data "aws_region" "current" {}

data "aws_subnets" "humand" {
  filter {
    name   = "vpc-id"
    values = [var.vpc_id]
  }
  filter {
    name   = "map-public-ip-on-launch"
    values = [false]
  }
}

module "datadog" {
  source = "../datadog"

  service   = var.service_name
  env       = var.env
  dd_source = var.dd_source
}

module "ecs_service" {
  source  = "terraform-aws-modules/ecs/aws//modules/service"
  version = "v5.12.1"

  name        = var.service_name
  cluster_arn = var.target_cluster_arn

  cpu    = var.cpu
  memory = var.memory

  capacity_provider_strategy = {
    FARGATE = {
      capacity_provider = "FARGATE"
      weight            = var.fargate_weight
    },
    FARGATE_SPOT = {
      capacity_provider = "FARGATE_SPOT"
      weight            = var.fargate_spot_weight
    }
  }

  runtime_platform = {
    cpu_architecture        = "ARM64"
    operating_system_family = "LINUX"
  }

  ephemeral_storage = {
    size_in_gib = var.ephemeral_storage_gib
  }

  enable_autoscaling       = true
  autoscaling_min_capacity = var.desired_count
  autoscaling_max_capacity = var.desired_count

  # Keep the old healthy task serving while the new task runs its boot prewarm
  # (which holds /health at 503 for minutes). max 200% lets ECS run the new task
  # alongside the old one; min 100% forbids stopping the old task until the new
  # one is healthy — so the ALB always has a healthy target, no deploy gap.
  deployment_minimum_healthy_percent = 100
  deployment_maximum_percent         = 200

  create_tasks_iam_role = false
  tasks_iam_role_arn    = var.service_role_arn

  container_definitions = {
    "${var.service_name}" = {
      essential    = true
      image        = var.container_image
      memory       = var.memory - 256 - 128
      stop_timeout = var.stop_timeout

      # Start the app only after the Datadog agent reports healthy, so the first
      # DogStatsD emissions (e.g. the AuthChecker boot round) aren't dropped with
      # ECONNREFUSED while the sidecar is still starting. The agent is already
      # `essential`, so this adds startup ordering without new failure coupling.
      dependencies = [
        {
          condition     = "HEALTHY"
          containerName = "datadog-agent"
        }
      ]

      environment = [
        for k, v in var.env_vars : {
          name  = k,
          value = v
        }
      ]

      secrets = [
        for k, v in var.secrets : {
          name      = k,
          valueFrom = v
        }
      ]

      port_mappings = [
        {
          containerPort = var.container_port
          protocol      = "tcp"
        }
      ]

      docker_labels = {
        "com.datadoghq.tags.service" = var.service_name,
        "com.datadoghq.tags.env"     = var.env
      },

      health_check = {
        command = [
          "CMD-SHELL",
          "curl -f -s localhost:${var.container_port}${var.health_check_path} || exit 1"
        ],
        interval = 30
        timeout  = 5
        retries  = 5
        # 300 is the ECS maximum for startPeriod. The boot prewarm fails-open at
        # CLONE.PREWARM_TIMEOUT_MS (240s) < 300s, so /health flips to 200 before
        # failed checks start counting toward a task kill.
        startPeriod = 300
      }

      enable_cloudwatch_logging = false
      log_configuration         = module.datadog.log_configuration

      readonly_root_filesystem = false
    }

    datadog-agent = module.datadog.sidecar_datadog-agent
    log-router    = module.datadog.sidecar_log-router
  }

  subnet_ids = data.aws_subnets.humand.ids

  security_group_rules = {
    ingress_app = {
      type        = "ingress"
      from_port   = var.container_port
      to_port     = var.container_port
      protocol    = "tcp"
      description = "Application port"
      cidr_blocks = ["0.0.0.0/0"]
    },
    egress_all = {
      type        = "egress"
      from_port   = 0
      to_port     = 0
      protocol    = "-1"
      cidr_blocks = ["0.0.0.0/0"]
    }
  }

  load_balancer = {
    app = {
      target_group_arn = var.target_group_arn
      container_name   = var.service_name
      container_port   = var.container_port
    }
  }

  # Give the ALB target group time to reach healthy while the boot prewarm clones
  # all managed repos before /health flips to 200 (clone-only, well under 600s —
  # see cold-start-repo-prewarm spec).
  health_check_grace_period_seconds = 600
}
