Cloud Platforms January 16, 2026 ⏱ 5 min read

EC2 Auto Scaling: Right-Sizing Your Fleet

Master EC2 Auto Scaling to automatically adjust capacity based on demand. Learn scaling policies, predictive scaling, and cost optimization strategies.

awsec2auto-scalingcost-optimization

EC2 Auto Scaling dynamically adjusts your fleet size based on demand, ensuring you have enough capacity during traffic spikes while minimizing costs during quiet periods. But getting it right requires understanding the nuances of scaling policies, health checks, and instance lifecycle management.

Auto Scaling Group Basics

# Auto Scaling Group with Launch Template
resource "aws_launch_template" "app" {
  name_prefix   = "app-"
  image_id      = data.aws_ami.amazon_linux.id
  instance_type = "t3.medium"

  network_interfaces {
    associate_public_ip_address = false
    security_groups             = [aws_security_group.app.id]
  }

  iam_instance_profile {
    name = aws_iam_instance_profile.app.name
  }

  user_data = base64encode(<<-EOF
    #!/bin/bash
    yum update -y
    amazon-linux-extras install docker -y
    systemctl start docker
    docker run -d -p 80:80 ${var.app_image}
  EOF
  )

  tag_specifications {
    resource_type = "instance"
    tags = {
      Name = "app-server"
    }
  }

  lifecycle {
    create_before_destroy = true
  }
}

resource "aws_autoscaling_group" "app" {
  name                = "app-asg"
  desired_capacity    = 2
  min_size            = 1
  max_size            = 10
  vpc_zone_identifier = var.private_subnet_ids
  health_check_type   = "ELB"
  health_check_grace_period = 300

  launch_template {
    id      = aws_launch_template.app.id
    version = "$Latest"
  }

  target_group_arns = [aws_lb_target_group.app.arn]

  instance_refresh {
    strategy = "Rolling"
    preferences {
      min_healthy_percentage = 50
    }
  }

  tag {
    key                 = "Environment"
    value               = var.environment
    propagate_at_launch = true
  }
}

Scaling Policies

Target Tracking (Recommended)

# Scale based on average CPU utilization
resource "aws_autoscaling_policy" "cpu_target" {
  name                   = "cpu-target-tracking"
  autoscaling_group_name = aws_autoscaling_group.app.name
  policy_type            = "TargetTrackingScaling"

  target_tracking_configuration {
    predefined_metric_specification {
      predefined_metric_type = "ASGAverageCPUUtilization"
    }
    target_value = 60.0
  }
}

# Scale based on requests per target
resource "aws_autoscaling_policy" "requests_target" {
  name                   = "requests-target-tracking"
  autoscaling_group_name = aws_autoscaling_group.app.name
  policy_type            = "TargetTrackingScaling"

  target_tracking_configuration {
    predefined_metric_specification {
      predefined_metric_type = "ALBRequestCountPerTarget"
      resource_label         = "${aws_lb.main.arn_suffix}/${aws_lb_target_group.app.arn_suffix}"
    }
    target_value = 1000.0  # 1000 requests per instance
  }
}

# Custom metric scaling
resource "aws_autoscaling_policy" "custom_metric" {
  name                   = "queue-depth-tracking"
  autoscaling_group_name = aws_autoscaling_group.app.name
  policy_type            = "TargetTrackingScaling"

  target_tracking_configuration {
    customized_metric_specification {
      metric_name = "ApproximateNumberOfMessagesVisible"
      namespace   = "AWS/SQS"
      statistic   = "Average"
      dimensions {
        name  = "QueueName"
        value = aws_sqs_queue.work.name
      }
    }
    target_value = 100.0  # 100 messages per instance
  }
}

Step Scaling for Bursty Workloads

resource "aws_autoscaling_policy" "step_scale_out" {
  name                   = "step-scale-out"
  autoscaling_group_name = aws_autoscaling_group.app.name
  policy_type            = "StepScaling"
  adjustment_type        = "ChangeInCapacity"

  step_adjustment {
    scaling_adjustment          = 1
    metric_interval_lower_bound = 0
    metric_interval_upper_bound = 20
  }

  step_adjustment {
    scaling_adjustment          = 2
    metric_interval_lower_bound = 20
    metric_interval_upper_bound = 40
  }

  step_adjustment {
    scaling_adjustment          = 4
    metric_interval_lower_bound = 40
  }
}

resource "aws_cloudwatch_metric_alarm" "high_cpu" {
  alarm_name          = "high-cpu-alarm"
  comparison_operator = "GreaterThanThreshold"
  evaluation_periods  = 2
  metric_name         = "CPUUtilization"
  namespace           = "AWS/EC2"
  period              = 60
  statistic           = "Average"
  threshold           = 70

  dimensions = {
    AutoScalingGroupName = aws_autoscaling_group.app.name
  }

  alarm_actions = [aws_autoscaling_policy.step_scale_out.arn]
}

Predictive Scaling

resource "aws_autoscaling_policy" "predictive" {
  name                   = "predictive-scaling"
  autoscaling_group_name = aws_autoscaling_group.app.name
  policy_type            = "PredictiveScaling"

  predictive_scaling_configuration {
    mode                         = "ForecastAndScale"
    scheduling_buffer_time       = 300  # Pre-launch 5 min early

    metric_specification {
      target_value = 60

      predefined_load_metric_specification {
        predefined_metric_type = "ASGTotalCPUUtilization"
      }

      predefined_scaling_metric_specification {
        predefined_metric_type = "ASGAverageCPUUtilization"
      }
    }
  }
}

Mixed Instance Types for Cost Optimization

resource "aws_autoscaling_group" "mixed" {
  name                = "mixed-instances-asg"
  desired_capacity    = 4
  min_size            = 2
  max_size            = 20
  vpc_zone_identifier = var.private_subnet_ids

  mixed_instances_policy {
    instances_distribution {
      on_demand_base_capacity                  = 2  # Always have 2 on-demand
      on_demand_percentage_above_base_capacity = 25 # 25% on-demand above base
      spot_allocation_strategy                 = "capacity-optimized"
    }

    launch_template {
      launch_template_specification {
        launch_template_id = aws_launch_template.app.id
        version            = "$Latest"
      }

      override {
        instance_type = "t3.medium"
      }

      override {
        instance_type = "t3a.medium"
      }

      override {
        instance_type = "t2.medium"
      }

      override {
        instance_type = "m5.large"
      }
    }
  }
}

Health Checks and Instance Lifecycle

# Lifecycle hook for graceful shutdown
resource "aws_autoscaling_lifecycle_hook" "terminate" {
  name                   = "graceful-shutdown"
  autoscaling_group_name = aws_autoscaling_group.app.name
  lifecycle_transition   = "autoscaling:EC2_INSTANCE_TERMINATING"
  default_result         = "CONTINUE"
  heartbeat_timeout      = 300

  notification_target_arn = aws_sns_topic.lifecycle.arn
  role_arn                = aws_iam_role.lifecycle.arn
}

# Lambda to drain connections before termination
resource "aws_lambda_function" "drain" {
  function_name = "instance-drain"
  runtime       = "python3.12"
  handler       = "handler.handler"
  role          = aws_iam_role.drain_lambda.arn
  
  filename = "drain.zip"
}

# drain_handler.py
import boto3
import json

elbv2 = boto3.client('elbv2')
asg = boto3.client('autoscaling')

def handler(event, context):
    message = json.loads(event['Records'][0]['Sns']['Message'])
    instance_id = message['EC2InstanceId']
    lifecycle_hook = message['LifecycleHookName']
    asg_name = message['AutoScalingGroupName']
    
    # Deregister from target groups
    target_groups = get_target_groups(asg_name)
    for tg_arn in target_groups:
        elbv2.deregister_targets(
            TargetGroupArn=tg_arn,
            Targets=[{'Id': instance_id}]
        )
    
    # Wait for connections to drain
    for tg_arn in target_groups:
        waiter = elbv2.get_waiter('target_deregistered')
        waiter.wait(
            TargetGroupArn=tg_arn,
            Targets=[{'Id': instance_id}]
        )
    
    # Complete lifecycle action
    asg.complete_lifecycle_action(
        AutoScalingGroupName=asg_name,
        LifecycleHookName=lifecycle_hook,
        InstanceId=instance_id,
        LifecycleActionResult='CONTINUE'
    )

Warm Pools for Faster Scaling

resource "aws_autoscaling_group" "with_warm_pool" {
  name             = "app-with-warm-pool"
  desired_capacity = 2
  min_size         = 2
  max_size         = 10
  
  # ... other config ...

  warm_pool {
    pool_state                  = "Stopped"
    min_size                    = 2
    max_group_prepared_capacity = 5

    instance_reuse_policy {
      reuse_on_scale_in = true
    }
  }
}

Monitoring and Alarms

resource "aws_cloudwatch_metric_alarm" "group_in_service" {
  alarm_name          = "asg-insufficient-capacity"
  comparison_operator = "LessThanThreshold"
  evaluation_periods  = 2
  metric_name         = "GroupInServiceInstances"
  namespace           = "AWS/AutoScaling"
  period              = 60
  statistic           = "Average"
  threshold           = 2
  alarm_description   = "ASG has fewer than 2 healthy instances"

  dimensions = {
    AutoScalingGroupName = aws_autoscaling_group.app.name
  }

  alarm_actions = [aws_sns_topic.alerts.arn]
}

# CLI: Check scaling activities
aws autoscaling describe-scaling-activities \
  --auto-scaling-group-name app-asg \
  --max-items 10

# View current capacity
aws autoscaling describe-auto-scaling-groups \
  --auto-scaling-group-names app-asg \
  --query 'AutoScalingGroups[0].{Min:MinSize,Max:MaxSize,Desired:DesiredCapacity,InService:length(Instances[?LifecycleState==`InService`])}'

Key Takeaways

Use Target Tracking as your default scaling policy — it’s simpler and handles scale-in/out automatically
Predictive Scaling is great for workloads with predictable patterns (business hours, weekly cycles)
Mixed instance types with Spot reduce costs by 60-90% — use capacity-optimized allocation
Warm Pools cut scale-out time from minutes to seconds for instances that need lengthy initialization
Lifecycle hooks ensure graceful drain before termination — don’t drop in-flight requests
Health check grace period prevents thrashing — give instances time to warm up

“The best scaling policy is the one you don’t have to think about. Target tracking gets you 80% there — optimize only when you need to.”