EC2 Auto Scaling: Right-Sizing Your Fleet
Master EC2 Auto Scaling to automatically adjust capacity based on demand. Learn scaling policies, predictive scaling, and cost optimization strategies.
EC2 Auto Scaling dynamically adjusts your fleet size based on demand, ensuring you have enough capacity during traffic spikes while minimizing costs during quiet periods. But getting it right requires understanding the nuances of scaling policies, health checks, and instance lifecycle management.
Auto Scaling Group Basics
# Auto Scaling Group with Launch Template
resource "aws_launch_template" "app" {
name_prefix = "app-"
image_id = data.aws_ami.amazon_linux.id
instance_type = "t3.medium"
network_interfaces {
associate_public_ip_address = false
security_groups = [aws_security_group.app.id]
}
iam_instance_profile {
name = aws_iam_instance_profile.app.name
}
user_data = base64encode(<<-EOF
#!/bin/bash
yum update -y
amazon-linux-extras install docker -y
systemctl start docker
docker run -d -p 80:80 ${var.app_image}
EOF
)
tag_specifications {
resource_type = "instance"
tags = {
Name = "app-server"
}
}
lifecycle {
create_before_destroy = true
}
}
resource "aws_autoscaling_group" "app" {
name = "app-asg"
desired_capacity = 2
min_size = 1
max_size = 10
vpc_zone_identifier = var.private_subnet_ids
health_check_type = "ELB"
health_check_grace_period = 300
launch_template {
id = aws_launch_template.app.id
version = "$Latest"
}
target_group_arns = [aws_lb_target_group.app.arn]
instance_refresh {
strategy = "Rolling"
preferences {
min_healthy_percentage = 50
}
}
tag {
key = "Environment"
value = var.environment
propagate_at_launch = true
}
}
Scaling Policies
Target Tracking (Recommended)
# Scale based on average CPU utilization
resource "aws_autoscaling_policy" "cpu_target" {
name = "cpu-target-tracking"
autoscaling_group_name = aws_autoscaling_group.app.name
policy_type = "TargetTrackingScaling"
target_tracking_configuration {
predefined_metric_specification {
predefined_metric_type = "ASGAverageCPUUtilization"
}
target_value = 60.0
}
}
# Scale based on requests per target
resource "aws_autoscaling_policy" "requests_target" {
name = "requests-target-tracking"
autoscaling_group_name = aws_autoscaling_group.app.name
policy_type = "TargetTrackingScaling"
target_tracking_configuration {
predefined_metric_specification {
predefined_metric_type = "ALBRequestCountPerTarget"
resource_label = "${aws_lb.main.arn_suffix}/${aws_lb_target_group.app.arn_suffix}"
}
target_value = 1000.0 # 1000 requests per instance
}
}
# Custom metric scaling
resource "aws_autoscaling_policy" "custom_metric" {
name = "queue-depth-tracking"
autoscaling_group_name = aws_autoscaling_group.app.name
policy_type = "TargetTrackingScaling"
target_tracking_configuration {
customized_metric_specification {
metric_name = "ApproximateNumberOfMessagesVisible"
namespace = "AWS/SQS"
statistic = "Average"
dimensions {
name = "QueueName"
value = aws_sqs_queue.work.name
}
}
target_value = 100.0 # 100 messages per instance
}
}
Step Scaling for Bursty Workloads
resource "aws_autoscaling_policy" "step_scale_out" {
name = "step-scale-out"
autoscaling_group_name = aws_autoscaling_group.app.name
policy_type = "StepScaling"
adjustment_type = "ChangeInCapacity"
step_adjustment {
scaling_adjustment = 1
metric_interval_lower_bound = 0
metric_interval_upper_bound = 20
}
step_adjustment {
scaling_adjustment = 2
metric_interval_lower_bound = 20
metric_interval_upper_bound = 40
}
step_adjustment {
scaling_adjustment = 4
metric_interval_lower_bound = 40
}
}
resource "aws_cloudwatch_metric_alarm" "high_cpu" {
alarm_name = "high-cpu-alarm"
comparison_operator = "GreaterThanThreshold"
evaluation_periods = 2
metric_name = "CPUUtilization"
namespace = "AWS/EC2"
period = 60
statistic = "Average"
threshold = 70
dimensions = {
AutoScalingGroupName = aws_autoscaling_group.app.name
}
alarm_actions = [aws_autoscaling_policy.step_scale_out.arn]
}
Predictive Scaling
resource "aws_autoscaling_policy" "predictive" {
name = "predictive-scaling"
autoscaling_group_name = aws_autoscaling_group.app.name
policy_type = "PredictiveScaling"
predictive_scaling_configuration {
mode = "ForecastAndScale"
scheduling_buffer_time = 300 # Pre-launch 5 min early
metric_specification {
target_value = 60
predefined_load_metric_specification {
predefined_metric_type = "ASGTotalCPUUtilization"
}
predefined_scaling_metric_specification {
predefined_metric_type = "ASGAverageCPUUtilization"
}
}
}
}
Mixed Instance Types for Cost Optimization
resource "aws_autoscaling_group" "mixed" {
name = "mixed-instances-asg"
desired_capacity = 4
min_size = 2
max_size = 20
vpc_zone_identifier = var.private_subnet_ids
mixed_instances_policy {
instances_distribution {
on_demand_base_capacity = 2 # Always have 2 on-demand
on_demand_percentage_above_base_capacity = 25 # 25% on-demand above base
spot_allocation_strategy = "capacity-optimized"
}
launch_template {
launch_template_specification {
launch_template_id = aws_launch_template.app.id
version = "$Latest"
}
override {
instance_type = "t3.medium"
}
override {
instance_type = "t3a.medium"
}
override {
instance_type = "t2.medium"
}
override {
instance_type = "m5.large"
}
}
}
}
Health Checks and Instance Lifecycle
# Lifecycle hook for graceful shutdown
resource "aws_autoscaling_lifecycle_hook" "terminate" {
name = "graceful-shutdown"
autoscaling_group_name = aws_autoscaling_group.app.name
lifecycle_transition = "autoscaling:EC2_INSTANCE_TERMINATING"
default_result = "CONTINUE"
heartbeat_timeout = 300
notification_target_arn = aws_sns_topic.lifecycle.arn
role_arn = aws_iam_role.lifecycle.arn
}
# Lambda to drain connections before termination
resource "aws_lambda_function" "drain" {
function_name = "instance-drain"
runtime = "python3.12"
handler = "handler.handler"
role = aws_iam_role.drain_lambda.arn
filename = "drain.zip"
}
# drain_handler.py
import boto3
import json
elbv2 = boto3.client('elbv2')
asg = boto3.client('autoscaling')
def handler(event, context):
message = json.loads(event['Records'][0]['Sns']['Message'])
instance_id = message['EC2InstanceId']
lifecycle_hook = message['LifecycleHookName']
asg_name = message['AutoScalingGroupName']
# Deregister from target groups
target_groups = get_target_groups(asg_name)
for tg_arn in target_groups:
elbv2.deregister_targets(
TargetGroupArn=tg_arn,
Targets=[{'Id': instance_id}]
)
# Wait for connections to drain
for tg_arn in target_groups:
waiter = elbv2.get_waiter('target_deregistered')
waiter.wait(
TargetGroupArn=tg_arn,
Targets=[{'Id': instance_id}]
)
# Complete lifecycle action
asg.complete_lifecycle_action(
AutoScalingGroupName=asg_name,
LifecycleHookName=lifecycle_hook,
InstanceId=instance_id,
LifecycleActionResult='CONTINUE'
)
Warm Pools for Faster Scaling
resource "aws_autoscaling_group" "with_warm_pool" {
name = "app-with-warm-pool"
desired_capacity = 2
min_size = 2
max_size = 10
# ... other config ...
warm_pool {
pool_state = "Stopped"
min_size = 2
max_group_prepared_capacity = 5
instance_reuse_policy {
reuse_on_scale_in = true
}
}
}
Monitoring and Alarms
resource "aws_cloudwatch_metric_alarm" "group_in_service" {
alarm_name = "asg-insufficient-capacity"
comparison_operator = "LessThanThreshold"
evaluation_periods = 2
metric_name = "GroupInServiceInstances"
namespace = "AWS/AutoScaling"
period = 60
statistic = "Average"
threshold = 2
alarm_description = "ASG has fewer than 2 healthy instances"
dimensions = {
AutoScalingGroupName = aws_autoscaling_group.app.name
}
alarm_actions = [aws_sns_topic.alerts.arn]
}
# CLI: Check scaling activities
aws autoscaling describe-scaling-activities \
--auto-scaling-group-name app-asg \
--max-items 10
# View current capacity
aws autoscaling describe-auto-scaling-groups \
--auto-scaling-group-names app-asg \
--query 'AutoScalingGroups[0].{Min:MinSize,Max:MaxSize,Desired:DesiredCapacity,InService:length(Instances[?LifecycleState==`InService`])}'
Key Takeaways
- Use Target Tracking as your default scaling policy — it’s simpler and handles scale-in/out automatically
- Predictive Scaling is great for workloads with predictable patterns (business hours, weekly cycles)
- Mixed instance types with Spot reduce costs by 60-90% — use capacity-optimized allocation
- Warm Pools cut scale-out time from minutes to seconds for instances that need lengthy initialization
- Lifecycle hooks ensure graceful drain before termination — don’t drop in-flight requests
- Health check grace period prevents thrashing — give instances time to warm up
“The best scaling policy is the one you don’t have to think about. Target tracking gets you 80% there — optimize only when you need to.”