Cold starts are the Achilles’ heel of serverless. When Lambda creates a new execution environment, there’s latency for container provisioning, runtime initialization, and your code startup. This guide covers every technique to minimize that tax.

Anatomy of a Cold Start

Cold Start Timeline:
├── Container Creation (~100-500ms)      ← AWS-managed, can't optimize
├── Runtime Init (~50-200ms)             ← Varies by language
├── Handler Init (your code)             ← Optimize this
│   ├── Import dependencies
│   ├── SDK client initialization
│   └── Connection establishment
└── Handler Execution                    ← Warm path

Typical cold start times by runtime:

  • Python: 200-500ms
  • Node.js: 150-400ms
  • Java: 3-10 seconds (!)
  • Go/Rust: 50-150ms
  • .NET: 400-800ms

Provisioned Concurrency

The nuclear option — pre-warm execution environments.

resource "aws_lambda_function" "api" {
  function_name = "low-latency-api"
  runtime       = "python3.12"
  handler       = "handler.handler"
  memory_size   = 1024
  timeout       = 30

  filename         = "deployment.zip"
  source_code_hash = filebase64sha256("deployment.zip")
  role             = aws_iam_role.lambda.arn

  publish = true  # Required for provisioned concurrency
}

resource "aws_lambda_alias" "live" {
  name             = "live"
  function_name    = aws_lambda_function.api.function_name
  function_version = aws_lambda_function.api.version
}

resource "aws_lambda_provisioned_concurrency_config" "api" {
  function_name                     = aws_lambda_function.api.function_name
  qualifier                         = aws_lambda_alias.live.name
  provisioned_concurrent_executions = 10
}

Scheduled Scaling for Provisioned Concurrency

resource "aws_appautoscaling_target" "lambda" {
  max_capacity       = 50
  min_capacity       = 5
  resource_id        = "function:${aws_lambda_function.api.function_name}:${aws_lambda_alias.live.name}"
  scalable_dimension = "lambda:function:ProvisionedConcurrency"
  service_namespace  = "lambda"
}

resource "aws_appautoscaling_scheduled_action" "business_hours" {
  name               = "business-hours-scale-up"
  service_namespace  = aws_appautoscaling_target.lambda.service_namespace
  resource_id        = aws_appautoscaling_target.lambda.resource_id
  scalable_dimension = aws_appautoscaling_target.lambda.scalable_dimension
  schedule           = "cron(0 8 ? * MON-FRI *)"

  scalable_target_action {
    min_capacity = 20
    max_capacity = 50
  }
}

resource "aws_appautoscaling_scheduled_action" "night" {
  name               = "night-scale-down"
  service_namespace  = aws_appautoscaling_target.lambda.service_namespace
  resource_id        = aws_appautoscaling_target.lambda.resource_id
  scalable_dimension = aws_appautoscaling_target.lambda.scalable_dimension
  schedule           = "cron(0 20 ? * * *)"

  scalable_target_action {
    min_capacity = 2
    max_capacity = 10
  }
}

SnapStart for Java

resource "aws_lambda_function" "java_api" {
  function_name = "java-api"
  runtime       = "java21"
  handler       = "com.example.Handler::handleRequest"
  memory_size   = 2048
  timeout       = 30

  filename = "app.jar"
  role     = aws_iam_role.lambda.arn

  snap_start {
    apply_on = "PublishedVersions"
  }

  publish = true
}
// Handler.java - SnapStart-optimized
public class Handler implements RequestHandler<APIGatewayProxyRequestEvent, APIGatewayProxyResponseEvent> {
    
    // Initialize during snapshot (runs once at publish time)
    private static final DynamoDbClient dynamoDb = DynamoDbClient.builder()
        .httpClient(UrlConnectionHttpClient.builder().build())
        .build();
    
    private static final ObjectMapper mapper = new ObjectMapper();
    
    // CRaC hook for custom snapshot behavior
    static {
        Runtime.getRuntime().addShutdownHook(new Thread(() -> {
            // Cleanup before snapshot
        }));
    }
    
    @Override
    public APIGatewayProxyResponseEvent handleRequest(
            APIGatewayProxyRequestEvent event, 
            Context context) {
        // Handler runs from restored snapshot
        return processRequest(event);
    }
}

Runtime Optimization

Python: Minimize Import Time

# BAD: Heavy imports at module level
import pandas as pd
import numpy as np
import boto3
import json
import logging
from my_heavy_lib import everything

# GOOD: Lazy imports
import json
import logging

_pandas = None
_boto3_session = None

def get_pandas():
    global _pandas
    if _pandas is None:
        import pandas as pd
        _pandas = pd
    return _pandas

def get_dynamodb():
    global _boto3_session
    if _boto3_session is None:
        import boto3
        _boto3_session = boto3.resource('dynamodb')
    return _boto3_session

Node.js: Use ES Modules and Top-Level Await

// handler.mjs
import { DynamoDBClient } from "@aws-sdk/client-dynamodb";
import { DynamoDBDocumentClient, GetCommand } from "@aws-sdk/lib-dynamodb";

// Initialize outside handler
const client = new DynamoDBClient({});
const docClient = DynamoDBDocumentClient.from(client);

// Pre-warm connection
await docClient.send(new GetCommand({
  TableName: process.env.TABLE_NAME,
  Key: { id: "__warmup__" }
})).catch(() => {});  // Ignore warmup errors

export const handler = async (event) => {
  // Already warm!
  const result = await docClient.send(new GetCommand({
    TableName: process.env.TABLE_NAME,
    Key: { id: event.id }
  }));
  
  return { statusCode: 200, body: JSON.stringify(result.Item) };
};

Minimize Package Size

# Multi-stage build for minimal container
FROM public.ecr.aws/lambda/python:3.12 as builder

COPY requirements.txt .
RUN pip install --no-cache-dir --target /asset -r requirements.txt

# Remove unnecessary files
RUN find /asset -type d -name "__pycache__" -exec rm -rf {} + 2>/dev/null || true
RUN find /asset -type d -name "tests" -exec rm -rf {} + 2>/dev/null || true
RUN find /asset -name "*.pyc" -delete
RUN find /asset -name "*.pyo" -delete

FROM public.ecr.aws/lambda/python:3.12

COPY --from=builder /asset ${LAMBDA_TASK_ROOT}
COPY handler.py ${LAMBDA_TASK_ROOT}

CMD ["handler.handler"]

Memory and CPU Optimization

More memory = more CPU = faster cold starts (sometimes).

# Benchmark different memory sizes
for mem in 128 256 512 1024 2048; do
  aws lambda update-function-configuration \
    --function-name my-function \
    --memory-size $mem
  
  # Wait for update
  aws lambda wait function-updated --function-name my-function
  
  # Run cold start test (invoke after 15 min idle)
  for i in {1..5}; do
    aws lambda invoke \
      --function-name my-function \
      --payload '{}' \
      --log-type Tail \
      response.json | jq -r '.LogResult' | base64 -d | grep "Init Duration"
  done
done

Architectural Patterns

Keep Functions Warm with Scheduled Events

resource "aws_cloudwatch_event_rule" "warmup" {
  name                = "lambda-warmup"
  schedule_expression = "rate(5 minutes)"
}

resource "aws_cloudwatch_event_target" "warmup" {
  rule  = aws_cloudwatch_event_rule.warmup.name
  arn   = aws_lambda_function.api.arn
  input = jsonencode({ warmup = true })
}
def handler(event, context):
    if event.get('warmup'):
        print("Warmup ping")
        return {'statusCode': 200}
    
    # Real logic here
    return process_request(event)

Response Streaming for Perceived Performance

// streaming-handler.mjs
import { DynamoDBClient } from "@aws-sdk/client-dynamodb";

export const handler = awslambda.streamifyResponse(
  async (event, responseStream, context) => {
    // Start sending response immediately
    const metadata = { statusCode: 200 };
    responseStream = awslambda.HttpResponseStream.from(responseStream, metadata);
    
    responseStream.write('{"status":"processing","items":[');
    
    // Stream items as they're retrieved
    const items = await fetchItems();
    for (let i = 0; i < items.length; i++) {
      if (i > 0) responseStream.write(',');
      responseStream.write(JSON.stringify(items[i]));
    }
    
    responseStream.write(']}');
    responseStream.end();
  }
);

Use Lambda Extensions Wisely

# Extensions run during INIT phase - adds cold start time!
# Only use extensions that provide clear value

# Good use: AWS Parameters and Secrets Lambda Extension
# Caches SSM/Secrets Manager values, avoids API calls

import os
import urllib.request

def get_secret():
    # Uses extension's local HTTP server (fast!)
    port = os.environ.get("PARAMETERS_SECRETS_EXTENSION_HTTP_PORT", 2773)
    url = f"http://localhost:{port}/secretsmanager/get?secretId=my-secret"
    
    headers = {"X-Aws-Parameters-Secrets-Token": os.environ["AWS_SESSION_TOKEN"]}
    req = urllib.request.Request(url, headers=headers)
    
    response = urllib.request.urlopen(req)
    return json.loads(response.read())["SecretString"]

Measuring Cold Starts

# Lambda Powertools for detailed metrics
from aws_lambda_powertools import Logger, Metrics, Tracer
from aws_lambda_powertools.metrics import MetricUnit

logger = Logger()
tracer = Tracer()
metrics = Metrics()

@logger.inject_lambda_context
@tracer.capture_lambda_handler
@metrics.log_metrics
def handler(event, context):
    # Check if cold start
    is_cold = getattr(handler, '_cold_start', True)
    handler._cold_start = False
    
    if is_cold:
        metrics.add_metric(name="ColdStart", unit=MetricUnit.Count, value=1)
        logger.info("Cold start detected")
    
    return process_request(event)
# CloudWatch Insights query for cold start analysis
resource "aws_cloudwatch_query_definition" "cold_starts" {
  name = "Lambda Cold Start Analysis"

  log_group_names = ["/aws/lambda/my-function"]

  query_string = <<-EOF
    filter @type = "REPORT"
    | stats 
        count(*) as invocations,
        sum(@initDuration > 0) as coldStarts,
        avg(@initDuration) as avgColdStartMs,
        max(@initDuration) as maxColdStartMs,
        pct(@initDuration, 99) as p99ColdStartMs
      by bin(1h)
  EOF
}

When to Accept Cold Starts

Not every function needs optimization:

Use CaseCold Start OK?Why
Background jobs✅ YesLatency doesn’t matter
Scheduled tasks✅ YesNo user waiting
Async event processing✅ YesQueues buffer delays
User-facing API⚠️ DependsIf <1% of requests
Real-time API❌ NoUse provisioned concurrency
Chat/WebSocket❌ NoEvery ms matters

Key Takeaways

  1. Provisioned concurrency eliminates cold starts entirely but costs money even when idle
  2. SnapStart makes Java viable for latency-sensitive workloads (200ms vs 3s cold starts)
  3. Lazy loading defers initialization cost to first use — helpful if not all code paths are hit
  4. More memory often means faster cold starts — the extra CPU speeds up initialization
  5. Minimize dependencies — every import adds milliseconds; tree-shake aggressively
  6. Stream responses for perceived performance when you can’t eliminate cold starts

“Cold starts are a tax on the first request. Decide whether to pay upfront (provisioned concurrency) or accept occasional slow responses.”