Cloud Run turns any container into a scalable, HTTPS-enabled service in minutes. No clusters, no nodes, no infrastructure to manage. This guide takes you from first deployment to production-ready patterns.

Your First Cloud Run Service

The Container

# Dockerfile
FROM python:3.12-slim

WORKDIR /app

# Install dependencies first (layer caching)
COPY requirements.txt .
RUN pip install --no-cache-dir -r requirements.txt

COPY . .

# Cloud Run injects PORT env var
ENV PORT=8080
EXPOSE 8080

# Use production-ready server
CMD ["gunicorn", "--bind", ":8080", "--workers", "1", "--threads", "8", "--timeout", "0", "app:app"]
# app.py
from flask import Flask, request, jsonify
import os

app = Flask(__name__)

@app.route('/')
def hello():
    return jsonify({
        'message': 'Hello from Cloud Run!',
        'revision': os.environ.get('K_REVISION', 'unknown')
    })

@app.route('/health')
def health():
    return 'ok', 200

if __name__ == '__main__':
    port = int(os.environ.get('PORT', 8080))
    app.run(host='0.0.0.0', port=port)

Deploy

# Build and push to Artifact Registry
gcloud builds submit --tag gcr.io/$PROJECT_ID/my-service

# Deploy
gcloud run deploy my-service \
  --image gcr.io/$PROJECT_ID/my-service \
  --region us-central1 \
  --platform managed \
  --allow-unauthenticated

# Output: Service URL: https://my-service-xxxxx-uc.a.run.app

Production Configuration

Terraform

resource "google_cloud_run_v2_service" "api" {
  name     = "api"
  location = "us-central1"
  
  template {
    # Scaling configuration
    scaling {
      min_instance_count = 1   # Always warm (no cold starts)
      max_instance_count = 100
    }

    # Container configuration
    containers {
      image = "${var.artifact_registry}/${var.project_id}/api:${var.image_tag}"
      
      ports {
        container_port = 8080
      }

      # Resource allocation
      resources {
        limits = {
          cpu    = "2"
          memory = "1Gi"
        }
        cpu_idle = true  # Scale to zero CPU when idle (cost savings)
        startup_cpu_boost = true  # 2x CPU during startup
      }

      # Environment variables
      env {
        name  = "PROJECT_ID"
        value = var.project_id
      }

      env {
        name  = "LOG_LEVEL"
        value = "INFO"
      }

      # Secrets from Secret Manager
      env {
        name = "DATABASE_URL"
        value_source {
          secret_key_ref {
            secret  = google_secret_manager_secret.db_url.secret_id
            version = "latest"
          }
        }
      }

      # Volume mounts
      volume_mounts {
        name       = "secrets"
        mount_path = "/secrets"
      }

      # Health checks
      startup_probe {
        http_get {
          path = "/health"
          port = 8080
        }
        initial_delay_seconds = 0
        period_seconds        = 10
        timeout_seconds       = 5
        failure_threshold     = 3
      }

      liveness_probe {
        http_get {
          path = "/health"
          port = 8080
        }
        period_seconds    = 30
        timeout_seconds   = 5
        failure_threshold = 3
      }
    }

    # Volumes
    volumes {
      name = "secrets"
      secret {
        secret       = google_secret_manager_secret.api_keys.secret_id
        default_mode = 256  # 0400
        items {
          version = "latest"
          path    = "api-key"
        }
      }
    }

    # Service account for Workload Identity
    service_account = google_service_account.api.email

    # VPC Connector for private resources
    vpc_access {
      connector = google_vpc_access_connector.main.id
      egress    = "PRIVATE_RANGES_ONLY"
    }

    # Timeout and concurrency
    timeout = "300s"
    max_instance_request_concurrency = 80

    # Execution environment
    execution_environment = "EXECUTION_ENVIRONMENT_GEN2"
  }

  # Traffic management
  traffic {
    type    = "TRAFFIC_TARGET_ALLOCATION_TYPE_LATEST"
    percent = 100
  }

  depends_on = [
    google_project_service.run
  ]
}

# IAM: Allow public access
resource "google_cloud_run_v2_service_iam_member" "public" {
  location = google_cloud_run_v2_service.api.location
  name     = google_cloud_run_v2_service.api.name
  role     = "roles/run.invoker"
  member   = "allUsers"
}

Custom Domain

# Reserve static IP
resource "google_compute_global_address" "api" {
  name = "api-ip"
}

# SSL certificate
resource "google_compute_managed_ssl_certificate" "api" {
  name = "api-cert"
  managed {
    domains = ["api.example.com"]
  }
}

# Load balancer backend
resource "google_compute_region_network_endpoint_group" "api" {
  name                  = "api-neg"
  region                = "us-central1"
  network_endpoint_type = "SERVERLESS"

  cloud_run {
    service = google_cloud_run_v2_service.api.name
  }
}

resource "google_compute_backend_service" "api" {
  name                  = "api-backend"
  load_balancing_scheme = "EXTERNAL_MANAGED"
  protocol              = "HTTPS"

  backend {
    group = google_compute_region_network_endpoint_group.api.id
  }

  log_config {
    enable      = true
    sample_rate = 1.0
  }
}

# URL map
resource "google_compute_url_map" "api" {
  name            = "api-urlmap"
  default_service = google_compute_backend_service.api.id
}

# HTTPS proxy
resource "google_compute_target_https_proxy" "api" {
  name             = "api-https-proxy"
  url_map          = google_compute_url_map.api.id
  ssl_certificates = [google_compute_managed_ssl_certificate.api.id]
}

# Forwarding rule
resource "google_compute_global_forwarding_rule" "api" {
  name                  = "api-forwarding"
  load_balancing_scheme = "EXTERNAL_MANAGED"
  target                = google_compute_target_https_proxy.api.id
  ip_address            = google_compute_global_address.api.id
  port_range            = "443"
}

Traffic Splitting for Canary Deployments

# Deploy new revision without traffic
gcloud run deploy api \
  --image gcr.io/$PROJECT_ID/api:v2 \
  --region us-central1 \
  --no-traffic

# Split traffic: 90% stable, 10% canary
gcloud run services update-traffic api \
  --region us-central1 \
  --to-revisions api-v1=90,api-v2=10

# Promote canary to 100%
gcloud run services update-traffic api \
  --region us-central1 \
  --to-latest
# Terraform: Gradual rollout
resource "google_cloud_run_v2_service" "api" {
  # ...

  traffic {
    type     = "TRAFFIC_TARGET_ALLOCATION_TYPE_REVISION"
    revision = "api-stable"
    percent  = 90
  }

  traffic {
    type    = "TRAFFIC_TARGET_ALLOCATION_TYPE_LATEST"
    percent = 10
    tag     = "canary"  # canary---api-xxx.a.run.app
  }
}

Connecting to Private Resources

# VPC Connector for Cloud SQL, Memorystore, etc.
resource "google_vpc_access_connector" "main" {
  name          = "vpc-connector"
  region        = "us-central1"
  network       = google_compute_network.main.id
  ip_cidr_range = "10.8.0.0/28"

  min_instances = 2
  max_instances = 10
}

# Cloud Run service using VPC
resource "google_cloud_run_v2_service" "api" {
  # ...

  template {
    vpc_access {
      connector = google_vpc_access_connector.main.id
      egress    = "PRIVATE_RANGES_ONLY"  # Only private IPs go through VPC
    }

    containers {
      env {
        name  = "DB_HOST"
        value = google_sql_database_instance.main.private_ip_address
      }
    }
  }
}

Cloud Run Jobs for Batch Processing

# job.yaml
apiVersion: run.googleapis.com/v1
kind: Job
metadata:
  name: data-processor
spec:
  template:
    spec:
      template:
        spec:
          containers:
          - image: gcr.io/my-project/processor:latest
            resources:
              limits:
                cpu: "2"
                memory: 4Gi
            env:
            - name: BATCH_SIZE
              value: "1000"
          maxRetries: 3
          timeoutSeconds: 3600
      parallelism: 10
      taskCount: 100
# Create job
gcloud run jobs create data-processor \
  --image gcr.io/$PROJECT_ID/processor:latest \
  --tasks 100 \
  --parallelism 10 \
  --max-retries 3 \
  --task-timeout 3600 \
  --cpu 2 \
  --memory 4Gi \
  --region us-central1

# Execute job
gcloud run jobs execute data-processor --region us-central1

# Schedule job
gcloud scheduler jobs create http daily-processor \
  --location us-central1 \
  --schedule "0 2 * * *" \
  --uri "https://us-central1-run.googleapis.com/apis/run.googleapis.com/v1/namespaces/$PROJECT_ID/jobs/data-processor:run" \
  --http-method POST \
  --oauth-service-account-email scheduler@$PROJECT_ID.iam.gserviceaccount.com

Concurrency Optimization

# app.py - Async for high concurrency
from fastapi import FastAPI
import asyncio
import httpx

app = FastAPI()

# Reuse HTTP client across requests
http_client = httpx.AsyncClient()

@app.get("/aggregate")
async def aggregate():
    """Handle many concurrent requests efficiently."""
    async def fetch(url):
        response = await http_client.get(url)
        return response.json()
    
    results = await asyncio.gather(
        fetch("https://api1.example.com/data"),
        fetch("https://api2.example.com/data"),
        fetch("https://api3.example.com/data"),
    )
    return {"results": results}

@app.on_event("shutdown")
async def shutdown():
    await http_client.aclose()
# Set high concurrency for async apps
gcloud run deploy api \
  --concurrency 250 \  # Requests per instance
  --cpu 2 \
  --memory 1Gi

Monitoring and Observability

# Structured logging (auto-parsed by Cloud Logging)
import json
import sys

def log(severity, message, **kwargs):
    entry = {
        "severity": severity,
        "message": message,
        **kwargs
    }
    print(json.dumps(entry), file=sys.stdout if severity != "ERROR" else sys.stderr)

log("INFO", "Processing request", request_id="abc123", user_id="user456")
log("ERROR", "Database connection failed", error="timeout")
# View logs
gcloud logging read "resource.type=cloud_run_revision AND resource.labels.service_name=api" \
  --limit 50 \
  --format json

# Tail logs
gcloud beta run services logs tail api --region us-central1

Key Takeaways

  1. Zero to production in minutes — just provide a container
  2. Scales to zero — pay nothing when idle
  3. Concurrency matters — one instance handles many requests
  4. Min instances eliminate cold starts for critical paths
  5. VPC Connector for private Cloud SQL, Memorystore, etc.
  6. Traffic splitting enables safe canary deployments
  7. Gen2 execution provides faster cold starts and more features

“Cloud Run is the ‘just deploy it’ button for containers. Start here, and only move to GKE when you genuinely need Kubernetes features.”