Google Cloud Run: From Container to Production in Minutes
Deploy containerized applications to Cloud Run with zero infrastructure management. Learn deployment patterns, scaling, custom domains, and production best practices.
Cloud Run turns any container into a scalable, HTTPS-enabled service in minutes. No clusters, no nodes, no infrastructure to manage. This guide takes you from first deployment to production-ready patterns.
Your First Cloud Run Service
The Container
# Dockerfile
FROM python:3.12-slim
WORKDIR /app
# Install dependencies first (layer caching)
COPY requirements.txt .
RUN pip install --no-cache-dir -r requirements.txt
COPY . .
# Cloud Run injects PORT env var
ENV PORT=8080
EXPOSE 8080
# Use production-ready server
CMD ["gunicorn", "--bind", ":8080", "--workers", "1", "--threads", "8", "--timeout", "0", "app:app"]
# app.py
from flask import Flask, request, jsonify
import os
app = Flask(__name__)
@app.route('/')
def hello():
return jsonify({
'message': 'Hello from Cloud Run!',
'revision': os.environ.get('K_REVISION', 'unknown')
})
@app.route('/health')
def health():
return 'ok', 200
if __name__ == '__main__':
port = int(os.environ.get('PORT', 8080))
app.run(host='0.0.0.0', port=port)
Deploy
# Build and push to Artifact Registry
gcloud builds submit --tag gcr.io/$PROJECT_ID/my-service
# Deploy
gcloud run deploy my-service \
--image gcr.io/$PROJECT_ID/my-service \
--region us-central1 \
--platform managed \
--allow-unauthenticated
# Output: Service URL: https://my-service-xxxxx-uc.a.run.app
Production Configuration
Terraform
resource "google_cloud_run_v2_service" "api" {
name = "api"
location = "us-central1"
template {
# Scaling configuration
scaling {
min_instance_count = 1 # Always warm (no cold starts)
max_instance_count = 100
}
# Container configuration
containers {
image = "${var.artifact_registry}/${var.project_id}/api:${var.image_tag}"
ports {
container_port = 8080
}
# Resource allocation
resources {
limits = {
cpu = "2"
memory = "1Gi"
}
cpu_idle = true # Scale to zero CPU when idle (cost savings)
startup_cpu_boost = true # 2x CPU during startup
}
# Environment variables
env {
name = "PROJECT_ID"
value = var.project_id
}
env {
name = "LOG_LEVEL"
value = "INFO"
}
# Secrets from Secret Manager
env {
name = "DATABASE_URL"
value_source {
secret_key_ref {
secret = google_secret_manager_secret.db_url.secret_id
version = "latest"
}
}
}
# Volume mounts
volume_mounts {
name = "secrets"
mount_path = "/secrets"
}
# Health checks
startup_probe {
http_get {
path = "/health"
port = 8080
}
initial_delay_seconds = 0
period_seconds = 10
timeout_seconds = 5
failure_threshold = 3
}
liveness_probe {
http_get {
path = "/health"
port = 8080
}
period_seconds = 30
timeout_seconds = 5
failure_threshold = 3
}
}
# Volumes
volumes {
name = "secrets"
secret {
secret = google_secret_manager_secret.api_keys.secret_id
default_mode = 256 # 0400
items {
version = "latest"
path = "api-key"
}
}
}
# Service account for Workload Identity
service_account = google_service_account.api.email
# VPC Connector for private resources
vpc_access {
connector = google_vpc_access_connector.main.id
egress = "PRIVATE_RANGES_ONLY"
}
# Timeout and concurrency
timeout = "300s"
max_instance_request_concurrency = 80
# Execution environment
execution_environment = "EXECUTION_ENVIRONMENT_GEN2"
}
# Traffic management
traffic {
type = "TRAFFIC_TARGET_ALLOCATION_TYPE_LATEST"
percent = 100
}
depends_on = [
google_project_service.run
]
}
# IAM: Allow public access
resource "google_cloud_run_v2_service_iam_member" "public" {
location = google_cloud_run_v2_service.api.location
name = google_cloud_run_v2_service.api.name
role = "roles/run.invoker"
member = "allUsers"
}
Custom Domain
# Reserve static IP
resource "google_compute_global_address" "api" {
name = "api-ip"
}
# SSL certificate
resource "google_compute_managed_ssl_certificate" "api" {
name = "api-cert"
managed {
domains = ["api.example.com"]
}
}
# Load balancer backend
resource "google_compute_region_network_endpoint_group" "api" {
name = "api-neg"
region = "us-central1"
network_endpoint_type = "SERVERLESS"
cloud_run {
service = google_cloud_run_v2_service.api.name
}
}
resource "google_compute_backend_service" "api" {
name = "api-backend"
load_balancing_scheme = "EXTERNAL_MANAGED"
protocol = "HTTPS"
backend {
group = google_compute_region_network_endpoint_group.api.id
}
log_config {
enable = true
sample_rate = 1.0
}
}
# URL map
resource "google_compute_url_map" "api" {
name = "api-urlmap"
default_service = google_compute_backend_service.api.id
}
# HTTPS proxy
resource "google_compute_target_https_proxy" "api" {
name = "api-https-proxy"
url_map = google_compute_url_map.api.id
ssl_certificates = [google_compute_managed_ssl_certificate.api.id]
}
# Forwarding rule
resource "google_compute_global_forwarding_rule" "api" {
name = "api-forwarding"
load_balancing_scheme = "EXTERNAL_MANAGED"
target = google_compute_target_https_proxy.api.id
ip_address = google_compute_global_address.api.id
port_range = "443"
}
Traffic Splitting for Canary Deployments
# Deploy new revision without traffic
gcloud run deploy api \
--image gcr.io/$PROJECT_ID/api:v2 \
--region us-central1 \
--no-traffic
# Split traffic: 90% stable, 10% canary
gcloud run services update-traffic api \
--region us-central1 \
--to-revisions api-v1=90,api-v2=10
# Promote canary to 100%
gcloud run services update-traffic api \
--region us-central1 \
--to-latest
# Terraform: Gradual rollout
resource "google_cloud_run_v2_service" "api" {
# ...
traffic {
type = "TRAFFIC_TARGET_ALLOCATION_TYPE_REVISION"
revision = "api-stable"
percent = 90
}
traffic {
type = "TRAFFIC_TARGET_ALLOCATION_TYPE_LATEST"
percent = 10
tag = "canary" # canary---api-xxx.a.run.app
}
}
Connecting to Private Resources
# VPC Connector for Cloud SQL, Memorystore, etc.
resource "google_vpc_access_connector" "main" {
name = "vpc-connector"
region = "us-central1"
network = google_compute_network.main.id
ip_cidr_range = "10.8.0.0/28"
min_instances = 2
max_instances = 10
}
# Cloud Run service using VPC
resource "google_cloud_run_v2_service" "api" {
# ...
template {
vpc_access {
connector = google_vpc_access_connector.main.id
egress = "PRIVATE_RANGES_ONLY" # Only private IPs go through VPC
}
containers {
env {
name = "DB_HOST"
value = google_sql_database_instance.main.private_ip_address
}
}
}
}
Cloud Run Jobs for Batch Processing
# job.yaml
apiVersion: run.googleapis.com/v1
kind: Job
metadata:
name: data-processor
spec:
template:
spec:
template:
spec:
containers:
- image: gcr.io/my-project/processor:latest
resources:
limits:
cpu: "2"
memory: 4Gi
env:
- name: BATCH_SIZE
value: "1000"
maxRetries: 3
timeoutSeconds: 3600
parallelism: 10
taskCount: 100
# Create job
gcloud run jobs create data-processor \
--image gcr.io/$PROJECT_ID/processor:latest \
--tasks 100 \
--parallelism 10 \
--max-retries 3 \
--task-timeout 3600 \
--cpu 2 \
--memory 4Gi \
--region us-central1
# Execute job
gcloud run jobs execute data-processor --region us-central1
# Schedule job
gcloud scheduler jobs create http daily-processor \
--location us-central1 \
--schedule "0 2 * * *" \
--uri "https://us-central1-run.googleapis.com/apis/run.googleapis.com/v1/namespaces/$PROJECT_ID/jobs/data-processor:run" \
--http-method POST \
--oauth-service-account-email scheduler@$PROJECT_ID.iam.gserviceaccount.com
Concurrency Optimization
# app.py - Async for high concurrency
from fastapi import FastAPI
import asyncio
import httpx
app = FastAPI()
# Reuse HTTP client across requests
http_client = httpx.AsyncClient()
@app.get("/aggregate")
async def aggregate():
"""Handle many concurrent requests efficiently."""
async def fetch(url):
response = await http_client.get(url)
return response.json()
results = await asyncio.gather(
fetch("https://api1.example.com/data"),
fetch("https://api2.example.com/data"),
fetch("https://api3.example.com/data"),
)
return {"results": results}
@app.on_event("shutdown")
async def shutdown():
await http_client.aclose()
# Set high concurrency for async apps
gcloud run deploy api \
--concurrency 250 \ # Requests per instance
--cpu 2 \
--memory 1Gi
Monitoring and Observability
# Structured logging (auto-parsed by Cloud Logging)
import json
import sys
def log(severity, message, **kwargs):
entry = {
"severity": severity,
"message": message,
**kwargs
}
print(json.dumps(entry), file=sys.stdout if severity != "ERROR" else sys.stderr)
log("INFO", "Processing request", request_id="abc123", user_id="user456")
log("ERROR", "Database connection failed", error="timeout")
# View logs
gcloud logging read "resource.type=cloud_run_revision AND resource.labels.service_name=api" \
--limit 50 \
--format json
# Tail logs
gcloud beta run services logs tail api --region us-central1
Key Takeaways
- Zero to production in minutes — just provide a container
- Scales to zero — pay nothing when idle
- Concurrency matters — one instance handles many requests
- Min instances eliminate cold starts for critical paths
- VPC Connector for private Cloud SQL, Memorystore, etc.
- Traffic splitting enables safe canary deployments
- Gen2 execution provides faster cold starts and more features
“Cloud Run is the ‘just deploy it’ button for containers. Start here, and only move to GKE when you genuinely need Kubernetes features.”