GKE Autopilot is Google’s fully managed Kubernetes mode. You define workloads; Google manages nodes, scaling, security, and optimization. It’s Kubernetes without the ops — but with trade-offs. This guide covers when Autopilot fits and how to use it effectively.

Autopilot vs Standard Mode

AspectAutopilotStandard
Node managementFully managedYou manage
PricingPer-pod resourcesPer-node
Node accessNo SSH/privilegedFull access
Cluster autoscalingAutomaticConfigure yourself
SecurityHardened by defaultYou configure
DaemonSetsLimitedAllowed
GPUsSupportedSupported
Spot/PreemptibleSupportedSupported

Creating an Autopilot Cluster

gcloud CLI

# Create Autopilot cluster
gcloud container clusters create-auto production \
  --region=us-central1 \
  --release-channel=regular \
  --enable-private-nodes \
  --master-ipv4-cidr=172.16.0.0/28 \
  --network=my-vpc \
  --subnetwork=gke-subnet \
  --cluster-secondary-range-name=pods \
  --services-secondary-range-name=services

# Get credentials
gcloud container clusters get-credentials production --region=us-central1

Terraform

resource "google_container_cluster" "autopilot" {
  name     = "production"
  location = "us-central1"

  # Enable Autopilot mode
  enable_autopilot = true

  # Network configuration
  network    = google_compute_network.main.id
  subnetwork = google_compute_subnetwork.gke.id

  ip_allocation_policy {
    cluster_secondary_range_name  = "pods"
    services_secondary_range_name = "services"
  }

  # Private cluster
  private_cluster_config {
    enable_private_nodes    = true
    enable_private_endpoint = false
    master_ipv4_cidr_block  = "172.16.0.0/28"
  }

  # Release channel for automatic upgrades
  release_channel {
    channel = "REGULAR"
  }

  # Maintenance window
  maintenance_policy {
    recurring_window {
      start_time = "2024-01-01T09:00:00Z"
      end_time   = "2024-01-01T17:00:00Z"
      recurrence = "FREQ=WEEKLY;BYDAY=SA,SU"
    }
  }

  # Workload Identity
  workload_identity_config {
    workload_pool = "${var.project_id}.svc.id.goog"
  }

  # Binary Authorization
  binary_authorization {
    evaluation_mode = "PROJECT_SINGLETON_POLICY_ENFORCE"
  }

  # Deletion protection
  deletion_protection = true
}

Deploying Workloads

Basic Deployment

# deployment.yaml
apiVersion: apps/v1
kind: Deployment
metadata:
  name: api
  namespace: production
spec:
  replicas: 3
  selector:
    matchLabels:
      app: api
  template:
    metadata:
      labels:
        app: api
    spec:
      containers:
      - name: api
        image: gcr.io/my-project/api:latest
        ports:
        - containerPort: 8080
        
        # REQUIRED in Autopilot: explicit resource requests
        resources:
          requests:
            cpu: 500m
            memory: 512Mi
            ephemeral-storage: 1Gi
          limits:
            cpu: 1000m
            memory: 1Gi
            ephemeral-storage: 2Gi
        
        readinessProbe:
          httpGet:
            path: /health
            port: 8080
          initialDelaySeconds: 5
          periodSeconds: 10
        
        livenessProbe:
          httpGet:
            path: /health
            port: 8080
          initialDelaySeconds: 15
          periodSeconds: 20
      
      # Spread across zones
      topologySpreadConstraints:
      - maxSkew: 1
        topologyKey: topology.kubernetes.io/zone
        whenUnsatisfiable: DoNotSchedule
        labelSelector:
          matchLabels:
            app: api
---
apiVersion: v1
kind: Service
metadata:
  name: api
  namespace: production
spec:
  selector:
    app: api
  ports:
  - port: 80
    targetPort: 8080
  type: ClusterIP

Using Spot Pods

# spot-deployment.yaml
apiVersion: apps/v1
kind: Deployment
metadata:
  name: worker
  namespace: production
spec:
  replicas: 10
  selector:
    matchLabels:
      app: worker
  template:
    metadata:
      labels:
        app: worker
    spec:
      # Request Spot VMs (up to 91% cheaper)
      nodeSelector:
        cloud.google.com/gke-spot: "true"
      
      # Tolerate Spot termination
      tolerations:
      - key: cloud.google.com/gke-spot
        operator: Equal
        value: "true"
        effect: NoSchedule
      
      terminationGracePeriodSeconds: 25
      
      containers:
      - name: worker
        image: gcr.io/my-project/worker:latest
        resources:
          requests:
            cpu: 1000m
            memory: 2Gi
          limits:
            cpu: 2000m
            memory: 4Gi
        
        # Handle SIGTERM for graceful shutdown
        lifecycle:
          preStop:
            exec:
              command: ["/bin/sh", "-c", "sleep 20"]

GPU Workloads

# gpu-job.yaml
apiVersion: batch/v1
kind: Job
metadata:
  name: ml-training
  namespace: ml
spec:
  template:
    spec:
      nodeSelector:
        cloud.google.com/gke-accelerator: nvidia-tesla-t4
      
      containers:
      - name: trainer
        image: gcr.io/my-project/trainer:latest
        resources:
          requests:
            cpu: 4
            memory: 16Gi
            nvidia.com/gpu: 1
          limits:
            cpu: 8
            memory: 32Gi
            nvidia.com/gpu: 1
      
      restartPolicy: Never
  backoffLimit: 3

Horizontal Pod Autoscaling

# hpa.yaml
apiVersion: autoscaling/v2
kind: HorizontalPodAutoscaler
metadata:
  name: api
  namespace: production
spec:
  scaleTargetRef:
    apiVersion: apps/v1
    kind: Deployment
    name: api
  minReplicas: 3
  maxReplicas: 50
  metrics:
  - type: Resource
    resource:
      name: cpu
      target:
        type: Utilization
        averageUtilization: 70
  - type: Resource
    resource:
      name: memory
      target:
        type: Utilization
        averageUtilization: 80
  behavior:
    scaleDown:
      stabilizationWindowSeconds: 300
      policies:
      - type: Percent
        value: 10
        periodSeconds: 60
    scaleUp:
      stabilizationWindowSeconds: 0
      policies:
      - type: Percent
        value: 100
        periodSeconds: 15
      - type: Pods
        value: 4
        periodSeconds: 15
      selectPolicy: Max

Ingress and Load Balancing

# ingress.yaml
apiVersion: networking.k8s.io/v1
kind: Ingress
metadata:
  name: api-ingress
  namespace: production
  annotations:
    kubernetes.io/ingress.class: gce
    kubernetes.io/ingress.global-static-ip-name: api-ip
    networking.gke.io/managed-certificates: api-cert
    kubernetes.io/ingress.allow-http: "false"
spec:
  rules:
  - host: api.example.com
    http:
      paths:
      - path: /*
        pathType: ImplementationSpecific
        backend:
          service:
            name: api
            port:
              number: 80
---
apiVersion: networking.gke.io/v1
kind: ManagedCertificate
metadata:
  name: api-cert
  namespace: production
spec:
  domains:
  - api.example.com

Workload Identity

# service-account.yaml
apiVersion: v1
kind: ServiceAccount
metadata:
  name: api
  namespace: production
  annotations:
    iam.gke.io/gcp-service-account: [email protected]
# Terraform: Bind K8s SA to GCP SA
resource "google_service_account" "api" {
  account_id   = "api"
  display_name = "API Service Account"
}

resource "google_project_iam_member" "api_storage" {
  project = var.project_id
  role    = "roles/storage.objectViewer"
  member  = "serviceAccount:${google_service_account.api.email}"
}

resource "google_service_account_iam_member" "api_workload_identity" {
  service_account_id = google_service_account.api.name
  role               = "roles/iam.workloadIdentityUser"
  member             = "serviceAccount:${var.project_id}.svc.id.goog[production/api]"
}

Autopilot Limitations

What You CAN’T Do

# ❌ Privileged containers (blocked)
securityContext:
  privileged: true

# ❌ Host network (blocked)
hostNetwork: true

# ❌ Host PID namespace (blocked)
hostPID: true

# ❌ Most DaemonSets (blocked except GKE-managed)
kind: DaemonSet

# ❌ Node SSH access (no nodes to SSH into)

What You CAN Do

# ✅ Spot/Preemptible pods
nodeSelector:
  cloud.google.com/gke-spot: "true"

# ✅ GPUs
resources:
  limits:
    nvidia.com/gpu: 1

# ✅ Arm64 workloads
nodeSelector:
  kubernetes.io/arch: arm64

# ✅ Windows containers
nodeSelector:
  kubernetes.io/os: windows

Cost Optimization

# Right-size your resources (you pay for requests, not limits)
resources:
  requests:
    cpu: 250m      # Start low
    memory: 256Mi  # Monitor and adjust
  limits:
    cpu: 1000m
    memory: 512Mi

# Use Spot for fault-tolerant workloads
nodeSelector:
  cloud.google.com/gke-spot: "true"

# Use Arm64 for compatible workloads (20% cheaper)
nodeSelector:
  kubernetes.io/arch: arm64
# Check resource recommendations
kubectl top pods -n production
kubectl describe vpa api  # If VPA is enabled

When to Choose Autopilot

Choose Autopilot when:

  • You want zero node management
  • Standard workloads (web apps, APIs, batch jobs)
  • Team lacks deep Kubernetes expertise
  • Security-hardened by default is valuable
  • Cost predictability (pay per pod, not node)

Choose Standard when:

  • Need privileged containers or DaemonSets
  • Custom node configurations required
  • Specific kernel modules or drivers
  • Need to SSH into nodes for debugging
  • Running system-level monitoring tools

Key Takeaways

  1. Autopilot = Serverless Kubernetes — you focus on workloads, Google manages nodes
  2. Resource requests are billing — right-size to control costs
  3. Spot pods save 91% — use for fault-tolerant workloads
  4. No node access — can’t SSH, can’t run privileged containers
  5. Security hardened — many attack vectors blocked by default
  6. Best for standard workloads — APIs, web apps, jobs, ML inference

“Autopilot is Kubernetes for teams who want Kubernetes benefits without Kubernetes operations. If you’re fighting nodes more than building features, try Autopilot.”