Ansible’s power lies in its simplicity, but simple doesn’t mean easy. Writing idempotent, maintainable playbooks requires understanding patterns that separate production-grade automation from scripts that work once.

Idempotency: The Core Principle

An idempotent operation produces the same result regardless of how many times it runs. Every Ansible task should be idempotent.

The Wrong Way

# ❌ Not idempotent - runs every time
- name: Add line to config
  shell: echo "export PATH=/opt/bin:$PATH" >> ~/.bashrc

The Right Way

# ✅ Idempotent - only changes if needed
- name: Add PATH to bashrc
  lineinfile:
    path: ~/.bashrc
    line: 'export PATH=/opt/bin:$PATH'
    state: present

Project Structure

A well-organized Ansible project:

ansible/
├── ansible.cfg                 # Ansible configuration
├── inventory/
│   ├── production/
│   │   ├── hosts.yml          # Production hosts
│   │   └── group_vars/
│   │       ├── all.yml        # Variables for all hosts
│   │       └── webservers.yml # Variables for webservers group
│   └── staging/
│       ├── hosts.yml
│       └── group_vars/
├── playbooks/
│   ├── site.yml               # Main playbook
│   ├── webservers.yml
│   └── databases.yml
├── roles/
│   ├── common/
│   ├── nginx/
│   └── postgresql/
├── group_vars/
│   └── all/
│       ├── vars.yml
│       └── vault.yml          # Encrypted secrets
└── requirements.yml            # Galaxy roles

Inventory Best Practices

Dynamic Inventory

# inventory/production/hosts.yml
all:
  children:
    webservers:
      hosts:
        web-[01:03].prod.example.com:
      vars:
        http_port: 80
        
    databases:
      hosts:
        db-01.prod.example.com:
          postgresql_role: primary
        db-02.prod.example.com:
          postgresql_role: replica

    loadbalancers:
      hosts:
        lb-01.prod.example.com:

Group Variables

# inventory/production/group_vars/webservers.yml
---
nginx_worker_processes: auto
nginx_worker_connections: 1024
app_env: production
app_debug: false

# Reference environment-specific values
ssl_certificate: "/etc/ssl/certs/{{ inventory_hostname }}.crt"
ssl_key: "/etc/ssl/private/{{ inventory_hostname }}.key"

Role Structure

A complete role structure:

roles/nginx/
├── defaults/
│   └── main.yml      # Default variables (lowest precedence)
├── vars/
│   └── main.yml      # Role variables (higher precedence)
├── tasks/
│   ├── main.yml      # Main tasks entry point
│   ├── install.yml
│   ├── configure.yml
│   └── service.yml
├── handlers/
│   └── main.yml      # Handlers (restart, reload, etc.)
├── templates/
│   └── nginx.conf.j2
├── files/
│   └── ssl-params.conf
├── meta/
│   └── main.yml      # Role dependencies
└── molecule/         # Testing
    └── default/

Role Tasks

# roles/nginx/tasks/main.yml
---
- name: Include OS-specific variables
  include_vars: "{{ item }}"
  with_first_found:
    - "{{ ansible_distribution }}-{{ ansible_distribution_version }}.yml"
    - "{{ ansible_distribution }}.yml"
    - "{{ ansible_os_family }}.yml"
    - default.yml

- name: Install nginx
  import_tasks: install.yml
  tags: [nginx, install]

- name: Configure nginx
  import_tasks: configure.yml
  tags: [nginx, configure]

- name: Manage nginx service
  import_tasks: service.yml
  tags: [nginx, service]
# roles/nginx/tasks/install.yml
---
- name: Install nginx package
  package:
    name: nginx
    state: present
  notify: Restart nginx

- name: Ensure nginx directories exist
  file:
    path: "{{ item }}"
    state: directory
    owner: root
    group: root
    mode: '0755'
  loop:
    - /etc/nginx/conf.d
    - /etc/nginx/sites-available
    - /etc/nginx/sites-enabled
# roles/nginx/tasks/configure.yml
---
- name: Deploy nginx.conf
  template:
    src: nginx.conf.j2
    dest: /etc/nginx/nginx.conf
    owner: root
    group: root
    mode: '0644'
    validate: nginx -t -c %s
  notify: Reload nginx

- name: Deploy virtual host configs
  template:
    src: vhost.conf.j2
    dest: "/etc/nginx/sites-available/{{ item.name }}.conf"
    owner: root
    group: root
    mode: '0644'
  loop: "{{ nginx_vhosts }}"
  notify: Reload nginx

- name: Enable virtual hosts
  file:
    src: "/etc/nginx/sites-available/{{ item.name }}.conf"
    dest: "/etc/nginx/sites-enabled/{{ item.name }}.conf"
    state: link
  loop: "{{ nginx_vhosts }}"
  when: item.enabled | default(true)
  notify: Reload nginx

Handlers

# roles/nginx/handlers/main.yml
---
- name: Restart nginx
  service:
    name: nginx
    state: restarted
  listen: Restart nginx

- name: Reload nginx
  service:
    name: nginx
    state: reloaded
  listen: Reload nginx

Default Variables

# roles/nginx/defaults/main.yml
---
nginx_worker_processes: auto
nginx_worker_connections: 1024
nginx_keepalive_timeout: 65

nginx_vhosts: []
# - name: example
#   server_name: example.com
#   root: /var/www/example
#   enabled: true

nginx_error_log: /var/log/nginx/error.log
nginx_access_log: /var/log/nginx/access.log

Error Handling

Block with Rescue

- name: Deploy application with rollback
  block:
    - name: Deploy new version
      copy:
        src: "{{ app_package }}"
        dest: /opt/myapp/
      notify: Restart myapp

    - name: Run database migrations
      command: /opt/myapp/migrate.sh
      register: migration_result

    - name: Verify deployment
      uri:
        url: "http://localhost:8080/health"
        status_code: 200
      retries: 5
      delay: 10

  rescue:
    - name: Rollback to previous version
      copy:
        src: "{{ app_package_previous }}"
        dest: /opt/myapp/
      notify: Restart myapp

    - name: Notify about failed deployment
      slack:
        token: "{{ slack_token }}"
        channel: "#deployments"
        msg: "Deployment failed on {{ inventory_hostname }}"

  always:
    - name: Clean up temp files
      file:
        path: /tmp/deploy-artifacts
        state: absent

Assertions

- name: Verify prerequisites
  assert:
    that:
      - ansible_distribution == 'Ubuntu'
      - ansible_distribution_version is version('20.04', '>=')
      - ansible_memtotal_mb >= 2048
    fail_msg: "Host does not meet minimum requirements"
    success_msg: "All prerequisites met"

Failed When

- name: Check service status
  command: systemctl is-active myapp
  register: service_status
  failed_when: service_status.rc not in [0, 3]  # 3 = inactive (ok for first run)
  changed_when: false

- name: Get deployment version
  command: cat /opt/myapp/VERSION
  register: version_output
  failed_when: "'error' in version_output.stderr"

Conditionals and Loops

Conditional Execution

- name: Install packages for Debian
  apt:
    name: "{{ packages }}"
    state: present
  when: ansible_os_family == 'Debian'

- name: Install packages for RedHat
  yum:
    name: "{{ packages }}"
    state: present
  when: ansible_os_family == 'RedHat'

Loops

- name: Create users
  user:
    name: "{{ item.name }}"
    groups: "{{ item.groups | default(omit) }}"
    shell: "{{ item.shell | default('/bin/bash') }}"
    state: present
  loop: "{{ users }}"
  loop_control:
    label: "{{ item.name }}"  # Only show name in output

- name: Wait for services to be ready
  uri:
    url: "http://{{ item }}:8080/health"
    status_code: 200
  loop: "{{ groups['webservers'] }}"
  retries: 10
  delay: 5
  register: health_check
  until: health_check.status == 200

Secrets Management with Vault

Encrypting Files

# Create encrypted file
ansible-vault create group_vars/all/vault.yml

# Edit encrypted file
ansible-vault edit group_vars/all/vault.yml

# Encrypt existing file
ansible-vault encrypt secrets.yml

# View encrypted file
ansible-vault view group_vars/all/vault.yml

Using Vault in Playbooks

# group_vars/all/vars.yml
database_password: "{{ vault_database_password }}"
api_key: "{{ vault_api_key }}"

# group_vars/all/vault.yml (encrypted)
vault_database_password: supersecret123
vault_api_key: abc123xyz
# Run playbook with vault password
ansible-playbook site.yml --ask-vault-pass

# Or use password file
ansible-playbook site.yml --vault-password-file ~/.vault_pass

Testing with Molecule

Setup

# Install molecule
pip install molecule molecule-docker

# Initialize role with molecule
molecule init role myrole --driver-name docker

Molecule Configuration

# roles/nginx/molecule/default/molecule.yml
---
dependency:
  name: galaxy

driver:
  name: docker

platforms:
  - name: ubuntu-22
    image: ubuntu:22.04
    pre_build_image: true
  - name: debian-12
    image: debian:12
    pre_build_image: true

provisioner:
  name: ansible
  inventory:
    host_vars:
      ubuntu-22:
        ansible_python_interpreter: /usr/bin/python3

verifier:
  name: ansible

Verification Tests

# roles/nginx/molecule/default/verify.yml
---
- name: Verify
  hosts: all
  gather_facts: false
  tasks:
    - name: Check nginx is installed
      command: nginx -v
      register: nginx_version
      changed_when: false
      failed_when: nginx_version.rc != 0

    - name: Check nginx is running
      service:
        name: nginx
        state: started
      check_mode: true
      register: nginx_service
      failed_when: nginx_service.changed

    - name: Check nginx is listening
      wait_for:
        port: 80
        timeout: 10
# Run tests
molecule test

# Run specific stage
molecule converge  # Run playbook
molecule verify    # Run tests
molecule destroy   # Clean up

Performance Optimization

Gathering Facts

# Disable fact gathering if not needed
- hosts: all
  gather_facts: false
  tasks:
    - name: Simple task that doesn't need facts
      file:
        path: /tmp/test
        state: touch

Async Tasks

- name: Long running task
  command: /opt/scripts/long-process.sh
  async: 3600  # Timeout in seconds
  poll: 0      # Don't wait (fire and forget)
  register: long_task

- name: Other tasks can run here
  debug:
    msg: "Doing other work..."

- name: Wait for long task
  async_status:
    jid: "{{ long_task.ansible_job_id }}"
  register: job_result
  until: job_result.finished
  retries: 100
  delay: 30

Delegation

- name: Add host to load balancer
  command: /opt/scripts/add-to-lb.sh {{ inventory_hostname }}
  delegate_to: loadbalancer-01
  run_once: true

Key Takeaways

  1. Everything must be idempotent — use modules, not shell commands
  2. Use roles for reusable, testable components
  3. Separate variables by environment — use inventory group_vars
  4. Handle errors gracefully — use block/rescue/always
  5. Test with Molecule — before pushing to production
  6. Use Vault for secrets — never commit plain text passwords
  7. Tag everything — for selective execution

“Ansible’s simplicity is its greatest strength, but also its greatest trap. Without discipline, ‘simple’ becomes ‘unmaintainable’.”