Setting up Azure Kubernetes Service (AKS) with Terraform

Learn how to deploy and manage Azure Kubernetes Service clusters using Terraform, including networking, scaling, and monitoring

Setting up Azure Kubernetes Service (AKS) with Terraform

Azure Kubernetes Service (AKS) is a managed container orchestration service. This guide demonstrates how to set up and manage AKS using Terraform.

Video Tutorial

Learn more about managing Azure Kubernetes Service with Terraform in this comprehensive video tutorial:

View Source Code

Prerequisites

  • Azure CLI configured with appropriate permissions
  • Terraform installed (version 1.0.0 or later)
  • kubectl installed
  • Resource group created

Project Structure

terraform-azure-aks/
├── main.tf
├── variables.tf
├── outputs.tf
├── modules/
│   └── aks/
│       ├── main.tf
│       ├── variables.tf
│       └── outputs.tf
└── kubernetes/
    └── manifests/
        ├── deployment.yaml
        └── service.yaml

AKS Cluster Configuration

Create modules/aks/main.tf:

# AKS Cluster
resource "azurerm_kubernetes_cluster" "main" {
  name                = "${var.project_name}-aks"
  location            = var.location
  resource_group_name = var.resource_group_name
  dns_prefix          = "${var.project_name}-aks"
  kubernetes_version  = var.kubernetes_version

  default_node_pool {
    name                = "default"
    node_count          = var.node_count
    vm_size             = var.vm_size
    os_disk_size_gb     = 30
    vnet_subnet_id      = var.subnet_id
    enable_auto_scaling = true
    min_count           = var.min_nodes
    max_count           = var.max_nodes
    max_pods            = 100
    
    node_labels = {
      "environment" = var.environment
      "nodepool"    = "default"
    }

    tags = var.tags
  }

  identity {
    type = "SystemAssigned"
  }

  network_profile {
    network_plugin     = "azure"
    network_policy     = "calico"
    load_balancer_sku  = "standard"
    service_cidr       = "10.0.0.0/16"
    dns_service_ip     = "10.0.0.10"
    docker_bridge_cidr = "172.17.0.1/16"
  }

  addon_profile {
    oms_agent {
      enabled                    = true
      log_analytics_workspace_id = azurerm_log_analytics_workspace.main.id
    }

    azure_policy {
      enabled = true
    }

    http_application_routing {
      enabled = false
    }
  }

  role_based_access_control {
    enabled = true

    azure_active_directory {
      managed                = true
      admin_group_object_ids = [var.admin_group_id]
    }
  }

  tags = var.tags
}

# Additional Node Pool
resource "azurerm_kubernetes_cluster_node_pool" "user" {
  name                  = "user"
  kubernetes_cluster_id = azurerm_kubernetes_cluster.main.id
  vm_size              = var.user_pool_vm_size
  node_count           = var.user_pool_node_count
  vnet_subnet_id       = var.subnet_id

  enable_auto_scaling = true
  min_count          = var.user_pool_min_nodes
  max_count          = var.user_pool_max_nodes
  max_pods           = 100

  node_labels = {
    "environment" = var.environment
    "nodepool"    = "user"
  }

  node_taints = [
    "dedicated=user:NoSchedule"
  ]

  tags = var.tags
}

# Log Analytics Workspace
resource "azurerm_log_analytics_workspace" "main" {
  name                = "${var.project_name}-law"
  location            = var.location
  resource_group_name = var.resource_group_name
  sku                 = "PerGB2018"
  retention_in_days   = 30

  tags = var.tags
}

Network Configuration

  1. Virtual Network Setup
resource "azurerm_virtual_network" "main" {
  name                = "${var.project_name}-vnet"
  location            = var.location
  resource_group_name = var.resource_group_name
  address_space       = ["10.0.0.0/8"]

  tags = var.tags
}

resource "azurerm_subnet" "aks" {
  name                 = "aks-subnet"
  resource_group_name  = var.resource_group_name
  virtual_network_name = azurerm_virtual_network.main.name
  address_prefixes     = ["10.1.0.0/16"]

  service_endpoints = [
    "Microsoft.KeyVault",
    "Microsoft.ContainerRegistry"
  ]
}

resource "azurerm_subnet" "ingress" {
  name                 = "ingress-subnet"
  resource_group_name  = var.resource_group_name
  virtual_network_name = azurerm_virtual_network.main.name
  address_prefixes     = ["10.2.0.0/16"]
}

Container Registry Integration

  1. Azure Container Registry
resource "azurerm_container_registry" "main" {
  name                = "${var.project_name}acr"
  resource_group_name = var.resource_group_name
  location            = var.location
  sku                 = "Premium"
  admin_enabled       = false

  network_rule_set {
    default_action = "Deny"
    ip_rule {
      action   = "Allow"
      ip_range = var.allowed_ip_range
    }
    virtual_network {
      action    = "Allow"
      subnet_id = azurerm_subnet.aks.id
    }
  }

  tags = var.tags
}

resource "azurerm_role_assignment" "aks_acr" {
  scope                = azurerm_container_registry.main.id
  role_definition_name = "AcrPull"
  principal_id         = azurerm_kubernetes_cluster.main.kubelet_identity[0].object_id
}

Monitoring Configuration

  1. Azure Monitor for Containers
resource "azurerm_monitor_diagnostic_setting" "aks" {
  name                       = "${var.project_name}-diag"
  target_resource_id        = azurerm_kubernetes_cluster.main.id
  log_analytics_workspace_id = azurerm_log_analytics_workspace.main.id

  log {
    category = "kube-apiserver"
    enabled  = true

    retention_policy {
      enabled = true
      days    = 30
    }
  }

  log {
    category = "kube-controller-manager"
    enabled  = true

    retention_policy {
      enabled = true
      days    = 30
    }
  }

  metric {
    category = "AllMetrics"
    enabled  = true

    retention_policy {
      enabled = true
      days    = 30
    }
  }
}

resource "azurerm_monitor_metric_alert" "node_cpu" {
  name                = "${var.project_name}-cpu-alert"
  resource_group_name = var.resource_group_name
  scopes              = [azurerm_kubernetes_cluster.main.id]
  description         = "Alert when CPU usage is high"

  criteria {
    metric_namespace = "Microsoft.ContainerService/managedClusters"
    metric_name      = "node_cpu_usage_percentage"
    aggregation      = "Average"
    operator         = "GreaterThan"
    threshold        = 80
  }

  action {
    action_group_id = var.action_group_id
  }
}

Security Configuration

  1. Pod Security Policies
resource "kubernetes_pod_security_policy" "restricted" {
  metadata {
    name = "restricted"
  }

  spec {
    privileged                 = false
    allow_privilege_escalation = false

    volumes = [
      "configMap",
      "emptyDir",
      "projected",
      "secret",
      "downwardAPI",
      "persistentVolumeClaim",
    ]

    run_as_user {
      rule = "MustRunAsNonRoot"
    }

    se_linux {
      rule = "RunAsAny"
    }

    supplemental_groups {
      rule = "MustRunAs"
      range {
        min = 1
        max = 65535
      }
    }

    fs_group {
      rule = "MustRunAs"
      range {
        min = 1
        max = 65535
      }
    }

    read_only_root_filesystem = true
  }
}
  1. Network Policies
resource "kubernetes_network_policy" "default_deny" {
  metadata {
    name      = "default-deny"
    namespace = "default"
  }

  spec {
    pod_selector {}

    policy_types = ["Ingress", "Egress"]
  }
}

Best Practices

  1. Security

    • Enable RBAC
    • Use managed identities
    • Implement network policies
    • Regular updates
  2. High Availability

    • Multiple node pools
    • Zone redundancy
    • Cluster autoscaling
    • Node pool autoscaling
  3. Monitoring

    • Enable Container Insights
    • Configure alerts
    • Log aggregation
    • Performance monitoring
  4. Cost Optimization

    • Right-size nodes
    • Use spot instances
    • Implement autoscaling
    • Monitor usage

Advanced Features

  1. Application Gateway Ingress Controller
resource "azurerm_application_gateway" "main" {
  name                = "${var.project_name}-appgw"
  resource_group_name = var.resource_group_name
  location            = var.location

  sku {
    name     = "WAF_v2"
    tier     = "WAF_v2"
    capacity = 2
  }

  gateway_ip_configuration {
    name      = "gateway-ip-config"
    subnet_id = azurerm_subnet.ingress.id
  }

  frontend_port {
    name = "http"
    port = 80
  }

  frontend_ip_configuration {
    name                 = "frontend-ip-config"
    public_ip_address_id = azurerm_public_ip.appgw.id
  }

  backend_address_pool {
    name = "backend-pool"
  }

  backend_http_settings {
    name                  = "http-settings"
    cookie_based_affinity = "Disabled"
    port                 = 80
    protocol             = "Http"
    request_timeout      = 30
  }

  http_listener {
    name                           = "http-listener"
    frontend_ip_configuration_name = "frontend-ip-config"
    frontend_port_name            = "http"
    protocol                      = "Http"
  }

  request_routing_rule {
    name                       = "routing-rule"
    rule_type                 = "Basic"
    http_listener_name        = "http-listener"
    backend_address_pool_name = "backend-pool"
    backend_http_settings_name = "http-settings"
    priority                  = 1
  }

  waf_configuration {
    enabled          = true
    firewall_mode    = "Prevention"
    rule_set_type    = "OWASP"
    rule_set_version = "3.1"
  }

  tags = var.tags
}
  1. Azure Policy for AKS
resource "azurerm_policy_assignment" "aks" {
  name                 = "aks-policy"
  scope                = azurerm_kubernetes_cluster.main.id
  policy_definition_id = "/providers/Microsoft.Authorization/policyDefinitions/a8eff44f-8c92-45c3-a3fb-9880802d67a7"
  description         = "Ensure AKS cluster uses disk encryption"
  display_name        = "AKS Disk Encryption"

  parameters = <<PARAMETERS
{
  "effect": {
    "value": "Deny"
  }
}
PARAMETERS
}

Conclusion

You’ve learned how to set up and manage Azure Kubernetes Service using Terraform. This setup provides:

  • Secure cluster deployment
  • Network isolation
  • Monitoring and alerts
  • Scalability and high availability

Remember to:

  • Monitor cluster health
  • Keep Kubernetes version updated
  • Review security settings
  • Optimize resource usage