Managing Azure Monitor with Terraform

Learn how to set up and manage Azure Monitor using Terraform, including metrics, logs, alerts, and workbooks

Managing Azure Monitor with Terraform

Azure Monitor is a comprehensive monitoring solution for collecting, analyzing, and acting on telemetry from cloud and on-premises environments. This guide demonstrates how to set up and manage Azure Monitor using Terraform.

Video Tutorial

Learn more about managing Azure Monitor with Terraform in this comprehensive video tutorial:

Prerequisites

  • Azure CLI configured with appropriate permissions
  • Terraform installed (version 1.0.0 or later)
  • Resource group created
  • Understanding of monitoring concepts

Project Structure

terraform-azure-monitor/
├── main.tf
├── variables.tf
├── outputs.tf
├── modules/
│   └── monitor/
│       ├── main.tf
│       ├── variables.tf
│       └── outputs.tf
└── queries/
    ├── performance.kql
    └── security.kql

Monitor Configuration

Create modules/monitor/main.tf:

# Log Analytics Workspace
resource "azurerm_log_analytics_workspace" "main" {
  name                = "${var.project_name}-workspace"
  location            = var.location
  resource_group_name = var.resource_group_name
  sku                 = "PerGB2018"
  retention_in_days   = 30

  daily_quota_gb = var.daily_quota_gb

  internet_ingestion_enabled = false
  internet_query_enabled    = false

  tags = var.tags
}

# Application Insights
resource "azurerm_application_insights" "main" {
  name                = "${var.project_name}-appinsights"
  location            = var.location
  resource_group_name = var.resource_group_name
  application_type    = "web"
  workspace_id        = azurerm_log_analytics_workspace.main.id

  retention_in_days   = 90
  sampling_percentage = 100
  
  disable_ip_masking = false

  tags = var.tags
}

# Action Group
resource "azurerm_monitor_action_group" "main" {
  name                = "${var.project_name}-action-group"
  resource_group_name = var.resource_group_name
  short_name          = "alerts"

  email_receiver {
    name                    = "operations"
    email_address          = var.ops_email
    use_common_alert_schema = true
  }

  sms_receiver {
    name         = "oncall"
    country_code = "1"
    phone_number = var.oncall_phone
  }

  webhook_receiver {
    name                    = "servicenow"
    service_uri            = var.servicenow_webhook_url
    use_common_alert_schema = true
  }

  azure_function_receiver {
    name                     = "processor"
    function_app_resource_id = var.function_app_id
    function_name           = "AlertProcessor"
    http_trigger_url        = var.function_trigger_url
    use_common_alert_schema = true
  }
}

# Scheduled Query Rules
resource "azurerm_monitor_scheduled_query_rules_alert" "performance" {
  name                = "${var.project_name}-performance-alert"
  location            = var.location
  resource_group_name = var.resource_group_name

  action {
    action_group = [azurerm_monitor_action_group.main.id]
  }

  data_source_id = azurerm_log_analytics_workspace.main.id
  description    = "Alert when CPU usage is high"
  enabled        = true
  
  query         = file("${path.module}/queries/performance.kql")
  severity      = 2
  frequency     = 5
  time_window   = 30
  
  trigger {
    operator  = "GreaterThan"
    threshold = 90
  }
}

# Metric Alerts
resource "azurerm_monitor_metric_alert" "latency" {
  name                = "${var.project_name}-latency-alert"
  resource_group_name = var.resource_group_name
  scopes              = [azurerm_application_insights.main.id]
  description         = "Alert when request latency is high"

  criteria {
    metric_namespace = "microsoft.insights/components"
    metric_name      = "requests/duration"
    aggregation      = "Average"
    operator         = "GreaterThan"
    threshold        = 1000

    dimension {
      name     = "cloud/roleName"
      operator = "Include"
      values   = ["web-api"]
    }
  }

  window_size        = "PT5M"
  frequency          = "PT1M"
  severity           = 2
  auto_mitigate      = true
  target_resource_type = "Microsoft.Insights/components"

  action {
    action_group_id = azurerm_monitor_action_group.main.id
  }
}

# Activity Log Alert
resource "azurerm_monitor_activity_log_alert" "service_health" {
  name                = "${var.project_name}-health-alert"
  resource_group_name = var.resource_group_name
  scopes              = ["/subscriptions/${data.azurerm_client_config.current.subscription_id}"]
  description         = "Alert on service health issues"

  criteria {
    category = "ServiceHealth"
    
    service_health {
      events    = ["Incident", "Maintenance"]
      services  = ["App Service", "SQL Database"]
      regions   = [var.location]
    }
  }

  action {
    action_group_id = azurerm_monitor_action_group.main.id
  }
}

# Workbook
resource "azurerm_application_insights_workbook" "main" {
  name                = "${var.project_name}-workbook"
  resource_group_name = var.resource_group_name
  location            = var.location
  display_name        = "Application Performance"
  data_json          = file("${path.module}/workbooks/performance.json")

  tags = var.tags
}

# Dashboard
resource "azurerm_portal_dashboard" "main" {
  name                = "${var.project_name}-dashboard"
  resource_group_name = var.resource_group_name
  location            = var.location
  dashboard_properties = templatefile("${path.module}/dashboards/main.json", {
    subscription_id = data.azurerm_client_config.current.subscription_id
    workspace_id    = azurerm_log_analytics_workspace.main.id
  })

  tags = var.tags
}

# Data Collection Rule
resource "azurerm_monitor_data_collection_rule" "main" {
  name                = "${var.project_name}-dcr"
  resource_group_name = var.resource_group_name
  location            = var.location

  destinations {
    log_analytics {
      workspace_resource_id = azurerm_log_analytics_workspace.main.id
      name                 = "workspace"
    }
  }

  data_flow {
    streams      = ["Microsoft-InsightsMetrics"]
    destinations = ["workspace"]
  }

  data_sources {
    performance_counter {
      streams                       = ["Microsoft-InsightsMetrics"]
      sampling_frequency_in_seconds = 60
      counter_specifiers           = ["\\Processor(_Total)\\% Processor Time"]
      name                         = "perfcounter"
    }
  }
}

Network Configuration

  1. Private Link Configuration
resource "azurerm_private_endpoint" "monitor" {
  name                = "${var.project_name}-pe"
  location            = var.location
  resource_group_name = var.resource_group_name
  subnet_id           = azurerm_subnet.monitor.id

  private_service_connection {
    name                           = "${var.project_name}-psc"
    private_connection_resource_id = azurerm_log_analytics_workspace.main.id
    is_manual_connection          = false
    subresource_names            = ["azuremonitor"]
  }

  private_dns_zone_group {
    name                 = "default"
    private_dns_zone_ids = [azurerm_private_dns_zone.monitor.id]
  }
}

resource "azurerm_private_dns_zone" "monitor" {
  name                = "privatelink.monitor.azure.com"
  resource_group_name = var.resource_group_name
}

resource "azurerm_private_dns_zone_virtual_network_link" "monitor" {
  name                  = "${var.project_name}-vnet-link"
  resource_group_name   = var.resource_group_name
  private_dns_zone_name = azurerm_private_dns_zone.monitor.name
  virtual_network_id    = azurerm_virtual_network.main.id
}

Security Configuration

  1. Role Assignments
resource "azurerm_role_assignment" "monitor_contributor" {
  scope                = azurerm_log_analytics_workspace.main.id
  role_definition_name = "Log Analytics Contributor"
  principal_id         = var.contributor_principal_id
}

resource "azurerm_role_assignment" "monitor_reader" {
  scope                = azurerm_log_analytics_workspace.main.id
  role_definition_name = "Log Analytics Reader"
  principal_id         = var.reader_principal_id
}
  1. Customer-Managed Keys
resource "azurerm_log_analytics_workspace_customer_managed_key" "main" {
  log_analytics_workspace_id = azurerm_log_analytics_workspace.main.id
  key_vault_key_id          = azurerm_key_vault_key.monitor.id
  identity_id               = azurerm_user_assigned_identity.monitor.id
}

Advanced Features

  1. Log Analytics Solutions
resource "azurerm_log_analytics_solution" "security" {
  solution_name         = "Security"
  location             = var.location
  resource_group_name  = var.resource_group_name
  workspace_resource_id = azurerm_log_analytics_workspace.main.id
  workspace_name       = azurerm_log_analytics_workspace.main.name

  plan {
    publisher = "Microsoft"
    product   = "OMSGallery/Security"
  }
}

resource "azurerm_log_analytics_solution" "containers" {
  solution_name         = "ContainerInsights"
  location             = var.location
  resource_group_name  = var.resource_group_name
  workspace_resource_id = azurerm_log_analytics_workspace.main.id
  workspace_name       = azurerm_log_analytics_workspace.main.name

  plan {
    publisher = "Microsoft"
    product   = "OMSGallery/ContainerInsights"
  }
}
  1. Saved Searches
resource "azurerm_log_analytics_saved_search" "errors" {
  name                       = "critical-errors"
  log_analytics_workspace_id = azurerm_log_analytics_workspace.main.id
  category                   = "Errors"
  display_name              = "Critical Errors"
  query                     = file("${path.module}/queries/errors.kql")
}

Best Practices

  1. Data Collection

    • Configure appropriate retention
    • Use data collection rules
    • Enable sampling
    • Filter unnecessary logs
  2. Security

    • Enable private endpoints
    • Use RBAC
    • Implement encryption
    • Control data access
  3. Cost Management

    • Set daily quotas
    • Monitor data ingestion
    • Use appropriate retention
    • Configure sampling
  4. Alert Configuration

    • Set appropriate thresholds
    • Configure action groups
    • Use dynamic thresholds
    • Implement alert suppression

Conclusion

You’ve learned how to set up and manage Azure Monitor using Terraform. This setup provides:

  • Comprehensive monitoring
  • Log analytics
  • Alerting and dashboards
  • Security and compliance

Remember to:

  • Monitor data ingestion
  • Review alert configurations
  • Maintain dashboards
  • Update access controls