Managing AWS ElastiCache with Terraform

Learn how to provision and manage AWS ElastiCache Redis and Memcached clusters using Terraform, including replication, backup, and monitoring

Managing AWS ElastiCache with Terraform

AWS ElastiCache is a fully managed in-memory caching service supporting Redis and Memcached. This guide demonstrates how to set up and manage ElastiCache using Terraform.

Video Tutorial

Learn more about managing AWS ElastiCache with Terraform in this comprehensive video tutorial:

Prerequisites

  • AWS CLI configured with appropriate permissions
  • Terraform installed (version 1.0.0 or later)
  • Basic understanding of Redis/Memcached
  • VPC with private subnets

Project Structure

terraform-elasticache/
├── main.tf
├── variables.tf
├── outputs.tf
├── modules/
│   └── elasticache/
│       ├── main.tf
│       ├── variables.tf
│       └── outputs.tf
└── config/
    └── parameter-groups.json

ElastiCache Configuration

Create modules/elasticache/main.tf:

# Redis Cluster
resource "aws_elasticache_cluster" "redis" {
  cluster_id           = "${var.project_name}-redis"
  engine              = "redis"
  node_type           = var.node_type
  num_cache_nodes     = var.num_cache_nodes
  parameter_group_name = aws_elasticache_parameter_group.redis.name
  port                = 6379
  security_group_ids  = [aws_security_group.redis.id]
  subnet_group_name   = aws_elasticache_subnet_group.main.name

  snapshot_retention_limit = 7
  snapshot_window         = "00:00-05:00"
  maintenance_window      = "mon:05:00-mon:09:00"

  auto_minor_version_upgrade = true
  apply_immediately         = false

  notification_topic_arn = aws_sns_topic.redis.arn

  tags = merge(
    var.tags,
    {
      Name = "${var.project_name}-redis"
    }
  )
}

# Redis Replication Group
resource "aws_elasticache_replication_group" "redis" {
  replication_group_id          = "${var.project_name}-redis-rep"
  replication_group_description = "Redis replication group"
  node_type                     = var.node_type
  port                         = 6379
  parameter_group_name          = aws_elasticache_parameter_group.redis.name
  automatic_failover_enabled    = true
  multi_az_enabled             = true
  subnet_group_name            = aws_elasticache_subnet_group.main.name
  security_group_ids           = [aws_security_group.redis.id]

  cluster_mode {
    replicas_per_node_group = 1
    num_node_groups         = 2
  }

  snapshot_retention_limit = 7
  snapshot_window         = "00:00-05:00"
  maintenance_window      = "mon:05:00-mon:09:00"

  auto_minor_version_upgrade = true
  apply_immediately         = false

  notification_topic_arn = aws_sns_topic.redis.arn

  tags = merge(
    var.tags,
    {
      Name = "${var.project_name}-redis-rep"
    }
  )
}

# Memcached Cluster
resource "aws_elasticache_cluster" "memcached" {
  cluster_id           = "${var.project_name}-memcached"
  engine              = "memcached"
  node_type           = var.node_type
  num_cache_nodes     = var.num_cache_nodes
  parameter_group_name = aws_elasticache_parameter_group.memcached.name
  port                = 11211
  security_group_ids  = [aws_security_group.memcached.id]
  subnet_group_name   = aws_elasticache_subnet_group.main.name

  az_mode             = "cross-az"
  preferred_availability_zones = var.availability_zones

  maintenance_window = "mon:05:00-mon:09:00"

  notification_topic_arn = aws_sns_topic.memcached.arn

  tags = merge(
    var.tags,
    {
      Name = "${var.project_name}-memcached"
    }
  )
}

# Parameter Groups
resource "aws_elasticache_parameter_group" "redis" {
  family = "redis6.x"
  name   = "${var.project_name}-redis-params"

  parameter {
    name  = "maxmemory-policy"
    value = "allkeys-lru"
  }

  parameter {
    name  = "timeout"
    value = "300"
  }

  tags = merge(
    var.tags,
    {
      Name = "${var.project_name}-redis-params"
    }
  )
}

resource "aws_elasticache_parameter_group" "memcached" {
  family = "memcached1.6"
  name   = "${var.project_name}-memcached-params"

  parameter {
    name  = "max_item_size"
    value = "10485760"
  }

  parameter {
    name  = "chunk_size_growth_factor"
    value = "1.25"
  }

  tags = merge(
    var.tags,
    {
      Name = "${var.project_name}-memcached-params"
    }
  )
}

# Subnet Group
resource "aws_elasticache_subnet_group" "main" {
  name       = "${var.project_name}-subnet-group"
  subnet_ids = var.subnet_ids

  tags = merge(
    var.tags,
    {
      Name = "${var.project_name}-subnet-group"
    }
  )
}

# Security Groups
resource "aws_security_group" "redis" {
  name        = "${var.project_name}-redis"
  description = "Security group for Redis cluster"
  vpc_id      = var.vpc_id

  ingress {
    description = "Redis from VPC"
    from_port   = 6379
    to_port     = 6379
    protocol    = "tcp"
    cidr_blocks = var.allowed_cidr_blocks
  }

  egress {
    from_port   = 0
    to_port     = 0
    protocol    = "-1"
    cidr_blocks = ["0.0.0.0/0"]
  }

  tags = merge(
    var.tags,
    {
      Name = "${var.project_name}-redis"
    }
  )
}

resource "aws_security_group" "memcached" {
  name        = "${var.project_name}-memcached"
  description = "Security group for Memcached cluster"
  vpc_id      = var.vpc_id

  ingress {
    description = "Memcached from VPC"
    from_port   = 11211
    to_port     = 11211
    protocol    = "tcp"
    cidr_blocks = var.allowed_cidr_blocks
  }

  egress {
    from_port   = 0
    to_port     = 0
    protocol    = "-1"
    cidr_blocks = ["0.0.0.0/0"]
  }

  tags = merge(
    var.tags,
    {
      Name = "${var.project_name}-memcached"
    }
  )
}

Monitoring and Alerts

  1. CloudWatch Alarms
resource "aws_cloudwatch_metric_alarm" "cpu_utilization" {
  alarm_name          = "${var.project_name}-cpu-utilization"
  comparison_operator = "GreaterThanThreshold"
  evaluation_periods  = "2"
  metric_name        = "CPUUtilization"
  namespace          = "AWS/ElastiCache"
  period             = "300"
  statistic          = "Average"
  threshold          = "75"
  alarm_description  = "CPU utilization is too high"
  alarm_actions      = [aws_sns_topic.alerts.arn]

  dimensions = {
    CacheClusterId = aws_elasticache_cluster.redis.id
  }
}

resource "aws_cloudwatch_metric_alarm" "evictions" {
  alarm_name          = "${var.project_name}-evictions"
  comparison_operator = "GreaterThanThreshold"
  evaluation_periods  = "2"
  metric_name        = "Evictions"
  namespace          = "AWS/ElastiCache"
  period             = "300"
  statistic          = "Sum"
  threshold          = "1000"
  alarm_description  = "High number of evictions"
  alarm_actions      = [aws_sns_topic.alerts.arn]

  dimensions = {
    CacheClusterId = aws_elasticache_cluster.redis.id
  }
}
  1. SNS Topics
resource "aws_sns_topic" "alerts" {
  name = "${var.project_name}-cache-alerts"

  tags = merge(
    var.tags,
    {
      Name = "${var.project_name}-alerts"
    }
  )
}

resource "aws_sns_topic_subscription" "email" {
  topic_arn = aws_sns_topic.alerts.arn
  protocol  = "email"
  endpoint  = var.alert_email
}

Backup and Recovery

  1. Automatic Snapshots
resource "aws_elasticache_cluster" "redis_with_backup" {
  # ... other configuration ...

  snapshot_retention_limit = 7
  snapshot_window         = "00:00-05:00"
  
  final_snapshot_identifier = "${var.project_name}-final-snapshot"
}
  1. Manual Snapshot
resource "aws_elasticache_snapshot" "manual" {
  cache_cluster_id    = aws_elasticache_cluster.redis.id
  snapshot_name       = "${var.project_name}-manual-snapshot"
}

Advanced Features

  1. Global Datastore
resource "aws_elasticache_global_replication_group" "example" {
  global_replication_group_id_suffix = "global"
  primary_replication_group_id       = aws_elasticache_replication_group.primary.id
}

resource "aws_elasticache_replication_group" "secondary" {
  provider = aws.secondary

  replication_group_id          = "${var.project_name}-secondary"
  replication_group_description = "Secondary replication group"
  global_replication_group_id   = aws_elasticache_global_replication_group.example.global_replication_group_id
}
  1. User Management
resource "aws_elasticache_user" "admin" {
  user_id       = "admin"
  user_name     = "administrator"
  access_string = "on ~app::* -@all +@read +@hash +@bitmap +@geo -setbit -bitfield -hset -hmset -hincrby -hincrbyfloat -hdel -hsetnx -hmget -hexists -hlen -hstrlen -hkeys -hvals -hgetall -hscan"
  engine        = "REDIS"
  passwords     = [var.admin_password]
}

resource "aws_elasticache_user_group" "admin" {
  engine        = "REDIS"
  user_group_id = "admin"
  user_ids      = [aws_elasticache_user.admin.user_id]
}

Best Practices

  1. Performance Optimization

    • Choose appropriate node type
    • Configure memory policies
    • Monitor cache hits/misses
    • Implement proper eviction
  2. High Availability

    • Use replication groups
    • Enable Multi-AZ
    • Configure automatic failover
    • Regular backups
  3. Security

    • Use security groups
    • Implement encryption
    • Configure authentication
    • Regular updates
  4. Cost Optimization

    • Use appropriate node types
    • Implement auto-scaling

Conclusion

You’ve learned how to set up and manage AWS ElastiCache using Terraform. This setup provides:

  • Redis and Memcached clusters
  • Replication and failover
  • Monitoring and alerts
  • Backup and recovery

Remember to:

  • Monitor performance
  • Implement security
  • Optimize costs
  • Maintain backups

Integration Examples

  1. Application Integration
import redis

redis_client = redis.Redis(
    host=aws_elasticache_cluster.redis.cache_nodes[0].address,
    port=6379,
    decode_responses=True
)

# Set value
redis_client.set('key', 'value')

# Get value
value = redis_client.get('key')
  1. Session Storage
from flask import Flask
from flask_session import Session
from redis import Redis

app = Flask(__name__)
app.config['SESSION_TYPE'] = 'redis'
app.config['SESSION_REDIS'] = Redis(
    host=aws_elasticache_cluster.redis.cache_nodes[0].address,
    port=6379
)
Session(app)