Managing AWS ElastiCache with Terraform

AWS ElastiCache is a fully managed in-memory caching service supporting Redis and Memcached. This guide demonstrates how to set up and manage ElastiCache using Terraform.

Video Tutorial

Learn more about managing AWS ElastiCache with Terraform in this comprehensive video tutorial:

Prerequisites

AWS CLI configured with appropriate permissions
Terraform installed (version 1.0.0 or later)
Basic understanding of Redis/Memcached
VPC with private subnets

Project Structure

terraform-elasticache/
├── main.tf
├── variables.tf
├── outputs.tf
├── modules/
│   └── elasticache/
│       ├── main.tf
│       ├── variables.tf
│       └── outputs.tf
└── config/
    └── parameter-groups.json

ElastiCache Configuration

Create modules/elasticache/main.tf:

# Redis Cluster
resource "aws_elasticache_cluster" "redis" {
  cluster_id           = "${var.project_name}-redis"
  engine              = "redis"
  node_type           = var.node_type
  num_cache_nodes     = var.num_cache_nodes
  parameter_group_name = aws_elasticache_parameter_group.redis.name
  port                = 6379
  security_group_ids  = [aws_security_group.redis.id]
  subnet_group_name   = aws_elasticache_subnet_group.main.name

  snapshot_retention_limit = 7
  snapshot_window         = "00:00-05:00"
  maintenance_window      = "mon:05:00-mon:09:00"

  auto_minor_version_upgrade = true
  apply_immediately         = false

  notification_topic_arn = aws_sns_topic.redis.arn

  tags = merge(
    var.tags,
    {
      Name = "${var.project_name}-redis"
    }
  )
}

# Redis Replication Group
resource "aws_elasticache_replication_group" "redis" {
  replication_group_id          = "${var.project_name}-redis-rep"
  replication_group_description = "Redis replication group"
  node_type                     = var.node_type
  port                         = 6379
  parameter_group_name          = aws_elasticache_parameter_group.redis.name
  automatic_failover_enabled    = true
  multi_az_enabled             = true
  subnet_group_name            = aws_elasticache_subnet_group.main.name
  security_group_ids           = [aws_security_group.redis.id]

  cluster_mode {
    replicas_per_node_group = 1
    num_node_groups         = 2
  }

  snapshot_retention_limit = 7
  snapshot_window         = "00:00-05:00"
  maintenance_window      = "mon:05:00-mon:09:00"

  auto_minor_version_upgrade = true
  apply_immediately         = false

  notification_topic_arn = aws_sns_topic.redis.arn

  tags = merge(
    var.tags,
    {
      Name = "${var.project_name}-redis-rep"
    }
  )
}

# Memcached Cluster
resource "aws_elasticache_cluster" "memcached" {
  cluster_id           = "${var.project_name}-memcached"
  engine              = "memcached"
  node_type           = var.node_type
  num_cache_nodes     = var.num_cache_nodes
  parameter_group_name = aws_elasticache_parameter_group.memcached.name
  port                = 11211
  security_group_ids  = [aws_security_group.memcached.id]
  subnet_group_name   = aws_elasticache_subnet_group.main.name

  az_mode             = "cross-az"
  preferred_availability_zones = var.availability_zones

  maintenance_window = "mon:05:00-mon:09:00"

  notification_topic_arn = aws_sns_topic.memcached.arn

  tags = merge(
    var.tags,
    {
      Name = "${var.project_name}-memcached"
    }
  )
}

# Parameter Groups
resource "aws_elasticache_parameter_group" "redis" {
  family = "redis6.x"
  name   = "${var.project_name}-redis-params"

  parameter {
    name  = "maxmemory-policy"
    value = "allkeys-lru"
  }

  parameter {
    name  = "timeout"
    value = "300"
  }

  tags = merge(
    var.tags,
    {
      Name = "${var.project_name}-redis-params"
    }
  )
}

resource "aws_elasticache_parameter_group" "memcached" {
  family = "memcached1.6"
  name   = "${var.project_name}-memcached-params"

  parameter {
    name  = "max_item_size"
    value = "10485760"
  }

  parameter {
    name  = "chunk_size_growth_factor"
    value = "1.25"
  }

  tags = merge(
    var.tags,
    {
      Name = "${var.project_name}-memcached-params"
    }
  )
}

# Subnet Group
resource "aws_elasticache_subnet_group" "main" {
  name       = "${var.project_name}-subnet-group"
  subnet_ids = var.subnet_ids

  tags = merge(
    var.tags,
    {
      Name = "${var.project_name}-subnet-group"
    }
  )
}

# Security Groups
resource "aws_security_group" "redis" {
  name        = "${var.project_name}-redis"
  description = "Security group for Redis cluster"
  vpc_id      = var.vpc_id

  ingress {
    description = "Redis from VPC"
    from_port   = 6379
    to_port     = 6379
    protocol    = "tcp"
    cidr_blocks = var.allowed_cidr_blocks
  }

  egress {
    from_port   = 0
    to_port     = 0
    protocol    = "-1"
    cidr_blocks = ["0.0.0.0/0"]
  }

  tags = merge(
    var.tags,
    {
      Name = "${var.project_name}-redis"
    }
  )
}

resource "aws_security_group" "memcached" {
  name        = "${var.project_name}-memcached"
  description = "Security group for Memcached cluster"
  vpc_id      = var.vpc_id

  ingress {
    description = "Memcached from VPC"
    from_port   = 11211
    to_port     = 11211
    protocol    = "tcp"
    cidr_blocks = var.allowed_cidr_blocks
  }

  egress {
    from_port   = 0
    to_port     = 0
    protocol    = "-1"
    cidr_blocks = ["0.0.0.0/0"]
  }

  tags = merge(
    var.tags,
    {
      Name = "${var.project_name}-memcached"
    }
  )
}

Monitoring and Alerts

CloudWatch Alarms

resource "aws_cloudwatch_metric_alarm" "cpu_utilization" {
  alarm_name          = "${var.project_name}-cpu-utilization"
  comparison_operator = "GreaterThanThreshold"
  evaluation_periods  = "2"
  metric_name        = "CPUUtilization"
  namespace          = "AWS/ElastiCache"
  period             = "300"
  statistic          = "Average"
  threshold          = "75"
  alarm_description  = "CPU utilization is too high"
  alarm_actions      = [aws_sns_topic.alerts.arn]

  dimensions = {
    CacheClusterId = aws_elasticache_cluster.redis.id
  }
}

resource "aws_cloudwatch_metric_alarm" "evictions" {
  alarm_name          = "${var.project_name}-evictions"
  comparison_operator = "GreaterThanThreshold"
  evaluation_periods  = "2"
  metric_name        = "Evictions"
  namespace          = "AWS/ElastiCache"
  period             = "300"
  statistic          = "Sum"
  threshold          = "1000"
  alarm_description  = "High number of evictions"
  alarm_actions      = [aws_sns_topic.alerts.arn]

  dimensions = {
    CacheClusterId = aws_elasticache_cluster.redis.id
  }
}

SNS Topics

resource "aws_sns_topic" "alerts" {
  name = "${var.project_name}-cache-alerts"

  tags = merge(
    var.tags,
    {
      Name = "${var.project_name}-alerts"
    }
  )
}

resource "aws_sns_topic_subscription" "email" {
  topic_arn = aws_sns_topic.alerts.arn
  protocol  = "email"
  endpoint  = var.alert_email
}

Backup and Recovery

Automatic Snapshots

resource "aws_elasticache_cluster" "redis_with_backup" {
  # ... other configuration ...

  snapshot_retention_limit = 7
  snapshot_window         = "00:00-05:00"
  
  final_snapshot_identifier = "${var.project_name}-final-snapshot"
}

Manual Snapshot

resource "aws_elasticache_snapshot" "manual" {
  cache_cluster_id    = aws_elasticache_cluster.redis.id
  snapshot_name       = "${var.project_name}-manual-snapshot"
}

Advanced Features

Global Datastore

resource "aws_elasticache_global_replication_group" "example" {
  global_replication_group_id_suffix = "global"
  primary_replication_group_id       = aws_elasticache_replication_group.primary.id
}

resource "aws_elasticache_replication_group" "secondary" {
  provider = aws.secondary

  replication_group_id          = "${var.project_name}-secondary"
  replication_group_description = "Secondary replication group"
  global_replication_group_id   = aws_elasticache_global_replication_group.example.global_replication_group_id
}

User Management

resource "aws_elasticache_user" "admin" {
  user_id       = "admin"
  user_name     = "administrator"
  access_string = "on ~app::* -@all +@read +@hash +@bitmap +@geo -setbit -bitfield -hset -hmset -hincrby -hincrbyfloat -hdel -hsetnx -hmget -hexists -hlen -hstrlen -hkeys -hvals -hgetall -hscan"
  engine        = "REDIS"
  passwords     = [var.admin_password]
}

resource "aws_elasticache_user_group" "admin" {
  engine        = "REDIS"
  user_group_id = "admin"
  user_ids      = [aws_elasticache_user.admin.user_id]
}

Best Practices

Performance Optimization
- Choose appropriate node type
- Configure memory policies
- Monitor cache hits/misses
- Implement proper eviction
High Availability
- Use replication groups
- Enable Multi-AZ
- Configure automatic failover
- Regular backups
Security
- Use security groups
- Implement encryption
- Configure authentication
- Regular updates
Cost Optimization
- Use appropriate node types
- Implement auto-scaling

Conclusion

You’ve learned how to set up and manage AWS ElastiCache using Terraform. This setup provides:

Redis and Memcached clusters
Replication and failover
Monitoring and alerts
Backup and recovery

Remember to:

Monitor performance
Implement security
Optimize costs
Maintain backups

Integration Examples

Application Integration

import redis

redis_client = redis.Redis(
    host=aws_elasticache_cluster.redis.cache_nodes[0].address,
    port=6379,
    decode_responses=True
)

# Set value
redis_client.set('key', 'value')

# Get value
value = redis_client.get('key')

Session Storage

from flask import Flask
from flask_session import Session
from redis import Redis

app = Flask(__name__)
app.config['SESSION_TYPE'] = 'redis'
app.config['SESSION_REDIS'] = Redis(
    host=aws_elasticache_cluster.redis.cache_nodes[0].address,
    port=6379
)
Session(app)