Skip to content

Commit

Permalink
Fix automatic instance termination
Browse files Browse the repository at this point in the history
  • Loading branch information
kocsismate committed Aug 30, 2024
1 parent 1463631 commit 450dc56
Show file tree
Hide file tree
Showing 9 changed files with 423 additions and 6 deletions.
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@ vendor/
.terraform/
/app/*/*
!/app/zend/*
/build/infrastructure/config/
!/build/infrastructure/config/aws.tfvars.dist
/config/**/*.*
!/config/**/*.dist
/tmp/
Expand Down
157 changes: 151 additions & 6 deletions build/infrastructure/aws/main.tf
Original file line number Diff line number Diff line change
Expand Up @@ -46,9 +46,7 @@ resource "aws_instance" "host" {
iops = 8000
}

tags = {
Name = "php-benchmark-host"
}
tags = merge(var.tags, {(var.scheduler_tag["key"]) = var.scheduler_tag["value"]})

connection {
type = "ssh"
Expand Down Expand Up @@ -79,9 +77,6 @@ EOF
inline = [
"set -e",

"# Automatic termination",
"#echo 'sudo halt' | at now + ${var.termination_timeout_in_min} min",

"# Update permissions",
"sudo mkdir -p ${var.remote_project_root}",
"sudo chmod -R 775 ${var.remote_project_root}",
Expand Down Expand Up @@ -235,3 +230,153 @@ resource "aws_security_group" "security_group" {
"0.0.0.0/0"]
}
}

################################################
#
# AUTOMATIC TERMINATION
#
################################################

resource "aws_iam_role" "this" {
name = "php-version-benchmark-termination-scheduler-lambda"
description = "Allows Lambda functions to stop and start ec2 and rds resources"
assume_role_policy = data.aws_iam_policy_document.this.json
tags = var.tags
}

data "aws_iam_policy_document" "this" {
statement {
actions = ["sts:AssumeRole"]

principals {
type = "Service"
identifiers = ["lambda.amazonaws.com"]
}
}
}

resource "aws_iam_role_policy" "termination_lambda" {
name = "php-version-benchmark-termination-lambda-policy"
role = aws_iam_role.this.id
policy = data.aws_iam_policy_document.termination_lambda.json
}

data "aws_iam_policy_document" "termination_lambda" {
statement {
actions = [
"tag:GetResources",
"ec2:StopInstances",
"ec2:StartInstances",
"autoscaling:DescribeAutoScalingInstances",
]

resources = [
"*",
]
}
}

resource "aws_iam_role_policy" "termination_lambda_cloudwatch_alarm" {
name = "php-version-benchmark-termination-cloudwatch-custom-policy-scheduler"
role = aws_iam_role.this.id
policy = data.aws_iam_policy_document.termination_lambda_cloudwatch_alarm.json
}

data "aws_iam_policy_document" "termination_lambda_cloudwatch_alarm" {
statement {
actions = [
"cloudwatch:DisableAlarmActions",
"cloudwatch:EnableAlarmActions",
]

resources = [
"*",
]
}
}

resource "aws_iam_role_policy" "lambda_logging" {
name = "php-version-benchmark-termination-lambda-logging"
role = aws_iam_role.this.id
policy = jsonencode({
"Version" : "2012-10-17",
"Statement" : [
{
"Action" : [
"logs:CreateLogStream",
"logs:PutLogEvents"
],
"Resource" : "${aws_cloudwatch_log_group.this.arn}:*",
"Effect" : "Allow"
}
]
})
}

# Convert *.py to .zip because AWS Lambda needs .zip
data "archive_file" "package" {
type = "zip"
source_dir = "${var.local_project_root}/build/infrastructure/package/"
output_path = "${var.local_project_root}/tmp/aws-stop-start-resources.zip"
}

# Create Lambda function for stop or start aws resources
resource "aws_lambda_function" "this" {
filename = data.archive_file.package.output_path
source_code_hash = data.archive_file.package.output_base64sha256
function_name = "php-version-benchmark-termination-lambda-function"
role = aws_iam_role.this.arn
handler = "scheduler.main.lambda_handler"
runtime = "python3.10"
timeout = "600"
kms_key_arn = ""

environment {
variables = {
AWS_REGIONS = var.region
SCHEDULE_ACTION = "stop"
TAG_KEY = var.scheduler_tag["key"]
TAG_VALUE = var.scheduler_tag["value"]
EC2_SCHEDULE = "true"
}
}

tags = var.tags
}

locals {
rfc_3339_now = "${replace(var.now, " ", "T")}Z"
termination_time = timeadd(local.rfc_3339_now, "${var.termination_timeout_in_min}m")
termination_hour = formatdate("h", local.termination_time)
termination_minute = formatdate("m", local.termination_time)
termination_day = formatdate("D", local.termination_time)
termination_month = formatdate("M", local.termination_time)
termination_year = formatdate("YYYY", local.termination_time)
cloudwatch_schedule_expression = "cron(${local.termination_minute} ${local.termination_hour} ${local.termination_day} ${local.termination_month} ? ${local.termination_year})"
}

resource "aws_cloudwatch_event_rule" "this" {
name = "php-version-benchmark-termination-lambda-scheduler"
description = "Trigger lambda scheduler"
schedule_expression = local.cloudwatch_schedule_expression
tags = var.tags
}

resource "aws_cloudwatch_event_target" "this" {
arn = aws_lambda_function.this.arn
rule = aws_cloudwatch_event_rule.this.name
}

resource "aws_lambda_permission" "this" {
statement_id = "AllowExecutionFromCloudWatch"
action = "lambda:InvokeFunction"
principal = "events.amazonaws.com"
function_name = aws_lambda_function.this.function_name
source_arn = aws_cloudwatch_event_rule.this.arn
}

resource "aws_cloudwatch_log_group" "this" {
name = "/aws/lambda/php-version-benchmark-termination"
retention_in_days = 7
tags = var.tags
}
19 changes: 19 additions & 0 deletions build/infrastructure/aws/variables.tf
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,25 @@ variable "result_root_dir" {
type = string
}

variable "tags" {
description = "Custom tags on AWS resources"
type = map(string)

default = {
"Name" = "php-version-benchmark"
}
}

variable "scheduler_tag" {
description = "Identifies AWS resources to stop"
type = map(string)

default = {
"key" = "to_stop"
"value" = "true"
}
}

variable "termination_timeout_in_min" {
type = number
}
Expand Down
1 change: 1 addition & 0 deletions build/infrastructure/package/scheduler/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
"""Module containing the logic for the lambda scheduler entry-points."""
46 changes: 46 additions & 0 deletions build/infrastructure/package/scheduler/exceptions.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
"""Exception functions."""

import logging


def ec2_exception(resource_name: str, resource_id: str, exception) -> None:
"""Exception raised during execution of ec2 scheduler.
Log instance, spot instance and autoscaling groups exceptions
on the specific aws resources.
:param str resource_name:
Aws resource name
:param str resource_id:
Aws resource id
:param str exception:
Human readable string describing the exception
"""
info_codes = ["IncorrectInstanceState"]
warning_codes = [
"UnsupportedOperation",
"IncorrectInstanceState",
"InvalidParameterCombination",
]

if exception.response["Error"]["Code"] in info_codes:
logging.info(
"%s %s: %s",
resource_name,
resource_id,
exception,
)
elif exception.response["Error"]["Code"] in warning_codes:
logging.warning(
"%s %s: %s",
resource_name,
resource_id,
exception,
)
else:
logging.error(
"Unexpected error on %s %s: %s",
resource_name,
resource_id,
exception,
)
54 changes: 54 additions & 0 deletions build/infrastructure/package/scheduler/filter_resources_by_tags.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
"""Filter aws resouces with tags."""

from typing import Iterator

import boto3


class FilterByTags:
"""Abstract Filter aws resources by tags in a class."""

def __init__(self, region_name=None) -> None:
"""Initialize resourcegroupstaggingapi client."""
if region_name:
self.rgta = boto3.client(
"resourcegroupstaggingapi", region_name=region_name
)
else:
self.rgta = boto3.client("resourcegroupstaggingapi")

def get_resources(self, resource_type, aws_tags) -> Iterator[str]:
"""Filter aws resources using resource type and defined tags.
Returns all the tagged defined resources that are located in
the specified Region for the AWS account.
:param str resource_type:
The constraints on the resources that you want returned.
The format of each resource type is service[:resourceType] .
For example, specifying a resource type of ec2 returns all
Amazon EC2 resources (which includes EC2 instances).
Specifying a resource type of ec2:instance returns only
EC2 instances.
:param list[map] aws_tags:
A list of TagFilters (keys and values).
Each TagFilter specified must contain a key with values
as optional. For example:
[
{
'Key': 'string',
'Values': [
'string',
]
},
]
:yield Iterator[str]:
The ids of the resources
"""
paginator = self.rgta.get_paginator("get_resources")
page_iterator = paginator.paginate(
TagFilters=aws_tags, ResourceTypeFilters=[resource_type]
)
for page in page_iterator:
for resource_tag_map in page["ResourceTagMappingList"]:
yield resource_tag_map["ResourceARN"]
81 changes: 81 additions & 0 deletions build/infrastructure/package/scheduler/instance_handler.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
"""ec2 instances scheduler."""

from typing import Dict, List

import boto3

from botocore.exceptions import ClientError

from .exceptions import ec2_exception
from .filter_resources_by_tags import FilterByTags


class InstanceScheduler:
"""Abstract ec2 scheduler in a class."""

def __init__(self, region_name=None) -> None:
"""Initialize ec2 scheduler."""
if region_name:
self.ec2 = boto3.client("ec2", region_name=region_name)
self.asg = boto3.client("autoscaling", region_name=region_name)
else:
self.ec2 = boto3.client("ec2")
self.asg = boto3.client("autoscaling")
self.tag_api = FilterByTags(region_name=region_name)

def stop(self, aws_tags: List[Dict]) -> None:
"""Aws ec2 instance stop function.
Stop ec2 instances with defined tags and disable its Cloudwatch
alarms.
:param list[map] aws_tags:
Aws tags to use for filter resources.
For example:
[
{
'Key': 'string',
'Values': [
'string',
]
}
]
"""
for instance_arn in self.tag_api.get_resources("ec2:instance", aws_tags):
instance_id = instance_arn.split("/")[-1]
try:
if not self.asg.describe_auto_scaling_instances(
InstanceIds=[instance_id]
)["AutoScalingInstances"]:
self.ec2.stop_instances(InstanceIds=[instance_id])
print(f"Stop instances {instance_id}")
except ClientError as exc:
ec2_exception("instance", instance_id, exc)

def start(self, aws_tags: List[Dict]) -> None:
"""Aws ec2 instance start function.
Start ec2 instances with defined tags.
Aws tags to use for filter resources
Aws tags to use for filter resources.
For example:
[
{
'Key': 'string',
'Values': [
'string',
]
}
]
"""
for instance_arn in self.tag_api.get_resources("ec2:instance", aws_tags):
instance_id = instance_arn.split("/")[-1]
try:
if not self.asg.describe_auto_scaling_instances(
InstanceIds=[instance_id]
)["AutoScalingInstances"]:
self.ec2.start_instances(InstanceIds=[instance_id])
print(f"Start instances {instance_id}")
except ClientError as exc:
ec2_exception("instance", instance_id, exc)
Loading

0 comments on commit 450dc56

Please sign in to comment.