From 4e7ff124fbbb673a99642b121dd23f244ba0b0e0 Mon Sep 17 00:00:00 2001 From: Mitchell Alessio <5306896+malessi@users.noreply.github.com> Date: Tue, 5 Sep 2023 14:12:42 -0400 Subject: [PATCH] BFD-2863: Introduce Launch Lifecycle Hooks to BFD Server Instances (#1916) --- .../bfd-server/templates/bfd-server.sh.j2 | 44 +++++++++++++++++++ .../server/modules/bfd_server_asg/main.tf | 18 ++++++-- .../bfd_server_asg/templates/fhir_server.tpl | 3 +- .../modules/bfd_server_iam/data-sources.tf | 2 + .../server/modules/bfd_server_iam/main.tf | 33 +++++++++++++- 5 files changed, 94 insertions(+), 6 deletions(-) diff --git a/ops/ansible/roles/bfd-server/templates/bfd-server.sh.j2 b/ops/ansible/roles/bfd-server/templates/bfd-server.sh.j2 index 77ddf8b1ec..89b4dcdd6c 100644 --- a/ops/ansible/roles/bfd-server/templates/bfd-server.sh.j2 +++ b/ops/ansible/roles/bfd-server/templates/bfd-server.sh.j2 @@ -65,6 +65,11 @@ export NEW_RELIC_METRIC_PERIOD='{{ data_server_new_relic_metric_period }}' STARTUP_TESTING_REQ_TIMEOUT='15' STARTUP_TESTING_REQ_BACKOFF_TIMEOUT='1' STARTUP_TESTING_BENE_ID='-88888888888888' +# Lifecycle hook name, used to signal to the ASG that this instance has warmed-up and is ready for +# traffic +{% if launch_lifecycle_hook is defined %} +LAUNCH_LIFECYCLE_HOOK="{{ launch_lifecycle_hook }}" +{% endif %} ## # Prints out the specified message. @@ -125,6 +130,45 @@ service_startup_check() { # Effectively allow traffic from external sources to reach service port sudo iptables -D INPUT -p tcp ! -i lo --dport "$BFD_PORT" -j REJECT log "Server started properly" + + if [[ -z "$LAUNCH_LIFECYCLE_HOOK" ]]; then + return 0 + fi + + log "Launch Lifecycle Hook $LAUNCH_LIFECYCLE_HOOK is enabled" + instance_id="$(ec2-metadata --instance-id | sed 's/instance-id: \(.*\)$/\1/')" + region="$(ec2-metadata --availability-zone | sed 's/placement: \(.*\).$/\1/')" + if [[ -z "$instance_id" || -z "$region" ]]; then + log "Instance ID or region not found from IMDS; is BFD Server running on an EC2 Instance?" + return 0 + fi + + # Capturing the ASG name from Terraform -> User Data Init -> Ansible is not possible, as the + # ASG name (as of writing) is based upon the Launch Template name and latest version. Trying + # to pass this data within the launch template's Terraform resource definition would result in + # a circular reference. Instead, we rely on AWS's default tagging behavior to get the ASG's + # name + asg_name="$( + aws ec2 describe-tags \ + --filters "Name=resource-id,Values=$instance_id" "Name=key,Values=aws:autoscaling:groupName" \ + --region "$region" | + jq -r '.Tags | .[] | .Value' + )" + if [[ -z "$asg_name" ]]; then + log "ASG name not found in instance tags; was instance launched within an ASG?" + return 0 + fi + + log "ASG Name: $asg_name; Instance ID: $instance_id" + log "Completing Lifecycle Action for Hook $LAUNCH_LIFECYCLE_HOOK..." + aws autoscaling complete-lifecycle-action \ + --lifecycle-action-result CONTINUE \ + --instance-id "$instance_id" \ + --lifecycle-hook-name "$LAUNCH_LIFECYCLE_HOOK" \ + --auto-scaling-group-name "$asg_name" \ + --region "$region" 1>/dev/null 2>/dev/null && + log "Lifecycle Action completed with result CONTINUE for hook $LAUNCH_LIFECYCLE_HOOK" || + log "Failed to complete Lifecycle Action for hook $LAUNCH_LIFECYCLE_HOOK" return 0 else log "Server failed to start properly, retrying..." diff --git a/ops/terraform/services/server/modules/bfd_server_asg/main.tf b/ops/terraform/services/server/modules/bfd_server_asg/main.tf index 0397c67732..d3258e24da 100644 --- a/ops/terraform/services/server/modules/bfd_server_asg/main.tf +++ b/ops/terraform/services/server/modules/bfd_server_asg/main.tf @@ -10,6 +10,8 @@ locals { { capacity = length(var.env_config.azs) * 3, metric_lower_bound = 2 * var.scaling_networkin_interval_mb, metric_upper_bound = 4 * var.scaling_networkin_interval_mb }, { capacity = length(var.env_config.azs) * 4, metric_lower_bound = 4 * var.scaling_networkin_interval_mb, metric_upper_bound = null } ] + + on_launch_lifecycle_hook_name = "bfd-${local.env}-${var.role}-on-launch" } ## Security groups @@ -103,10 +105,11 @@ resource "aws_launch_template" "main" { } user_data = base64encode(templatefile("${path.module}/templates/${var.launch_config.user_data_tpl}", { - env = local.env - port = var.lb_config.port - accountId = var.launch_config.account_id - data_server_db_url = "jdbc:postgresql://${local.rds_reader_endpoint}:5432/fhirdb${var.jdbc_suffix}" + env = local.env + port = var.lb_config.port + accountId = var.launch_config.account_id + data_server_db_url = "jdbc:postgresql://${local.rds_reader_endpoint}:5432/fhirdb${var.jdbc_suffix}" + launch_lifecycle_hook = local.on_launch_lifecycle_hook_name })) tag_specifications { @@ -145,6 +148,13 @@ resource "aws_autoscaling_group" "main" { version = aws_launch_template.main.latest_version } + initial_lifecycle_hook { + name = local.on_launch_lifecycle_hook_name + default_result = "ABANDON" + heartbeat_timeout = var.asg_config.instance_warmup * 3 + lifecycle_transition = "autoscaling:EC2_INSTANCE_LAUNCHING" + } + enabled_metrics = [ "GroupMinSize", "GroupMaxSize", diff --git a/ops/terraform/services/server/modules/bfd_server_asg/templates/fhir_server.tpl b/ops/terraform/services/server/modules/bfd_server_asg/templates/fhir_server.tpl index a115ff9cb7..98ffc32e37 100644 --- a/ops/terraform/services/server/modules/bfd_server_asg/templates/fhir_server.tpl +++ b/ops/terraform/services/server/modules/bfd_server_asg/templates/fhir_server.tpl @@ -36,7 +36,8 @@ cat < extra_vars.json "data_server_tmp_dir": "{{ data_server_dir }}/tmp", "data_server_war": "bfd-server-war-1.0.0-SNAPSHOT.war", "data_server_db_url": "${data_server_db_url}", - "env": "${env}" + "env": "${env}", + "launch_lifecycle_hook": "${launch_lifecycle_hook}" } EOF diff --git a/ops/terraform/services/server/modules/bfd_server_iam/data-sources.tf b/ops/terraform/services/server/modules/bfd_server_iam/data-sources.tf index 61e0e82bd7..33f70e34ee 100644 --- a/ops/terraform/services/server/modules/bfd_server_iam/data-sources.tf +++ b/ops/terraform/services/server/modules/bfd_server_iam/data-sources.tf @@ -1,3 +1,5 @@ +data "aws_region" "current" {} + data "aws_caller_identity" "current" {} data "aws_kms_key" "master_key" { diff --git a/ops/terraform/services/server/modules/bfd_server_iam/main.tf b/ops/terraform/services/server/modules/bfd_server_iam/main.tf index 10a9863b75..087eea4763 100644 --- a/ops/terraform/services/server/modules/bfd_server_iam/main.tf +++ b/ops/terraform/services/server/modules/bfd_server_iam/main.tf @@ -1,5 +1,7 @@ locals { - env = terraform.workspace + region = data.aws_region.current.name + account_id = data.aws_caller_identity.current.account_id + env = terraform.workspace } resource "aws_iam_instance_profile" "instance" { @@ -144,3 +146,32 @@ resource "aws_iam_role_policy_attachment" "kms_mgmt" { role = aws_iam_role.instance.id policy_arn = aws_iam_policy.kms_mgmt.arn } + + +# allow Server instances to complete lifecycle actions on their ASG +resource "aws_iam_policy" "asg" { + description = join("", [ + "Policy granting BFD Server in ${local.env} environment access to complete Lifecycle Actions ", + "on the ${local.env} AutoScaling Group" + ]) + name = "bfd-${local.env}-${var.service}-asg" + path = "/" + policy = jsonencode( + { + Version = "2012-10-17" + Statement = [ + { + Effect = "Allow" + Action = "autoscaling:CompleteLifecycleAction" + Resource = ["arn:aws:autoscaling:${local.region}:${local.account_id}:autoScalingGroup:*:autoScalingGroupName/bfd-${local.env}-${var.legacy_service}*"] + } + ] + } + ) +} + +# attach policy allowing BFD Server to complete lifecycle actions on its ASG +resource "aws_iam_role_policy_attachment" "asg" { + role = aws_iam_role.instance.id + policy_arn = aws_iam_policy.asg.arn +}