converged-computing · vsoch · Dec 13, 2024
diff --git a/experiments/google/compute-engine/cpu/retest/README.md b/experiments/google/compute-engine/cpu/retest/README.md
@@ -0,0 +1,9 @@
+# Retest of Compute Engine
+
+> Size 32
+
+We are trying to diagnose why there are differences in Compute Engine vs. GKE. So far we have identified two variables:
+
+- MTU: on GKE was set to 8896 (and used the underlying default 1460 for Compute Engine)
+- Tier 1 "PREMIUM" was set for GKE but not for Compute Engine
+- COMPACT we were never able to get for compute engine, at least greater than 10 instances. I can try again but not sure it will be different.
diff --git a/experiments/google/compute-engine/cpu/retest/base/main.tf b/experiments/google/compute-engine/cpu/retest/base/main.tf
@@ -0,0 +1,44 @@
+# Copyright 2022 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+module "compute_nodes" {
+    source          = "./modules/compute"
+
+    for_each = {
+        for index, node in var.compute_node_specs:
+        node.name_prefix => node
+    }
+    project_id        = var.project_id
+    region            = var.region
+
+    family            = var.family
+
+    name_prefix       = each.value.name_prefix
+    subnetwork        = var.subnetwork
+    machine_arch      = each.value.machine_arch
+    machine_type      = each.value.machine_type
+    num_instances     = each.value.instances
+
+    boot_script       = lookup(each.value, "boot_script", null)
+    compact_placement = lookup(each.value, "compact", false)
+    gpu               = lookup(each.value, "gpu_type", null) == null || lookup(each.value, "gpu_count", 0) <= 0 ? null : {
+        type  = each.value.gpu_type
+        count = each.value.gpu_count
+    }
+    service_account   = {
+        email  = var.service_account_emails["compute"]
+        scopes = var.compute_scopes
+    }
+    nfs_mounts         = var.cluster_storage
+}
diff --git a/experiments/google/compute-engine/cpu/retest/base/modules/compute/main.tf b/experiments/google/compute-engine/cpu/retest/base/modules/compute/main.tf
@@ -0,0 +1,91 @@
+# Copyright 2022 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+data "google_compute_image" "fluxfw_compute_x86_64_image" {
+    project = var.project_id
+    family  = var.family
+}
+
+data "google_compute_zones" "available" {
+    project = var.project_id
+    region  = var.region
+}
+
+resource "google_compute_address" "ip_address" {
+  name = "external-ip"
+}
+
+locals {
+    automatic_restart = var.compact_placement ? false : var.automatic_restart
+    compute_images = {
+        "x86-64" = {
+            image   = data.google_compute_image.fluxfw_compute_x86_64_image.self_link
+            project = data.google_compute_image.fluxfw_compute_x86_64_image.project
+        }
+    }
+    on_host_maintenance = var.compact_placement ? "TERMINATE" : var.on_host_maintenance
+    access_config = {
+        nat_ip       = google_compute_address.ip_address.address
+        network_tier = var.network_tier
+      }
+}
+
+resource "google_compute_resource_policy" "collocated" {
+    count   = var.compact_placement ? 1 : 0
+    name    = "${var.name_prefix}-collocated-policy"
+    project = var.project_id
+    region  = var.region
+    group_placement_policy {
+      vm_count = var.num_instances
+      collocation = "COLLOCATED"
+    }
+}
+
+module "flux_compute_instance_template" {
+    source               = "github.com/terraform-google-modules/terraform-google-vm/modules/instance_template"
+    region               = var.region
+    project_id           = var.project_id
+    name_prefix          = var.name_prefix
+    subnetwork           = var.subnetwork
+    gpu                  = var.gpu
+    service_account      = var.service_account
+    access_config        = [local.access_config]
+    tags                 = ["ssh", "flux", "compute"]
+    machine_type         = var.machine_type
+    disk_size_gb         = 256
+    source_image         = local.compute_images["${var.machine_arch}"].image
+    source_image_project = local.compute_images["${var.machine_arch}"].project
+    automatic_restart    = local.automatic_restart
+    on_host_maintenance  = local.on_host_maintenance
+    startup_script       = var.boot_script
+
+    metadata             = { 
+        "enable-oslogin"   : "TRUE",
+        "VmDnsSetting"     : "GlobalDefault",
+        "nfs-mounts"       : jsonencode(var.nfs_mounts),
+        "gpus-attached"    : var.gpu != null ? "TRUE" : "FALSE"
+    }
+}
+
+module "flux_compute_instances" {
+    source              = "github.com/terraform-google-modules/terraform-google-vm/modules/compute_instance"
+    region              = var.region
+    zone                = data.google_compute_zones.available.names[0]
+    hostname            = var.name_prefix
+    add_hostname_suffix = true
+    num_instances       = var.num_instances
+    resource_policies   = var.compact_placement ? [ google_compute_resource_policy.collocated[0].self_link ] : []
+    instance_template   = module.flux_compute_instance_template.self_link
+    subnetwork          = var.subnetwork
+}
diff --git a/experiments/google/compute-engine/cpu/retest/base/modules/compute/variables.tf b/experiments/google/compute-engine/cpu/retest/base/modules/compute/variables.tf
@@ -0,0 +1,94 @@
+variable "automatic_restart" {
+  type        = bool
+  description = "(Optional) Specifies whether the instance should be automatically restarted if it is terminated by Compute Engine (not terminated by a user)."
+  default     = true
+}
+
+variable "boot_script" {
+    description = "(Optional) the name of a file containing a script to be executed on compute nodes at boot time"
+    type        = string
+    default     = null
+}
+
+variable "compact_placement" {
+    description = "(Optional) a boolean which determines whether a set of compute nodes has a compact placement resource policy attached to them."
+    type        = bool
+    default     = false
+}
+
+variable "family" {
+    description = "The source X86 image family prefix to use"
+    type        = string
+    default     = "flux-framework"
+}
+
+variable "gpu" {
+    description = "The type and count of GPU(s) to attach to a compute node"
+    type        = object({
+        type  = string
+        count = number
+    })
+    default     = null
+}
+
+variable "machine_arch" {
+    description = "The instruction set architecture, usually x86_64, used by the compute node"
+    type        = string
+}
+
+variable "network_tier" {
+    description = "The network tier (STANDARD or PREMIUM)"
+    type        = string
+    default     = "STANDARD"
+}
+
+variable "machine_type" {
+    description = "The Compute Engine machine type to be used for the compute node"
+    type        = string
+}
+
+variable "name_prefix" {
+    description = "The name prefix for the compute node instances, the full instances names will be this prefix followed by a node number"
+    type        = string
+}
+
+variable "nfs_mounts" {
+    description = "A map with keys 'share' and 'mountpoint' describing an NFS export and its intended mount point"
+    type        = map(string)
+    default     = {}
+}
+
+variable "num_instances" {
+    description = "The number of compute node instances to create"
+    type        = number
+    default     = 1
+}
+
+variable "on_host_maintenance" {
+  type        = string
+  description = "Instance availability Policy"
+  default     = "MIGRATE"
+}
+
+variable "project_id" {
+    description = "The GCP project ID"
+    type        = string
+}
+
+variable "region" {
+    description = "The GCP region where the cluster resides"
+    type        = string
+}
+
+variable "service_account" {
+    description = "The GCP service account used by the compute node"
+    type        = object({
+        email  = string
+        scopes = set(string)
+    })
+}
+
+variable "subnetwork" {
+    description = "Subnetwork to deploy to"
+    type        = string
+}
diff --git a/experiments/google/compute-engine/cpu/retest/base/variables.tf b/experiments/google/compute-engine/cpu/retest/base/variables.tf
@@ -0,0 +1,53 @@
+
+variable "cluster_storage" { 
+    description = "A map with keys 'share' and 'mountpoint' describing an NFS export and its intended mount point"
+    type        = map(string)
+}
+
+variable "family" {
+    description = "The source image x86 prefix to be used by the compute node(s)"
+    type        = string
+    default     = "global"
+}
+
+variable "compute_node_specs" {
+    description = "A list of compute node specifications"
+    type = list(object({
+       name_prefix  = string
+       machine_arch = string
+       machine_type = string
+       gpu_type     = string
+       gpu_count    = number
+       compact      = bool
+       instances    = number
+       properties   = set(string)
+       boot_script  = string
+    }))
+    default = []
+}
+
+variable "compute_scopes" {
+    description = "The set of access scopes for compute node instances"
+    default     = [ "cloud-platform" ]
+    type        = set(string)
+}
+
+variable "project_id" {
+    description = "The GCP project ID"
+    type        = string
+}
+
+variable "region" {
+    description = "The GCP region where the cluster resides"
+    type = string
+}
+
+variable "service_account_emails" {
+    description = "A map with keys: 'compute', 'login', 'manager' that map to the service account to be used by the respective nodes"
+    type        = map(string)
+}
+
+variable "subnetwork" {
+    description = "Subnetwork to deploy to"
+    type        = string
+}