From e5f159762a6a6570ac5cb8529f21868800cbe7cd Mon Sep 17 00:00:00 2001 From: David Mallon Date: Tue, 21 Jan 2025 10:50:26 +0000 Subject: [PATCH 1/4] (PE-40163) automate recovery of failed postgres server --- plans/replace_failed_postgresql.pp | 59 ++++++++++++++++++++++++++++++ 1 file changed, 59 insertions(+) create mode 100644 plans/replace_failed_postgresql.pp diff --git a/plans/replace_failed_postgresql.pp b/plans/replace_failed_postgresql.pp new file mode 100644 index 00000000..0c8f35f7 --- /dev/null +++ b/plans/replace_failed_postgresql.pp @@ -0,0 +1,59 @@ +# @summary Replaces a failed PostgreSQL host +# @param primary_host - The hostname and certname of the primary Puppet server +# @param replica_host - The hostname and certname of the replica VM +# @param working_postgresql_host - The hostname and certname of the still-working PE-PostgreSQL server +# @param failed_postgresql_host - The hostname and certname of the failed PE-PostgreSQL server +# @param replacement_postgresql_host - The hostname and certname of the server being brought in to replace the failed PE-PostgreSQL server +# +plan peadm::replace_failed_postgresql( + Peadm::SingleTargetSpec $primary_host, + Peadm::SingleTargetSpec $replica_host, + Peadm::SingleTargetSpec $working_postgresql_host, + Peadm::SingleTargetSpec $failed_postgresql_host, + Peadm::SingleTargetSpec $replacement_postgresql_host, +) { + $all_hosts = peadm::flatten_compact([ + $primary_host, + $replica_host, + $working_postgresql_host, + $failed_postgresql_host, + $replacement_postgresql_host, + ]) + + # verify we can connect to targets proded before proceeding + run_command('hostname', $all_hosts) + + # Get current peadm config before making modifications + $peadm_config = run_task('peadm::get_peadm_config', $primary_host).first.value + $compilers = $peadm_config['params']['compilers'] + + # Bail if this is trying to be ran against Standard + if $compilers.empty { + fail_plan('Plan peadm::add_database is only applicable for L and XL deployments') + } + + $pe_hosts = peadm::flatten_compact([ + $primary_host, + $replica_host, + ]) + + # Stop puppet.service on Puppet server primary and replica + run_task('service', $pe_hosts, 'action' => 'stop', 'name' => 'puppet.service') + + # Temporarily set both primary and replica server nodes so that they use the remaining healthy PE-PostgreSQL server + run_plan('peadm::util::update_db_setting', $pe_hosts, + postgresql_host => $working_postgresql_host, + override => true, + ) + + # Restart pe-puppetdb.service on Puppet server primary and replica + run_task('service', $pe_hosts, 'action' => 'restart', 'name' => 'pe-puppetdb.service') + + # Purge failed PE-PostgreSQL node from PuppetDB + run_command("/opt/puppetlabs/bin/puppet node purge ${$failed_postgresql_host}", $primary_host) + + # Run peadm::add_database plan to deploy replacement PE-PostgreSQL server + run_plan('peadm::add_database', targets => $replacement_postgresql_host, + primary_host => $primary_host, + ) +} From 436b89eeb394fb0d63c1eb69f02f484b87b9fecb Mon Sep 17 00:00:00 2001 From: David Mallon Date: Wed, 22 Jan 2025 11:44:21 +0000 Subject: [PATCH 2/4] (PE-40163) alternate approach to running a task --- plans/replace_failed_postgresql.pp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/plans/replace_failed_postgresql.pp b/plans/replace_failed_postgresql.pp index 0c8f35f7..b84770c2 100644 --- a/plans/replace_failed_postgresql.pp +++ b/plans/replace_failed_postgresql.pp @@ -47,7 +47,8 @@ ) # Restart pe-puppetdb.service on Puppet server primary and replica - run_task('service', $pe_hosts, 'action' => 'restart', 'name' => 'pe-puppetdb.service') + # run_task('service', $pe_hosts, 'action' => 'restart', 'name' => 'pe-puppetdb.service') + run_task('service', $pe_hosts, { action => 'restart', name => 'pe-puppetdb.service' }) # Purge failed PE-PostgreSQL node from PuppetDB run_command("/opt/puppetlabs/bin/puppet node purge ${$failed_postgresql_host}", $primary_host) From cdae5da4c7e8a433998578321de05145d86870f7 Mon Sep 17 00:00:00 2001 From: David Mallon Date: Wed, 5 Feb 2025 16:33:04 +0000 Subject: [PATCH 3/4] (PE-40163) deal with empty csr attributes --- plans/replace_failed_postgresql.pp | 3 +-- plans/util/insert_csr_extension_requests.pp | 7 ++++--- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/plans/replace_failed_postgresql.pp b/plans/replace_failed_postgresql.pp index b84770c2..f1a0dc82 100644 --- a/plans/replace_failed_postgresql.pp +++ b/plans/replace_failed_postgresql.pp @@ -29,7 +29,7 @@ # Bail if this is trying to be ran against Standard if $compilers.empty { - fail_plan('Plan peadm::add_database is only applicable for L and XL deployments') + fail_plan('Plan peadm::replace_failed_postgresql is only applicable for L and XL deployments') } $pe_hosts = peadm::flatten_compact([ @@ -47,7 +47,6 @@ ) # Restart pe-puppetdb.service on Puppet server primary and replica - # run_task('service', $pe_hosts, 'action' => 'restart', 'name' => 'pe-puppetdb.service') run_task('service', $pe_hosts, { action => 'restart', name => 'pe-puppetdb.service' }) # Purge failed PE-PostgreSQL node from PuppetDB diff --git a/plans/util/insert_csr_extension_requests.pp b/plans/util/insert_csr_extension_requests.pp index df00a34e..01f4148d 100644 --- a/plans/util/insert_csr_extension_requests.pp +++ b/plans/util/insert_csr_extension_requests.pp @@ -15,9 +15,10 @@ # If we're merging extension requests, existing requests will be preserved. # If we're not merging, only ours will be used; existing requests will be # overwritten. - $csr_file_data = $merge ? { - true => $csr_attributes_data.deep_merge({ 'extension_requests' => $extension_requests }), - false => ($csr_attributes_data + { 'extension_requests' => $extension_requests }), + if $merge and !$csr_attributes_data.empty { + $csr_file_data = $csr_attributes_data.deep_merge({ 'extension_requests' => $extension_requests }) + } else { + $csr_file_data = $csr_attributes_data + { 'extension_requests' => $extension_requests } } run_task('peadm::mkdir_p_file', $target, From f71ced7b361baa71bd863d418e523e9c2f81c380 Mon Sep 17 00:00:00 2001 From: David Mallon Date: Wed, 5 Feb 2025 17:15:48 +0000 Subject: [PATCH 4/4] (PE-40163) update reference.md --- REFERENCE.md | 45 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 45 insertions(+) diff --git a/REFERENCE.md b/REFERENCE.md index e7563f9c..0951b740 100644 --- a/REFERENCE.md +++ b/REFERENCE.md @@ -109,6 +109,7 @@ Supported use cases: * [`peadm::convert`](#peadm--convert): Convert an existing PE cluster to a PEAdm-managed cluster * [`peadm::install`](#peadm--install): Install a new PE cluster * [`peadm::modify_certificate`](#peadm--modify_certificate): Modify the certificate of one or more targets +* [`peadm::replace_failed_postgresql`](#peadm--replace_failed_postgresql): Replaces a failed PostgreSQL host * [`peadm::restore`](#peadm--restore): Restore puppet primary configuration * [`peadm::restore_ca`](#peadm--restore_ca) * [`peadm::status`](#peadm--status): Return status information from one or more PE clusters in a table format @@ -2370,6 +2371,50 @@ Data type: `Boolean` Default value: `false` +### `peadm::replace_failed_postgresql` + +Replaces a failed PostgreSQL host + +#### Parameters + +The following parameters are available in the `peadm::replace_failed_postgresql` plan: + +* [`primary_host`](#-peadm--replace_failed_postgresql--primary_host) +* [`replica_host`](#-peadm--replace_failed_postgresql--replica_host) +* [`working_postgresql_host`](#-peadm--replace_failed_postgresql--working_postgresql_host) +* [`failed_postgresql_host`](#-peadm--replace_failed_postgresql--failed_postgresql_host) +* [`replacement_postgresql_host`](#-peadm--replace_failed_postgresql--replacement_postgresql_host) + +##### `primary_host` + +Data type: `Peadm::SingleTargetSpec` + +- The hostname and certname of the primary Puppet server + +##### `replica_host` + +Data type: `Peadm::SingleTargetSpec` + +- The hostname and certname of the replica VM + +##### `working_postgresql_host` + +Data type: `Peadm::SingleTargetSpec` + +- The hostname and certname of the still-working PE-PostgreSQL server + +##### `failed_postgresql_host` + +Data type: `Peadm::SingleTargetSpec` + +- The hostname and certname of the failed PE-PostgreSQL server + +##### `replacement_postgresql_host` + +Data type: `Peadm::SingleTargetSpec` + +- The hostname and certname of the server being brought in to replace the failed PE-PostgreSQL server + ### `peadm::restore` Restore puppet primary configuration