Skip to content

Commit

Permalink
NetApp replica promote: guard source being down
Browse files Browse the repository at this point in the history
Constructing a vserver client to the old active replica
may fail in a disaster scenario. Promoting the non-active
replica must work nevertheless.

Change-Id: Ib1a36f45ba47e378e13acd24ea16634ed96e15ca
  • Loading branch information
Carthaca committed Oct 11, 2023
1 parent b6ae0d5 commit c20d39b
Showing 1 changed file with 68 additions and 53 deletions.
121 changes: 68 additions & 53 deletions manila/share/drivers/netapp/dataontap/cluster_mode/lib_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -3053,20 +3053,33 @@ def promote_replica(self, context, replica_list, replica, access_rules,
dm_session = data_motion.DataMotionSession()

# SAPCC Get space logical reporting settings from original replica.
logical_opts = {}
is_logical_space_rep = None
effi_opts = {}
orig_active_vserver_client = None
orig_active_vserver = dm_session.get_vserver_from_share(
orig_active_replica)
orig_active_replica_backend = share_utils.extract_host(
orig_active_replica['host'], level='backend_name')
orig_active_replica_name = self._get_backend_share_name(
orig_active_replica['id'])
orig_active_vserver_client = data_motion.get_client_for_backend(
orig_active_replica_backend, vserver_name=orig_active_vserver)
logical_opts = self._get_logical_space_options(
orig_active_vserver_client, orig_active_replica_name)
is_logical_space_reporting = logical_opts['logical_space_reporting']
effi_opts = self._get_efficiency_options(orig_active_vserver_client,
orig_active_replica_name,
extra_logging=True)
try:
orig_active_vserver_client = data_motion.get_client_for_backend(
orig_active_replica_backend, vserver_name=orig_active_vserver)
except Exception as e:
LOG.exception(
f"Could not create client for vserver '{orig_active_vserver}' "
f"on backend '{orig_active_replica_backend}' for original "
f"active replica '{orig_active_replica['id']}'. "
f"error: {e}")
if orig_active_vserver_client is not None:
logical_opts = self._get_logical_space_options(
orig_active_vserver_client, orig_active_replica_name)
is_logical_space_rep = logical_opts['logical_space_reporting']
effi_opts = self._get_efficiency_options(
orig_active_vserver_client,
orig_active_replica_name,
extra_logging=True)

new_replica_list = []

Expand Down Expand Up @@ -3107,27 +3120,19 @@ def promote_replica(self, context, replica_list, replica, access_rules,
# NOTE(felipe_rodrigues): non active DR replica does not have the
# export location set, so during replica deletion the driver cannot
# delete the ONTAP export. Clean up it when becoming non active.
orig_active_vserver = dm_session.get_vserver_from_share(
orig_active_replica)
orig_active_replica_backend = (
share_utils.extract_host(orig_active_replica['host'],
level='backend_name'))
orig_active_replica_name = self._get_backend_share_name(
orig_active_replica['id'])
orig_active_vserver_client = data_motion.get_client_for_backend(
orig_active_replica_backend, vserver_name=orig_active_vserver)
orig_active_replica_helper = self._get_helper(orig_active_replica)
orig_active_replica_helper.set_client(orig_active_vserver_client)
try:
orig_active_replica_helper.cleanup_demoted_replica(
orig_active_replica, orig_active_replica_name)
except exception.StorageCommunicationException:
except Exception as e:
LOG.exception(
"Could not cleanup the original active replica export %s.",
orig_active_replica['id'])
f"Could not cleanup the original active replica export "
f"{orig_active_replica['id']}. error: {e}")

self._unmount_orig_active_replica(orig_active_replica,
orig_active_vserver)
self._unmount_orig_active_replica(orig_active_vserver_client,
orig_active_replica['id'],
orig_active_replica_name)

self._handle_qos_on_replication_change(dm_session,
new_active_replica,
Expand Down Expand Up @@ -3158,48 +3163,52 @@ def promote_replica(self, context, replica_list, replica, access_rules,
f"replica. {e}")

# SAPCC update new replica
_, new_active_vserver_client = self._get_vserver(
_, new_active_vserver_cli = self._get_vserver(
share_server=share_server)
new_active_replica_name = self._get_backend_share_name(
new_active_replica['id'])
try:
new_active_vserver_client.update_volume_space_attributes(
new_active_replica_name, is_logical_space_reporting)
except Exception as e:
LOG.exception(
f"Could not apply is_logical_space_reporting "
f"'{is_logical_space_reporting}' to the promoted replica. {e}")
if effi_opts['cross_dedup_disabled']:
logical_space_error_msg = (
f"Could not apply is_logical_space_rep "
f"'{is_logical_space_rep}' to the promoted replica."
)
if is_logical_space_rep is not None:
try:
new_active_vserver_client.update_volume_efficiency_attributes(
new_active_replica_name, True, True,
cross_dedup_disabled=True)
new_active_vserver_cli.update_volume_space_attributes(
new_active_replica_name, is_logical_space_rep)
except Exception as e:
LOG.exception(
f"With efficiency options '{effi_opts}'"
f"could not apply cross_dedup_disabled to the promoted "
f"replica. {e}")
LOG.exception(f"{logical_space_error_msg} {e}")
else:
LOG.exception(logical_space_error_msg)

effi_opts_error_msg = (
f"With efficiency options '{effi_opts}'"
f"could not apply cross_dedup_disabled to the promoted "
f"replica."
)
if effi_opts:
if effi_opts['cross_dedup_disabled']:
try:
new_active_vserver_cli.update_volume_efficiency_attributes(
new_active_replica_name, True, True,
cross_dedup_disabled=True)
except Exception as e:
LOG.exception(f"{effi_opts_error_msg} {e}")
else:
LOG.exception(effi_opts_error_msg)

return new_replica_list

def _unmount_orig_active_replica(self, orig_active_replica,
orig_active_vserver=None):
orig_active_replica_backend = (
share_utils.extract_host(orig_active_replica['host'],
level='backend_name'))
orig_active_vserver_client = data_motion.get_client_for_backend(
orig_active_replica_backend,
vserver_name=orig_active_vserver)
share_name = self._get_backend_share_name(
orig_active_replica['id'])
def _unmount_orig_active_replica(self, orig_active_vserver_client,
replica_id, share_name):
try:
orig_active_vserver_client.unmount_volume(share_name,
force=True)
LOG.info("Unmount of the original active replica %s successful.",
orig_active_replica['id'])
except exception.StorageCommunicationException:
LOG.exception("Could not unmount the original active replica %s.",
orig_active_replica['id'])
replica_id)
except Exception as e:
LOG.exception(
f"Could not unmount the original active replica "
f"{replica_id}. error: {e}")

def _handle_qos_on_replication_change(self, dm_session, new_active_replica,
orig_active_replica, is_dr,
Expand All @@ -3215,7 +3224,13 @@ def _handle_qos_on_replication_change(self, dm_session, new_active_replica,
qos_specs = self._get_normalized_qos_specs(extra_specs)

if is_dr and qos_specs:
dm_session.remove_qos_on_old_active_replica(orig_active_replica)
try:
dm_session.remove_qos_on_old_active_replica(
orig_active_replica)
except Exception as e:
LOG.exception(
f"Could not remove qos on the original active replica "
f"{orig_active_replica['id']}. error: {e}")

if qos_specs:
# Check if a QoS policy already exists for the promoted replica,
Expand Down

0 comments on commit c20d39b

Please sign in to comment.