From db1b4fe0f6dfac37fef45f0f9e4e16611ed0ffcb Mon Sep 17 00:00:00 2001 From: Iakov GAN <82834333+iakov-aws@users.noreply.github.com> Date: Fri, 19 Apr 2024 15:01:08 +0200 Subject: [PATCH] Better process from 0 (#782) * better choosing of name * enhancements for flowless cretion of dashboard * better export --- cid/commands/init_qs.py | 13 ++++-- cid/export.py | 3 +- cid/helpers/athena.py | 65 +++++++++++++++++------------- cid/helpers/quicksight/__init__.py | 8 ++-- 4 files changed, 52 insertions(+), 37 deletions(-) diff --git a/cid/commands/init_qs.py b/cid/commands/init_qs.py index 060ebf39..4e737601 100644 --- a/cid/commands/init_qs.py +++ b/cid/commands/init_qs.py @@ -1,5 +1,6 @@ """ Command Init QuickSight """ +import re import time import logging @@ -57,6 +58,7 @@ def _create_quicksight_enterprise_subscription(self): 'AccountName': account_name, 'NotificationEmail': email, } + cid_print('Initializing Amazon QuickSight in your AWS Account') try: response = self.cid.qs.client.create_account_subscription(**params) logger.debug(f'create_account_subscription resp: {response}') @@ -76,14 +78,17 @@ def _create_quicksight_enterprise_subscription(self): def _get_account_name_for_quicksight(self): """Get the account name for quicksight""" for _ in range(MAX_ITERATIONS): + cid_print('Please, choose a descriptive name for your QuickSight account.') + cid_print('This will be used later to share it with your users. For test you can use AWS Account Id. This can NOT be changed later. If you are planning to use IdC stop now and create QuickSight Account in UI.') account_name = get_parameter( 'account-name', - message=( - '\n\tPlease, choose a descriptive name for your QuickSight account. ' - 'This will be used later to share it with your users. This can NOT be changed later.' - ), + message='Please enter a unique name [A-Za-z0-9-]', default=self.cid.organizations.get_account_name() ) + if not re.match('[A-Za-z0-9-]', account_name): + logger.error('account_name must match [A-Za-z0-9-]') + unset_parameter('account-name') + continue if account_name: return account_name print('\t The account name must not be empty. Please, try again.') diff --git a/cid/export.py b/cid/export.py index 10569a20..ce671200 100644 --- a/cid/export.py +++ b/cid/export.py @@ -317,10 +317,11 @@ def export_analysis(qs, athena, glue): dashboard_id = new_dashboard_id dashboard_resource = {} + print(datasets) dashboard_resource['dependsOn'] = { # Historically CID uses dataset names as dataset reference. IDs of manually created resources have uuid format. # We can potentially reconsider this and use IDs at some point - 'datasets': sorted(list(set(datasets + resources_datasets))) + 'datasets': sorted(list(set(list(datasets.keys()) + resources_datasets))) } dashboard_resource['name'] = analysis['Name'] dashboard_resource['dashboardId'] = dashboard_id diff --git a/cid/helpers/athena.py b/cid/helpers/athena.py index dfdb6bc2..27dbf825 100644 --- a/cid/helpers/athena.py +++ b/cid/helpers/athena.py @@ -122,8 +122,9 @@ def WorkGroup(self) -> str: # Select default workgroup if present if self.defaults.get('WorkGroup') not in {wgr['Name'] for wgr in workgroups}: workgroups.append({'Name': f"{self.defaults.get('WorkGroup')} (create new)"}) - default_workgroup = next(iter([wgr.get('Name') for wgr in workgroups if wgr['Name'] == self.defaults.get('WorkGroup')]), None) - if default_workgroup: logger.info(f'Found "{default_workgroup}" as a default workgroup') + default_workgroup = next(iter([wgr.get('Name') for wgr in workgroups if self.defaults.get('WorkGroup') in wgr['Name']]), None) + if default_workgroup: + logger.info(f'Found "{default_workgroup}" as a default workgroup') # Ask user selected_workgroup = get_parameter( param_name='athena-workgroup', @@ -158,38 +159,44 @@ def workgroup_output_location(self) -> str: def _ensure_workgroup(self, name: str) -> str: """Ensure a workgroup exists and configured with an S3 bucket""" - try: - s3 = S3(session=self.session) + s3 = S3(session=self.session) + if name == 'primary': # QuickSight manages primary wg differently, relying exclusively on bucket with a predefined name + bucket_name = f'{self.partition}-athena-query-results-{self.region}-{self.account_id}' + else: bucket_name = f'{self.partition}-athena-query-results-cid-{self.account_id}-{self.region}' + try: workgroup = self.client.get_work_group(WorkGroup=name) - if not workgroup.get('WorkGroup', {}).get('Configuration', {}).get('ResultConfiguration', {}).get('OutputLocation', None): - buckets = s3.list_buckets(region_name=self.region) - if bucket_name not in buckets: - buckets.append(f'{bucket_name} (create new)') - bucket_name = get_parameter( - param_name='athena-result-bucket', - message=f"Select S3 bucket to use with Amazon Athena Workgroup [{name}]", - choices=[bucket for bucket in buckets] - ) - if ' (create new)' in bucket_name: - bucket_name = bucket_name.replace(' (create new)', '') - s3.ensure_bucket(name=bucket_name) - self.client.update_work_group( - WorkGroup=name, - Description='string', - ConfigurationUpdates={ - 'ResultConfigurationUpdates': { - 'OutputLocation': f's3://{bucket_name}', - 'EncryptionConfiguration': { - 'EncryptionOption': 'SSE_S3', - }, - 'AclConfiguration': { - 'S3AclOption': 'BUCKET_OWNER_FULL_CONTROL' - } + if workgroup.get('WorkGroup', {}).get('Configuration', {}).get('ResultConfiguration', {}).get('OutputLocation', None): + return name # all good we have Output Bucket Configured. + + # there no result bucket configured for this WG + buckets = s3.list_buckets(region_name=self.region) + if bucket_name not in buckets: + buckets.append(f'{bucket_name} (create new)') + bucket_name = get_parameter( + param_name='athena-result-bucket', + message=f"Select S3 bucket to use with Amazon Athena Workgroup [{name}]", + choices=[bucket for bucket in buckets] + ) + if ' (create new)' in bucket_name: + bucket_name = bucket_name.replace(' (create new)', '') + s3.ensure_bucket(name=bucket_name) + self.client.update_work_group( + WorkGroup=name, + Description='string', + ConfigurationUpdates={ + 'ResultConfigurationUpdates': { + 'OutputLocation': f's3://{bucket_name}', + 'EncryptionConfiguration': { + 'EncryptionOption': 'SSE_S3', + }, + 'AclConfiguration': { + 'S3AclOption': 'BUCKET_OWNER_FULL_CONTROL' } } - ) + } + ) return name except self.client.exceptions.InvalidRequestException as exc: # Workgroup does not exist diff --git a/cid/helpers/quicksight/__init__.py b/cid/helpers/quicksight/__init__.py index 4962a3b5..814662a6 100644 --- a/cid/helpers/quicksight/__init__.py +++ b/cid/helpers/quicksight/__init__.py @@ -460,7 +460,8 @@ def create_data_source(self, athena_workgroup, datasource_id: str=None, role_arn if not datasource.status.endswith('IN_PROGRESS'): break if not datasource.is_healthy: - logger.error(f'Data source creation failed: {datasource.error_info}.') + logger.error(f'DataSource parameters: {json.dumps(params, indent=2)}') + logger.error(f'DataSource creation failed: {datasource.error_info}.') if "The QuickSight service role required to access your AWS resources has not been created yet." in str(datasource.error_info): logger.error( 'Please check that QuickSight has a default role that can access S3 Buckets and Athena https://quicksight.aws.amazon.com/sn/admin?#aws ' @@ -468,8 +469,9 @@ def create_data_source(self, athena_workgroup, datasource_id: str=None, role_arn ) if get_parameter( param_name='quicksight-delete-failed-datasource', - message=f'Data source creation failed: {datasource.error_info}. Delete?', + message=f'Data source creation failed: {datasource.error_info}. Delete(recommended)?', choices=['yes', 'no'], + default='yes' ) == 'yes': try: self.delete_data_source(datasource.id) @@ -1116,7 +1118,7 @@ def create_dataset(self, definition: dict) -> str: dataset_id = definition.get("DataSetId") logger.info(f'Dataset {definition.get("Name")} already exists with DataSetId={dataset_id}') except self.client.exceptions.LimitExceededException as exc: - raise CidCritical('AWS QuickSight SPICE limit exceeded. Add SPICE here https://quicksight.aws.amazon.com/sn/admin#capacity .') from exc + raise CidCritical('Not enough AWS QuickSight SPICE capacity. Add SPICE here https://quicksight.aws.amazon.com/sn/admin#capacity .') from exc logger.info(f'Waiting for {definition.get("Name")} to be created') deadline = time.time() + max_timeout