Skip to content

Commit

Permalink
fix tag parameter (#1100)
Browse files Browse the repository at this point in the history
  • Loading branch information
iakov-aws authored Jan 14, 2025
1 parent c8239c5 commit bc3a8b6
Show file tree
Hide file tree
Showing 2 changed files with 20 additions and 4 deletions.
7 changes: 4 additions & 3 deletions cid/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -362,11 +362,12 @@ def get_template_parameters(self, parameters: dict, param_prefix: str='', others
prefix = '' if value.get('global') else param_prefix
if isinstance(value, str):
params[key] = value
elif isinstance(value, dict) and value.get('type') == 'cur.tag_and_cost_category_fields':
elif isinstance(value, dict) and str(value.get('type')).endswith('tag_and_cost_category_fields'):
cur_version = '2' if str(value.get('type')).startswith('cur2.') else '1'
params[key] = get_parameter(
param_name=prefix + key,
message=f"Required parameter: {key} ({value.get('description')})",
choices=self.cur.tag_and_cost_category_fields + ["'none'"],
choices=self.get_cur(cur_version).tag_and_cost_category_fields + ["'none'"],
)
elif isinstance(value, dict) and value.get('type') == 'athena':
if get_parameters().get(prefix + key): # priority to user input
Expand Down Expand Up @@ -1362,7 +1363,7 @@ def create_or_update_dataset(self, dataset_definition: dict, dataset_id: str=Non
# Read dataset definition from template
data = self.get_data_from_definition('dataset', dataset_definition)
template = Template(json.dumps(data))
cur1_required = dataset_definition.get('dependsOn', dict()).get('cur') or dataset_definition.get('dependsOn', dict()).get('cur')
cur1_required = dataset_definition.get('dependsOn', dict()).get('cur')
cur2_required = dataset_definition.get('dependsOn', dict()).get('cur2')
athena_datasource = None

Expand Down
17 changes: 16 additions & 1 deletion cid/helpers/cur.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,7 @@ class AbstractCUR(CidBase):
]
_metadata = None
_database = None
_tag_and_cost_category = None

def __init__(self, athena, glue):
self.athena = athena
Expand Down Expand Up @@ -178,7 +179,21 @@ def tag_and_cost_category_fields(self) -> list:
if self.version == '1':
return [field for field in self.fields if field.startswith('resource_tags_user_') or field.startswith('cost_category_')]
elif self.version == '2':
raise NotImplemented('Need to run a query to get all fields of resource_tags')
if self._tag_and_cost_category is not None: # the query can take few mins so we try to cache it
logging.debug(f'Using cached tags.')
return self._tag_and_cost_category
cid_print(f'Scanning resource_tags in {self.table_name} (can take a while).')
keys = self.athena.query(sql=f'''
SELECT DISTINCT key
FROM {self.table_name}
CROSS JOIN UNNEST(map_keys(resource_tags)) AS t(key)
WHERE billing_period >= DATE_FORMAT(DATE_ADD('month', -1, CURRENT_DATE), '%Y-%m')
AND line_item_usage_start_date > DATE_ADD('day', -7, CURRENT_DATE)
''',
database=self.database,
)
self._tag_and_cost_category = sorted([f"resource_tags['{k[0]}']" for k in keys])
return self._tag_and_cost_category
else:
raise NotImplemented('cur version not known')

Expand Down

0 comments on commit bc3a8b6

Please sign in to comment.