Skip to content

Commit

Permalink
improving error catching and logging for node normalization
Browse files Browse the repository at this point in the history
  • Loading branch information
EvanDietzMorris committed Jul 23, 2024
1 parent 7731c4a commit 9f19660
Showing 1 changed file with 9 additions and 8 deletions.
17 changes: 9 additions & 8 deletions Common/normalization.py
Original file line number Diff line number Diff line change
Expand Up @@ -121,8 +121,7 @@ def hit_node_norm_service(self, curies):
error_message = f"Node Normalization service {self.node_norm_endpoint} returned 200 " \
f"but with an empty result for (curies: {curies})"
raise NormalizationFailedError(error_message=error_message)
elif resp.status_code == 422:
# 422 unprocessable entity - we sent something bad to node norm, crash so we can diagnose
else:
error_message = f'Node norm response code: {resp.status_code} (curies: {curies})'
self.logger.error(error_message)
resp.raise_for_status()
Expand Down Expand Up @@ -166,11 +165,13 @@ def normalize_node_data(self, node_list: list, batch_size: int = 1000) -> list:
# we could try to optimize the number of max_workers for ThreadPoolExecutor more specifically,
# by default python attempts to find a reasonable # based on os.cpu_count()
with ThreadPoolExecutor() as executor:
# casting to a list here
normalization_results = list(executor.map(self.hit_node_norm_service, chunks_of_ids))
for normalization_json in normalization_results:
# merge the normalization results into one dictionary
cached_node_norms.update(**normalization_json)
for normalization_json, ids in zip(normalization_results, chunks_of_ids):
if not normalization_json:
self.logger.error(f'Normalization json results missing for ids:{ids}')
else:
# merge the normalization results into one dictionary
cached_node_norms.update(**normalization_json)

# reset the node index
node_idx = 0
Expand Down Expand Up @@ -354,8 +355,8 @@ def get_current_node_norm_version(self):
@staticmethod
def get_normalization_requests_session():
s = requests.Session()
retries = Retry(total=3,
backoff_factor=.1,
retries = Retry(total=5,
backoff_factor=.2,
status_forcelist=[502, 503, 504, 403, 429])
s.mount('https://', HTTPAdapter(max_retries=retries))
s.mount('http://', HTTPAdapter(max_retries=retries))
Expand Down

0 comments on commit 9f19660

Please sign in to comment.