Skip to content

Commit

Permalink
#1088: Also update the md5 file
Browse files Browse the repository at this point in the history
  • Loading branch information
philipjyoon committed Feb 14, 2025
1 parent cc858d5 commit dd71e48
Showing 1 changed file with 19 additions and 2 deletions.
21 changes: 19 additions & 2 deletions tools/fix_disp_s1_iso_xml_from_s3.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
#!/usr/bin/env python3

'''
Given a S3 bucket and a prefix, fix any DISP-S1 product iso xmls if they do not validate.
Given a S3 bucket and a prefix, fix any DISP-S1 product iso xmls if they do not validate and also update the md5 file.
Usage: python fix_disp_s1_iso_xml_from_s3.py opera-int-rs-pop1 products/DISP_S1
File path looks like this: s3://opera-pst-rs-pop1/products/DISP_S1/OPERA_L3_DISP-S1_IW_F12640_VV_20220815T232903Z_20220827T232903Z_v1.0_20250211T022048Z/OPERA_L3_DISP-S1_IW_F12640_VV_20220815T232903Z_20220827T232903Z_v1.0_20250211T022048Z.iso.xml
Expand All @@ -11,16 +11,21 @@
import boto3
from collections import defaultdict
from lxml import etree
import hashlib

def fix_iso_xmls(bucket: str, prefix: str, dry_run: bool = False):

s3 = boto3.client('s3')
paginator = s3.get_paginator('list_objects_v2')
response_iterator = paginator.paginate(Bucket=bucket, Prefix=prefix)
files = defaultdict(list)

for response in response_iterator:
for obj in response['Contents']:
key = obj['Key']

if "v1.0" not in key:
continue

file_name = key.split('/')[-1]
if file_name[-4:] != '.xml':
continue
Expand All @@ -44,6 +49,18 @@ def fix_iso_xmls(bucket: str, prefix: str, dry_run: bool = False):
else:
print(f'Dry run: Would have uploaded {key}')

# Update the md5 file
md5_file = file_name + ".md5"
md5_key = key + ".md5"
md5_str = hashlib.md5(open(fixed_name).read().encode('utf-8')).hexdigest()
with open(md5_file, 'w') as f:
f.write(md5_str)
if not dry_run:
s3.upload_file(md5_file, bucket, md5_key)
print(f'Uploaded {md5_key}')
else:
print(f'Dry run: Would have uploaded {md5_key}')

if __name__ == '__main__':
if len(sys.argv) < 3:
print('Usage: fix_disp_s1_iso_xml_from_s3.py <bucket> <prefix> [<dry-run>]')
Expand Down

0 comments on commit dd71e48

Please sign in to comment.