create opening function

ASFHyP3 · Sep 30, 2024 · fcfab4b · fcfab4b
1 parent 3bd89d9
commit fcfab4b
Showing 1 changed file with 37 additions and 1 deletion.
diff --git a/src/opera_disp_tms/utils.py b/src/opera_disp_tms/utils.py
@@ -1,7 +1,26 @@
 from pathlib import Path
-from typing import Union
+from typing import Optional, Union
 
 import requests
+import s3fs
+import xarray as xr
+
+from opera_disp_tms.tmp_s3_access import get_credentials
+
+
+IO_PARAMS = {
+    'fsspec_params': {
+        # "skip_instance_cache": True
+        'cache_type': 'blockcache',  # or "first" with enough space
+        'block_size': 8 * 1024 * 1024,  # could be bigger
+    },
+    'h5py_params': {
+        'driver_kwds': {  # only recent versions of xarray and h5netcdf allow this correctly
+            'page_buf_size': 32 * 1024 * 1024,  # this one only works in repacked files
+            'rdcc_nbytes': 8 * 1024 * 1024,  # this one is to read the chunks
+        }
+    },
+}
 
 
 def download_file(
@@ -28,3 +47,20 @@ def download_file(
                 if chunk:
                     f.write(chunk)
     session.close()
+
+
+def open_opera_disp_product(s3_uri: str, dataset_path: Optional[str] = None):
+    creds = get_credentials()
+    s3_fs = s3fs.S3FileSystem(key=creds['accessKeyId'], secret=creds['secretAccessKey'], token='sessionToken')
+
+    group = dict()
+    if dataset_path:
+        group['group'] = dataset_path
+
+    ds = xr.open_dataset(
+        s3_fs.open(s3_uri, **IO_PARAMS['fsspec_params']),
+        engine='h5netcdf',
+        **group,
+        **IO_PARAMS['h5py_params'],
+    )
+    return ds