put back in the option to set input/output channels

propbably makes sense to keep the option to specify the input/output channels in case you don't want to process a certain channel like hz
simpeg · Aug 14, 2024 · 3a63d32 · 3a63d32
1 parent 141a10b
commit 3a63d32
Show file tree

Hide file tree

Showing 4 changed files with 64 additions and 34 deletions.
diff --git a/aurora/config/config_creator.py b/aurora/config/config_creator.py
@@ -5,11 +5,13 @@
 
 """
 
-from aurora.config.metadata.processing import Processing
 from aurora.config import BANDS_DEFAULT_FILE
 from aurora.sandbox.io_helpers.emtf_band_setup import EMTFBandSetupFile
 from loguru import logger
 from mt_metadata.transfer_functions.processing.aurora.window import Window
+from mt_metadata.transfer_functions.processing.aurora.processing import (
+    Processing,
+)
 from typing import Optional, Union
 import pathlib
 
@@ -39,33 +41,32 @@ def __init__(
         self._band_edges = band_edges
         self._band_specification_style = None
 
-    def processing_id(self, kernel_dataset):
-        """
-        Generates a string id label for the processing config: WIP.
-
-        In the past, we used f"{local}-{remote}" or  f"{local}-{run_id}"
-        Neither of these is sufficiently unique.  In fact, they only describe the
-        dataset, and not the processing config.  It is difficult to see how to make a
-        comprehensive, unique id without it being very long or involving hash
-        functions.
-
-        For now, will try to use {local}-{local_runs}-{remote}-{remote_runs},
-        which at least describes the dataset, then a string can be generated by the
-        config and appended if needed.
-
-
-        Parameters
-        ----------
-        kernel_dataset: aurora.transfer_function.kernel_dataset.KernelDataset
-            An object that defines the data to be processed.
-
-        Returns
-        -------
-        id: str
-            A label for the processing config.
-        """
-        id = f"{kernel_dataset.local_station_id}-{kernel_dataset.remote_station_id}"
-        return id
+    # def processing_id(self, kernel_dataset):
+    #     """
+    #     Generates a string id label for the processing config: WIP.
+
+    #     In the past, we used f"{local}-{remote}" or  f"{local}-{run_id}"
+    #     Neither of these is sufficiently unique.  In fact, they only describe the
+    #     dataset, and not the processing config.  It is difficult to see how to make a
+    #     comprehensive, unique id without it being very long or involving hash
+    #     functions.
+
+    #     For now, will try to use {local}-{local_runs}-{remote}-{remote_runs},
+    #     which at least describes the dataset, then a string can be generated by the
+    #     config and appended if needed.
+
+    #     Parameters
+    #     ----------
+    #     kernel_dataset: aurora.transfer_function.kernel_dataset.KernelDataset
+    #         An object that defines the data to be processed.
+
+    #     Returns
+    #     -------
+    #     id: str
+    #         A label for the processing config.
+    #     """
+    #     id = f"{kernel_dataset.local_station_id}-{kernel_dataset.remote_station_id}"
+    #     return id
 
     @property
     def band_specification_style(self):
@@ -120,6 +121,8 @@ def determine_band_specification_style(self) -> None:
     def create_from_kernel_dataset(
         self,
         kernel_dataset,
+        input_channels=None,
+        output_channels=None,
         estimator: Optional[Union[str, None]] = None,
         emtf_band_file: Optional[Union[str, pathlib.Path, None]] = None,
         band_edges: Optional[Union[dict, None]] = None,
@@ -173,8 +176,10 @@ def create_from_kernel_dataset(
         processing_obj: aurora.config.metadata.processing.Processing
             Object storing the processing parameters.
         """
-        processing_id = self.processing_id(kernel_dataset)
-        processing_obj = Processing(id=processing_id)  # , **kwargs)
+
+        processing_obj = Processing(
+            id=kernel_dataset.processing_id
+        )  # , **kwargs)
 
         # pack station and run info into processing object
         processing_obj.stations.from_dataset_dataframe(kernel_dataset.df)
@@ -224,8 +229,16 @@ def create_from_kernel_dataset(
         if self.band_specification_style == "EMTF":
             processing_obj.band_setup_file = str(self._emtf_band_file)
         for key, decimation_obj in processing_obj.decimations_dict.items():
-            decimation_obj.input_channels = kernel_dataset.input_channels
-            decimation_obj.output_channels = kernel_dataset.output_channels
+            if input_channels is None:
+                decimation_obj.input_channels = kernel_dataset.input_channels
+            else:
+                decimation_obj.input_channels = input_channels
+
+            if output_channels is None:
+                decimation_obj.output_channels = kernel_dataset.output_channels
+            else:
+                decimation_obj.output_channels = output_channels
+
             if num_samples_window is not None:
                 decimation_obj.window.num_samples = num_samples_window[key]
             # set estimator if provided as kwarg

diff --git a/aurora/transfer_function/kernel_dataset.py b/aurora/transfer_function/kernel_dataset.py
@@ -381,6 +381,17 @@ def has_remote_mth5(self):
             return False
         else:
             return self.remote_mth5_path.exists()
+
+    @property
+    def processing_id(self):
+        """its difficult to come put with unique ids without crazy long names
+        so this is a generic id of local-remote, the station metadata
+        will have run information and the config parameters.
+        """
+        if self.remote_station_id is not None:
+            return f"{self.local_station_id}-rr_{self.remote_station_id}"
+        else:
+            return self.local_station_id
 
     @property
     def input_channels(self):

diff --git a/tests/parkfield/test_process_parkfield_run.py b/tests/parkfield/test_process_parkfield_run.py
@@ -75,7 +75,9 @@ def test():
     test_processing(z_file_path=z_file_path, test_clock_zero="data start")
 
     # COMPARE WITH ARCHIVED Z-FILE
-    auxilliary_z_file = PARKFIELD_PATHS["emtf_results"].joinpath("PKD_272_00.zrr")
+    auxilliary_z_file = PARKFIELD_PATHS["emtf_results"].joinpath(
+        "PKD_272_00.zrr"
+    )
     if z_file_path.exists():
         compare_two_z_files(
             z_file_path,
@@ -89,7 +91,9 @@ def test():
             xlims=[0.05, 500],
         )
     else:
-        logger.error("Z-File not found - Parkfield tests failed to generate output")
+        logger.error(
+            "Z-File not found - Parkfield tests failed to generate output"
+        )
         logger.warning("NCEDC probably not returning data")
 
 

diff --git a/tests/transfer_function/test_kernel_dataset.py b/tests/transfer_function/test_kernel_dataset.py
@@ -113,6 +113,8 @@ def test_remote_df(self):
             self.assertListEqual(
                 list(self.kd.remote_df.station.unique()), ["test1"]
             )
+    def test_processing_id(self):
+        self.assertEqual(self.kd.processing_id, "test1-rr_test2")
 
     # @classmethod
     # def tearDownClass(self):