merge some doc from 337 branch to test

simpeg · Jul 21, 2024 · 13c3067 · 13c3067
1 parent 4eca1e8
commit 13c3067
Show file tree

Hide file tree

Showing 2 changed files with 146 additions and 52 deletions.
diff --git a/aurora/config/config_creator.py b/aurora/config/config_creator.py
@@ -1,29 +1,47 @@
 """
-Helper class to make config files.
-
-Note: the config is still evolving and this class and its methods are expected to
-change.
+This module contains a Helper class to make config files.
 
+The processing config is still evolving and this class and its methods may change.
 
 """
-from loguru import logger
-
 from aurora.config.metadata.processing import Processing
 from aurora.config import BANDS_DEFAULT_FILE
-from mt_metadata.transfer_functions.processing.aurora.window import Window
 from aurora.sandbox.io_helpers.emtf_band_setup import EMTFBandSetupFile
+from loguru import logger
+from mt_metadata.transfer_functions.processing.aurora.window import Window
+from typing import Optional, Union
+import pathlib
 
 SUPPORTED_BAND_SPECIFICATION_STYLES = ["EMTF", "band_edges"]
 
 
 class ConfigCreator:
-    def __init__(self, **kwargs):
-        self._emtf_band_file = kwargs.get("emtf_band_file", None)
-        self._band_edges = kwargs.get("band_edges", None)
+    def __init__(
+        self,
+        emtf_band_file: Optional[Union[str, pathlib.Path, None]] = None,
+        band_edges: Optional[Union[dict, None]] = None,
+    ):
+        """
+        Constructor
+
+        Parameters
+        ----------
+        emtf_band_file: Optional[Union[str, pathlib.Path, None]]
+            Allows the specification of an EMTF "band setup file" for defining frequency bands.
+        band_edges: dict
+            Keys are integers corresponding to decimation level.  Values are numpy arrays.
+            Numpy arrays are one-row-per-band.  Array shape n_bands x 2. array[i_band,0] is the
+            lower edge of band, and array[i_band,0] is the upper.
+
+        """
+        self._emtf_band_file = emtf_band_file
+        self._band_edges = band_edges
         self._band_specification_style = None
 
     def processing_id(self, kernel_dataset):
         """
+        Generates a string id label for the processing config: WIP.
+
         In the past, we used f"{local}-{remote}" or  f"{local}-{run_id}"
         Neither of these is sufficiently unique.  In fact, they only describe the
         dataset, and not the processing config.  It is difficult to see how to make a
@@ -37,31 +55,47 @@ def processing_id(self, kernel_dataset):
 
         Parameters
         ----------
-        kernel_dataset
+        kernel_dataset: aurora.transfer_function.kernel_dataset.KernelDataset
+            An object that defines the data to be processed.
 
         Returns
         -------
-
+        id: str
+            A label for the processing config.
         """
         id = f"{kernel_dataset.local_station_id}-{kernel_dataset.remote_station_id}"
         return id
 
     @property
     def band_specification_style(self):
+        """return a description of the scheme used to define the bands."""
         return self._band_specification_style
 
     @band_specification_style.setter
-    def band_specification_style(self, value):
+    def band_specification_style(self, value: str) -> None:
+        """
+        Sets the band_specification_style
+
+        Parameters
+        ----------
+        value: str
+            The label for the scheme used to define the bands.
+
+        Returns
+        -------
+
+        """
         if value not in SUPPORTED_BAND_SPECIFICATION_STYLES:
             msg = f"Won't set band specification style to unrecognized value {value}"
             logger.warning(msg)
             raise NotImplementedError(msg)
-            # return
         else:
             self._band_specification_style = value
 
-    def determine_band_specification_style(self):
+    def determine_band_specification_style(self) -> None:
         """
+        Try to identify which scheme was used to define the bands
+
         TODO: Should emtf_band_file path be stored in config to support reproducibility?
 
         """
@@ -85,43 +119,55 @@ def create_from_kernel_dataset(
         kernel_dataset,
         input_channels=["hx", "hy"],
         output_channels=["hz", "ex", "ey"],
-        estimator=None,
-        **kwargs,
+        estimator: Optional[Union[str, None]] = None,
+        emtf_band_file: Optional[Union[str, pathlib.Path, None]] = None,
+        band_edges: Optional[Union[dict, None]] = None,
+        decimation_factors: Optional[Union[list, None]] = None,
+        num_samples_window: Optional[Union[int, None]] = None,
     ):
         """
-        Hmmm, why not make this a method of kernel_dataset??
+        This creates a processing config from a kernel dataset
+        TODO: Make this a method of kernel_dataset.
 
-        Early on we want to know how may decimation levels there will be.
-        This is defined either by:
-         1. decimation_factors argument (normally accompanied by a bands_dict)
-         2. number of decimations implied by EMTF band setup file.
-        Theoretically, you could also use the number of decimations implied by
-        bands_dict but this is sloppy, because it would be bad practice to assume
-        the decimation factor.
 
         Notes:
         1.  2022-09-10
         The reading-in from EMTF band setup file used to be very terse, carried
-        some baked in assumptions about decimation factors, and did not acknowlege
+        some baked in assumptions about decimation factors, and did not acknowledge
         specific frequency bands in Hz.  I am adding some complexity to the method
         that populates bands from EMTF band setup file but am now explict about the
-        assumtion of decimation factors, and do provide the frequency bands in Hz.
+        assumption of decimation factors, and do provide the frequency bands in Hz.
+
+        The number of decimation levels must be defined either by:
+         1. decimation_factors argument (normally accompanied by a bands_dict)
+         2. number of decimations implied by EMTF band setup file.
+        Theoretically, you could also use the number of decimations implied by
+        bands_dict but this is sloppy, because it would assume the decimation factor.
 
 
         Parameters
         ----------
-        kernel_dataset
-        emtf_band_file: while the default here is None, it will get assigned the
-        value BANDS_DEFAULT_FILE in the set_frequecy_bands method if band edges is
-        also None.
-        input_channels
-        output_channels
-        estimator
-        band_edges
-        kwargs
+        kernel_dataset: aurora.transfer_function.kernel_dataset.KernelDataset'
+            An object that defines the data to be processed.
+        input_channels: list
+            List of the input channels that will be used in TF estimation (usually "hx", "hy")
+        output_channels: list
+            List of the output channels that will be estimated by TF (usually "ex", "ey", "hz")
+        estimator:  Optional[Union[str, None]] = None,
+            The name of the regression estimator to use for TF estimation.
+        emtf_band_file: Optional[Union[str, pathlib.Path, None]] = None
+            The emtf nad setup file if used.
+        band_edges: Optional[Union[dict, None]] = None
+            The band edges if emtf_band_file not used
+        decimation_factors: Optional[Union[list, None]] = None
+            List of decimation factors, normally [1, 4, 4, 4, ... 4]
+        num_samples_window: Optional[Union[int, None]] = None
+            The size of the window (usually for FFT)
 
         Returns
         -------
+        processing_obj: aurora.config.metadata.processing.Processing
+            Object storing the processing parameters.
 
         """
 
@@ -131,14 +177,11 @@ def create_from_kernel_dataset(
         # pack station and run info into processing object
         processing_obj.stations.from_dataset_dataframe(kernel_dataset.df)
 
-        # Unpack kwargs
-        self._emtf_band_file = kwargs.get("emtf_band_file", None)
-        self._band_edges = kwargs.get("band_edges", None)
-        decimation_factors = kwargs.get("decimation_factors", None)
-        num_samples_window = kwargs.get("num_samples_window", None)
-
-        # determine window parameters:
-        # check if they have been passed as kwargs, otherwise extract default values
+        # Unpack optioanl arguments
+        self._emtf_band_file = emtf_band_file
+        self._band_edges = band_edges
+        decimation_factors = decimation_factors
+        num_samples_window = num_samples_window
 
         # Determine if band_setup or edges dict is to be used for bands
         self.determine_band_specification_style()
@@ -150,6 +193,7 @@ def create_from_kernel_dataset(
                 filepath=self._emtf_band_file, sample_rate=kernel_dataset.sample_rate
             )
             num_decimations = emtf_band_setup_file.num_decimation_levels
+            # Assign optional arguments if they have not been passed
             if decimation_factors is None:
                 # set default values to EMTF default values [1, 4, 4, 4, ..., 4]
                 decimation_factors = num_decimations * [4]

diff --git a/aurora/config/metadata/processing.py b/aurora/config/metadata/processing.py
@@ -1,27 +1,46 @@
 # -*- coding: utf-8 -*-
 """
-Extend the Processing class with some aurora-specific methods
+Extend the mt_metadata.transfer_functions.processing.aurora.processing.Processing class
+with some aurora-specific methods.
 """
+import pathlib
+
 # =============================================================================
 # Imports
 # =============================================================================
-import pandas as pd
 
 from aurora.time_series.windowing_scheme import window_scheme_from_decimation
+from loguru import logger
 from mt_metadata.transfer_functions.processing.aurora.processing import Processing
 from mt_metadata.utils.list_dict import ListDict
-from loguru import logger
+from typing import Optional, Union
+import pandas as pd
+
 
 class Processing(Processing):
     def __init__(self, **kwargs):
+        """
+        Constructor
+
+        Parameters
+        ----------
+        kwargs
+        """
         # super().__init__(attr_dict=attr_dict, **kwargs)
         super().__init__(**kwargs)
 
     def window_scheme(self, as_type="df"):
         """
         Make a dataframe of processing parameters one row per decimation level.
+
+        Parameters
+        ----------
+        as_type: Optional[str]
+            if "df" return a dataframe, if "dict" return dict
         Returns
         -------
+        windowing: Union[dict, pd.DataFrame]
+            return type depends on as_type argument.
 
         """
         window_schemes = [window_scheme_from_decimation(x) for x in self.decimations]
@@ -45,20 +64,49 @@ def window_scheme(self, as_type="df"):
             raise TypeError
 
     def decimation_info(self):
+        """
+        Zips decimation level ids to the Decimation objects adn returns as a dict
+
+        Returns
+        -------
+        decimation_info: dict
+            The decimation objects keyed by decimation level id.
+        """
         decimation_ids = [x.decimation.level for x in self.decimations]
         decimation_factors = [x.decimation.factor for x in self.decimations]
         decimation_info = dict(zip(decimation_ids, decimation_factors))
         return decimation_info
 
-    def save_as_json(self, filename=None, nested=True, required=False):
+    def save_as_json(
+        self,
+        filename: Optional[Union[str, pathlib.Path, None]] = None,
+        nested: Optional[bool] = True,
+        required: Optional[bool] = False,
+    ) -> None:
+        """
+        Exports self to a JSON
+
+        Parameters
+        ----------
+        filename: Optional[Union[str, pathlib.Path, None]
+            Where to write the json
+        nested: Optional[bool] = True,
+            An mt_metadata argument
+        required: Optional[bool] = False,
+            An mt_metadata argument
+
+        """
         if filename is None:
             filename = self.json_fn()
         json_str = self.to_json(nested=nested, required=required)
         with open(filename, "w") as f:
             f.write(json_str)
 
-    def emtf_tf_header(self, dec_level_id):
+    def emtf_tf_header(self, dec_level_id: int) -> ListDict:
         """
+        Returns a ListDict object that has the information that was in the old EMTF TF
+         Header object.  This may be deprecated in future -- it is an artefact of the
+         old matlab implementation.
 
         Parameters
         ----------
@@ -67,7 +115,9 @@ def emtf_tf_header(self, dec_level_id):
 
         Returns
         -------
-        tfh: mt_metadata.transfer_functions.processing.aurora.transfer_function_header.TransferFunctionHeader
+            tfh: ListDict
+            Object with the properties of the old EMTF TransferFunctionHeader class.
+
         """
         tfh = ListDict()
         tfh.processing_scheme = self.decimations[dec_level_id].estimator.engine
@@ -80,7 +130,7 @@ def emtf_tf_header(self, dec_level_id):
 
         return tfh
 
-    def make_tf_level(self, dec_level_id):
+    def make_tf_level(self, dec_level_id: int):
         """
         Initialize container for a single decimation level -- "flat" transfer function.
 
@@ -107,7 +157,7 @@ def make_tf_level(self, dec_level_id):
 
 class EMTFTFHeader(ListDict):
     """
-    Convenince class for storing metadata for a TF estimate.
+    Convenience class for storing metadata for a TF estimate.
     Based on Gary Egbert's TFHeader.m originally in
     iris_mt_scratch/egbert_codes-20210121T193218Z-001/egbert_codes/matlabPrototype_10-13-20/TF/classes