From cf88f7c09303f7229e2f1fd67dedf2f3ae94e901 Mon Sep 17 00:00:00 2001
From: "Karl N. Kappler" <magnetotellurics@gmail.com>
Date: Mon, 22 Jul 2024 18:11:07 -0700
Subject: [PATCH] merge from #337 branch for testing

---
 aurora/test_utils/dataset_definitions.py      | 59 ++++++------
 aurora/test_utils/mth5/fc_helpers.py          | 31 +++++--
 .../parkfield/calibration_helpers.py          | 31 ++++---
 .../parkfield/make_parkfield_mth5.py          | 26 +++++-
 aurora/test_utils/parkfield/path_helpers.py   | 14 ++-
 .../synthetic/make_mth5_from_asc.py           | 76 +++++++++++++--
 .../synthetic/make_processing_configs.py      | 52 +++++++++--
 aurora/test_utils/synthetic/paths.py          | 10 +-
 .../synthetic/processing_helpers.py           | 28 +++---
 aurora/test_utils/synthetic/rms_helpers.py    | 37 ++++++--
 aurora/test_utils/synthetic/station_config.py | 93 +++++++++++++------
 11 files changed, 335 insertions(+), 122 deletions(-)

diff --git a/aurora/test_utils/dataset_definitions.py b/aurora/test_utils/dataset_definitions.py
index fe1669de..9c184b89 100644
--- a/aurora/test_utils/dataset_definitions.py
+++ b/aurora/test_utils/dataset_definitions.py
@@ -1,10 +1,16 @@
+"""
+    This module contains methods that are used to define datasets to build from FDSN servers.
+
+    These datasets are in turn used for testing.
+
+"""
 from obspy import UTCDateTime
 from aurora.sandbox.io_helpers.fdsn_dataset import FDSNDataset
 
 
-def make_pkdsao_test_00_config(minitest=False):
+def make_pkdsao_test_00_config(minitest=False) -> FDSNDataset:
     """
-    Populate a FDSNDataset() object for 2h of 40Hz data
+    Return a description of a 2h PKD SAO 40Hz dataset from NCEDC.
 
     Parameters
     ----------
@@ -30,7 +36,14 @@ def make_pkdsao_test_00_config(minitest=False):
     return test_data_set
 
 
-def make_cas04_nvr08_test_00_config():
+def make_cas04_nvr08_test_00_config() -> FDSNDataset:
+    """
+    Return a description of a CAS04,NVR08 dataset from IRIS.
+
+    Returns
+    -------
+
+    """
     test_data_set = FDSNDataset()
     test_data_set.dataset_id = "cas_nvr_test_00"
     test_data_set.network = "ZU"
@@ -52,7 +65,8 @@ def make_cas04_nvr08_test_00_config():
     return test_data_set
 
 
-def make_iak34_test_00_config():
+def make_iak34_test_00_config() -> FDSNDataset:
+    """Return a description of a IAK34 dataset from IRIS."""
     test_data_set = FDSNDataset()
     test_data_set.dataset_id = "iak34_test_00"
     test_data_set.network = "EM"
@@ -70,7 +84,8 @@ def make_iak34_test_00_config():
     return test_data_set
 
 
-def make_iak34_test_01_config():
+def make_iak34_test_01_config() -> FDSNDataset:
+    """Return a description of a IAK34 dataset from IRIS."""
     test_data_set = FDSNDataset()
     test_data_set.dataset_id = "iak34_test_01_long_ss"
     test_data_set.network = "EM"
@@ -87,7 +102,8 @@ def make_iak34_test_01_config():
     return test_data_set
 
 
-def make_iak34_test_02_config():
+def make_iak34_test_02_config() -> FDSNDataset:
+    """Return a description of a IAK34 dataset from IRIS."""
     test_data_set = FDSNDataset()
     test_data_set.dataset_id = "iak34_test_02_long_rr"
     test_data_set.network = "EM"
@@ -104,7 +120,8 @@ def make_iak34_test_02_config():
     return test_data_set
 
 
-def make_iak34_test_03_config():
+def make_iak34_test_03_config() -> FDSNDataset:
+    """Return a description of a IAK34 dataset from IRIS."""
     test_data_set = FDSNDataset()
     test_data_set.dataset_id = "iak34_test_03_long_rr"
     test_data_set.network = "EM"
@@ -121,7 +138,8 @@ def make_iak34_test_03_config():
     return test_data_set
 
 
-def make_iak34_test_04_config():
+def make_iak34_test_04_config() -> FDSNDataset:
+    """Return a description of a IAK34 dataset from IRIS."""
     test_data_set = FDSNDataset()
     test_data_set.dataset_id = "iak34_test_04_rr"
     test_data_set.network = "EM"
@@ -138,29 +156,8 @@ def make_iak34_test_04_config():
     return test_data_set
 
 
-# def make_iak34_nen34_test_00_config():
-#     test_data_set = FDSNDataset()
-#     test_data_set.dataset_id = "iak34_nen34_test_00"
-#     test_data_set.network = "ZU"
-#     test_data_set.station = "IAK34,NEN34"
-#     # <ORIGINAL>
-#     # test_data_set.starttime = UTCDateTime("2020-06-02T18:41:43.000000Z")
-#     # test_data_set.endtime = UTCDateTime("2020-07-13T21:46:12.000000Z")
-#     # </ORIGINAL>
-#     test_data_set.starttime = UTCDateTime("2020-06-04T00:00:00.000000Z")
-#     test_data_set.endtime = UTCDateTime("2020-06-05T00:00:00.000000Z")  # minitest
-#     # test_data_set.endtime = UTCDateTime("2020-06-24T15:55:46.000000Z")
-#
-#     # test_data_set.starttime = UTCDateTime("2004-09-28T00:00:00.000000Z")
-#     # test_data_set.endtime = UTCDateTime("2004-09-28T01:59:59.975000Z")
-#     # test_data_set.endtime = UTCDateTime("2004-09-28T00:01:59.999000Z") #small test
-#     test_data_set.channel_codes = None
-#     test_data_set.description = "earthscope example dataset"
-#     test_data_set.components_list = ["hx", "hy", "ex", "ey"]
-#     return test_data_set
-
-
-def make_test_configs():
+def make_test_configs() -> dict:
+    """Make all the test dataset configs and put them in a dict"""
     test_data_set_configs = {}
 
     # pkd_sao_test_00 Remote Reference
diff --git a/aurora/test_utils/mth5/fc_helpers.py b/aurora/test_utils/mth5/fc_helpers.py
index 5f0ef501..96f692e3 100644
--- a/aurora/test_utils/mth5/fc_helpers.py
+++ b/aurora/test_utils/mth5/fc_helpers.py
@@ -1,22 +1,35 @@
+"""
+    This module contains functions to used by test that involve Fourier Coefficients in MTH5.
+"""
+
+from typing import Optional, Union
 import numpy as np
 import pandas as pd
+import pathlib
+import xarray as xr
 
 
-def read_fc_csv(csv_name, as_xarray=True):
+def read_fc_csv(
+    csv_name: Union[pathlib.Path, str], as_xarray: Optional[bool] = True
+) -> Union[xr.Dataset, pd.DataFrame]:
     """
+
+    Load Fourier coefficients from a csv file and return as xarray or dataframe
+
     Usage:
     xrds_obj = read_fc_csv(csv_name)
     df = read_fc_csv(csv_name, as_xarry=False)
-    xrds =
-    Returns a data
+
     Parameters
     ----------
-    csv_name: str or pathlib.Path
-    as_xarray: bool'
+    csv_name: Union[pathlib.Path, str]
+        Path to csv file to read
+    as_xarray: Optional[bool]
+        If true return xr.Dataset
 
     Returns
     -------
-
+    output: xr.Dataset or pd.DataFrame
     """
     df = pd.read_csv(
         csv_name,
@@ -29,7 +42,7 @@ def read_fc_csv(csv_name, as_xarray=True):
     for col in df.columns:
         df[col] = np.complex128(df[col])
     if as_xarray:
-        xrds_out = df.to_xarray()
-        return xrds_out
+        output = df.to_xarray()
     else:
-        return df
+        output = df
+    return output
diff --git a/aurora/test_utils/parkfield/calibration_helpers.py b/aurora/test_utils/parkfield/calibration_helpers.py
index 738afc91..f0f8dc27 100644
--- a/aurora/test_utils/parkfield/calibration_helpers.py
+++ b/aurora/test_utils/parkfield/calibration_helpers.py
@@ -1,3 +1,6 @@
+"""
+    This module contains methods that are used in the Parkfield calibration tests.
+"""
 import matplotlib.pyplot as plt
 import numpy as np
 
@@ -8,25 +11,26 @@
 plt.ion()
 
 
-def load_bf4_fap_for_parkfield_test_using_mt_metadata(frequencies):
+def load_bf4_fap_for_parkfield_test_using_mt_metadata(frequencies: np.ndarray):
     """
-    The hardware repsonses (AAF and digitizer) are not included in this response,
-    but these do not make any significant difference away from the Nyquist frequecny.
+    Loads a csv format response file for a BF4 coil and return the calibration function.
+    Uses an mt_metadata filter object.
 
-    Near the Nyquist calibration is inadequate anyhow.  Looking at the output plots,
-    which show the "full calibration" vs "response table (EMI)", neither one is
-    realistic at high frequency.  The fap ("response table (EMI)") curve does not
-    compensate for AAF and plunges down at high frequency.  The full calibration
-    from the PZ response on the other hand rises unrealistically.  The PZ rising
-    signal amplitude at high frequency is an artefact of calibrating noise.
+    - Anti-alias filter and digitizer responses are not included in the csv -- it is coil only.
+    - We ignore the AAF, and hard-code a counts-per-volt value for now
+
+    Development Notes:
+    TODO: Add doc showing where counts per volt is accessing in FDSN metadata.
 
     Parameters
     ----------
-    frequencies : numpy array
-        Array of frequencies at which to evaluate the bf response function
+    frequencies: np.ndarray
+        Frequencies at which to evaluate the bf response function
+
     Returns
     -------
-
+    bf4_resp:  np.ndarray
+        Complex response of the filter at the input frequencies
     """
     from aurora.general_helper_functions import DATA_PATH
     from mt_metadata.timeseries.filters.helper_functions import (
@@ -49,6 +53,7 @@ def plot_responses(
     show_response_curves,
 ):
     """
+    Makes a sanity check plot to show the response of the calibration curves
 
     Parameters
     ----------
@@ -99,7 +104,7 @@ def parkfield_sanity_check(
     include_decimation=False,
 ):
     """
-    loop over channels in fft obj and make calibrated spectral plots
+    Loop over channels in fft obj and make calibrated spectral plots
 
     Parameters
     ----------
diff --git a/aurora/test_utils/parkfield/make_parkfield_mth5.py b/aurora/test_utils/parkfield/make_parkfield_mth5.py
index 55d7d1d7..80a0eb75 100644
--- a/aurora/test_utils/parkfield/make_parkfield_mth5.py
+++ b/aurora/test_utils/parkfield/make_parkfield_mth5.py
@@ -1,7 +1,10 @@
 """
-Create Parkfield / Hollister mth5 to use as test data
+    This module contains methods for building an MTH5 file from data at Parkfield (PKD) and Hollister
+    (SAO) long term monitoring stations to use as test data.
 
 """
+import pathlib
+
 from aurora.test_utils.dataset_definitions import TEST_DATA_SET_CONFIGS
 from mth5.utils.helpers import read_back_data
 from mth5.helpers import close_open_files
@@ -16,6 +19,16 @@
 
 
 def select_data_source():
+    """
+    Identifies appropriate web client to use for NCEDC data requests.
+
+    This was used for debugging data access issues in the past -- may no longer be needed.
+
+    Returns
+    -------
+    data_source: str
+        A responsive NCEDC client.
+    """
     from obspy.clients.fdsn import Client
 
     ok = False
@@ -35,7 +48,9 @@ def select_data_source():
 
 
 def make_pkdsao_mth5(fdsn_dataset):
-    """ """
+    """
+    Makes MTH5 file with data from Parkfield and Hollister stations to use for testing.
+    """
     close_open_files()
     fdsn_dataset.data_source = select_data_source()
     fdsn_dataset.initialize_client()
@@ -51,12 +66,14 @@ def make_pkdsao_mth5(fdsn_dataset):
     return h5_path
 
 
-def ensure_h5_exists():
+def ensure_h5_exists() -> pathlib.Path:
     """
+    Make sure that the PKD SAO MTH5 file exists.  If it does not, build it.
 
     Returns
     -------
-
+    h5_path: pathlib.Path
+        The path to the PKD SAO mth5 file to be used for testing.
     """
 
     h5_path = PARKFIELD_PATHS["data"].joinpath(FDSN_DATASET.h5_filebase)
@@ -73,6 +90,7 @@ def ensure_h5_exists():
 
 
 def main():
+    """allows the make to be run by calling this module from the command line"""
     make_pkdsao_mth5(FDSN_DATASET)
 
 
diff --git a/aurora/test_utils/parkfield/path_helpers.py b/aurora/test_utils/parkfield/path_helpers.py
index 1cc73c92..b0af20d6 100644
--- a/aurora/test_utils/parkfield/path_helpers.py
+++ b/aurora/test_utils/parkfield/path_helpers.py
@@ -1,7 +1,19 @@
+"""
+    This module contains helper functions to control where the parkfield test data
+    and test results are stored /accessed.
+"""
 from aurora.general_helper_functions import DATA_PATH
 
 
-def make_parkfield_paths():
+def make_parkfield_paths() -> dict:
+    """
+    Makes a dictionary with information about where to store/access PKD test data and results.
+
+    Returns
+    -------
+    parkfield_paths: dict
+        Dict containing paths to "data", "aurora_results", "config", "emtf_results"
+    """
     base_path = DATA_PATH.joinpath("parkfield")
     parkfield_paths = {}
     parkfield_paths["data"] = base_path
diff --git a/aurora/test_utils/synthetic/make_mth5_from_asc.py b/aurora/test_utils/synthetic/make_mth5_from_asc.py
index 5882dc28..b6c422ce 100644
--- a/aurora/test_utils/synthetic/make_mth5_from_asc.py
+++ b/aurora/test_utils/synthetic/make_mth5_from_asc.py
@@ -17,9 +17,11 @@
 data/test12rr_LEMI34.h5
 data/test1_LEMI12.h5
 
-- 20231103: Added an 8Hz upsampled version of test1.  No spectral content was added
+- 20231103: Added an 8Hz up-sampled version of test1.  No spectral content was added
 so the band between the old and new Nyquist frequencies is bogus.
 
+Notes: Work in progress -- this module is being migrated to MTH5.
+
 """
 # import inspect
 import numpy as np
@@ -47,11 +49,9 @@
 MTH5_PATH = synthetic_test_paths.mth5_path
 
 
-def create_run_ts_from_synthetic_run(run, df, channel_nomenclature="default"):
+def create_run_ts_from_synthetic_run(run, df, channel_nomenclature="default") -> RunTS:
     """
-    Loop over stations and make ChannelTS objects.
-    Need to add a tag in the channels
-    so that when you call a run it will get all the filters with it.
+    Loop over channels of synthetic data in df and make ChannelTS objects.
 
     Parameters
     ----------
@@ -67,7 +67,8 @@ def create_run_ts_from_synthetic_run(run, df, channel_nomenclature="default"):
 
     Returns
     -------
-
+    runts: RunTS
+        MTH5 run time series object, data and metadata bound into one.
     """
 
     channel_nomenclature_obj = ChannelNomenclature()
@@ -127,12 +128,16 @@ def create_run_ts_from_synthetic_run(run, df, channel_nomenclature="default"):
 
 def get_time_series_dataframe(run, source_folder, add_nan_values):
     """
+    Returns time series data in a dataframe with columns named for EM field component.
+
     Parameters
     ----------
     run: aurora.test_utils.synthetic.station_config.SyntheticRun
         Information needed to define/create the run
-
     source_folder: pathlib.Path, or null
+        Where to load the ascii time series from
+    add_nan_values: bool
+        If True, add some NaN, if False, do not add Nan.
 
     Up-samples data to run.sample_rate, which is treated as in integer.
     Only tested for 8, to make 8Hz data for testing.  If run.sample_rate is default (1.0)
@@ -141,7 +146,7 @@ def get_time_series_dataframe(run, source_folder, add_nan_values):
     Returns
     -------
     df: pandas.DataFrame
-    The time series data for the synthetic run
+        The time series data for the synthetic run
     """
     # point to the ascii time series
     if source_folder:
@@ -186,6 +191,7 @@ def create_mth5_synthetic_file(
     force_make_mth5=True,
 ):
     """
+    Creates an MTH5 from synthetic data
 
     Parameters
     ----------
@@ -284,6 +290,21 @@ def create_test1_h5(
     source_folder="",
     force_make_mth5=True,
 ):
+    """
+    Creates an MTH5 file for a single station named "test1".
+
+    Parameters
+    ----------
+    file_version
+    channel_nomenclature
+    target_folder
+    source_folder
+    force_make_mth5
+
+    Returns
+    -------
+
+    """
     station_01_params = make_station_01(channel_nomenclature=channel_nomenclature)
     mth5_name = station_01_params.mth5_name
     station_params = [
@@ -309,6 +330,9 @@ def create_test2_h5(
     target_folder=MTH5_PATH,
     source_folder="",
 ):
+    """
+    Creates an MTH5 file for a single station named "test2".
+    """
     station_02_params = make_station_02(channel_nomenclature=channel_nomenclature)
     mth5_name = station_02_params.mth5_name
     station_params = [
@@ -332,6 +356,9 @@ def create_test1_h5_with_nan(
     target_folder=MTH5_PATH,
     source_folder="",
 ):
+    """
+    Creates an MTH5 file for a single station named "test1" with some nan values.
+    """
     station_01_params = make_station_01(channel_nomenclature=channel_nomenclature)
     mth5_name = station_01_params.mth5_name
     station_params = [
@@ -355,6 +382,9 @@ def create_test12rr_h5(
     target_folder=MTH5_PATH,
     source_folder=None,
 ):
+    """
+    Creates an MTH5 file with data from two stations station named "test1" and "test2".
+    """
     station_01_params = make_station_01(channel_nomenclature=channel_nomenclature)
     station_02_params = make_station_02(channel_nomenclature=channel_nomenclature)
     station_params = [station_01_params, station_02_params]
@@ -378,6 +408,10 @@ def create_test3_h5(
     target_folder=MTH5_PATH,
     source_folder="",
 ):
+    """
+    Creates an MTH5 file for a single station named "test3".
+    This example has several runs and can be used to test looping over runs.
+    """
     station_03_params = make_station_03(channel_nomenclature=channel_nomenclature)
     station_params = [
         station_03_params,
@@ -399,7 +433,13 @@ def create_test4_h5(
     target_folder=MTH5_PATH,
     source_folder="",
 ):
-    """8Hz data kluged from the 1Hz ... only freqs below 0.5Hz will make sense (100 Ohmm and 45deg)"""
+    """
+    Creates an MTH5 file for a single station named "test1", data are up-sampled to 8Hz from
+    original 1 Hz.
+
+    Note: Because the 8Hz data are derived from the 1Hz, only frequencies below 0.5Hz
+    will have valid TFs that yield the apparent resistivity of the synthetic data (100 Ohm-m).
+    """
     station_04_params = make_station_04(channel_nomenclature=channel_nomenclature)
     mth5_path = create_mth5_synthetic_file(
         [
@@ -415,7 +455,22 @@ def create_test4_h5(
     return mth5_path
 
 
-def _get_set_survey_id(m):
+def _get_set_survey_id(m: MTH5) -> tuple:
+    """
+    Given an open mth5 file (m) set the survey ID and return it as a string,
+    as well a the (modified) mth5 object.
+
+    Parameters
+    ----------
+    m: MTH5
+        The mth5 object to set the survey ID for.
+
+    Returns
+    -------
+    (m, survey_id): tuple
+        m is the (modified) MTH5
+        survey_id is a string
+    """
     if m.file_version == "0.1.0":
         survey_id = None
     elif m.file_version == "0.2.0":
@@ -428,6 +483,7 @@ def _get_set_survey_id(m):
 
 
 def main(file_version="0.1.0"):
+    """Allow the module to be called from the command line"""
     file_version = "0.2.0"
     #    create_test1_h5(file_version=file_version)
     #     create_test1_h5_with_nan(file_version=file_version)
diff --git a/aurora/test_utils/synthetic/make_processing_configs.py b/aurora/test_utils/synthetic/make_processing_configs.py
index 222543da..c192ac34 100644
--- a/aurora/test_utils/synthetic/make_processing_configs.py
+++ b/aurora/test_utils/synthetic/make_processing_configs.py
@@ -1,3 +1,7 @@
+"""
+    This module contains methods for generating processing config objects that are
+    used in aurora's tests of processing synthetic data.
+"""
 from aurora.config import BANDS_DEFAULT_FILE
 from aurora.config import BANDS_256_26_FILE
 from aurora.config.config_creator import ConfigCreator
@@ -128,19 +132,29 @@ def create_test_run_config(
 
 def test_to_from_json():
     """
-    Test related to issue #172
-    This is deprecated in its current form, but should be modified to save the json
-    from the processing object (not the config class)
-    Trying to save to json and then read back a Processing object
+    Intended to test that processing config can be stored as a json, then
+    reloaded from json and is equal.
 
-    Start by manually creating the dataset_df for syntehtic test1
+    WORK IN PROGRESS -- see mt_metadata Issue #222
+
+    Development Notes
+    TODO: This test should be completed and moved into tests.
+     - The json does not load into an mt_metadata object
+     - The problem seems to be that at the run-level of the processing config
+     there is an intention to allow for multiple time-periods.
+     - This is reasonable, consider a station running for several months,
+     we may want to only process data from certain chunks of the time series.
+     - However, the time period reader does not seem to work as expected.
+     - A partial fix is on fix_issue_222 branch of mt_metadata
+
+    Related to issue #172
 
 
     Returns
     -------
 
     """
-    import pandas as pd
+    # import pandas as pd
     from mt_metadata.transfer_functions.processing.aurora import Processing
     from aurora.pipelines.run_summary import RunSummary
     from aurora.transfer_function.kernel_dataset import KernelDataset
@@ -164,10 +178,36 @@ def test_to_from_json():
     json_fn = CONFIG_PATH.joinpath(processing_config.json_fn())
     p.from_json(json_fn)
     logger.info("Assert equal needed here")
+    # This fails (July 22, 2024)
+    # assert p == processing_config
+
+    # This should be true, but its false
+    # p.stations.local.runs == processing_config.stations.local.runs
+    # p.stations.local.runs[0] == processing_config.stations.local.runs[0]
+
+    """
+    Debugging Notes:
+    Once the updated parsing from #222 is applied, the next problem is that the object that was
+    read-back from json has two dicts in its time periods:
+    p.stations.local.runs[0].time_periods
+    [{'time_period': {'end': '1980-01-01T11:06:39+00:00', 'start': '1980-01-01T00:00:00+00:00'}},
+     {'time_period': {'end': '1980-01-01T11:06:39+00:00', 'start': '1980-01-01T00:00:00+00:00'}}]
+    processing_config.stations.local.runs[0].time_periods
+    [{
+        "time_period": {
+            "end": "1980-01-01T11:06:39+00:00",
+            "start": "1980-01-01T00:00:00+00:00"
+        }
+    }]
+    """
     return
 
 
 def main():
+    """Allow the module to be called from the command line"""
+    pass
+    # TODO: fix test_to_from_json and put in tests.
+    #  - see issue #222 in mt_metadata.
     test_to_from_json()
     # create_test_run_config("test1", df)
     # create_test_run_config("test2")
diff --git a/aurora/test_utils/synthetic/paths.py b/aurora/test_utils/synthetic/paths.py
index bcf8a01c..5f67aa1a 100644
--- a/aurora/test_utils/synthetic/paths.py
+++ b/aurora/test_utils/synthetic/paths.py
@@ -1,8 +1,11 @@
 """
-Sets up paths for synthetic data testing.
+This module contains a class that helps manage data paths for testing aurora on synthetic data.
 
-The DATA_PATH from general_helper_functions has traditionally had the
-synthetic ascii data, but this is now stored in MTH5.
+Development Notes:
+    - The DATA_PATH from general_helper_functions has traditionally had the
+     synthetic ascii data, but this is now stored in MTH5.
+    - This class was built to handle Issue #303 (installation on read-only file system).
+     https://github.com/simpeg/aurora/issues/303
 """
 import pathlib
 
@@ -29,6 +32,7 @@ class SyntheticTestPaths:
 
     def __init__(self, sandbox_path=None, ascii_data_path=None):
         """
+        Constructor
 
         Parameters
         ----------
diff --git a/aurora/test_utils/synthetic/processing_helpers.py b/aurora/test_utils/synthetic/processing_helpers.py
index ddf7c7d8..84519866 100644
--- a/aurora/test_utils/synthetic/processing_helpers.py
+++ b/aurora/test_utils/synthetic/processing_helpers.py
@@ -1,11 +1,17 @@
+"""
+    This module contains some helper functions that are called during the
+    execution of aurora's tests of processing on synthetic data.
+"""
+import mt_metadata.transfer_functions
+import pathlib
 from aurora.pipelines.process_mth5 import process_mth5
 from aurora.test_utils.synthetic.make_mth5_from_asc import create_test1_h5
 
 
 def get_example_kernel_dataset():
     """
-    Some tests could benefit from having a ready-made kernel dataset object.
-    This creates one from the synthetic data.
+    Creates a kernel dataset object from the synthetic data
+     - Helper function for synthetic tests.
 
     Returns
     -------
@@ -31,8 +37,14 @@ def get_example_kernel_dataset():
     return kernel_dataset
 
 
-def tf_obj_from_synthetic_data(mth5_path):
-    """Helper function for test_issue_139"""
+def tf_obj_from_synthetic_data(
+    mth5_path: pathlib.Path,
+) -> mt_metadata.transfer_functions.TF:
+    """
+    Executes aurora processing on mth5_path, and returns mt_metadata TF object.
+    - Helper function for test_issue_139
+
+    """
     from aurora.config.config_creator import ConfigCreator
     from aurora.pipelines.run_summary import RunSummary
     from aurora.transfer_function.kernel_dataset import KernelDataset
@@ -54,11 +66,3 @@ def tf_obj_from_synthetic_data(mth5_path):
         z_file_path="test1_RRtest2.zrr",
     )
     return tf_cls
-
-
-# def main():
-#     kd = get_example_kernel_dataset()
-#     return
-#
-# if __name__ == "__main__":
-#     main()
diff --git a/aurora/test_utils/synthetic/rms_helpers.py b/aurora/test_utils/synthetic/rms_helpers.py
index 75e35114..7a8f26b8 100644
--- a/aurora/test_utils/synthetic/rms_helpers.py
+++ b/aurora/test_utils/synthetic/rms_helpers.py
@@ -1,13 +1,20 @@
+"""
+    This module contains methods associated with RMS calculations that are used in testing
+    aurora processing on synthetic data.
+
+"""
 import numpy as np
 from loguru import logger
 
 
 def compute_rms(rho, phi, model_rho_a=100.0, model_phi=45.0, verbose=False):
     """
-    This function being used to make comparative plots for synthetic data.  Could be
-    used in general to compare different processing results.  For example by replacing
-    model_rho_a and model_phi with other processing results, or other (
-    non-uniform) model results.
+    Computes the RMS between processing results (rho, phi) and model (rho, phi).
+
+    It is used to make annotations for comparative plots for synthetic data. Could be
+    used in general to compare different processing results. For example by replacing
+    model_rho_a and model_phi with other processing results, or other (non-uniform)
+    model results.
 
     Parameters
     ----------
@@ -34,7 +41,21 @@ def compute_rms(rho, phi, model_rho_a=100.0, model_phi=45.0, verbose=False):
     return rho_rms, phi_rms
 
 
-def get_expected_rms_misfit(test_case_id, emtf_version=None):
+def get_expected_rms_misfit(test_case_id: str, emtf_version=None) -> dict:
+    """
+    Returns hard-coded expected results from synthetic data processing.
+    These results are a benchmark against which test results are compared on push to
+    github.
+
+    Parameters
+    ----------
+    test_case_id
+    emtf_version
+
+    Returns
+    -------
+
+    """
     expected_rms_misfit = {}
     expected_rms_misfit["rho"] = {}
     expected_rms_misfit["phi"] = {}
@@ -65,8 +86,10 @@ def assert_rms_misfit_ok(
     phi_rms_aurora,
     rho_tol=1e-4,
     phi_tol=1e-4,
-):
+) -> None:
     """
+    Compares actual RMS misfit from processing against expected values.
+    Raises Assertion errors if test processing results different from expected.
 
     Parameters
     ----------
@@ -90,7 +113,7 @@ def assert_rms_misfit_ok(
         logger.error(rho_rms_aurora - expected_rms_rho)
         raise AssertionError("Expected misfit for resistivity is not correct")
 
-    if not np.isclose(phi_rms_aurora - expected_rms_phi, 0, atol=rho_tol):
+    if not np.isclose(phi_rms_aurora - expected_rms_phi, 0, atol=phi_tol):
         logger.error("==== AURORA ====\n")
         logger.error(phi_rms_aurora)
         logger.error("==== EXPECTED ====\n")
diff --git a/aurora/test_utils/synthetic/station_config.py b/aurora/test_utils/synthetic/station_config.py
index 9e197c53..2a36bee8 100644
--- a/aurora/test_utils/synthetic/station_config.py
+++ b/aurora/test_utils/synthetic/station_config.py
@@ -1,7 +1,7 @@
 """
-Definitions used in the creation of synthetic mth5 files.
-
+This module contains helper functions for the creation of synthetic mth5 files.
 
+Development Notes:
 Survey level: 'mth5_path', Path to output h5
 Station level: 'station_id', name of the station
 Station level:'latitude':17.996
@@ -15,7 +15,10 @@
 Run level: 'sample_rate', 1.0
 
 """
-from typing import Dict, List, Union
+import pathlib
+from typing import Dict, List, Optional, Union
+
+import mt_metadata.timeseries
 
 from aurora.general_helper_functions import get_mth5_ascii_data_path
 from aurora.test_utils.synthetic.paths import SyntheticTestPaths
@@ -28,10 +31,12 @@
 synthetic_test_paths = SyntheticTestPaths()
 
 
-def make_filters(as_list=False):
+def make_filters(as_list: bool = False) -> Union[Dict, List]:
     """
+    Returns some dummy, placeholder filters.
+
     Because the data from EMTF is already in mV/km and nT these filters are just
-    placeholders to show where they would get assigned.
+    placeholders to show how to add them to the MTH5.
 
     Parameters
     ----------
@@ -40,7 +45,7 @@ def make_filters(as_list=False):
 
     Returns
     -------
-    filters_list: Union[List, Dict]
+    filters: Union[List, Dict]
         filters that can be used to populate the filters lists of synthetic data
     """
     unity_coeff_filter = make_coefficient_filter(name="1", gain=1.0)
@@ -48,13 +53,13 @@ def make_filters(as_list=False):
     divide_by_10_filter = make_coefficient_filter(gain=0.1, name="0.1")
 
     if as_list:
-        return [unity_coeff_filter, multipy_by_10_filter, divide_by_10_filter]
+        filters = [unity_coeff_filter, multipy_by_10_filter, divide_by_10_filter]
     else:
         filters = {}
         filters["1x"] = unity_coeff_filter
         filters["10x"] = multipy_by_10_filter
         filters["0.1x"] = divide_by_10_filter
-        return filters
+    return filters
 
 
 FILTERS = make_filters()
@@ -67,30 +72,51 @@ class SyntheticRun(object):
     Initially this class worked only with the synthetic ASCII data from legacy EMTF.
     """
 
-    def __init__(self, id, **kwargs):
+    def __init__(
+        self,
+        id: str,
+        sample_rate: Optional[float] = 1.0,
+        raw_data_path: Optional[Union[str, pathlib.Path, None]] = None,
+        channel_nomenclature: Optional[str] = "default",
+        channels: Optional[Union[List, None]] = None,
+        noise_scalars: Optional[Union[Dict, None]] = None,
+        nan_indices: Optional[Union[Dict, None]] = None,
+        filters: Optional[Union[Dict, None]] = None,
+        start: Optional[Union[str, None]] = None,
+    ):
+        """
+        Constructor
+
+        Parameters
+        ----------
+        id
+
+        """
         run_metadata = Run()
         run_metadata.id = id
-        run_metadata.sample_rate = kwargs.get("sample_rate", 1.0)
+        run_metadata.sample_rate = sample_rate
 
-        self.raw_data_path = kwargs.get("raw_data_path", None)
+        self.raw_data_path = raw_data_path
 
         # set channel names
         self._channel_map = None
-        self.channel_nomenclature_keyword = kwargs.get(
-            "channel_nomenclature", "default"
-        )
+        self.channel_nomenclature_keyword = channel_nomenclature
         self.set_channel_map()
-        self.channels = kwargs.get("channels", list(self.channel_map.values()))
+        if channels is None:
+            self.channels = list(self.channel_map.values())
+        else:
+            self.channels = channels
 
-        self.noise_scalars = kwargs.get("noise_scalars", None)
-        self.nan_indices = kwargs.get("nan_indices", {})
-        self.filters = kwargs.get("filters", {})
-        self.start = kwargs.get("start", None)
+        self.noise_scalars = noise_scalars
+        self.nan_indices = nan_indices
+        self.filters = filters
+        self.start = start
 
         if self.noise_scalars is None:
             self.noise_scalars = {}
             for channel in self.channels:
                 self.noise_scalars[channel] = 0.0
+
         # run_metadata.add_base_attribute("")
         self.run_metadata = run_metadata
 
@@ -143,8 +169,9 @@ def __init__(self, id, **kwargs):
         self.mth5_name = kwargs.get("mth5_name", None)
 
 
-def make_station_01(channel_nomenclature="default"):
+def make_station_01(channel_nomenclature="default") -> mt_metadata.timeseries.Station:
     """
+        Returns mt_metadata.timeseries.Station object for synthetic MTH5 creation.
 
     Parameters
     ----------
@@ -204,9 +231,10 @@ def make_station_01(channel_nomenclature="default"):
     return station
 
 
-def make_station_02(channel_nomenclature="default"):
+def make_station_02(channel_nomenclature="default") -> mt_metadata.timeseries.Station:
     """
-    Just like station 1, but the data are different
+    Returns mt_metadata.timeseries.Station object for synthetic MTH5 creation.
+     - Just like station 1, but the data are different
 
     Parameters
     ----------
@@ -227,9 +255,10 @@ def make_station_02(channel_nomenclature="default"):
     return test2
 
 
-def make_station_03(channel_nomenclature="default"):
+def make_station_03(channel_nomenclature="default") -> mt_metadata.timeseries.Station:
     """
-    Create a synthetic station with multiple runs.  Rather than generate fresh
+    Returns mt_metadata.timeseries.Station object for synthetic MTH5 creation.
+    - Like 01, 02, but in this case the station has multiple runs.  Rather than generate fresh
     synthetic data, we just reuse test1.asc for each run.
 
     Parameters
@@ -319,8 +348,20 @@ def make_station_03(channel_nomenclature="default"):
     return station
 
 
-def make_station_04(channel_nomenclature="default"):
-    """Just like station 01, but data are resampled to 8Hz"""
+def make_station_04(channel_nomenclature="default") -> mt_metadata.timeseries.Station:
+    """
+    Returns mt_metadata.timeseries.Station object for synthetic MTH5 creation.
+     - Just like station 01, but data are resampled to 8Hz
+
+    Parameters
+    ----------
+    channel_nomenclature
+
+    Returns
+    -------
+
+    """
+
     station_metadata = Station()
     station_metadata.id = "test1"
     channel_nomenclature_obj = ChannelNomenclature()