From cf88f7c09303f7229e2f1fd67dedf2f3ae94e901 Mon Sep 17 00:00:00 2001 From: "Karl N. Kappler" Date: Mon, 22 Jul 2024 18:11:07 -0700 Subject: [PATCH] merge from #337 branch for testing --- aurora/test_utils/dataset_definitions.py | 59 ++++++------ aurora/test_utils/mth5/fc_helpers.py | 31 +++++-- .../parkfield/calibration_helpers.py | 31 ++++--- .../parkfield/make_parkfield_mth5.py | 26 +++++- aurora/test_utils/parkfield/path_helpers.py | 14 ++- .../synthetic/make_mth5_from_asc.py | 76 +++++++++++++-- .../synthetic/make_processing_configs.py | 52 +++++++++-- aurora/test_utils/synthetic/paths.py | 10 +- .../synthetic/processing_helpers.py | 28 +++--- aurora/test_utils/synthetic/rms_helpers.py | 37 ++++++-- aurora/test_utils/synthetic/station_config.py | 93 +++++++++++++------ 11 files changed, 335 insertions(+), 122 deletions(-) diff --git a/aurora/test_utils/dataset_definitions.py b/aurora/test_utils/dataset_definitions.py index fe1669de..9c184b89 100644 --- a/aurora/test_utils/dataset_definitions.py +++ b/aurora/test_utils/dataset_definitions.py @@ -1,10 +1,16 @@ +""" + This module contains methods that are used to define datasets to build from FDSN servers. + + These datasets are in turn used for testing. + +""" from obspy import UTCDateTime from aurora.sandbox.io_helpers.fdsn_dataset import FDSNDataset -def make_pkdsao_test_00_config(minitest=False): +def make_pkdsao_test_00_config(minitest=False) -> FDSNDataset: """ - Populate a FDSNDataset() object for 2h of 40Hz data + Return a description of a 2h PKD SAO 40Hz dataset from NCEDC. Parameters ---------- @@ -30,7 +36,14 @@ def make_pkdsao_test_00_config(minitest=False): return test_data_set -def make_cas04_nvr08_test_00_config(): +def make_cas04_nvr08_test_00_config() -> FDSNDataset: + """ + Return a description of a CAS04,NVR08 dataset from IRIS. + + Returns + ------- + + """ test_data_set = FDSNDataset() test_data_set.dataset_id = "cas_nvr_test_00" test_data_set.network = "ZU" @@ -52,7 +65,8 @@ def make_cas04_nvr08_test_00_config(): return test_data_set -def make_iak34_test_00_config(): +def make_iak34_test_00_config() -> FDSNDataset: + """Return a description of a IAK34 dataset from IRIS.""" test_data_set = FDSNDataset() test_data_set.dataset_id = "iak34_test_00" test_data_set.network = "EM" @@ -70,7 +84,8 @@ def make_iak34_test_00_config(): return test_data_set -def make_iak34_test_01_config(): +def make_iak34_test_01_config() -> FDSNDataset: + """Return a description of a IAK34 dataset from IRIS.""" test_data_set = FDSNDataset() test_data_set.dataset_id = "iak34_test_01_long_ss" test_data_set.network = "EM" @@ -87,7 +102,8 @@ def make_iak34_test_01_config(): return test_data_set -def make_iak34_test_02_config(): +def make_iak34_test_02_config() -> FDSNDataset: + """Return a description of a IAK34 dataset from IRIS.""" test_data_set = FDSNDataset() test_data_set.dataset_id = "iak34_test_02_long_rr" test_data_set.network = "EM" @@ -104,7 +120,8 @@ def make_iak34_test_02_config(): return test_data_set -def make_iak34_test_03_config(): +def make_iak34_test_03_config() -> FDSNDataset: + """Return a description of a IAK34 dataset from IRIS.""" test_data_set = FDSNDataset() test_data_set.dataset_id = "iak34_test_03_long_rr" test_data_set.network = "EM" @@ -121,7 +138,8 @@ def make_iak34_test_03_config(): return test_data_set -def make_iak34_test_04_config(): +def make_iak34_test_04_config() -> FDSNDataset: + """Return a description of a IAK34 dataset from IRIS.""" test_data_set = FDSNDataset() test_data_set.dataset_id = "iak34_test_04_rr" test_data_set.network = "EM" @@ -138,29 +156,8 @@ def make_iak34_test_04_config(): return test_data_set -# def make_iak34_nen34_test_00_config(): -# test_data_set = FDSNDataset() -# test_data_set.dataset_id = "iak34_nen34_test_00" -# test_data_set.network = "ZU" -# test_data_set.station = "IAK34,NEN34" -# # -# # test_data_set.starttime = UTCDateTime("2020-06-02T18:41:43.000000Z") -# # test_data_set.endtime = UTCDateTime("2020-07-13T21:46:12.000000Z") -# # -# test_data_set.starttime = UTCDateTime("2020-06-04T00:00:00.000000Z") -# test_data_set.endtime = UTCDateTime("2020-06-05T00:00:00.000000Z") # minitest -# # test_data_set.endtime = UTCDateTime("2020-06-24T15:55:46.000000Z") -# -# # test_data_set.starttime = UTCDateTime("2004-09-28T00:00:00.000000Z") -# # test_data_set.endtime = UTCDateTime("2004-09-28T01:59:59.975000Z") -# # test_data_set.endtime = UTCDateTime("2004-09-28T00:01:59.999000Z") #small test -# test_data_set.channel_codes = None -# test_data_set.description = "earthscope example dataset" -# test_data_set.components_list = ["hx", "hy", "ex", "ey"] -# return test_data_set - - -def make_test_configs(): +def make_test_configs() -> dict: + """Make all the test dataset configs and put them in a dict""" test_data_set_configs = {} # pkd_sao_test_00 Remote Reference diff --git a/aurora/test_utils/mth5/fc_helpers.py b/aurora/test_utils/mth5/fc_helpers.py index 5f0ef501..96f692e3 100644 --- a/aurora/test_utils/mth5/fc_helpers.py +++ b/aurora/test_utils/mth5/fc_helpers.py @@ -1,22 +1,35 @@ +""" + This module contains functions to used by test that involve Fourier Coefficients in MTH5. +""" + +from typing import Optional, Union import numpy as np import pandas as pd +import pathlib +import xarray as xr -def read_fc_csv(csv_name, as_xarray=True): +def read_fc_csv( + csv_name: Union[pathlib.Path, str], as_xarray: Optional[bool] = True +) -> Union[xr.Dataset, pd.DataFrame]: """ + + Load Fourier coefficients from a csv file and return as xarray or dataframe + Usage: xrds_obj = read_fc_csv(csv_name) df = read_fc_csv(csv_name, as_xarry=False) - xrds = - Returns a data + Parameters ---------- - csv_name: str or pathlib.Path - as_xarray: bool' + csv_name: Union[pathlib.Path, str] + Path to csv file to read + as_xarray: Optional[bool] + If true return xr.Dataset Returns ------- - + output: xr.Dataset or pd.DataFrame """ df = pd.read_csv( csv_name, @@ -29,7 +42,7 @@ def read_fc_csv(csv_name, as_xarray=True): for col in df.columns: df[col] = np.complex128(df[col]) if as_xarray: - xrds_out = df.to_xarray() - return xrds_out + output = df.to_xarray() else: - return df + output = df + return output diff --git a/aurora/test_utils/parkfield/calibration_helpers.py b/aurora/test_utils/parkfield/calibration_helpers.py index 738afc91..f0f8dc27 100644 --- a/aurora/test_utils/parkfield/calibration_helpers.py +++ b/aurora/test_utils/parkfield/calibration_helpers.py @@ -1,3 +1,6 @@ +""" + This module contains methods that are used in the Parkfield calibration tests. +""" import matplotlib.pyplot as plt import numpy as np @@ -8,25 +11,26 @@ plt.ion() -def load_bf4_fap_for_parkfield_test_using_mt_metadata(frequencies): +def load_bf4_fap_for_parkfield_test_using_mt_metadata(frequencies: np.ndarray): """ - The hardware repsonses (AAF and digitizer) are not included in this response, - but these do not make any significant difference away from the Nyquist frequecny. + Loads a csv format response file for a BF4 coil and return the calibration function. + Uses an mt_metadata filter object. - Near the Nyquist calibration is inadequate anyhow. Looking at the output plots, - which show the "full calibration" vs "response table (EMI)", neither one is - realistic at high frequency. The fap ("response table (EMI)") curve does not - compensate for AAF and plunges down at high frequency. The full calibration - from the PZ response on the other hand rises unrealistically. The PZ rising - signal amplitude at high frequency is an artefact of calibrating noise. + - Anti-alias filter and digitizer responses are not included in the csv -- it is coil only. + - We ignore the AAF, and hard-code a counts-per-volt value for now + + Development Notes: + TODO: Add doc showing where counts per volt is accessing in FDSN metadata. Parameters ---------- - frequencies : numpy array - Array of frequencies at which to evaluate the bf response function + frequencies: np.ndarray + Frequencies at which to evaluate the bf response function + Returns ------- - + bf4_resp: np.ndarray + Complex response of the filter at the input frequencies """ from aurora.general_helper_functions import DATA_PATH from mt_metadata.timeseries.filters.helper_functions import ( @@ -49,6 +53,7 @@ def plot_responses( show_response_curves, ): """ + Makes a sanity check plot to show the response of the calibration curves Parameters ---------- @@ -99,7 +104,7 @@ def parkfield_sanity_check( include_decimation=False, ): """ - loop over channels in fft obj and make calibrated spectral plots + Loop over channels in fft obj and make calibrated spectral plots Parameters ---------- diff --git a/aurora/test_utils/parkfield/make_parkfield_mth5.py b/aurora/test_utils/parkfield/make_parkfield_mth5.py index 55d7d1d7..80a0eb75 100644 --- a/aurora/test_utils/parkfield/make_parkfield_mth5.py +++ b/aurora/test_utils/parkfield/make_parkfield_mth5.py @@ -1,7 +1,10 @@ """ -Create Parkfield / Hollister mth5 to use as test data + This module contains methods for building an MTH5 file from data at Parkfield (PKD) and Hollister + (SAO) long term monitoring stations to use as test data. """ +import pathlib + from aurora.test_utils.dataset_definitions import TEST_DATA_SET_CONFIGS from mth5.utils.helpers import read_back_data from mth5.helpers import close_open_files @@ -16,6 +19,16 @@ def select_data_source(): + """ + Identifies appropriate web client to use for NCEDC data requests. + + This was used for debugging data access issues in the past -- may no longer be needed. + + Returns + ------- + data_source: str + A responsive NCEDC client. + """ from obspy.clients.fdsn import Client ok = False @@ -35,7 +48,9 @@ def select_data_source(): def make_pkdsao_mth5(fdsn_dataset): - """ """ + """ + Makes MTH5 file with data from Parkfield and Hollister stations to use for testing. + """ close_open_files() fdsn_dataset.data_source = select_data_source() fdsn_dataset.initialize_client() @@ -51,12 +66,14 @@ def make_pkdsao_mth5(fdsn_dataset): return h5_path -def ensure_h5_exists(): +def ensure_h5_exists() -> pathlib.Path: """ + Make sure that the PKD SAO MTH5 file exists. If it does not, build it. Returns ------- - + h5_path: pathlib.Path + The path to the PKD SAO mth5 file to be used for testing. """ h5_path = PARKFIELD_PATHS["data"].joinpath(FDSN_DATASET.h5_filebase) @@ -73,6 +90,7 @@ def ensure_h5_exists(): def main(): + """allows the make to be run by calling this module from the command line""" make_pkdsao_mth5(FDSN_DATASET) diff --git a/aurora/test_utils/parkfield/path_helpers.py b/aurora/test_utils/parkfield/path_helpers.py index 1cc73c92..b0af20d6 100644 --- a/aurora/test_utils/parkfield/path_helpers.py +++ b/aurora/test_utils/parkfield/path_helpers.py @@ -1,7 +1,19 @@ +""" + This module contains helper functions to control where the parkfield test data + and test results are stored /accessed. +""" from aurora.general_helper_functions import DATA_PATH -def make_parkfield_paths(): +def make_parkfield_paths() -> dict: + """ + Makes a dictionary with information about where to store/access PKD test data and results. + + Returns + ------- + parkfield_paths: dict + Dict containing paths to "data", "aurora_results", "config", "emtf_results" + """ base_path = DATA_PATH.joinpath("parkfield") parkfield_paths = {} parkfield_paths["data"] = base_path diff --git a/aurora/test_utils/synthetic/make_mth5_from_asc.py b/aurora/test_utils/synthetic/make_mth5_from_asc.py index 5882dc28..b6c422ce 100644 --- a/aurora/test_utils/synthetic/make_mth5_from_asc.py +++ b/aurora/test_utils/synthetic/make_mth5_from_asc.py @@ -17,9 +17,11 @@ data/test12rr_LEMI34.h5 data/test1_LEMI12.h5 -- 20231103: Added an 8Hz upsampled version of test1. No spectral content was added +- 20231103: Added an 8Hz up-sampled version of test1. No spectral content was added so the band between the old and new Nyquist frequencies is bogus. +Notes: Work in progress -- this module is being migrated to MTH5. + """ # import inspect import numpy as np @@ -47,11 +49,9 @@ MTH5_PATH = synthetic_test_paths.mth5_path -def create_run_ts_from_synthetic_run(run, df, channel_nomenclature="default"): +def create_run_ts_from_synthetic_run(run, df, channel_nomenclature="default") -> RunTS: """ - Loop over stations and make ChannelTS objects. - Need to add a tag in the channels - so that when you call a run it will get all the filters with it. + Loop over channels of synthetic data in df and make ChannelTS objects. Parameters ---------- @@ -67,7 +67,8 @@ def create_run_ts_from_synthetic_run(run, df, channel_nomenclature="default"): Returns ------- - + runts: RunTS + MTH5 run time series object, data and metadata bound into one. """ channel_nomenclature_obj = ChannelNomenclature() @@ -127,12 +128,16 @@ def create_run_ts_from_synthetic_run(run, df, channel_nomenclature="default"): def get_time_series_dataframe(run, source_folder, add_nan_values): """ + Returns time series data in a dataframe with columns named for EM field component. + Parameters ---------- run: aurora.test_utils.synthetic.station_config.SyntheticRun Information needed to define/create the run - source_folder: pathlib.Path, or null + Where to load the ascii time series from + add_nan_values: bool + If True, add some NaN, if False, do not add Nan. Up-samples data to run.sample_rate, which is treated as in integer. Only tested for 8, to make 8Hz data for testing. If run.sample_rate is default (1.0) @@ -141,7 +146,7 @@ def get_time_series_dataframe(run, source_folder, add_nan_values): Returns ------- df: pandas.DataFrame - The time series data for the synthetic run + The time series data for the synthetic run """ # point to the ascii time series if source_folder: @@ -186,6 +191,7 @@ def create_mth5_synthetic_file( force_make_mth5=True, ): """ + Creates an MTH5 from synthetic data Parameters ---------- @@ -284,6 +290,21 @@ def create_test1_h5( source_folder="", force_make_mth5=True, ): + """ + Creates an MTH5 file for a single station named "test1". + + Parameters + ---------- + file_version + channel_nomenclature + target_folder + source_folder + force_make_mth5 + + Returns + ------- + + """ station_01_params = make_station_01(channel_nomenclature=channel_nomenclature) mth5_name = station_01_params.mth5_name station_params = [ @@ -309,6 +330,9 @@ def create_test2_h5( target_folder=MTH5_PATH, source_folder="", ): + """ + Creates an MTH5 file for a single station named "test2". + """ station_02_params = make_station_02(channel_nomenclature=channel_nomenclature) mth5_name = station_02_params.mth5_name station_params = [ @@ -332,6 +356,9 @@ def create_test1_h5_with_nan( target_folder=MTH5_PATH, source_folder="", ): + """ + Creates an MTH5 file for a single station named "test1" with some nan values. + """ station_01_params = make_station_01(channel_nomenclature=channel_nomenclature) mth5_name = station_01_params.mth5_name station_params = [ @@ -355,6 +382,9 @@ def create_test12rr_h5( target_folder=MTH5_PATH, source_folder=None, ): + """ + Creates an MTH5 file with data from two stations station named "test1" and "test2". + """ station_01_params = make_station_01(channel_nomenclature=channel_nomenclature) station_02_params = make_station_02(channel_nomenclature=channel_nomenclature) station_params = [station_01_params, station_02_params] @@ -378,6 +408,10 @@ def create_test3_h5( target_folder=MTH5_PATH, source_folder="", ): + """ + Creates an MTH5 file for a single station named "test3". + This example has several runs and can be used to test looping over runs. + """ station_03_params = make_station_03(channel_nomenclature=channel_nomenclature) station_params = [ station_03_params, @@ -399,7 +433,13 @@ def create_test4_h5( target_folder=MTH5_PATH, source_folder="", ): - """8Hz data kluged from the 1Hz ... only freqs below 0.5Hz will make sense (100 Ohmm and 45deg)""" + """ + Creates an MTH5 file for a single station named "test1", data are up-sampled to 8Hz from + original 1 Hz. + + Note: Because the 8Hz data are derived from the 1Hz, only frequencies below 0.5Hz + will have valid TFs that yield the apparent resistivity of the synthetic data (100 Ohm-m). + """ station_04_params = make_station_04(channel_nomenclature=channel_nomenclature) mth5_path = create_mth5_synthetic_file( [ @@ -415,7 +455,22 @@ def create_test4_h5( return mth5_path -def _get_set_survey_id(m): +def _get_set_survey_id(m: MTH5) -> tuple: + """ + Given an open mth5 file (m) set the survey ID and return it as a string, + as well a the (modified) mth5 object. + + Parameters + ---------- + m: MTH5 + The mth5 object to set the survey ID for. + + Returns + ------- + (m, survey_id): tuple + m is the (modified) MTH5 + survey_id is a string + """ if m.file_version == "0.1.0": survey_id = None elif m.file_version == "0.2.0": @@ -428,6 +483,7 @@ def _get_set_survey_id(m): def main(file_version="0.1.0"): + """Allow the module to be called from the command line""" file_version = "0.2.0" # create_test1_h5(file_version=file_version) # create_test1_h5_with_nan(file_version=file_version) diff --git a/aurora/test_utils/synthetic/make_processing_configs.py b/aurora/test_utils/synthetic/make_processing_configs.py index 222543da..c192ac34 100644 --- a/aurora/test_utils/synthetic/make_processing_configs.py +++ b/aurora/test_utils/synthetic/make_processing_configs.py @@ -1,3 +1,7 @@ +""" + This module contains methods for generating processing config objects that are + used in aurora's tests of processing synthetic data. +""" from aurora.config import BANDS_DEFAULT_FILE from aurora.config import BANDS_256_26_FILE from aurora.config.config_creator import ConfigCreator @@ -128,19 +132,29 @@ def create_test_run_config( def test_to_from_json(): """ - Test related to issue #172 - This is deprecated in its current form, but should be modified to save the json - from the processing object (not the config class) - Trying to save to json and then read back a Processing object + Intended to test that processing config can be stored as a json, then + reloaded from json and is equal. - Start by manually creating the dataset_df for syntehtic test1 + WORK IN PROGRESS -- see mt_metadata Issue #222 + + Development Notes + TODO: This test should be completed and moved into tests. + - The json does not load into an mt_metadata object + - The problem seems to be that at the run-level of the processing config + there is an intention to allow for multiple time-periods. + - This is reasonable, consider a station running for several months, + we may want to only process data from certain chunks of the time series. + - However, the time period reader does not seem to work as expected. + - A partial fix is on fix_issue_222 branch of mt_metadata + + Related to issue #172 Returns ------- """ - import pandas as pd + # import pandas as pd from mt_metadata.transfer_functions.processing.aurora import Processing from aurora.pipelines.run_summary import RunSummary from aurora.transfer_function.kernel_dataset import KernelDataset @@ -164,10 +178,36 @@ def test_to_from_json(): json_fn = CONFIG_PATH.joinpath(processing_config.json_fn()) p.from_json(json_fn) logger.info("Assert equal needed here") + # This fails (July 22, 2024) + # assert p == processing_config + + # This should be true, but its false + # p.stations.local.runs == processing_config.stations.local.runs + # p.stations.local.runs[0] == processing_config.stations.local.runs[0] + + """ + Debugging Notes: + Once the updated parsing from #222 is applied, the next problem is that the object that was + read-back from json has two dicts in its time periods: + p.stations.local.runs[0].time_periods + [{'time_period': {'end': '1980-01-01T11:06:39+00:00', 'start': '1980-01-01T00:00:00+00:00'}}, + {'time_period': {'end': '1980-01-01T11:06:39+00:00', 'start': '1980-01-01T00:00:00+00:00'}}] + processing_config.stations.local.runs[0].time_periods + [{ + "time_period": { + "end": "1980-01-01T11:06:39+00:00", + "start": "1980-01-01T00:00:00+00:00" + } + }] + """ return def main(): + """Allow the module to be called from the command line""" + pass + # TODO: fix test_to_from_json and put in tests. + # - see issue #222 in mt_metadata. test_to_from_json() # create_test_run_config("test1", df) # create_test_run_config("test2") diff --git a/aurora/test_utils/synthetic/paths.py b/aurora/test_utils/synthetic/paths.py index bcf8a01c..5f67aa1a 100644 --- a/aurora/test_utils/synthetic/paths.py +++ b/aurora/test_utils/synthetic/paths.py @@ -1,8 +1,11 @@ """ -Sets up paths for synthetic data testing. +This module contains a class that helps manage data paths for testing aurora on synthetic data. -The DATA_PATH from general_helper_functions has traditionally had the -synthetic ascii data, but this is now stored in MTH5. +Development Notes: + - The DATA_PATH from general_helper_functions has traditionally had the + synthetic ascii data, but this is now stored in MTH5. + - This class was built to handle Issue #303 (installation on read-only file system). + https://github.com/simpeg/aurora/issues/303 """ import pathlib @@ -29,6 +32,7 @@ class SyntheticTestPaths: def __init__(self, sandbox_path=None, ascii_data_path=None): """ + Constructor Parameters ---------- diff --git a/aurora/test_utils/synthetic/processing_helpers.py b/aurora/test_utils/synthetic/processing_helpers.py index ddf7c7d8..84519866 100644 --- a/aurora/test_utils/synthetic/processing_helpers.py +++ b/aurora/test_utils/synthetic/processing_helpers.py @@ -1,11 +1,17 @@ +""" + This module contains some helper functions that are called during the + execution of aurora's tests of processing on synthetic data. +""" +import mt_metadata.transfer_functions +import pathlib from aurora.pipelines.process_mth5 import process_mth5 from aurora.test_utils.synthetic.make_mth5_from_asc import create_test1_h5 def get_example_kernel_dataset(): """ - Some tests could benefit from having a ready-made kernel dataset object. - This creates one from the synthetic data. + Creates a kernel dataset object from the synthetic data + - Helper function for synthetic tests. Returns ------- @@ -31,8 +37,14 @@ def get_example_kernel_dataset(): return kernel_dataset -def tf_obj_from_synthetic_data(mth5_path): - """Helper function for test_issue_139""" +def tf_obj_from_synthetic_data( + mth5_path: pathlib.Path, +) -> mt_metadata.transfer_functions.TF: + """ + Executes aurora processing on mth5_path, and returns mt_metadata TF object. + - Helper function for test_issue_139 + + """ from aurora.config.config_creator import ConfigCreator from aurora.pipelines.run_summary import RunSummary from aurora.transfer_function.kernel_dataset import KernelDataset @@ -54,11 +66,3 @@ def tf_obj_from_synthetic_data(mth5_path): z_file_path="test1_RRtest2.zrr", ) return tf_cls - - -# def main(): -# kd = get_example_kernel_dataset() -# return -# -# if __name__ == "__main__": -# main() diff --git a/aurora/test_utils/synthetic/rms_helpers.py b/aurora/test_utils/synthetic/rms_helpers.py index 75e35114..7a8f26b8 100644 --- a/aurora/test_utils/synthetic/rms_helpers.py +++ b/aurora/test_utils/synthetic/rms_helpers.py @@ -1,13 +1,20 @@ +""" + This module contains methods associated with RMS calculations that are used in testing + aurora processing on synthetic data. + +""" import numpy as np from loguru import logger def compute_rms(rho, phi, model_rho_a=100.0, model_phi=45.0, verbose=False): """ - This function being used to make comparative plots for synthetic data. Could be - used in general to compare different processing results. For example by replacing - model_rho_a and model_phi with other processing results, or other ( - non-uniform) model results. + Computes the RMS between processing results (rho, phi) and model (rho, phi). + + It is used to make annotations for comparative plots for synthetic data. Could be + used in general to compare different processing results. For example by replacing + model_rho_a and model_phi with other processing results, or other (non-uniform) + model results. Parameters ---------- @@ -34,7 +41,21 @@ def compute_rms(rho, phi, model_rho_a=100.0, model_phi=45.0, verbose=False): return rho_rms, phi_rms -def get_expected_rms_misfit(test_case_id, emtf_version=None): +def get_expected_rms_misfit(test_case_id: str, emtf_version=None) -> dict: + """ + Returns hard-coded expected results from synthetic data processing. + These results are a benchmark against which test results are compared on push to + github. + + Parameters + ---------- + test_case_id + emtf_version + + Returns + ------- + + """ expected_rms_misfit = {} expected_rms_misfit["rho"] = {} expected_rms_misfit["phi"] = {} @@ -65,8 +86,10 @@ def assert_rms_misfit_ok( phi_rms_aurora, rho_tol=1e-4, phi_tol=1e-4, -): +) -> None: """ + Compares actual RMS misfit from processing against expected values. + Raises Assertion errors if test processing results different from expected. Parameters ---------- @@ -90,7 +113,7 @@ def assert_rms_misfit_ok( logger.error(rho_rms_aurora - expected_rms_rho) raise AssertionError("Expected misfit for resistivity is not correct") - if not np.isclose(phi_rms_aurora - expected_rms_phi, 0, atol=rho_tol): + if not np.isclose(phi_rms_aurora - expected_rms_phi, 0, atol=phi_tol): logger.error("==== AURORA ====\n") logger.error(phi_rms_aurora) logger.error("==== EXPECTED ====\n") diff --git a/aurora/test_utils/synthetic/station_config.py b/aurora/test_utils/synthetic/station_config.py index 9e197c53..2a36bee8 100644 --- a/aurora/test_utils/synthetic/station_config.py +++ b/aurora/test_utils/synthetic/station_config.py @@ -1,7 +1,7 @@ """ -Definitions used in the creation of synthetic mth5 files. - +This module contains helper functions for the creation of synthetic mth5 files. +Development Notes: Survey level: 'mth5_path', Path to output h5 Station level: 'station_id', name of the station Station level:'latitude':17.996 @@ -15,7 +15,10 @@ Run level: 'sample_rate', 1.0 """ -from typing import Dict, List, Union +import pathlib +from typing import Dict, List, Optional, Union + +import mt_metadata.timeseries from aurora.general_helper_functions import get_mth5_ascii_data_path from aurora.test_utils.synthetic.paths import SyntheticTestPaths @@ -28,10 +31,12 @@ synthetic_test_paths = SyntheticTestPaths() -def make_filters(as_list=False): +def make_filters(as_list: bool = False) -> Union[Dict, List]: """ + Returns some dummy, placeholder filters. + Because the data from EMTF is already in mV/km and nT these filters are just - placeholders to show where they would get assigned. + placeholders to show how to add them to the MTH5. Parameters ---------- @@ -40,7 +45,7 @@ def make_filters(as_list=False): Returns ------- - filters_list: Union[List, Dict] + filters: Union[List, Dict] filters that can be used to populate the filters lists of synthetic data """ unity_coeff_filter = make_coefficient_filter(name="1", gain=1.0) @@ -48,13 +53,13 @@ def make_filters(as_list=False): divide_by_10_filter = make_coefficient_filter(gain=0.1, name="0.1") if as_list: - return [unity_coeff_filter, multipy_by_10_filter, divide_by_10_filter] + filters = [unity_coeff_filter, multipy_by_10_filter, divide_by_10_filter] else: filters = {} filters["1x"] = unity_coeff_filter filters["10x"] = multipy_by_10_filter filters["0.1x"] = divide_by_10_filter - return filters + return filters FILTERS = make_filters() @@ -67,30 +72,51 @@ class SyntheticRun(object): Initially this class worked only with the synthetic ASCII data from legacy EMTF. """ - def __init__(self, id, **kwargs): + def __init__( + self, + id: str, + sample_rate: Optional[float] = 1.0, + raw_data_path: Optional[Union[str, pathlib.Path, None]] = None, + channel_nomenclature: Optional[str] = "default", + channels: Optional[Union[List, None]] = None, + noise_scalars: Optional[Union[Dict, None]] = None, + nan_indices: Optional[Union[Dict, None]] = None, + filters: Optional[Union[Dict, None]] = None, + start: Optional[Union[str, None]] = None, + ): + """ + Constructor + + Parameters + ---------- + id + + """ run_metadata = Run() run_metadata.id = id - run_metadata.sample_rate = kwargs.get("sample_rate", 1.0) + run_metadata.sample_rate = sample_rate - self.raw_data_path = kwargs.get("raw_data_path", None) + self.raw_data_path = raw_data_path # set channel names self._channel_map = None - self.channel_nomenclature_keyword = kwargs.get( - "channel_nomenclature", "default" - ) + self.channel_nomenclature_keyword = channel_nomenclature self.set_channel_map() - self.channels = kwargs.get("channels", list(self.channel_map.values())) + if channels is None: + self.channels = list(self.channel_map.values()) + else: + self.channels = channels - self.noise_scalars = kwargs.get("noise_scalars", None) - self.nan_indices = kwargs.get("nan_indices", {}) - self.filters = kwargs.get("filters", {}) - self.start = kwargs.get("start", None) + self.noise_scalars = noise_scalars + self.nan_indices = nan_indices + self.filters = filters + self.start = start if self.noise_scalars is None: self.noise_scalars = {} for channel in self.channels: self.noise_scalars[channel] = 0.0 + # run_metadata.add_base_attribute("") self.run_metadata = run_metadata @@ -143,8 +169,9 @@ def __init__(self, id, **kwargs): self.mth5_name = kwargs.get("mth5_name", None) -def make_station_01(channel_nomenclature="default"): +def make_station_01(channel_nomenclature="default") -> mt_metadata.timeseries.Station: """ + Returns mt_metadata.timeseries.Station object for synthetic MTH5 creation. Parameters ---------- @@ -204,9 +231,10 @@ def make_station_01(channel_nomenclature="default"): return station -def make_station_02(channel_nomenclature="default"): +def make_station_02(channel_nomenclature="default") -> mt_metadata.timeseries.Station: """ - Just like station 1, but the data are different + Returns mt_metadata.timeseries.Station object for synthetic MTH5 creation. + - Just like station 1, but the data are different Parameters ---------- @@ -227,9 +255,10 @@ def make_station_02(channel_nomenclature="default"): return test2 -def make_station_03(channel_nomenclature="default"): +def make_station_03(channel_nomenclature="default") -> mt_metadata.timeseries.Station: """ - Create a synthetic station with multiple runs. Rather than generate fresh + Returns mt_metadata.timeseries.Station object for synthetic MTH5 creation. + - Like 01, 02, but in this case the station has multiple runs. Rather than generate fresh synthetic data, we just reuse test1.asc for each run. Parameters @@ -319,8 +348,20 @@ def make_station_03(channel_nomenclature="default"): return station -def make_station_04(channel_nomenclature="default"): - """Just like station 01, but data are resampled to 8Hz""" +def make_station_04(channel_nomenclature="default") -> mt_metadata.timeseries.Station: + """ + Returns mt_metadata.timeseries.Station object for synthetic MTH5 creation. + - Just like station 01, but data are resampled to 8Hz + + Parameters + ---------- + channel_nomenclature + + Returns + ------- + + """ + station_metadata = Station() station_metadata.id = "test1" channel_nomenclature_obj = ChannelNomenclature()