Skip to content

Commit

Permalink
merge some doc from 337 branch to test
Browse files Browse the repository at this point in the history
  • Loading branch information
kkappler committed Jul 21, 2024
1 parent 4eca1e8 commit 13c3067
Show file tree
Hide file tree
Showing 2 changed files with 146 additions and 52 deletions.
132 changes: 88 additions & 44 deletions aurora/config/config_creator.py
Original file line number Diff line number Diff line change
@@ -1,29 +1,47 @@
"""
Helper class to make config files.

Note: the config is still evolving and this class and its methods are expected to
change.
This module contains a Helper class to make config files.

The processing config is still evolving and this class and its methods may change.

"""
from loguru import logger

from aurora.config.metadata.processing import Processing
from aurora.config import BANDS_DEFAULT_FILE
from mt_metadata.transfer_functions.processing.aurora.window import Window
from aurora.sandbox.io_helpers.emtf_band_setup import EMTFBandSetupFile
from loguru import logger
from mt_metadata.transfer_functions.processing.aurora.window import Window
from typing import Optional, Union
import pathlib

SUPPORTED_BAND_SPECIFICATION_STYLES = ["EMTF", "band_edges"]


class ConfigCreator:
def __init__(self, **kwargs):
self._emtf_band_file = kwargs.get("emtf_band_file", None)
self._band_edges = kwargs.get("band_edges", None)
def __init__(
self,
emtf_band_file: Optional[Union[str, pathlib.Path, None]] = None,
band_edges: Optional[Union[dict, None]] = None,
):
"""
Constructor

Parameters
----------
emtf_band_file: Optional[Union[str, pathlib.Path, None]]
Allows the specification of an EMTF "band setup file" for defining frequency bands.
band_edges: dict
Keys are integers corresponding to decimation level. Values are numpy arrays.
Numpy arrays are one-row-per-band. Array shape n_bands x 2. array[i_band,0] is the
lower edge of band, and array[i_band,0] is the upper.

"""
self._emtf_band_file = emtf_band_file
self._band_edges = band_edges
self._band_specification_style = None

def processing_id(self, kernel_dataset):
"""
Generates a string id label for the processing config: WIP.

In the past, we used f"{local}-{remote}" or f"{local}-{run_id}"
Neither of these is sufficiently unique. In fact, they only describe the
dataset, and not the processing config. It is difficult to see how to make a
Expand All @@ -37,31 +55,47 @@ def processing_id(self, kernel_dataset):

Parameters
----------
kernel_dataset
kernel_dataset: aurora.transfer_function.kernel_dataset.KernelDataset
An object that defines the data to be processed.

Returns
-------

id: str
A label for the processing config.
"""
id = f"{kernel_dataset.local_station_id}-{kernel_dataset.remote_station_id}"
return id

@property
def band_specification_style(self):
"""return a description of the scheme used to define the bands."""
return self._band_specification_style

@band_specification_style.setter
def band_specification_style(self, value):
def band_specification_style(self, value: str) -> None:
"""
Sets the band_specification_style

Parameters
----------
value: str
The label for the scheme used to define the bands.

Returns
-------

"""
if value not in SUPPORTED_BAND_SPECIFICATION_STYLES:
msg = f"Won't set band specification style to unrecognized value {value}"
logger.warning(msg)
raise NotImplementedError(msg)
# return
else:
self._band_specification_style = value

def determine_band_specification_style(self):
def determine_band_specification_style(self) -> None:
"""
Try to identify which scheme was used to define the bands

TODO: Should emtf_band_file path be stored in config to support reproducibility?

"""
Expand All @@ -85,43 +119,55 @@ def create_from_kernel_dataset(
kernel_dataset,
input_channels=["hx", "hy"],
output_channels=["hz", "ex", "ey"],
estimator=None,
**kwargs,
estimator: Optional[Union[str, None]] = None,
emtf_band_file: Optional[Union[str, pathlib.Path, None]] = None,
band_edges: Optional[Union[dict, None]] = None,
decimation_factors: Optional[Union[list, None]] = None,
num_samples_window: Optional[Union[int, None]] = None,
):
"""
Hmmm, why not make this a method of kernel_dataset??
This creates a processing config from a kernel dataset
TODO: Make this a method of kernel_dataset.

Early on we want to know how may decimation levels there will be.
This is defined either by:
1. decimation_factors argument (normally accompanied by a bands_dict)
2. number of decimations implied by EMTF band setup file.
Theoretically, you could also use the number of decimations implied by
bands_dict but this is sloppy, because it would be bad practice to assume
the decimation factor.

Notes:
1. 2022-09-10
The reading-in from EMTF band setup file used to be very terse, carried
some baked in assumptions about decimation factors, and did not acknowlege
some baked in assumptions about decimation factors, and did not acknowledge
specific frequency bands in Hz. I am adding some complexity to the method
that populates bands from EMTF band setup file but am now explict about the
assumtion of decimation factors, and do provide the frequency bands in Hz.
assumption of decimation factors, and do provide the frequency bands in Hz.

The number of decimation levels must be defined either by:
1. decimation_factors argument (normally accompanied by a bands_dict)
2. number of decimations implied by EMTF band setup file.
Theoretically, you could also use the number of decimations implied by
bands_dict but this is sloppy, because it would assume the decimation factor.


Parameters
----------
kernel_dataset
emtf_band_file: while the default here is None, it will get assigned the
value BANDS_DEFAULT_FILE in the set_frequecy_bands method if band edges is
also None.
input_channels
output_channels
estimator
band_edges
kwargs
kernel_dataset: aurora.transfer_function.kernel_dataset.KernelDataset'
An object that defines the data to be processed.
input_channels: list
List of the input channels that will be used in TF estimation (usually "hx", "hy")
output_channels: list
List of the output channels that will be estimated by TF (usually "ex", "ey", "hz")
estimator: Optional[Union[str, None]] = None,
The name of the regression estimator to use for TF estimation.
emtf_band_file: Optional[Union[str, pathlib.Path, None]] = None
The emtf nad setup file if used.
band_edges: Optional[Union[dict, None]] = None
The band edges if emtf_band_file not used
decimation_factors: Optional[Union[list, None]] = None
List of decimation factors, normally [1, 4, 4, 4, ... 4]
num_samples_window: Optional[Union[int, None]] = None
The size of the window (usually for FFT)

Returns
-------
processing_obj: aurora.config.metadata.processing.Processing
Object storing the processing parameters.

"""

Expand All @@ -131,14 +177,11 @@ def create_from_kernel_dataset(
# pack station and run info into processing object
processing_obj.stations.from_dataset_dataframe(kernel_dataset.df)

# Unpack kwargs
self._emtf_band_file = kwargs.get("emtf_band_file", None)
self._band_edges = kwargs.get("band_edges", None)
decimation_factors = kwargs.get("decimation_factors", None)
num_samples_window = kwargs.get("num_samples_window", None)

# determine window parameters:
# check if they have been passed as kwargs, otherwise extract default values
# Unpack optioanl arguments
self._emtf_band_file = emtf_band_file
self._band_edges = band_edges
decimation_factors = decimation_factors
num_samples_window = num_samples_window

# Determine if band_setup or edges dict is to be used for bands
self.determine_band_specification_style()
Expand All @@ -150,6 +193,7 @@ def create_from_kernel_dataset(
filepath=self._emtf_band_file, sample_rate=kernel_dataset.sample_rate
)
num_decimations = emtf_band_setup_file.num_decimation_levels
# Assign optional arguments if they have not been passed
if decimation_factors is None:
# set default values to EMTF default values [1, 4, 4, 4, ..., 4]
decimation_factors = num_decimations * [4]
Expand Down
66 changes: 58 additions & 8 deletions aurora/config/metadata/processing.py
Original file line number Diff line number Diff line change
@@ -1,27 +1,46 @@
# -*- coding: utf-8 -*-
"""
Extend the Processing class with some aurora-specific methods
Extend the mt_metadata.transfer_functions.processing.aurora.processing.Processing class
with some aurora-specific methods.
"""
import pathlib

# =============================================================================
# Imports
# =============================================================================
import pandas as pd

from aurora.time_series.windowing_scheme import window_scheme_from_decimation
from loguru import logger
from mt_metadata.transfer_functions.processing.aurora.processing import Processing
from mt_metadata.utils.list_dict import ListDict
from loguru import logger
from typing import Optional, Union
import pandas as pd


class Processing(Processing):
def __init__(self, **kwargs):
"""
Constructor

Parameters
----------
kwargs
"""
# super().__init__(attr_dict=attr_dict, **kwargs)
super().__init__(**kwargs)

def window_scheme(self, as_type="df"):
"""
Make a dataframe of processing parameters one row per decimation level.

Parameters
----------
as_type: Optional[str]
if "df" return a dataframe, if "dict" return dict
Returns
-------
windowing: Union[dict, pd.DataFrame]
return type depends on as_type argument.

"""
window_schemes = [window_scheme_from_decimation(x) for x in self.decimations]
Expand All @@ -45,20 +64,49 @@ def window_scheme(self, as_type="df"):
raise TypeError

def decimation_info(self):
"""
Zips decimation level ids to the Decimation objects adn returns as a dict

Returns
-------
decimation_info: dict
The decimation objects keyed by decimation level id.
"""
decimation_ids = [x.decimation.level for x in self.decimations]
decimation_factors = [x.decimation.factor for x in self.decimations]
decimation_info = dict(zip(decimation_ids, decimation_factors))
return decimation_info

def save_as_json(self, filename=None, nested=True, required=False):
def save_as_json(
self,
filename: Optional[Union[str, pathlib.Path, None]] = None,
nested: Optional[bool] = True,
required: Optional[bool] = False,
) -> None:
"""
Exports self to a JSON

Parameters
----------
filename: Optional[Union[str, pathlib.Path, None]
Where to write the json
nested: Optional[bool] = True,
An mt_metadata argument
required: Optional[bool] = False,
An mt_metadata argument

"""
if filename is None:
filename = self.json_fn()
json_str = self.to_json(nested=nested, required=required)
with open(filename, "w") as f:
f.write(json_str)

def emtf_tf_header(self, dec_level_id):
def emtf_tf_header(self, dec_level_id: int) -> ListDict:
"""
Returns a ListDict object that has the information that was in the old EMTF TF
Header object. This may be deprecated in future -- it is an artefact of the
old matlab implementation.

Parameters
----------
Expand All @@ -67,7 +115,9 @@ def emtf_tf_header(self, dec_level_id):

Returns
-------
tfh: mt_metadata.transfer_functions.processing.aurora.transfer_function_header.TransferFunctionHeader
tfh: ListDict
Object with the properties of the old EMTF TransferFunctionHeader class.

"""
tfh = ListDict()
tfh.processing_scheme = self.decimations[dec_level_id].estimator.engine
Expand All @@ -80,7 +130,7 @@ def emtf_tf_header(self, dec_level_id):

return tfh

def make_tf_level(self, dec_level_id):
def make_tf_level(self, dec_level_id: int):
"""
Initialize container for a single decimation level -- "flat" transfer function.

Expand All @@ -107,7 +157,7 @@ def make_tf_level(self, dec_level_id):

class EMTFTFHeader(ListDict):
"""
Convenince class for storing metadata for a TF estimate.
Convenience class for storing metadata for a TF estimate.
Based on Gary Egbert's TFHeader.m originally in
iris_mt_scratch/egbert_codes-20210121T193218Z-001/egbert_codes/matlabPrototype_10-13-20/TF/classes

Expand Down

0 comments on commit 13c3067

Please sign in to comment.