Skip to content

Commit

Permalink
put back in the option to set input/output channels
Browse files Browse the repository at this point in the history
propbably makes sense to keep the option to specify the input/output channels in case you don't want to process a certain channel like hz
  • Loading branch information
kujaku11 committed Aug 14, 2024
1 parent 141a10b commit 3a63d32
Show file tree
Hide file tree
Showing 4 changed files with 64 additions and 34 deletions.
77 changes: 45 additions & 32 deletions aurora/config/config_creator.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,13 @@
"""

from aurora.config.metadata.processing import Processing
from aurora.config import BANDS_DEFAULT_FILE
from aurora.sandbox.io_helpers.emtf_band_setup import EMTFBandSetupFile
from loguru import logger
from mt_metadata.transfer_functions.processing.aurora.window import Window
from mt_metadata.transfer_functions.processing.aurora.processing import (
Processing,
)
from typing import Optional, Union
import pathlib

Expand Down Expand Up @@ -39,33 +41,32 @@ def __init__(
self._band_edges = band_edges
self._band_specification_style = None

def processing_id(self, kernel_dataset):
"""
Generates a string id label for the processing config: WIP.
In the past, we used f"{local}-{remote}" or f"{local}-{run_id}"
Neither of these is sufficiently unique. In fact, they only describe the
dataset, and not the processing config. It is difficult to see how to make a
comprehensive, unique id without it being very long or involving hash
functions.
For now, will try to use {local}-{local_runs}-{remote}-{remote_runs},
which at least describes the dataset, then a string can be generated by the
config and appended if needed.
Parameters
----------
kernel_dataset: aurora.transfer_function.kernel_dataset.KernelDataset
An object that defines the data to be processed.
Returns
-------
id: str
A label for the processing config.
"""
id = f"{kernel_dataset.local_station_id}-{kernel_dataset.remote_station_id}"
return id
# def processing_id(self, kernel_dataset):
# """
# Generates a string id label for the processing config: WIP.

# In the past, we used f"{local}-{remote}" or f"{local}-{run_id}"
# Neither of these is sufficiently unique. In fact, they only describe the
# dataset, and not the processing config. It is difficult to see how to make a
# comprehensive, unique id without it being very long or involving hash
# functions.

# For now, will try to use {local}-{local_runs}-{remote}-{remote_runs},
# which at least describes the dataset, then a string can be generated by the
# config and appended if needed.

# Parameters
# ----------
# kernel_dataset: aurora.transfer_function.kernel_dataset.KernelDataset
# An object that defines the data to be processed.

# Returns
# -------
# id: str
# A label for the processing config.
# """
# id = f"{kernel_dataset.local_station_id}-{kernel_dataset.remote_station_id}"
# return id

@property
def band_specification_style(self):
Expand Down Expand Up @@ -120,6 +121,8 @@ def determine_band_specification_style(self) -> None:
def create_from_kernel_dataset(
self,
kernel_dataset,
input_channels=None,
output_channels=None,
estimator: Optional[Union[str, None]] = None,
emtf_band_file: Optional[Union[str, pathlib.Path, None]] = None,
band_edges: Optional[Union[dict, None]] = None,
Expand Down Expand Up @@ -173,8 +176,10 @@ def create_from_kernel_dataset(
processing_obj: aurora.config.metadata.processing.Processing
Object storing the processing parameters.
"""
processing_id = self.processing_id(kernel_dataset)
processing_obj = Processing(id=processing_id) # , **kwargs)

processing_obj = Processing(
id=kernel_dataset.processing_id
) # , **kwargs)

# pack station and run info into processing object
processing_obj.stations.from_dataset_dataframe(kernel_dataset.df)
Expand Down Expand Up @@ -224,8 +229,16 @@ def create_from_kernel_dataset(
if self.band_specification_style == "EMTF":
processing_obj.band_setup_file = str(self._emtf_band_file)
for key, decimation_obj in processing_obj.decimations_dict.items():
decimation_obj.input_channels = kernel_dataset.input_channels
decimation_obj.output_channels = kernel_dataset.output_channels
if input_channels is None:
decimation_obj.input_channels = kernel_dataset.input_channels
else:
decimation_obj.input_channels = input_channels

if output_channels is None:
decimation_obj.output_channels = kernel_dataset.output_channels
else:
decimation_obj.output_channels = output_channels

if num_samples_window is not None:
decimation_obj.window.num_samples = num_samples_window[key]
# set estimator if provided as kwarg
Expand Down
11 changes: 11 additions & 0 deletions aurora/transfer_function/kernel_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -381,6 +381,17 @@ def has_remote_mth5(self):
return False
else:
return self.remote_mth5_path.exists()

@property
def processing_id(self):
"""its difficult to come put with unique ids without crazy long names
so this is a generic id of local-remote, the station metadata
will have run information and the config parameters.
"""
if self.remote_station_id is not None:
return f"{self.local_station_id}-rr_{self.remote_station_id}"
else:
return self.local_station_id

@property
def input_channels(self):
Expand Down
8 changes: 6 additions & 2 deletions tests/parkfield/test_process_parkfield_run.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,9 @@ def test():
test_processing(z_file_path=z_file_path, test_clock_zero="data start")

# COMPARE WITH ARCHIVED Z-FILE
auxilliary_z_file = PARKFIELD_PATHS["emtf_results"].joinpath("PKD_272_00.zrr")
auxilliary_z_file = PARKFIELD_PATHS["emtf_results"].joinpath(
"PKD_272_00.zrr"
)
if z_file_path.exists():
compare_two_z_files(
z_file_path,
Expand All @@ -89,7 +91,9 @@ def test():
xlims=[0.05, 500],
)
else:
logger.error("Z-File not found - Parkfield tests failed to generate output")
logger.error(
"Z-File not found - Parkfield tests failed to generate output"
)
logger.warning("NCEDC probably not returning data")


Expand Down
2 changes: 2 additions & 0 deletions tests/transfer_function/test_kernel_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -113,6 +113,8 @@ def test_remote_df(self):
self.assertListEqual(
list(self.kd.remote_df.station.unique()), ["test1"]
)
def test_processing_id(self):
self.assertEqual(self.kd.processing_id, "test1-rr_test2")

# @classmethod
# def tearDownClass(self):
Expand Down

0 comments on commit 3a63d32

Please sign in to comment.