diff --git a/mth5/data/make_mth5_from_asc.py b/mth5/data/make_mth5_from_asc.py index c7acc8f1..b00cb658 100644 --- a/mth5/data/make_mth5_from_asc.py +++ b/mth5/data/make_mth5_from_asc.py @@ -8,17 +8,27 @@ that originally came from EMTF -- test1.asc and test2.asc. Each ascii file represents five channels of data sampled at 1Hz at a synthetic station. -Mirroring the original ascii files are: -data/test1.h5 -data/test2.h5 -data/test12rr.h5 +TODO: Separate the handling of legacy EMTF data files, such as + reading into a dataframe from oddly delimited data, as well as flipping polarities of + the electric channels (possibly due to a baked in sign convention error in the legacy + data), so that a simple dataframe can be passed. That will make the methods here more + easily generalize to work with other dataframes. That would be useful in future when + we creating synthetic data at arbitrary sample rate. + +Development Notes: + Mirroring the original ascii files are: + data/test1.h5 + data/test2.h5 + data/test12rr.h5 + + Also created are some files with the same data but other channel_nomenclature schemes: + data/test12rr_LEMI34.h5 + data/test1_LEMI12.h5 + + - 20231103: Added an 8Hz up-sampled version of test1. No spectral content was added + so the band between the old and new Nyquist frequencies is bogus. -Also created are some files with the same data but other channel_nomenclature schemes: -data/test12rr_LEMI34.h5 -data/test1_LEMI12.h5 -- 20231103: Added an 8Hz up-sampled version of test1. No spectral content was added -so the band between the old and new Nyquist frequencies is bogus. """ # ============================================================================= @@ -38,17 +48,20 @@ from mth5.data.station_config import make_station_03 from mth5.data.station_config import make_station_04 from mth5.data.station_config import SyntheticRun +from mth5.data.station_config import SyntheticStation from mth5.mth5 import MTH5 from mth5.timeseries import ChannelTS, RunTS from mth5.utils.helpers import add_filters from mt_metadata.transfer_functions.processing.aurora import ( ChannelNomenclature, ) +from mt_metadata.transfer_functions.processing.aurora.channel_nomenclature import SupportedNomenclature + from mt_metadata.timeseries import Electric from mt_metadata.timeseries import Magnetic from mt_metadata.timeseries import Survey -from typing import Optional, Union +from typing import List, Literal, Optional, Union # ============================================================================= @@ -59,22 +72,24 @@ def create_run_ts_from_synthetic_run( - run: SyntheticRun, df: pd.DataFrame, channel_nomenclature: str = "default" + run: SyntheticRun, + df: pd.DataFrame, + channel_nomenclature: SupportedNomenclature = "default" ): """ Loop over channels of synthetic data in df and make ChannelTS objects. - :type run: mth5.data.station_config.SyntheticRun :param run: One-off data structure with information mth5 needs to initialize. Specifically sample_rate, filters. - :type df: pandas.DataFrame + :type run: mth5.data.station_config.SyntheticRun :param df: time series data in columns labelled from ["ex", "ey", "hx", "hy", "hz"] - :type channel_nomenclature : string + :type df: pandas.DataFrame :param channel_nomenclature : Keyword corresponding to channel nomenclature mapping in CHANNEL_MAPS variable from channel_nomenclature.py module in mt_metadata. Supported values include ['default', 'lemi12', 'lemi34', 'phoenix123'] + :type channel_nomenclature : string - :rtype runts: RunTS :return runts: MTH5 run time series object, data and metadata bound into one. + :rtype runts: RunTS """ @@ -126,7 +141,7 @@ def create_run_ts_from_synthetic_run( def get_time_series_dataframe( run: SyntheticRun, - source_folder: Optional[Union[pathlib.Path, str]], + source_folder: Union[pathlib.Path, str], add_nan_values: Optional[bool] = False ) -> pd.DataFrame: """ @@ -136,10 +151,13 @@ def get_time_series_dataframe( Only tested for 8, to make 8Hz data for testing. If run.sample_rate is default (1.0) then no up-sampling takes place. + TODO: Move noise, and nan addition out of this method. + :type run: mth5.data.station_config.SyntheticRun :param run: Information needed to define/create the run :type source_folder: Optional[Union[pathlib.Path, str]] - :param source_folder: Where to load the ascii time series from + :param source_folder: Where to load the ascii time series from. This overwrites any + previous value that may have been stored in the SyntheticRun :type add_nan_values: bool :param add_nan_values: If True, add some NaN, if False, do not add Nan. :rtype df: pandas.DataFrame @@ -150,31 +168,14 @@ def get_time_series_dataframe( if source_folder: run.raw_data_path = source_folder.joinpath(run.raw_data_path.name) - # read in data - df = pd.read_csv(run.raw_data_path, names=run.channels, sep="\s+") - - # Invert electric channels to fix phase swap due to modeling coordinates. - df[df.columns[-2]] = -df[df.columns[-2]] # df["ex"] = -df["ex"] - df[df.columns[-1]] = -df[df.columns[-1]] # df["ey"] = -df["ey"] - # Column indices are used to avoid handling channel nomenclature here. - - # upsample data if requested, - if run.run_metadata.sample_rate != 1.0: - df_orig = df.copy(deep=True) - new_data_dict = {} - for i_ch, ch in enumerate(run.channels): - data = df_orig[ch].to_numpy() - new_data_dict[ch] = ssig.resample( - data, int(run.run_metadata.sample_rate) * len(df_orig) - ) - df = pd.DataFrame(data=new_data_dict) - - # add noise + df = run._get_timeseries_dataframe() + + # add noise if requested for col in run.channels: if run.noise_scalars[col]: df[col] += run.noise_scalars[col] * np.random.randn(len(df)) - # add nan + # add nan if requested if add_nan_values: for col in run.channels: for [ndx, num_nan] in run.nan_indices[col]: @@ -183,51 +184,55 @@ def get_time_series_dataframe( def create_mth5_synthetic_file( - station_cfgs: list, + station_cfgs: List[SyntheticStation], mth5_name: Union[pathlib.Path, str], target_folder: Optional[Union[pathlib.Path, str]] = "", - source_folder: Optional[Union[pathlib.Path, str]] = "", - plot: Optional[bool] = False, - add_nan_values: Optional[bool] = False, - file_version: Optional[str] = "0.1.0", - channel_nomenclature: Optional[str] = "default", - force_make_mth5: Optional[bool] = True, - survey_metadata: Optional[Union[Survey, None]] = None, + source_folder: Union[pathlib.Path, str] = "", + plot: bool = False, + add_nan_values: bool = False, + file_version: Literal["0.1.0", "0.2.0"] = "0.1.0", + channel_nomenclature: SupportedNomenclature = "default", + force_make_mth5: bool = True, + survey_metadata: Optional[Survey] = None, ): """ - Creates an MTH5 from synthetic data + Creates an MTH5 from synthetic data. + + Development Notes: + 20250203: This function could be made more general, so that it operates on dataframes and legacy emtf ascii files. - :type station_cfgs: list - :param station_cfgs: Elements of the list are each dicts. The dicts are one-off + :param station_cfgs: Iterable of objects of type SyntheticStation. These are one-off data structure used to hold information mth5 needs to initialize, specifically sample_rate, filters, etc. - :type mth5_name: Union[pathlib.Path, str] + :type station_cfgs: List[SyntheticStation] :param mth5_name: Where the mth5 will be stored. This is generated by the station_config, but may change in this method based on add_nan_values or channel_nomenclature - :type target_folder: Optional[Union[pathlib.Path, str]] + :type mth5_name: Union[pathlib.Path, str] :param target_folder: Where the mth5 file will be stored - :type source_folder: Optional[Union[pathlib.Path, str]] = "", + :type target_folder: Optional[Union[pathlib.Path, str]] :param source_folder: Where the ascii source data are stored - :type plot: bool + :type source_folder: Optional[Union[pathlib.Path, str]] = "", :param plot: Set to false unless you want to look at a plot of the time series - :type add_nan_values: bool + :type plot: bool :param add_nan_values: If true, some np.nan are sprinkled into the time series. Intended to be used for tests. - :type file_version: str - :param file_version: One of ["0.1.0", "0.2.0"], corresponding to the version of mth5 to create - :type channel_nomenclature: str - :param channel_nomenclature: Keyword corresponding to channel nomenclature mapping in CHANNEL_MAPS variable - from channel_nomenclature.py module in mt_metadata. Supported values are ['default', 'lemi12', 'lemi34', 'phoenix123'] + :type add_nan_values: bool + :param file_version: One of the supported mth5 file versions. This is the version of mth5 to create. + :type file_version: Literal["0.1.0", "0.2.0"] = "0.1.0", + :param channel_nomenclature: Keyword corresponding to channel nomenclature mapping in CHANNEL_MAPS variable, + for example ['default', 'lemi12', 'lemi34', 'phoenix123'] A full list is in mt_metadata/transfer_functions/processing/aurora/standards/channel_nomenclatures.json - :type force_make_mth5: bool - :param force_make_mth5: str + :type channel_nomenclature: SupportedNomenclature :param force_make_mth5: If set to true, the file will be made, even if it already exists. If false, and file already exists, skip the make job. - :type survey_metadata: Survey + :type force_make_mth5: bool :param survey_metadata: Option to provide survey metadata, otherwise it will be created. - :rtype: mth5_path: pathlib.Path + :type survey_metadata: Survey :return: The path to the stored h5 file. + :rtype: mth5_path: pathlib.Path + """ + # Handle path and file name conventions if not target_folder: msg = f"No target folder provided for making {mth5_name}" logger.warning(msg) @@ -238,8 +243,8 @@ def create_mth5_synthetic_file( try: target_folder.mkdir(exist_ok=True, parents=True) except OSError: - msg = "Aurora maybe installed on a read-only file system" - msg = f"{msg}: try setting target_path argument when calling create_mth5_synthetic_file" + msg = "MTH5 maybe installed on a read-only file system" + msg = f"{msg}: try setting `target_folder` argument when calling create_mth5_synthetic_file" logger.error(msg) mth5_path = target_folder.joinpath(mth5_name) @@ -247,6 +252,7 @@ def create_mth5_synthetic_file( mth5_path, add_nan_values, channel_nomenclature ) + # Only create file if needed if not force_make_mth5: if mth5_path.exists(): return mth5_path @@ -266,12 +272,16 @@ def create_mth5_synthetic_file( station_group = m.add_station(station_cfg.id, survey=survey_id) for run in station_cfg.runs: + # run is object of type SyntheticRun df = get_time_series_dataframe( run=run, source_folder=source_folder, add_nan_values=add_nan_values ) + # TODO: Add handling for noise, nan, and upsampling here + # (They don't belong in get_time_Series_dataframe() + # cast to run_ts runts = create_run_ts_from_synthetic_run( run, df, channel_nomenclature=channel_nomenclature @@ -302,8 +312,8 @@ def create_test1_h5( """ Creates an MTH5 file for a single station named "test1". - :type file_version: str :param file_version: One of ["0.1.0", "0.2.0"], corresponding to the version of mth5 to create + :type file_version: str :type channel_nomenclature: Optional[str] :param channel_nomenclature: Keyword corresponding to channel nomenclature mapping in CHANNEL_MAPS variable from channel_nomenclature.py module in mt_metadata. Supported values are ['default', 'lemi12', 'lemi34', 'phoenix123'] @@ -584,7 +594,8 @@ def _update_mth5_path( add_nan_values: bool, channel_nomenclature: str ) -> pathlib.Path: - """set name for output h5 file""" + """ Modify the name of output h5 file based on wheter or not nan-data are included + as well as channel_nomenclature if not default. """ path_str = mth5_path.__str__() if add_nan_values: path_str = path_str.replace(".h5", "_nan.h5") diff --git a/mth5/data/station_config.py b/mth5/data/station_config.py index 4615bfc0..467b427d 100644 --- a/mth5/data/station_config.py +++ b/mth5/data/station_config.py @@ -16,25 +16,30 @@ """ import pathlib +import scipy.signal as ssig from typing import Dict, List, Optional, Union import mt_metadata.timeseries +import pandas as pd +from loguru import logger from mt_metadata.timeseries.filters.helper_functions import make_coefficient_filter from mt_metadata.timeseries import Run from mt_metadata.timeseries import Station from mt_metadata.transfer_functions.processing.aurora import ChannelNomenclature +from mt_metadata.transfer_functions.processing.aurora.channel_nomenclature import SupportedNomenclature ASCII_DATA_PATH = pathlib.Path(__file__).parent.resolve() def make_filters(as_list: Optional[bool] = False) -> Union[dict, list]: """ - Because the data from EMTF is already in mV/km and nT these filters are just - placeholders to show where they would get assigned. + Creates a collection of filters + Because the synthetic data from EMTF are already in mV/km and nT, no calibration filters are required. + The filters here are placeholders to show where instrument response function information would get assigned. - :type as_list: bool :param as_list: If True we return a list, False return a dict + :type as_list: bool + :return filters_list: Filters for populating the filters lists of synthetic data :rtype filters_list: Union[List, Dict] - :return pfilters_list: Filters for populating the filters lists of synthetic data """ unity_coeff_filter = make_coefficient_filter(name="1", gain=1.0) multipy_by_10_filter = make_coefficient_filter(gain=10.0, name="10") @@ -63,36 +68,43 @@ class SyntheticRun(object): def __init__( self, id: str, - sample_rate: Optional[float] = 1.0, - raw_data_path: Optional[Union[str, pathlib.Path, None]] = None, - channel_nomenclature: Optional[str] = "default", - channels: Optional[Union[list, None]] = None, - noise_scalars: Optional[Union[dict, None]] = None, - nan_indices: Optional[Union[dict, None]] = None, - filters: Optional[Union[dict, None]] = None, - start: Optional[Union[str, None]] = None, + sample_rate: float = 1.0, + raw_data_path: Optional[Union[str, pathlib.Path]] = None, + channel_nomenclature: SupportedNomenclature = "default", + channels: Optional[list] = None, + noise_scalars: Optional[dict] = None, + nan_indices: Optional[dict] = None, + filters: Optional[dict] = None, + start: Optional[str] = None, + timeseries_dataframe: Optional[pd.DataFrame] = None, + data_source: str = "legacy emtf ascii" ) -> None: """ Constructor. - :type id: str :param id: label for the run + :type id: str + :param sample_rate: sample rate of the time series in Hz. :type sample_rate: float - :param sample_rate: sample rate of the times series - :type raw_data_path: Union[str, pathlib.Path, None] :param raw_data_path: Path to ascii data source - :type channel_nomenclature: str + :type raw_data_path: Union[str, pathlib.Path, None] :param channel_nomenclature: the keyword for the channel nomenclature - :type channels: Union[list, None] + :type channel_nomenclature: str :param channels: the channel names to include in the run. - :type noise_scalars: Union[dict, None] + :type channels: Union[list, None] :param noise_scalars: Keys are channels, values are scale factors for noise to add - :type nan_indices: Union[dict, None] + :type noise_scalars: Union[dict, None] :param nan_indices: Keys are channels, values lists. List elements are pairs of (index, num_nan_to_add) - :type filters: Union[dict, None] + :type nan_indices: Union[dict, None] :param filters: Keys are channels, values lists. List elements are Filter objects - :type start: Union[str, None] + :type filters: Union[dict, None] :param start: Setting the run start time. e.g. start="1980-01-01T00:00:00+00:00" + :type start: Union[str, None] + :param timeseries_dataframe: The time series data for the run. + Added 2025 to try to allow more general data to be cast to mth5 + :type timeseries_dataframe: Optional[pd.DataFrame] = None + :param data_source: Keyword to tell if data are a legacy EMTF ASCII file + :param data_source: Keyword to tell if data are a legacy EMTF ASCII file """ run_metadata = Run() @@ -100,7 +112,12 @@ def __init__( run_metadata.sample_rate = sample_rate run_metadata.time_period.start = start - self.raw_data_path = raw_data_path + self._timeseries_dataframe = timeseries_dataframe # normally None for legacy EMTF data + if isinstance(self._timeseries_dataframe, pd.DataFrame): + self.data_source = "dataframe" + else: + self.data_source = data_source + self.raw_data_path = raw_data_path # set channel names self._channel_map = None @@ -108,13 +125,19 @@ def __init__( self.set_channel_map() if channels is None: self.channels = list(self.channel_map.values()) + + # Set scale factors for adding noise to individual channels self.noise_scalars = noise_scalars if noise_scalars is None: self.noise_scalars = {} for channel in self.channels: self.noise_scalars[channel] = 0.0 + + # Set indices for adding nan to individual channels if nan_indices is None: self.nan_indices = {} # TODO: make this consistent with noise_scalars, None or empty dict. + + # Set filters individual channels if filters is None: self.filters = {} # TODO: make this consistent with noise_scalars, None or empty dict. @@ -144,6 +167,39 @@ def set_channel_map(self) -> None: ) self._channel_map = channel_nomenclature.get_channel_map() + def _get_timeseries_dataframe( + self, + ) -> pd.DataFrame: + """ + Returns time series data in a dataframe with columns named for EM field component. + + Up-samples data to self.run_metadata.sample_rate, which is treated as in integer, + in teh case that self.data_source == "legacy emtf ascii". + Only tested for 8, to make 8Hz data for testing. If run.sample_rate is default (1.0) + then no up-sampling takes place. + + :rtype df: pandas.DataFrame + :return df: The time series data for the synthetic run + + """ + if isinstance(self._timeseries_dataframe, pd.DataFrame): + msg = f"Run Data appear to be already set in dataframe" + logger.info(msg) + return self._timeseries_dataframe + + elif self.data_source == "legacy emtf ascii": + ascii_file = LegacyEMTFAsciiFile(file_path=self.raw_data_path) + df = ascii_file.load_dataframe( + channel_names=self.channels, + sample_rate=self.run_metadata.sample_rate + ) + + return df + else: + msg = f"No dataframe associated with run, nor a legacy EMTF ASCII file" + msg += ".. add support for your filetype or declare dataframe" + raise NotImplementedError(msg) + class SyntheticStation(object): """ @@ -157,18 +213,20 @@ class SyntheticStation(object): def __init__( self, id: str, - latitude: Optional[float] = 0.0, - mth5_name: Optional[Union[str, pathlib.Path, None]] = None, + latitude: float = 0.0, + mth5_name: Optional[Union[str, pathlib.Path]] = None, ) -> None: """ Constructor. + :param id: The name of the station :type id: str - :param id:: station id + :param latitude: The station latitude + TODO: Add note about units supported for latitude + TODO: replace this with a station_metadata dictionary. :type latitude: float - :param latitude: the station latiude :type mth5_name: Union[str, pathlib.Path, None] - :param mth5_name: The name of thm mth5 the station will be written to. + :param mth5_name: The name of the h5 file to which the station data and metadata will be written. """ self.id = id @@ -177,14 +235,15 @@ def __init__( self.mth5_name = mth5_name -def make_station_01(channel_nomenclature: Optional[str] = "default") -> SyntheticStation: +def make_station_01(channel_nomenclature: SupportedNomenclature = "default") -> SyntheticStation: """ + This method prepares the metadata needed to generate an mth5 with syntheric data. + :param channel_nomenclature: Must be one of the nomenclatures defined in SupportedNomenclature :type channel_nomenclature: str - :param channel_nomenclature: Must be one of the nomenclatures defined in "channel_nomenclatures.json" - :rtype: SyntheticStation :return: Object with all info needed to generate MTH5 file from synthetic data. + :rtype: SyntheticStation """ station_metadata = Station() @@ -198,6 +257,7 @@ def make_station_01(channel_nomenclature: Optional[str] = "default") -> Syntheti run_001 = SyntheticRun( id="001", + sample_rate=1.0, raw_data_path=ASCII_DATA_PATH.joinpath("test1.asc"), channel_nomenclature=channel_nomenclature, start=None, @@ -235,14 +295,14 @@ def make_station_01(channel_nomenclature: Optional[str] = "default") -> Syntheti return station -def make_station_02(channel_nomenclature: Optional[str] = "default") -> SyntheticStation: +def make_station_02(channel_nomenclature: SupportedNomenclature = "default") -> SyntheticStation: """ Just like station 1, but the data are different - :type channel_nomenclature: str - :param channel_nomenclature: Must be one of the nomenclatures defined in "channel_nomenclatures.json" - :rtype: SyntheticStation + :param channel_nomenclature: Must be one of the nomenclatures defined in SupportedNomenclature + :type channel_nomenclature: SupportedNomenclature :return: Object with all info needed to generate MTH5 file from synthetic data. + :rtype: SyntheticStation """ test2 = make_station_01(channel_nomenclature=channel_nomenclature) @@ -256,14 +316,13 @@ def make_station_02(channel_nomenclature: Optional[str] = "default") -> Syntheti return test2 -def make_station_03(channel_nomenclature="default") -> SyntheticStation: +def make_station_03(channel_nomenclature: SupportedNomenclature = "default") -> SyntheticStation: """ Create a synthetic station with multiple runs. Rather than generate fresh synthetic data, we just reuse test1.asc for each run. - :type channel_nomenclature: str - :param channel_nomenclature: Must be one of the nomenclatures defined in "channel_nomenclatures.json" - Example values ["default", "lemi12", "lemi34", "phoenix123"] + :param channel_nomenclature: Literal, Must be one of the nomenclatures defined in "channel_nomenclatures.json" + :type channel_nomenclature: SupportedNomenclature :rtype: SyntheticStation :return: Object with all info needed to generate MTH5 file from synthetic data. @@ -288,7 +347,8 @@ def make_station_03(channel_nomenclature="default") -> SyntheticStation: filters[ch] = [FILTERS["10x"].name, FILTERS["0.1x"].name] run_001 = SyntheticRun( - "001", + id="001", + sample_rate=1.0, raw_data_path=ASCII_DATA_PATH.joinpath("test1.asc"), nan_indices=nan_indices, filters=filters, @@ -300,7 +360,8 @@ def make_station_03(channel_nomenclature="default") -> SyntheticStation: for ch in channels: noise_scalars[ch] = 2.0 run_002 = SyntheticRun( - "002", + id="002", + sample_rate=1.0, raw_data_path=ASCII_DATA_PATH.joinpath("test1.asc"), noise_scalars=noise_scalars, nan_indices=nan_indices, @@ -312,7 +373,8 @@ def make_station_03(channel_nomenclature="default") -> SyntheticStation: for ch in channels: noise_scalars[ch] = 5.0 run_003 = SyntheticRun( - "003", + id="003", + sample_rate=1.0, raw_data_path=ASCII_DATA_PATH.joinpath("test1.asc"), noise_scalars=noise_scalars, nan_indices=nan_indices, @@ -324,7 +386,8 @@ def make_station_03(channel_nomenclature="default") -> SyntheticStation: for ch in channels: noise_scalars[ch] = 10.0 run_004 = SyntheticRun( - "004", + id="004", + sample_rate=1.0, raw_data_path=ASCII_DATA_PATH.joinpath("test1.asc"), noise_scalars=noise_scalars, nan_indices=nan_indices, @@ -343,12 +406,12 @@ def make_station_03(channel_nomenclature="default") -> SyntheticStation: return station -def make_station_04(channel_nomenclature="default") -> SyntheticStation: +def make_station_04(channel_nomenclature: SupportedNomenclature = "default") -> SyntheticStation: """ Just like station 01, but data are resampled to 8Hz - :type channel_nomenclature: str - :param channel_nomenclature: Must be one of the nomenclatures defined in "channel_nomenclatures.json" + :param channel_nomenclature: Literal, Must be one of the nomenclatures defined in "channel_nomenclatures.json" + :type channel_nomenclature: SupportedNomenclature :rtype: SyntheticStation :return: Object with all info needed to generate MTH5 file from synthetic data. """ @@ -361,11 +424,11 @@ def make_station_04(channel_nomenclature="default") -> SyntheticStation: station.mth5_name = "test_04_8Hz.h5" run_001 = SyntheticRun( - "001", + id="001", + sample_rate=8.0, raw_data_path=ASCII_DATA_PATH.joinpath("test1.asc"), channel_nomenclature=channel_nomenclature, start=None, - sample_rate=8.0, ) run_001.nan_indices = {} @@ -389,6 +452,68 @@ def make_station_04(channel_nomenclature="default") -> SyntheticStation: return station +class LegacyEMTFAsciiFile(): + """ + This class can be used to interact with the legacy synthetic data files + that were originally in EMTF. + + Development Notes: + As of 2025-02-03 the only LegacyEMTFAsciiFile date sources are sampled at 1Hz. + One-off upsampling can be handled in this class if the requested sample rate differs. + + """ + IMPLICIT_SAMPLE_RATE = 1.0 # Hz + + def __init__( + self, + file_path: pathlib.Path + ): + self.file_path = file_path + + def load_dataframe( + self, + channel_names: list, + sample_rate: float, + ) -> pd.DataFrame: + """ + Loads an EMTF legacy ASCII time series into a dataframe. + + These files have an awkward whitespace separator, and also need to have the + electric field channels inverted to fix a phase swap. + + :param channel_names: The names of the channels in the legacy EMTF file, in order. + :type channel_names: list + :param sample_rate: The sample rate of the output time series in Hz. + :type sample_rate: float + + :return df: The labelled time series from the legacy EMTF file. + :rtype df: pd.DataFrame + + """ + + # read in data + df = pd.read_csv(self.file_path, names=channel_names, sep="\s+") + + # Invert electric channels to fix phase swap due to modeling coordinates. + # Column indices are used to avoid handling channel nomenclature here. + df[df.columns[-2]] = -df[df.columns[-2]] # df["ex"] = -df["ex"] + df[df.columns[-1]] = -df[df.columns[-1]] # df["ey"] = -df["ey"] + + # Temporary kludge: One-off handling for a test case to upsample data. + # TODO: delete this once synthetic data module is built can offer multiple sample rates + if sample_rate != self.IMPLICIT_SAMPLE_RATE: + df_orig = df.copy(deep=True) + new_data_dict = {} + for ch in df.columns: + data = df_orig[ch].to_numpy() + new_data_dict[ch] = ssig.resample( + data, int(sample_rate) * len(df_orig) + ) + df = pd.DataFrame(data=new_data_dict) + + return df + + def main(): # sr = SyntheticRun("001") make_station_04()