diff --git a/aurora/time_series/apodization_window.py b/aurora/time_series/apodization_window.py index d950e15f..7789bda5 100644 --- a/aurora/time_series/apodization_window.py +++ b/aurora/time_series/apodization_window.py @@ -53,11 +53,14 @@ import numpy as np import scipy.signal as ssig from loguru import logger +from typing import Optional, Union class ApodizationWindow(object): """ - Instantiate an apodization window object. Example usages: + Instantiate an apodization window object. + + Example usages: apod_window = ApodizationWindow() taper=ApodizationWindow(taper_family='hanning', num_samples_window=55 ) @@ -75,32 +78,42 @@ class ApodizationWindow(object): ENBW: Effective Noise BandWidth, see Equation (22) NENBW Normalized Equivalent Noise BandWidth, see Equation (21) - Parameters - ---------- - taper_family : string - Specify the taper type - boxcar, kaiser, hanning, etc - num_samples_window : int - The number of samples in the taper - taper : numpy array - The actual window coefficients themselves. This can be passed if a - particular custom window is desired. - additional_args: dictionary - These are any additional requirements scipy needs in order to - generate the window. """ - def __init__(self, **kwargs): + def __init__( + self, + taper_family: Optional[str] = "boxcar", + num_samples_window: Optional[ + int + ] = 0, # TODO: this should probably init to None + taper: Optional[np.ndarray] = np.empty(0), + taper_additional_args: Optional[Union[dict, None]] = None, + **kwargs, # needed as a passthrough argument to WindowingScheme + ): + """ + Constructor Parameters ---------- - kwargs: dict - See parameters list in class level documentation above + taper_family : string + Specify the taper type - boxcar, kaiser, hanning, etc + num_samples_window : int + The number of samples in the taper + taper : numpy array + The actual window coefficients themselves. This can be passed if a + particular custom window is desired. + additional_args: dictionary + These are any additional requirements scipy needs in order to + generate the window. + """ - self.taper_family = kwargs.get("taper_family", "boxcar") - self._num_samples_window = kwargs.get("num_samples_window", 0) - self._taper = kwargs.get("taper", np.empty(0)) - self.additional_args = kwargs.get("taper_additional_args", {}) + self.taper_family = taper_family + self._num_samples_window = num_samples_window + self._taper = taper + if taper_additional_args is None: + taper_additional_args = {} + self.additional_args = taper_additional_args self._coherent_gain = None self._nenbw = None @@ -112,8 +125,13 @@ def __init__(self, **kwargs): self.make() @property - def summary(self): + def summary(self) -> str: """ + Returns a string summarizing the window properties + + TODO: This should be the __str__(), i.e. the readable version + https://stackoverflow.com/questions/1436703/what-is-the-difference-between-str-and-repr + Returns ------- out_str: str @@ -129,19 +147,25 @@ def summary(self): def __str__(self): """ - ? __repr__? + returns the taper values as a string + + TODO: this should be __repr__ (unambiguous) + https://stackoverflow.com/questions/1436703/what-is-the-difference-between-str-and-repr + """ return f"{self.taper}" @property - def num_samples_window(self): + def num_samples_window(self) -> int: + """returns the window length in samples""" if self._num_samples_window == 0: self._num_samples_window = len(self.taper) return self._num_samples_window - def make(self): + def make(self) -> None: """ - this is just a wrapper call to scipy.signal + A wrapper call to scipy.signal + Note: see scipy.signal.get_window for a description of what is expected in args[1:]. http://docs.scipy.org/doc/scipy/reference/ generated/scipy.signal.get_window.html @@ -152,55 +176,67 @@ def make(self): window_args = [v for k, v in self.additional_args.items()] window_args.insert(0, self.taper_family) window_args = tuple(window_args) - # print(f"\n\nWINDOW args {window_args}") + self.taper = ssig.get_window(window_args, self.num_samples_window) - self.apodization_factor # calculate - return + self.apodization_factor # calculate -- 20240723: this should not be needed @property - def S1(self): - """sum of the window coefficients""" + def S1(self) -> float: + """Return the sum of the window coefficients""" if getattr(self, "_S1", None) is None: self._S1 = sum(self.taper) return self._S1 @property - def S2(self): - """sum of squares of the window coefficients""" + def S2(self) -> float: + """Retursn the sum of squares of the window coefficients.""" if getattr(self, "_S2", None) is None: self._S2 = sum(self.taper**2) return self._S2 @property - def coherent_gain(self): - """DC gain of the window normalized by window length""" + def coherent_gain(self) -> float: + """Returns the DC gain of the window normalized by window length.""" return self.S1 / self.num_samples_window @property - def nenbw(self): - """NENBW Normalized Equivalent Noise BandWidth, see Equation (21) in - Heinzel et al 2002""" + def nenbw(self) -> float: + """ + Returns the Normalized Equivalent Noise BandWidth + + Notes: + a.k.a NENBW, see Equation (21) in Heinzel et al. 2002. + + """ return self.num_samples_window * self.S2 / (self.S1**2) - def enbw(self, fs): + def enbw(self, fs) -> float: """ - Notes that unlike NENBW, CG, S1, S2, this is not a pure property of the + Return the "Equivalent Noise Bandwidth of the window. + + Notes: + Effective Noise BandWidth = fs*NENBW/N = fs S2/(S1**2) + Unlike NENBW, CG, S1, S2, this is not a pure property of the window -- but instead this is a property of the window combined with the sample rate. + Parameters ---------- - fs : sampling frequency (1/dt) + fs : float + The sampling frequency (1/dt) Returns ------- - + enbw: float """ - """Effective Noise BandWidth = fs*NENBW/N = fs S2/(S1**2)""" - return fs * self.S2 / (self.S1**2) - def test_linear_spectral_density_factor(self): - """This is just a test to verify some algebra + def test_linear_spectral_density_factor(self) -> None: + """T + This is just a test to verify some algebra + + TODO Move this into tests + Claim: The lsd_calibration factors A (1./coherent\_gain)\*np.sqrt((2\*dt)/(nenbw\*N)) @@ -216,10 +252,6 @@ def test_linear_spectral_density_factor(self): which I show in githib aurora issue #3 via . (CG\*\*2) \* NENBW \*N = S2 - Returns - ------- - - """ lsd_factor1 = (1.0 / self.coherent_gain) * np.sqrt( 1.0 / (self.nenbw * self.num_samples_window) @@ -231,17 +263,20 @@ def test_linear_spectral_density_factor(self): raise Exception @property - def taper(self): + def taper(self) -> np.ndarray: + """Returns the values of the taper as a numpy array""" return self._taper @taper.setter - def taper(self, x): + def taper(self, x: np.ndarray) -> None: + """Sets the value of the taper""" self._taper = x self._S1 = None self._S2 = None @property - def apodization_factor(self): + def apodization_factor(self) -> float: + """Returns the apodization factor (computes if it is None)""" if self._apodization_factor is None: self._apodization_factor = np.sqrt(self.nenbw) * self.coherent_gain return self._apodization_factor diff --git a/aurora/time_series/decorators.py b/aurora/time_series/decorators.py index 745292aa..6f61c7df 100644 --- a/aurora/time_series/decorators.py +++ b/aurora/time_series/decorators.py @@ -1,4 +1,9 @@ """ +This module is a place to put decorators used by methods in aurora. +It is a work in progress. + +Development notes: + | Here is the decorator pattern | def decorator(func): | @functools.wraps(func) @@ -8,6 +13,7 @@ | # Do something after | return value | return wrapper_decorator + """ import functools @@ -16,7 +22,11 @@ def can_use_xr_dataarray(func): """ - Intended as a decorator. Most of the windowed time series methods are + Decorator to allow functions that operate on xr.Dataset to also operate + on xr.DataArray, by converting xr.DataArray to xr.Dataset, calling the + function, and then converting back to xr.DataArray + + Most of the windowed time series methods are intended to work with xarray.Dataset class. But I would like to be able to pass them xarray.DataArray objects. This class casts a DataArray to a Dataset, runs it through func and casts back to a DataArray. diff --git a/aurora/time_series/filters/__init__.py b/aurora/time_series/filters/__init__.py deleted file mode 100644 index e69de29b..00000000 diff --git a/aurora/time_series/frequency_band_helpers.py b/aurora/time_series/frequency_band_helpers.py index 4b785ede..10dc7588 100644 --- a/aurora/time_series/frequency_band_helpers.py +++ b/aurora/time_series/frequency_band_helpers.py @@ -1,4 +1,8 @@ -import numpy as np +""" + This module contains functions that are associated with time series of Fourier coefficients + +""" +# import numpy as np from loguru import logger @@ -47,11 +51,14 @@ def get_band_for_tf_estimate(band, dec_level_config, local_stft_obj, remote_stft def extract_band(frequency_band, fft_obj, channels=[], epsilon=1e-7): """ - Stand alone method that operates on an xr.DataArray, and is wrapped with Spectrogram + Extracts a frequency band from xr.DataArray representing a spectrogram. + Stand alone version of the method that is used by WIP Spectrogram class. - Note #1: 20230902 - drop=True does not play nice with h5py and Dataset, results in a type error. + Development Notes: + #1: 20230902 + TODO: Decide if base dataset object should be a xr.DataArray (not xr.Dataset) + - drop=True does not play nice with h5py and Dataset, results in a type error. File "stringsource", line 2, in h5py.h5r.Reference.__reduce_cython__ TypeError: no default __reduce__ due to non-trivial __cinit__ However, it works OK with DataArray, so maybe use data array in general @@ -87,35 +94,34 @@ def extract_band(frequency_band, fft_obj, channels=[], epsilon=1e-7): def check_time_axes_synched(X, Y): """ - Utility function for checking that time axes agree + Utility function for checking that time axes agree. + Raises ValueError if axes do not agree. + + It is critical that X, Y, RR have the same time axes for aurora processing. Parameters ---------- X : xarray Y : xarray - Returns - ------- - - """ - """ - It is critical that X, Y, RR have the same time axes here - - Returns - ------- """ if (X.time == Y.time).all(): pass else: - logger.warning("WARNING - NAN Handling could fail if X,Y dont share time axes") - raise Exception + msg = "Time axes of arrays not identical" + # "NAN Handling could fail if X,Y dont share time axes" + logger.warning(msg) + raise ValueError(msg) return def adjust_band_for_coherence_sorting(frequency_band, spectrogram, rule="min3"): """ + WIP: Intended to broaden band to allow more FCs for spectral features + - used in coherence sorting and general feature extraction + Parameters ---------- frequency_band @@ -250,4 +256,5 @@ def adjust_band_for_coherence_sorting(frequency_band, spectrogram, rule="min3"): def cross_spectra(X, Y): + """WIP: Returns the cross power spectra between two arrays""" return X.conj() * Y diff --git a/aurora/time_series/spectrogram.py b/aurora/time_series/spectrogram.py index 09c606c7..e79e7bde 100644 --- a/aurora/time_series/spectrogram.py +++ b/aurora/time_series/spectrogram.py @@ -1,3 +1,8 @@ +""" + WORK IN PROGRESS (WIP): This module contains a class that represents a spectrogram, + i.e. A 2D time series of Fourier coefficients with axes time and frequency. + +""" from aurora.time_series.frequency_band_helpers import extract_band @@ -11,6 +16,7 @@ class Spectrogram(object): """ def __init__(self, dataset=None): + """Constructor""" self._dataset = dataset self._frequency_increment = None @@ -20,8 +26,8 @@ def _lowest_frequency(self): def _higest_frequency(self): pass - def __str__(self): - # Description of frequency coverage + def __str__(self) -> str: + """Returns a Description of frequency coverage""" intro = "Spectrogram:" frequency_coverage = ( f"{self.dataset.dims['frequency']} harmonics, {self.frequency_increment}Hz spaced \n" @@ -49,14 +55,20 @@ def __repr__(self): @property def dataset(self): + """returns the underlying xarray data""" return self._dataset @property def time_axis(self): + """returns the time axis of the underlying xarray""" return self.dataset.time @property def frequency_increment(self): + """ + returns the "delta f" of the frequency axis + - assumes uniformly sampled in frequency domain + """ if self._frequency_increment is None: frequency_axis = self.dataset.frequency self._frequency_increment = frequency_axis.data[1] - frequency_axis.data[0] @@ -65,6 +77,8 @@ def frequency_increment(self): def num_harmonics_in_band(self, frequency_band, epsilon=1e-7): """ + Returns the number of harmonics within the frequency band in the underlying dataset + Parameters ---------- band @@ -81,7 +95,9 @@ def num_harmonics_in_band(self, frequency_band, epsilon=1e-7): def extract_band(self, frequency_band, channels=[]): """ - TODO: Check if this should be returning a copy of the data... + Returns another instance of Spectrogram, with the frequency axis reduced to the input band. + + TODO: Consider returning a copy of the data... Parameters ---------- @@ -103,5 +119,6 @@ def extract_band(self, frequency_band, channels=[]): spectrogram = Spectrogram(dataset=extracted_band_dataset) return spectrogram - def cross_powers(self, ch1, ch2, band=None): - pass + # TODO: Add cross power method + # def cross_powers(self, ch1, ch2, band=None): + # pass diff --git a/aurora/time_series/time_axis_helpers.py b/aurora/time_series/time_axis_helpers.py index dd1a229e..3a4c95fc 100644 --- a/aurora/time_series/time_axis_helpers.py +++ b/aurora/time_series/time_axis_helpers.py @@ -1,11 +1,41 @@ +""" + This module contains functions for generating time axes. + + 20240723: There are two approaches used to generate time axes that + should be equivalent if there are integer nanoseconds per sample, + but otherwise they will differ. + + These functions are not used outside of tests and may be removed if future. + For now, keep them around as they may be useful in addressing + mth5 issue 225 https://github.com/kujaku11/mth5/issues/225 + which wants to characterize roudnd-off error in timestamps. + +""" import numpy as np import pandas as pd import time from loguru import logger -def fast_arange(t0, n_samples, sample_rate): - t0 = np.datetime64(t0) +def fast_arange(t0: np.datetime64, n_samples: int, sample_rate: float) -> np.ndarray: + """ + creates an array of (approximately) equally spaced time stamps + + Parameters + ---------- + t0: np.datetime64 + The time of the first sample + n_samples: int + The number of samples on the time axis + sample_rate: float + The number of samples per second + + Returns + ------- + time_index: np.ndarray + An array of np.datetime64 objects -- the time axis. + """ + # t0 = np.datetime64(t0) dt = 1.0 / sample_rate dt_nanoseconds = int(np.round(1e9 * dt)) dt_timedelta = np.timedelta64(dt_nanoseconds, "ns") @@ -13,8 +43,10 @@ def fast_arange(t0, n_samples, sample_rate): return time_index -def slow_comprehension(t0, n_samples, sample_rate): - t0 = np.datetime64(t0) +def slow_comprehension( + t0: np.datetime64, n_samples: int, sample_rate: float +) -> np.ndarray: + # t0 = np.datetime64(t0) dt = 1.0 / sample_rate time_vector_seconds = dt * np.arange(n_samples) time_vector_nanoseconds = (np.round(1e9 * time_vector_seconds)).astype(int) @@ -29,7 +61,21 @@ def slow_comprehension(t0, n_samples, sample_rate): TIME_AXIS_GENERATOR_FUNCTIONS["slow_comprehension"] = slow_comprehension -def decide_time_axis_method(sample_rate): +def decide_time_axis_method(sample_rate: float) -> str: + """ + Based on sample rate, decide method of time axis generation. + + Parameters + ---------- + sample_rate: float + The sample rate of the data (assumed constant for whole time series) + + Returns + ------- + method: str + one of ["fast_arange", "slow_comprehension"] + must be a key in TIME_AXIS_GENERATOR_FUNCTIONS + """ dt = 1.0 / sample_rate ns_per_sample = 1e9 * dt if np.floor(ns_per_sample) == np.ceil(ns_per_sample): @@ -39,14 +85,36 @@ def decide_time_axis_method(sample_rate): return method -def make_time_axis(t0, n_samples, sample_rate): +def make_time_axis(t0: np.datetime64, n_samples: int, sample_rate: float) -> np.ndarray: + """ + Passthrough method that calls a function from TIME_AXIS_GENERATOR_FUNCTIONS + + Parameters + ---------- + t0: np.datetime64 + The time of the first sample + n_samples: int + The number of samples on the time axis + sample_rate: float + The number of samples per second + + Returns + ------- + time_index: np.ndarray + An array of np.datetime64 objects -- the time axis. + """ method = decide_time_axis_method(sample_rate) time_axis = TIME_AXIS_GENERATOR_FUNCTIONS[method](t0, n_samples, sample_rate) return time_axis def test_generate_time_axis(t0, n_samples, sample_rate): - """Two obvious ways to generate an axis of timestamps here. One method is slow and + """ + + Method to compare different ways to generate a time axis. + + Development Notes: + Two obvious ways to generate an axis of timestamps here. One method is slow and more precise, the other is fast but drops some nanoseconds due to integer roundoff error. @@ -68,7 +136,7 @@ def test_generate_time_axis(t0, n_samples, sample_rate): _description_ n_samples : _type_ _description_ - sample_rate : _type_ + sample_rate : _type_ _description_ Returns ------- @@ -81,7 +149,6 @@ def test_generate_time_axis(t0, n_samples, sample_rate): time_index_1 = slow_comprehension(t0, n_samples, sample_rate) processing_time_1 = tt - time.time() logger.info(f"processing_time_1 = {processing_time_1}") - # FAST tt = time.time() @@ -89,7 +156,7 @@ def test_generate_time_axis(t0, n_samples, sample_rate): processing_time_2 = tt - time.time() logger.info(f"processing_time_2 {processing_time_2}") logger.info(f"ratio of processing times {processing_time_1/processing_time_2}") - + if (np.abs(time_index_2 - time_index_1)).sum() == 0: pass else: @@ -97,21 +164,33 @@ def test_generate_time_axis(t0, n_samples, sample_rate): return time_index_1 -# def make_time_axis_method1(t0, n_samples, sample_rate): -# pass +def do_some_tests() -> None: + """ + Placeholder for tests + highlights the difference in time axes when there are integer number of + ns per sample vs not. -def do_some_tests(): + Returns + ------- + + """ + # Integer ns per sample n_samples = 1000 sample_rate = 50.0 # Hz t0 = pd.Timestamp(1977, 3, 2, 6, 1, 44) time_axis = test_generate_time_axis(t0, n_samples, sample_rate) logger.info(f"{time_axis[0]} ...{time_axis[-1]}") + + # Non-Integer ns per sample sample_rate = 3.0 # Hz time_axis = test_generate_time_axis(t0, n_samples, sample_rate) + logger.info(f"{time_axis[0]} ...{time_axis[-1]}") + return def main(): + """Allow callable from command line""" do_some_tests() diff --git a/aurora/time_series/window_helpers.py b/aurora/time_series/window_helpers.py index 535370f7..fb210322 100644 --- a/aurora/time_series/window_helpers.py +++ b/aurora/time_series/window_helpers.py @@ -1,4 +1,9 @@ """ + +This module contains some helper functions that are used when working with sliding windows. + +TODO: Not all the functions here are needed, some of them are just examples and tests. + Notes in google doc: https://docs.google.com/document/d/1CsRhSLXsRG8HQxM4lKNqVj-V9KA9iUQAvCOtouVzFs0/edit?usp=sharing @@ -13,8 +18,13 @@ # Window-to-timeseries relationship -def available_number_of_windows_in_array(n_samples_array, n_samples_window, n_advance): +def available_number_of_windows_in_array( + n_samples_array: int, n_samples_window: int, n_advance: int +) -> int: """ + Returns the number of whole windows that can be extracted from array of length + n_samples_array by a window of length n_samples_window, if the window advances + by n_advance samples at each step. Parameters ---------- @@ -32,7 +42,9 @@ def available_number_of_windows_in_array(n_samples_array, n_samples_window, n_ad """ stridable_samples = n_samples_array - n_samples_window if stridable_samples < 0: - logger.critical("CRITICAL Window is longer than the time series") + logger.error( + "Window is longer than the time series -- no complete windows can be returned" + ) return 0 available_number_of_strides = int(np.floor(stridable_samples / n_advance)) available_number_of_strides += 1