Skip to content

Commit

Permalink
updating print to logger
Browse files Browse the repository at this point in the history
  • Loading branch information
Laura Keyson committed Nov 17, 2023
1 parent c028e75 commit 2624140
Show file tree
Hide file tree
Showing 43 changed files with 984 additions and 280 deletions.
6 changes: 3 additions & 3 deletions aurora/config/metadata/processing.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,13 +10,13 @@
from aurora.time_series.windowing_scheme import window_scheme_from_decimation
from mt_metadata.transfer_functions.processing.aurora.processing import Processing
from mt_metadata.utils.list_dict import ListDict

from loguru import logger

class Processing(Processing):
def __init__(self, **kwargs):

# super().__init__(attr_dict=attr_dict, **kwargs)
super().__init__(**kwargs)
self.logger = logger

def window_scheme(self, as_type="df"):
"""
Expand All @@ -42,7 +42,7 @@ def window_scheme(self, as_type="df"):
df = pd.DataFrame(data=data_dict)
return df
else:
print(f"unexpected rtype for window_scheme {as_type}")
self.logger.error(f"unexpected rtype for window_scheme {as_type}")
raise TypeError

def decimation_info(self):
Expand Down
4 changes: 2 additions & 2 deletions aurora/general_helper_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -103,12 +103,12 @@ def execute_command(cmd, **kwargs):
"""
exec_dir = kwargs.get("exec_dir", os.path.expanduser("~/"))
allow_exception = kwargs.get("allow_exception", True)
print("executing from {}".format(exec_dir))
logger.info("executing from {}".format(exec_dir))
cwd = os.getcwd()
os.chdir(exec_dir)
exit_status = os.system(cmd)
if exit_status != 0:
print(f"exit_status of {cmd} = {exit_status}")
logger.info(f"exit_status of {cmd} = {exit_status}")
if allow_exception:
raise Exception(f"Failed to successfully execute \n {cmd}")
os.chdir(cwd)
Expand Down
28 changes: 15 additions & 13 deletions aurora/pipelines/fourier_coefficients.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,8 @@
from mt_metadata.transfer_functions.processing.fourier_coefficients import (
Decimation as FCDecimation,
)
from loguru import logger



# =============================================================================
Expand Down Expand Up @@ -134,7 +136,7 @@ def decimation_and_stft_config_creator(
if isinstance(mt_metadata.timeseries.time_period.TimePeriod, time_period):
dd.time_period = time_period
else:
print(f"Not sure how to assign time_period with {time_period}")
logger.info(f"Not sure how to assign time_period with {time_period}")
raise NotImplementedError

decimation_and_stft_config.append(dd)
Expand All @@ -159,17 +161,17 @@ def add_fcs_to_mth5(mth5_path, decimation_and_stft_configs=None):
channel_summary_df = m.channel_summary.to_dataframe()

usssr_grouper = channel_summary_df.groupby(GROUPBY_COLUMNS)
print(f"DETECTED {len(usssr_grouper)} unique station-sample_rate instances")
logger.debug(f"DETECTED {len(usssr_grouper)} unique station-sample_rate instances")

for (survey, station, sample_rate), usssr_group in usssr_grouper:
print(f"\n\n\nsurvey: {survey}, station: {station}, sample_rate {sample_rate}")
logger.info(f"\n\n\nsurvey: {survey}, station: {station}, sample_rate {sample_rate}")
station_obj = m.get_station(station, survey)
run_summary = station_obj.run_summary

# Get the FC schemes
if not decimation_and_stft_configs:
msg = "FC config not supplied, using default, creating on the fly"
print(f"{msg}")
logger.info(f"{msg}")
decimation_and_stft_configs = decimation_and_stft_config_creator(
sample_rate, time_period=None
)
Expand All @@ -178,7 +180,7 @@ def add_fcs_to_mth5(mth5_path, decimation_and_stft_configs=None):
# I wonder if daskifiying that will cause issues with multiple threads trying to
# write to the hdf5 file -- will need testing
for i_run_row, run_row in run_summary.iterrows():
print(
logger.info(
f"survey: {survey}, station: {station}, sample_rate {sample_rate}, i_run_row {i_run_row}"
)
# Access Run
Expand Down Expand Up @@ -208,11 +210,11 @@ def add_fcs_to_mth5(mth5_path, decimation_and_stft_configs=None):
if i_dec_level != 0:
# Apply decimation
run_xrds = prototype_decimate(decimation_stft_obj, run_xrds)
print(f"type decimation_stft_obj = {type(decimation_stft_obj)}")
logger.info(f"type decimation_stft_obj = {type(decimation_stft_obj)}")
if not decimation_stft_obj.is_valid_for_time_series_length(
run_xrds.time.shape[0]
):
print(
logger.info(
f"Decimation Level {i_dec_level} invalid, TS of {run_xrds.time.shape[0]} samples too short"
)
continue
Expand Down Expand Up @@ -248,24 +250,24 @@ def read_back_fcs(mth5_path):
m = MTH5()
m.open_mth5(mth5_path)
channel_summary_df = m.channel_summary.to_dataframe()
print(channel_summary_df)
logger.debug(channel_summary_df)
usssr_grouper = channel_summary_df.groupby(GROUPBY_COLUMNS)
for (survey, station, sample_rate), usssr_group in usssr_grouper:
print(f"survey: {survey}, station: {station}, sample_rate {sample_rate}")
logger.info(f"survey: {survey}, station: {station}, sample_rate {sample_rate}")
station_obj = m.get_station(station, survey)
fc_groups = station_obj.fourier_coefficients_group.groups_list
print(f"FC Groups: {fc_groups}")
logger.info(f"FC Groups: {fc_groups}")
for run_id in fc_groups:
fc_group = station_obj.fourier_coefficients_group.get_fc_group(run_id)
dec_level_ids = fc_group.groups_list
for dec_level_id in dec_level_ids:
dec_level = fc_group.get_decimation_level(dec_level_id)
print(
logger.info(
f"dec_level {dec_level_id}"
) # channel_summary {dec_level.channel_summary}")
xrds = dec_level.to_xarray(["hx", "hy"])
print(f"Time axis shape {xrds.time.data.shape}")
print(f"Freq axis shape {xrds.frequency.data.shape}")
logger.info(f"Time axis shape {xrds.time.data.shape}")
logger.info(f"Freq axis shape {xrds.frequency.data.shape}")
return True


Expand Down
7 changes: 3 additions & 4 deletions aurora/pipelines/process_mth5.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,6 @@

# =============================================================================


def make_stft_objects(
processing_config, i_dec_level, run_obj, run_xrds, units, station_id
):
Expand Down Expand Up @@ -168,7 +167,7 @@ def triage_issue_289(local_stfts, remote_stfts):
for i_chunk in range(n_chunks):
ok = local_stfts[i_chunk].time.shape == remote_stfts[i_chunk].time.shape
if not ok:
print("Mismatch in FC array lengths detected -- Issue #289")
logger.warning("Mismatch in FC array lengths detected -- Issue #289")
glb = max(
local_stfts[i_chunk].time.min(),
remote_stfts[i_chunk].time.min(),
Expand Down Expand Up @@ -365,8 +364,8 @@ def process_mth5(
try:
assert row.run_id == run_obj.metadata.id
except AssertionError:
print("WARNING Run ID in dataset_df does not match run_obj")
print("WARNING Forcing run metadata to match dataset_df")
logger.warning("WARNING Run ID in dataset_df does not match run_obj")
logger.warning("WARNING Forcing run metadata to match dataset_df")
run_obj.metadata.id = row.run_id

stft_obj = make_stft_objects(
Expand Down
10 changes: 6 additions & 4 deletions aurora/pipelines/run_summary.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
from mt_metadata.transfer_functions.processing.aurora.channel_nomenclature import ALLOWED_OUTPUT_CHANNELS
import mth5
from mth5.utils.helpers import initialize_mth5
from loguru import logger



Expand Down Expand Up @@ -70,6 +71,7 @@ def __init__(self, **kwargs):
self._input_dict = kwargs.get("input_dict", None)
self.df = kwargs.get("df", None)
self._mini_summary_columns = ["survey", "station_id", "run_id", "start", "end"]
self.logger = logger

def clone(self):
"""
Expand All @@ -89,7 +91,7 @@ def mini_summary(self):

@property
def print_mini_summary(self):
print(self.mini_summary)
self.logger.info(self.mini_summary)

def add_duration(self, df=None):
"""
Expand All @@ -110,13 +112,13 @@ def check_runs_are_valid(self, drop=False, **kwargs):
"""kwargs can tell us what sorts of conditions to check, for example all_zero, there are nan, etc."""
# check_for_all_zero_runs
for i_row, row in self.df.iterrows():
print(f"Checking row for zeros {row}")
self.logger.info(f"Checking row for zeros {row}")
m = mth5.mth5.MTH5()
m.open_mth5(row.mth5_path)
run_obj = m.get_run(row.station_id, row.run_id, row.survey)
runts = run_obj.to_runts()
if runts.dataset.to_array().data.__abs__().sum() == 0:
print("CRITICAL: Detected a run with all zero values")
self.logger.critical("CRITICAL: Detected a run with all zero values")
self.df["valid"].at[i_row] = False
# load each run, and take the median of the sum of the absolute values
if drop:
Expand Down Expand Up @@ -269,7 +271,7 @@ def extract_run_summary_from_mth5(mth5_obj, summary_type="run"):
channel_summary_df = mth5_obj.channel_summary.to_dataframe()
# check that the mth5 has been summarized already
if len(channel_summary_df) < 2:
print("Channel summary maybe not initialized yet, 3 or more channels expected.")
self.logger.info("Channel summary maybe not initialized yet, 3 or more channels expected.")
mth5_obj.channel_summary.summarize()
channel_summary_df = mth5_obj.channel_summary.to_dataframe()
if summary_type == "run":
Expand Down
4 changes: 2 additions & 2 deletions aurora/pipelines/time_series_helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -222,7 +222,7 @@ def nan_to_mean(xrds):
for ch in xrds.keys():
null_values_present = xrds[ch].isnull().any()
if null_values_present:
print(
logger.info(
"Null values detected in xrds -- this is not expected and should be examined"
)
value = np.nan_to_num(np.nanmean(xrds[ch].data))
Expand Down Expand Up @@ -308,7 +308,7 @@ def calibrate_stft_obj(stft_obj, run_obj, units="MT", channel_scale_factors=None
channel_scale_factor = 1.0
calibration_response /= channel_scale_factor
if units == "SI":
print("Warning: SI Units are not robustly supported issue #36")
logger.warning("Warning: SI Units are not robustly supported issue #36")

stft_obj[channel_id].data /= calibration_response
return stft_obj
Expand Down
10 changes: 6 additions & 4 deletions aurora/pipelines/transfer_function_helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@
from aurora.transfer_function.weights.edf_weights import (
effective_degrees_of_freedom_weights,
)
from loguru import logger


ESTIMATOR_LIBRARY = {"OLS": RegressionEstimator, "RME": TRME, "RME_RR": TRME_RR}

Expand All @@ -31,8 +33,8 @@ def get_estimator_class(estimation_engine):
try:
estimator_class = ESTIMATOR_LIBRARY[estimation_engine]
except KeyError:
print(f"processing_scheme {estimation_engine} not supported")
print(f"processing_scheme must be one of {list(ESTIMATOR_LIBRARY.keys())}")
logger.error(f"processing_scheme {estimation_engine} not supported")
logger.error(f"processing_scheme must be one of {list(ESTIMATOR_LIBRARY.keys())}")
raise Exception
return estimator_class

Expand Down Expand Up @@ -87,7 +89,7 @@ def check_time_axes_synched(X, Y):
if (X.time == Y.time).all():
pass
else:
print("WARNING - NAN Handling could fail if X,Y dont share time axes")
logger.warning("WARNING - NAN Handling could fail if X,Y dont share time axes")
raise Exception
return

Expand Down Expand Up @@ -121,7 +123,7 @@ def get_band_for_tf_estimate(
being within the frequency band given as an input argument.
"""
dec_level_config = config.decimations[0]
print(f"Processing band {band.center_period:.6f}s")
logger.info(f"Processing band {band.center_period:.6f}s")
band_dataset = extract_band(band, local_stft_obj)
X = band_dataset[dec_level_config.input_channels]
Y = band_dataset[dec_level_config.output_channels]
Expand Down
25 changes: 13 additions & 12 deletions aurora/pipelines/transfer_function_kernel.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
from mth5.utils.exceptions import MTH5Error
from mth5.utils.helpers import initialize_mth5
from mt_metadata.transfer_functions.core import TF
from loguru import logger


class TransferFunctionKernel(object):
Expand Down Expand Up @@ -120,15 +121,15 @@ def update_dataset_df(self, i_dec_level):

# APPLY TIMING CORRECTIONS HERE
else:
print(f"DECIMATION LEVEL {i_dec_level}")
logger.info(f"DECIMATION LEVEL {i_dec_level}")

for i, row in self.dataset_df.iterrows():
if not self.is_valid_dataset(row, i_dec_level):
continue
if row.fc:
row_ssr_str = f"survey: {row.survey}, station_id: {row.station_id}, run_id: {row.run_id}"
msg = f"FC already exists for {row_ssr_str} -- skipping decimation"
print(msg)
logger.info(msg)
continue
run_xrds = row["run_dataarray"].to_dataset("channel")
decimation = self.config.decimations[i_dec_level].decimation
Expand Down Expand Up @@ -244,7 +245,7 @@ def check_if_fc_levels_already_exist(self):
< self.processing_config.num_decimation_levels
):
self.dataset_df.loc[dataset_df_indices, "fc"] = False
print(
logger.info(
f"Not enough FC Groups available for {row_ssr_str} -- will need to build them "
)
continue
Expand Down Expand Up @@ -279,7 +280,7 @@ def show_processing_summary(
columns_to_show = self.processing_summary.columns
columns_to_show = [x for x in columns_to_show if x not in omit_columns]
logger.info("Processing Summary Dataframe:")
print(self.processing_summary[columns_to_show].to_string())
logger.info(self.processing_summary[columns_to_show].to_string())

def make_processing_summary(self):
"""
Expand Down Expand Up @@ -321,7 +322,7 @@ def make_processing_summary(self):
df.dec_level.diff()[1:] == 1
).all() # dec levels increment by 1
except AssertionError:
print(f"Skipping {group} because decimation levels are messy.")
logger.info(f"Skipping {group} because decimation levels are messy.")
continue
assert df.dec_factor.iloc[0] == 1
assert df.dec_level.iloc[0] == 0
Expand Down Expand Up @@ -403,13 +404,13 @@ def validate_processing(self):
if not self.config.stations.remote:
for decimation in self.config.decimations:
if decimation.estimator.engine == "RME_RR":
print("No RR station specified, switching RME_RR to RME")
logger.info("No RR station specified, switching RME_RR to RME")
decimation.estimator.engine = "RME"

# Make sure that a local station is defined
if not self.config.stations.local.id:
print("WARNING: Local station not specified")
print("Setting local station from Kernel Dataset")
logger.warning("WARNING: Local station not specified")
logger.warning("Setting local station from Kernel Dataset")
self.config.stations.from_dataset_dataframe(self.kernel_dataset.df)

def validate(self):
Expand Down Expand Up @@ -540,7 +541,7 @@ def make_decimation_dict_for_tf(tf_collection, processing_config):
i_dec
].num_segments.data[0, i_band]
except KeyError:
print("Possibly invalid decimation level")
logger.error("Possibly invalid decimation level")
period_value["npts"] = 0
decimation_dict[period_key] = period_value

Expand Down Expand Up @@ -596,14 +597,14 @@ def memory_warning(self):
total_memory = psutil.virtual_memory().total

# print the total amount of RAM in GB
print(f"Total memory: {total_memory / (1024 ** 3):.2f} GB")
logger.info(f"Total memory: {total_memory / (1024 ** 3):.2f} GB")
num_samples = self.dataset_df.duration * self.dataset_df.sample_rate
total_samples = num_samples.sum()
total_bytes = total_samples * bytes_per_sample
print(f"Total Bytes of Raw Data: {total_bytes / (1024 ** 3):.3f} GB")
logger.info(f"Total Bytes of Raw Data: {total_bytes / (1024 ** 3):.3f} GB")

ram_fraction = 1.0 * total_bytes / total_memory
print(f"Raw Data will use: {100 * ram_fraction:.3f} % of memory")
logger.info(f"Raw Data will use: {100 * ram_fraction:.3f} % of memory")

# Check a condition
if total_bytes > memory_threshold * total_memory:
Expand Down
14 changes: 7 additions & 7 deletions aurora/sandbox/debug_mt_metadata_issue_85.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from mt_metadata.timeseries.location import Location
from mth5.mth5 import MTH5

from loguru import logger

def test_can_add_location():
"""
Expand Down Expand Up @@ -29,17 +29,17 @@ def test_can_add_location():
run_group.station_group.metadata.location = location
# setting latitude as above line does not wind up in the run either"

print("Why don't the following values agree??")
print(f"station group {station_group.metadata.location.latitude}")
print(f"Run Group {run_group.station_group.metadata.location.latitude}")
logger.info("Why don't the following values agree??")
logger.info(f"station group {station_group.metadata.location.latitude}")
logger.info(f"Run Group {run_group.station_group.metadata.location.latitude}")
m.close_mth5()

print("Reopen the file and check if update was done on close()")
logger.info("Reopen the file and check if update was done on close()")
m.open_mth5("location_test.h5", mode="r")
eureka = m.get_station("eureka")
print(f"station group {eureka.metadata.location.latitude}")
logger.info(f"station group {eureka.metadata.location.latitude}")
run_001 = eureka.get_run("001")
print(f"Run Group {run_001.station_group.metadata.location.latitude}")
logger.info(f"Run Group {run_001.station_group.metadata.location.latitude}")
m.close_mth5()
return

Expand Down
Loading

0 comments on commit 2624140

Please sign in to comment.