updating print to logger

simpeg · Nov 17, 2023 · 2624140 · 2624140
1 parent c028e75
commit 2624140
Show file tree

Hide file tree

Showing 43 changed files with 984 additions and 280 deletions.
diff --git a/aurora/config/metadata/processing.py b/aurora/config/metadata/processing.py
@@ -10,13 +10,13 @@
 from aurora.time_series.windowing_scheme import window_scheme_from_decimation
 from mt_metadata.transfer_functions.processing.aurora.processing import Processing
 from mt_metadata.utils.list_dict import ListDict
-
+from loguru import logger
 
 class Processing(Processing):
     def __init__(self, **kwargs):
-
         # super().__init__(attr_dict=attr_dict, **kwargs)
         super().__init__(**kwargs)
+        self.logger = logger
 
     def window_scheme(self, as_type="df"):
         """
@@ -42,7 +42,7 @@ def window_scheme(self, as_type="df"):
             df = pd.DataFrame(data=data_dict)
             return df
         else:
-            print(f"unexpected rtype for window_scheme {as_type}")
+            self.logger.error(f"unexpected rtype for window_scheme {as_type}")
             raise TypeError
 
     def decimation_info(self):

diff --git a/aurora/general_helper_functions.py b/aurora/general_helper_functions.py
@@ -103,12 +103,12 @@ def execute_command(cmd, **kwargs):
     """
     exec_dir = kwargs.get("exec_dir", os.path.expanduser("~/"))
     allow_exception = kwargs.get("allow_exception", True)
-    print("executing from {}".format(exec_dir))
+    logger.info("executing from {}".format(exec_dir))
     cwd = os.getcwd()
     os.chdir(exec_dir)
     exit_status = os.system(cmd)
     if exit_status != 0:
-        print(f"exit_status of {cmd} = {exit_status}")
+        logger.info(f"exit_status of {cmd} = {exit_status}")
         if allow_exception:
             raise Exception(f"Failed to successfully execute \n {cmd}")
     os.chdir(cwd)

diff --git a/aurora/pipelines/fourier_coefficients.py b/aurora/pipelines/fourier_coefficients.py
@@ -78,6 +78,8 @@
 from mt_metadata.transfer_functions.processing.fourier_coefficients import (
     Decimation as FCDecimation,
 )
+from loguru import logger
+
 
 
 # =============================================================================
@@ -134,7 +136,7 @@ def decimation_and_stft_config_creator(
             if isinstance(mt_metadata.timeseries.time_period.TimePeriod, time_period):
                 dd.time_period = time_period
             else:
-                print(f"Not sure how to assign time_period with {time_period}")
+                logger.info(f"Not sure how to assign time_period with {time_period}")
                 raise NotImplementedError
 
         decimation_and_stft_config.append(dd)
@@ -159,17 +161,17 @@ def add_fcs_to_mth5(mth5_path, decimation_and_stft_configs=None):
     channel_summary_df = m.channel_summary.to_dataframe()
 
     usssr_grouper = channel_summary_df.groupby(GROUPBY_COLUMNS)
-    print(f"DETECTED {len(usssr_grouper)} unique station-sample_rate instances")
+    logger.debug(f"DETECTED {len(usssr_grouper)} unique station-sample_rate instances")
 
     for (survey, station, sample_rate), usssr_group in usssr_grouper:
-        print(f"\n\n\nsurvey: {survey}, station: {station}, sample_rate {sample_rate}")
+        logger.info(f"\n\n\nsurvey: {survey}, station: {station}, sample_rate {sample_rate}")
         station_obj = m.get_station(station, survey)
         run_summary = station_obj.run_summary
 
         # Get the FC schemes
         if not decimation_and_stft_configs:
             msg = "FC config not supplied, using default, creating on the fly"
-            print(f"{msg}")
+            logger.info(f"{msg}")
             decimation_and_stft_configs = decimation_and_stft_config_creator(
                 sample_rate, time_period=None
             )
@@ -178,7 +180,7 @@ def add_fcs_to_mth5(mth5_path, decimation_and_stft_configs=None):
         # I wonder if daskifiying that will cause issues with multiple threads trying to
         # write to the hdf5 file -- will need testing
         for i_run_row, run_row in run_summary.iterrows():
-            print(
+            logger.info(
                 f"survey: {survey}, station: {station}, sample_rate {sample_rate}, i_run_row {i_run_row}"
             )
             # Access Run
@@ -208,11 +210,11 @@ def add_fcs_to_mth5(mth5_path, decimation_and_stft_configs=None):
                 if i_dec_level != 0:
                     # Apply decimation
                     run_xrds = prototype_decimate(decimation_stft_obj, run_xrds)
-                print(f"type decimation_stft_obj = {type(decimation_stft_obj)}")
+                logger.info(f"type decimation_stft_obj = {type(decimation_stft_obj)}")
                 if not decimation_stft_obj.is_valid_for_time_series_length(
                     run_xrds.time.shape[0]
                 ):
-                    print(
+                    logger.info(
                         f"Decimation Level {i_dec_level} invalid, TS of {run_xrds.time.shape[0]} samples too short"
                     )
                     continue
@@ -248,24 +250,24 @@ def read_back_fcs(mth5_path):
     m = MTH5()
     m.open_mth5(mth5_path)
     channel_summary_df = m.channel_summary.to_dataframe()
-    print(channel_summary_df)
+    logger.debug(channel_summary_df)
     usssr_grouper = channel_summary_df.groupby(GROUPBY_COLUMNS)
     for (survey, station, sample_rate), usssr_group in usssr_grouper:
-        print(f"survey: {survey}, station: {station}, sample_rate {sample_rate}")
+        logger.info(f"survey: {survey}, station: {station}, sample_rate {sample_rate}")
         station_obj = m.get_station(station, survey)
         fc_groups = station_obj.fourier_coefficients_group.groups_list
-        print(f"FC Groups: {fc_groups}")
+        logger.info(f"FC Groups: {fc_groups}")
         for run_id in fc_groups:
             fc_group = station_obj.fourier_coefficients_group.get_fc_group(run_id)
             dec_level_ids = fc_group.groups_list
             for dec_level_id in dec_level_ids:
                 dec_level = fc_group.get_decimation_level(dec_level_id)
-                print(
+                logger.info(
                     f"dec_level {dec_level_id}"
                 )  # channel_summary {dec_level.channel_summary}")
                 xrds = dec_level.to_xarray(["hx", "hy"])
-                print(f"Time axis shape {xrds.time.data.shape}")
-                print(f"Freq axis shape {xrds.frequency.data.shape}")
+                logger.info(f"Time axis shape {xrds.time.data.shape}")
+                logger.info(f"Freq axis shape {xrds.frequency.data.shape}")
     return True
 
 

diff --git a/aurora/pipelines/process_mth5.py b/aurora/pipelines/process_mth5.py
@@ -39,7 +39,6 @@
 
 # =============================================================================
 
-
 def make_stft_objects(
     processing_config, i_dec_level, run_obj, run_xrds, units, station_id
 ):
@@ -168,7 +167,7 @@ def triage_issue_289(local_stfts, remote_stfts):
     for i_chunk in range(n_chunks):
         ok = local_stfts[i_chunk].time.shape == remote_stfts[i_chunk].time.shape
         if not ok:
-            print("Mismatch in FC array lengths detected -- Issue #289")
+            logger.warning("Mismatch in FC array lengths detected -- Issue #289")
             glb = max(
                 local_stfts[i_chunk].time.min(),
                 remote_stfts[i_chunk].time.min(),
@@ -365,8 +364,8 @@ def process_mth5(
             try:
                 assert row.run_id == run_obj.metadata.id
             except AssertionError:
-                print("WARNING Run ID in dataset_df does not match run_obj")
-                print("WARNING Forcing run metadata to match dataset_df")
+                logger.warning("WARNING Run ID in dataset_df does not match run_obj")
+                logger.warning("WARNING Forcing run metadata to match dataset_df")
                 run_obj.metadata.id = row.run_id
 
             stft_obj = make_stft_objects(

diff --git a/aurora/pipelines/run_summary.py b/aurora/pipelines/run_summary.py
@@ -28,6 +28,7 @@
 from mt_metadata.transfer_functions.processing.aurora.channel_nomenclature import ALLOWED_OUTPUT_CHANNELS
 import mth5
 from mth5.utils.helpers import initialize_mth5
+from loguru import logger
 
 
 
@@ -70,6 +71,7 @@ def __init__(self, **kwargs):
         self._input_dict = kwargs.get("input_dict", None)
         self.df = kwargs.get("df", None)
         self._mini_summary_columns = ["survey", "station_id", "run_id", "start", "end"]
+        self.logger = logger
 
     def clone(self):
         """
@@ -89,7 +91,7 @@ def mini_summary(self):
 
     @property
     def print_mini_summary(self):
-        print(self.mini_summary)
+        self.logger.info(self.mini_summary)
 
     def add_duration(self, df=None):
         """
@@ -110,13 +112,13 @@ def check_runs_are_valid(self, drop=False, **kwargs):
         """kwargs can tell us what sorts of conditions to check, for example all_zero, there are nan, etc."""
         # check_for_all_zero_runs
         for i_row, row in self.df.iterrows():
-            print(f"Checking row for zeros {row}")
+            self.logger.info(f"Checking row for zeros {row}")
             m = mth5.mth5.MTH5()
             m.open_mth5(row.mth5_path)
             run_obj = m.get_run(row.station_id, row.run_id, row.survey)
             runts = run_obj.to_runts()
             if runts.dataset.to_array().data.__abs__().sum() == 0:
-                print("CRITICAL: Detected a run with all zero values")
+                self.logger.critical("CRITICAL: Detected a run with all zero values")
                 self.df["valid"].at[i_row] = False
             # load each run, and take the median of the sum of the absolute values
         if drop:
@@ -269,7 +271,7 @@ def extract_run_summary_from_mth5(mth5_obj, summary_type="run"):
     channel_summary_df = mth5_obj.channel_summary.to_dataframe()
     # check that the mth5 has been summarized already
     if len(channel_summary_df) < 2:
-        print("Channel summary maybe not initialized yet, 3 or more channels expected.")
+        self.logger.info("Channel summary maybe not initialized yet, 3 or more channels expected.")
         mth5_obj.channel_summary.summarize()
         channel_summary_df = mth5_obj.channel_summary.to_dataframe()
     if summary_type == "run":

diff --git a/aurora/pipelines/time_series_helpers.py b/aurora/pipelines/time_series_helpers.py
@@ -222,7 +222,7 @@ def nan_to_mean(xrds):
     for ch in xrds.keys():
         null_values_present = xrds[ch].isnull().any()
         if null_values_present:
-            print(
+            logger.info(
                 "Null values detected in xrds -- this is not expected and should be examined"
             )
             value = np.nan_to_num(np.nanmean(xrds[ch].data))
@@ -308,7 +308,7 @@ def calibrate_stft_obj(stft_obj, run_obj, units="MT", channel_scale_factors=None
                 channel_scale_factor = 1.0
             calibration_response /= channel_scale_factor
         if units == "SI":
-            print("Warning: SI Units are not robustly supported issue #36")
+            logger.warning("Warning: SI Units are not robustly supported issue #36")
 
         stft_obj[channel_id].data /= calibration_response
     return stft_obj

diff --git a/aurora/pipelines/transfer_function_helpers.py b/aurora/pipelines/transfer_function_helpers.py
@@ -10,6 +10,8 @@
 from aurora.transfer_function.weights.edf_weights import (
     effective_degrees_of_freedom_weights,
 )
+from loguru import logger
+
 
 ESTIMATOR_LIBRARY = {"OLS": RegressionEstimator, "RME": TRME, "RME_RR": TRME_RR}
 
@@ -31,8 +33,8 @@ def get_estimator_class(estimation_engine):
     try:
         estimator_class = ESTIMATOR_LIBRARY[estimation_engine]
     except KeyError:
-        print(f"processing_scheme {estimation_engine} not supported")
-        print(f"processing_scheme must be one of {list(ESTIMATOR_LIBRARY.keys())}")
+        logger.error(f"processing_scheme {estimation_engine} not supported")
+        logger.error(f"processing_scheme must be one of {list(ESTIMATOR_LIBRARY.keys())}")
         raise Exception
     return estimator_class
 
@@ -87,7 +89,7 @@ def check_time_axes_synched(X, Y):
     if (X.time == Y.time).all():
         pass
     else:
-        print("WARNING - NAN Handling could fail if X,Y dont share time axes")
+        logger.warning("WARNING - NAN Handling could fail if X,Y dont share time axes")
         raise Exception
     return
 
@@ -121,7 +123,7 @@ def get_band_for_tf_estimate(
         being within the frequency band given as an input argument.
     """
     dec_level_config = config.decimations[0]
-    print(f"Processing band {band.center_period:.6f}s")
+    logger.info(f"Processing band {band.center_period:.6f}s")
     band_dataset = extract_band(band, local_stft_obj)
     X = band_dataset[dec_level_config.input_channels]
     Y = band_dataset[dec_level_config.output_channels]

diff --git a/aurora/pipelines/transfer_function_kernel.py b/aurora/pipelines/transfer_function_kernel.py
@@ -9,6 +9,7 @@
 from mth5.utils.exceptions import MTH5Error
 from mth5.utils.helpers import initialize_mth5
 from mt_metadata.transfer_functions.core import TF
+from loguru import logger
 
 
 class TransferFunctionKernel(object):
@@ -120,15 +121,15 @@ def update_dataset_df(self, i_dec_level):
 
             # APPLY TIMING CORRECTIONS HERE
         else:
-            print(f"DECIMATION LEVEL {i_dec_level}")
+            logger.info(f"DECIMATION LEVEL {i_dec_level}")
 
             for i, row in self.dataset_df.iterrows():
                 if not self.is_valid_dataset(row, i_dec_level):
                     continue
                 if row.fc:
                     row_ssr_str = f"survey: {row.survey}, station_id: {row.station_id}, run_id: {row.run_id}"
                     msg = f"FC already exists for {row_ssr_str} -- skipping decimation"
-                    print(msg)
+                    logger.info(msg)
                     continue
                 run_xrds = row["run_dataarray"].to_dataset("channel")
                 decimation = self.config.decimations[i_dec_level].decimation
@@ -244,7 +245,7 @@ def check_if_fc_levels_already_exist(self):
                     < self.processing_config.num_decimation_levels
                 ):
                     self.dataset_df.loc[dataset_df_indices, "fc"] = False
-                    print(
+                    logger.info(
                         f"Not enough FC Groups available for {row_ssr_str} -- will need to build them "
                     )
                     continue
@@ -279,7 +280,7 @@ def show_processing_summary(
         columns_to_show = self.processing_summary.columns
         columns_to_show = [x for x in columns_to_show if x not in omit_columns]
         logger.info("Processing Summary Dataframe:")
-        print(self.processing_summary[columns_to_show].to_string())
+        logger.info(self.processing_summary[columns_to_show].to_string())
 
     def make_processing_summary(self):
         """
@@ -321,7 +322,7 @@ def make_processing_summary(self):
                         df.dec_level.diff()[1:] == 1
                     ).all()  # dec levels increment by 1
                 except AssertionError:
-                    print(f"Skipping {group} because decimation levels are messy.")
+                    logger.info(f"Skipping {group} because decimation levels are messy.")
                     continue
                 assert df.dec_factor.iloc[0] == 1
                 assert df.dec_level.iloc[0] == 0
@@ -403,13 +404,13 @@ def validate_processing(self):
         if not self.config.stations.remote:
             for decimation in self.config.decimations:
                 if decimation.estimator.engine == "RME_RR":
-                    print("No RR station specified, switching RME_RR to RME")
+                    logger.info("No RR station specified, switching RME_RR to RME")
                     decimation.estimator.engine = "RME"
 
         # Make sure that a local station is defined
         if not self.config.stations.local.id:
-            print("WARNING: Local station not specified")
-            print("Setting local station from Kernel Dataset")
+            logger.warning("WARNING: Local station not specified")
+            logger.warning("Setting local station from Kernel Dataset")
             self.config.stations.from_dataset_dataframe(self.kernel_dataset.df)
 
     def validate(self):
@@ -540,7 +541,7 @@ def make_decimation_dict_for_tf(tf_collection, processing_config):
                             i_dec
                         ].num_segments.data[0, i_band]
                     except KeyError:
-                        print("Possibly invalid decimation level")
+                        logger.error("Possibly invalid decimation level")
                         period_value["npts"] = 0
                     decimation_dict[period_key] = period_value
 
@@ -596,14 +597,14 @@ def memory_warning(self):
         total_memory = psutil.virtual_memory().total
 
         # print the total amount of RAM in GB
-        print(f"Total memory: {total_memory / (1024 ** 3):.2f} GB")
+        logger.info(f"Total memory: {total_memory / (1024 ** 3):.2f} GB")
         num_samples = self.dataset_df.duration * self.dataset_df.sample_rate
         total_samples = num_samples.sum()
         total_bytes = total_samples * bytes_per_sample
-        print(f"Total Bytes of Raw Data: {total_bytes / (1024 ** 3):.3f} GB")
+        logger.info(f"Total Bytes of Raw Data: {total_bytes / (1024 ** 3):.3f} GB")
 
         ram_fraction = 1.0 * total_bytes / total_memory
-        print(f"Raw Data will use: {100 * ram_fraction:.3f} % of memory")
+        logger.info(f"Raw Data will use: {100 * ram_fraction:.3f} % of memory")
 
         # Check a condition
         if total_bytes > memory_threshold * total_memory:

diff --git a/aurora/sandbox/debug_mt_metadata_issue_85.py b/aurora/sandbox/debug_mt_metadata_issue_85.py
@@ -1,6 +1,6 @@
 from mt_metadata.timeseries.location import Location
 from mth5.mth5 import MTH5
-
+from loguru import logger
 
 def test_can_add_location():
     """
@@ -29,17 +29,17 @@ def test_can_add_location():
     run_group.station_group.metadata.location = location
     # setting latitude as above line does not wind up in the run either"
 
-    print("Why don't the following values agree??")
-    print(f"station group {station_group.metadata.location.latitude}")
-    print(f"Run Group {run_group.station_group.metadata.location.latitude}")
+    logger.info("Why don't the following values agree??")
+    logger.info(f"station group {station_group.metadata.location.latitude}")
+    logger.info(f"Run Group {run_group.station_group.metadata.location.latitude}")
     m.close_mth5()
 
-    print("Reopen the file and check if update was done on close()")
+    logger.info("Reopen the file and check if update was done on close()")
     m.open_mth5("location_test.h5", mode="r")
     eureka = m.get_station("eureka")
-    print(f"station group {eureka.metadata.location.latitude}")
+    logger.info(f"station group {eureka.metadata.location.latitude}")
     run_001 = eureka.get_run("001")
-    print(f"Run Group {run_001.station_group.metadata.location.latitude}")
+    logger.info(f"Run Group {run_001.station_group.metadata.location.latitude}")
     m.close_mth5()
     return