towards issue #119 - adding placeholders for coherence sorting

simpeg · Feb 10, 2024 · f0a9d9d · f0a9d9d
1 parent c47df23
commit f0a9d9d
Show file tree

Hide file tree

Showing 3 changed files with 239 additions and 279 deletions.
diff --git a/aurora/pipelines/run_summary.py b/aurora/pipelines/run_summary.py
@@ -24,14 +24,17 @@
 import pandas as pd
 
 
-from mt_metadata.transfer_functions.processing.aurora.channel_nomenclature import ALLOWED_INPUT_CHANNELS
-from mt_metadata.transfer_functions.processing.aurora.channel_nomenclature import ALLOWED_OUTPUT_CHANNELS
+from mt_metadata.transfer_functions.processing.aurora.channel_nomenclature import (
+    ALLOWED_INPUT_CHANNELS,
+)
+from mt_metadata.transfer_functions.processing.aurora.channel_nomenclature import (
+    ALLOWED_OUTPUT_CHANNELS,
+)
 import mth5
 from mth5.utils.helpers import initialize_mth5
 from loguru import logger
 
 
-
 RUN_SUMMARY_COLUMNS = [
     "survey",
     "station_id",
@@ -270,7 +273,9 @@ def extract_run_summary_from_mth5(mth5_obj, summary_type="run"):
     channel_summary_df = mth5_obj.channel_summary.to_dataframe()
     # check that the mth5 has been summarized already
     if len(channel_summary_df) < 2:
-        logger.info("Channel summary maybe not initialized yet, 3 or more channels expected.")
+        logger.info(
+            "Channel summary maybe not initialized yet, 3 or more channels expected."
+        )
         mth5_obj.channel_summary.summarize()
         channel_summary_df = mth5_obj.channel_summary.to_dataframe()
     if summary_type == "run":
@@ -317,7 +322,7 @@ def extract_run_summaries_from_mth5s(mth5_list, summary_type="run", deduplicate=
         if isinstance(mth5_elt, mth5.mth5.MTH5):
             mth5_obj = mth5_elt
         else:  # mth5_elt is a path or a string
-            mth5_obj = initialize_mth5(mth5_elt, mode="r")
+            mth5_obj = initialize_mth5(mth5_elt, mode="a")
 
         df = extract_run_summary_from_mth5(mth5_obj, summary_type=summary_type)
 

diff --git a/aurora/pipelines/transfer_function_helpers.py b/aurora/pipelines/transfer_function_helpers.py
@@ -155,7 +155,7 @@ def process_transfer_functions(
     local_stft_obj,
     remote_stft_obj,
     transfer_function_obj,
-    # segment_weights=["jj84_coherence_weights",],
+    # segment_weights=["multiple_coherence",],#["simple_coherence",],#["multiple_coherence",],#jj84_coherence_weights",],
     segment_weights=[],
     channel_weights=None,
 ):
@@ -169,21 +169,30 @@ def process_transfer_functions(
     remote_stft_obj
     transfer_function_obj: aurora.transfer_function.TTFZ.TTFZ
         The transfer function container ready to receive values in this method.
-    segment_weights : numpy array or None
+    segment_weights : numpy array or list of strings
         1D array which should be of the same length as the time axis of the STFT objects
         If these weights are zero anywhere, we drop all that segment across all channels
+        If it is a list of strings, each string corresponds to a weighting
+        algorithm to be applied.
+        ["jackknife_jj84", "multiple_coherence", "simple_coherence"]
     channel_weights : numpy array or None
 
+    Note #1: Although it is advantageous to executing the regression channel-by-channel
+    vs. all-at-once, we need to keep the all-at-once to get residual covariances (see issue #87)
 
-    TODO:
-    1. Review the advantages of executing the regression all at once vs
-    channel-by-channel.  If there is not disadvantage to always
-    using a channel-by-channel approach we can modify this to only support that
-    method.  However, we still need a way to get residual covariances (see issue #87)
-    2. Consider push the nan-handling into the band extraction as a
-    kwarg.
-    3. The reindexing of the band may be done in the extraction as well.  This would
-    result in an "edf-weighting-scheme-ready" format.
+    Note #2:
+    Consider placing the segment weight logic in its own module with the various functions in a dictionary.
+    Possibly can combines (product) all segment weights, like the following pseudocode:
+
+        W = zeros
+        for wt_style in  segment_weights:
+            fcn = wt_fucntions[style]
+            w = fcn(X, Y, RR, )
+            W *= w
+        return W
+
+
+    TODO: Consider push the nan-handling into the band extraction as a kwarg.
 
     Returns
     -------
@@ -197,21 +206,30 @@ def process_transfer_functions(
             band, dec_level_config, local_stft_obj, remote_stft_obj
         )
 
-        # Apply segment weights first
-        # This could be replaced by a method that combines (product) all segment weights in a dict
-        # weights = {}
-        if "jj84_coherence_weights" in segment_weights:
+        # Apply segment weights first -- see Note #2
+
+        if "jackknife_jj84" in segment_weights:
             from aurora.transfer_function.weights.coherence_weights import (
                 coherence_weights_jj84,
             )
 
             Wjj84 = coherence_weights_jj84(band, local_stft_obj, remote_stft_obj)
             apply_weights(X, Y, RR, Wjj84, segment=True, dropna=False)
+        if "simple_coherence" in segment_weights:
+            from aurora.transfer_function.weights.coherence_weights import (
+                simple_coherence_weights,
+            )
+
+            W = simple_coherence_weights(band, local_stft_obj, remote_stft_obj)
+            apply_weights(X, Y, RR, W, segment=True, dropna=False)
+
+        if "multiple_coherence" in segment_weights:
+            from aurora.transfer_function.weights.coherence_weights import (
+                multiple_coherence_weights,
+            )
 
-        # if multiple_coherence_weights in segment_weights:
-        #     from aurora.transfer_function.weights.coherence_weights import compute_multiple_coherence_weights
-        #     Wmc = compute_multiple_coherence_weights(band, local_stft_obj, remote_stft_obj)
-        #     apply_segment_weights(X, Y, RR, Wmc)
+            W = multiple_coherence_weights(band, local_stft_obj, remote_stft_obj)
+            apply_weights(X, Y, RR, W, segment=True, dropna=False)
 
         # if there are channel weights apply them here