Skip to content

Commit

Permalink
Add option to avoid aggressive merging
Browse files Browse the repository at this point in the history
  • Loading branch information
dachengx committed Feb 6, 2025
1 parent d8297a6 commit d4858c4
Showing 1 changed file with 94 additions and 19 deletions.
113 changes: 94 additions & 19 deletions straxen/plugins/merged_s2s/merged_s2s.py
Original file line number Diff line number Diff line change
Expand Up @@ -191,12 +191,6 @@ class MergedS2s(strax.OverlapWindowPlugin):
),
)

p_value_prioritized = straxen.URLConfig(
default=False,
type=bool,
help="Whether to prioritize p-value over area when testing the proposal",
)

use_bayesian_merging = straxen.URLConfig(default=True, type=bool, help="Use Bayesian merging")

rm_sparse_xy = straxen.URLConfig(
Expand All @@ -207,6 +201,18 @@ class MergedS2s(strax.OverlapWindowPlugin):
default=True, type=bool, help="Use uncertainty from probabilistic posrec to derive weights"
)

p_value_prioritized = straxen.URLConfig(
default=False,
type=bool,
help="Whether to prioritize p-value over area when testing the proposal",
)

withdraw_aggressive_merging = straxen.URLConfig(
default=True,
type=bool,
help="Whether to withdraw the aggressive merging of the peaklets",
)

merged_s2s_get_window_size_factor = straxen.URLConfig(
default=5, type=int, track=False, help="Factor of the window size for the merged_s2s plugin"
)
Expand Down Expand Up @@ -341,6 +347,9 @@ def merge(self, _peaklets, lone_hits, start, end):
else:
peaklets = _peaklets[is_s2]

# make sure the peaklets are not overwritten
peaklets.flags.writeable = False

max_buffer = int(self.max_duration // strax.gcd_of_array(peaklets["dt"]))

start_merge_at, end_merge_at, _merged = self.get_merge_instructions(
Expand All @@ -367,6 +376,7 @@ def merge(self, _peaklets, lone_hits, start, end):
self.rm_sparse_xy,
self.use_uncertainty_weights,
self.p_value_prioritized,
self.withdraw_aggressive_merging,
self.gap_thresholds,
disable=self.disable_progress_bar,
)
Expand Down Expand Up @@ -535,6 +545,7 @@ def get_merge_instructions(
sparse_xy=True,
uncertainty_weights=True,
p_value_prioritized=False,
withdraw_aggressive_merging=True,
gap_thresholds=None,
diagnosing=False,
disable=True,
Expand All @@ -561,6 +572,8 @@ def get_merge_instructions(
peaks[name] = strax.endtime(_peaks)
else:
peaks[name] = _peaks[name]
peaks_copy = np.copy(peaks)
peaks_copy.flags.writeable = False

n_peaks = len(peaks)
if n_peaks == 0:
Expand All @@ -569,6 +582,11 @@ def get_merge_instructions(
start_index = np.arange(n_peaks)
# exclusive end index
end_index = np.arange(n_peaks) + 1
# keep a copy of the original indices
start_index_copy = np.copy(start_index)
start_index_copy.flags.writeable = False
end_index_copy = np.copy(end_index)
end_index_copy.flags.writeable = False

# mask to help keep track of the peaklets that have been examined
unexamined = np.full(n_peaks, True)
Expand All @@ -583,12 +601,35 @@ def get_merge_instructions(
right_bounds = np.minimum(
np.hstack([core_bounds, end]), peaks["endtime"] + int(max_gap / 2)
)
# keep a copy of the original boundaries
left_bounds_copy = np.copy(left_bounds)
left_bounds_copy.flags.writeable = False
right_bounds_copy = np.copy(right_bounds)
right_bounds_copy.flags.writeable = False

if diagnosing:
merged_area = []
p_values = []
dr_avgs = []

def can_merge_left(i, peaks):
# whether peaklet i (from _peak) can be merged to its left unexamined peaklet
return (
i - 1 >= 0
and unexamined[i - 1]
and MergedS2s.get_gap(peaks[i], peaks[i - 1]) < max_gap
and MergedS2s.get_duration(peaks[i], peaks[i - 1]) <= max_duration
)

def can_merge_right(i, peaks):
# whether peaklet i (from _peak) can be merged to its right unexamined peaklet
return (
i < n_peaks
and unexamined[i]
and MergedS2s.get_gap(peaks[i - 1], peaks[i]) < max_gap
and MergedS2s.get_duration(peaks[i - 1], peaks[i]) <= max_duration
)

argsort = strax.stable_argsort(peaks["area"])
for i in tqdm(argsort[::-1], disable=disable):
if not unexamined[i]:
Expand All @@ -605,21 +646,11 @@ def get_merge_instructions(
# please mind here that the definition of gaps should
# be consistent with in the merging algorithm
# and the merged peak should not be longer than the max duration
if (
start_index[i] - 1 >= 0
and unexamined[start_index[i] - 1]
and MergedS2s.get_gap(peaks[i], peaks[start_index[i] - 1]) < max_gap
and MergedS2s.get_duration(peaks[i], peaks[start_index[i] - 1]) <= max_duration
):
if can_merge_left(start_index[i], peaks):
indices.append(start_index[i] - 1)
else:
indices.append(None)
if (
end_index[i] < n_peaks
and unexamined[end_index[i]]
and MergedS2s.get_gap(peaks[i], peaks[end_index[i]]) < max_gap
and MergedS2s.get_duration(peaks[i], peaks[end_index[i]]) <= max_duration
):
if can_merge_right(end_index[i], peaks):
indices.append(end_index[i])
else:
indices.append(None)
Expand Down Expand Up @@ -689,8 +720,52 @@ def get_merge_instructions(
merged_area.append(area[examined][merged[examined]].sum())

if np.all(p < p_threshold):
if not (
sparse_xy
and withdraw_aggressive_merging
and end_index[i] - start_index[i] > 1
):
_merging = examined
else:
revise_left = (
can_merge_left(start_index[i], peaks_copy)
and not merged[start_index[i]]
)
revise_right = (
can_merge_right(end_index[i], peaks_copy)
and not merged[end_index[i] - 1]
)
if revise_left or revise_right:
# this means the left(right) most peaklet might be merged
# by an unexamined peaklet left(right) far away in time
start_idx = start_index[i]
while not merged[start_idx] and revise_left:
start_idx += 1
end_idx = end_index[i]
while not merged[end_idx - 1] and revise_right:
end_idx -= 1
assert start_idx < end_idx
# revise the previously merged peaklets back
for revised in (
slice(start_index[i], start_idx),
slice(end_idx, end_index[i]),
):
merged[revised] = True
peaks[revised] = peaks_copy[revised]
start_index[revised] = start_index_copy[revised]
end_index[revised] = end_index_copy[revised]
left_bounds[revised] = left_bounds_copy[revised]
right_bounds[revised] = right_bounds_copy[revised]
# revise the peaklets really to be merged
_merging = slice(start_idx, end_idx)
start_index[_merging] = start_index_copy[start_idx]
end_index[_merging] = end_index_copy[end_idx - 1]
left_bounds[_merging] = left_bounds_copy[start_idx]
right_bounds[_merging] = right_bounds_copy[end_idx - 1]
else:
_merging = examined
# this will not allow merging of the already examined peaklets
unexamined[examined] = False
unexamined[_merging] = False
else:
# whether test the proposal with larger area first
if p_value_prioritized:
Expand Down

0 comments on commit d4858c4

Please sign in to comment.