Skip to content

Commit

Permalink
Merge branch 'master' into update-weights-varlss-mod
Browse files Browse the repository at this point in the history
  • Loading branch information
andreicuceu committed Jan 17, 2025
2 parents 6e9ec38 + 7add2b7 commit 3a18ed2
Show file tree
Hide file tree
Showing 10 changed files with 106 additions and 32 deletions.
2 changes: 1 addition & 1 deletion .bumpversion.cfg
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
[bumpversion]
current_version = 9.8.0
current_version = 9.11.0
commit = True

[bumpversion:file:py/picca/_version.py]
4 changes: 2 additions & 2 deletions .github/workflows/bump_version.yml
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ jobs:
bump_version:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
- uses: actions/checkout@v4
with:
token: ${{ secrets.PAT_MW_TOKEN_ACTION }}

Expand All @@ -23,7 +23,7 @@ jobs:
git config --global user.name "Michael Walther (bot)"
- name: Set up Python
uses: actions/setup-python@v4
uses: actions/setup-python@v5

- name: Install bump2version
run: pip install bump2version
Expand Down
7 changes: 4 additions & 3 deletions .github/workflows/publish-to-test-pypi.yml
Original file line number Diff line number Diff line change
Expand Up @@ -14,9 +14,9 @@ jobs:
id-token: write # IMPORTANT: this permission is mandatory for trusted publishing

steps:
- uses: actions/checkout@v3
- uses: actions/checkout@v4
- name: Set up Python
uses: actions/setup-python@v4
uses: actions/setup-python@v5
with:
python-version: "3.x"

Expand All @@ -37,7 +37,8 @@ jobs:
- name: Publish distribution 📦 to Test PyPI
uses: pypa/gh-action-pypi-publish@release/v1
with:
repository-url: https://test.pypi.org/legacy/
repository_url: https://test.pypi.org/legacy/
attestations: false

- name: Publish distribution 📦 to PyPI
if: startsWith(github.ref, 'refs/tags')
Expand Down
4 changes: 2 additions & 2 deletions .github/workflows/pylint.yml
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,9 @@ jobs:
runs-on: ubuntu-latest

steps:
- uses: actions/checkout@v3
- uses: actions/checkout@v4
- name: Set up Python 3.12
uses: actions/setup-python@v4
uses: actions/setup-python@v5
with:
python-version: 3.12
- name: Install dependencies
Expand Down
4 changes: 2 additions & 2 deletions .github/workflows/python_package.yml
Original file line number Diff line number Diff line change
Expand Up @@ -29,9 +29,9 @@ jobs:
# OPENBLAS_NUM_THREADS: 1
# NUMEXPR_NUM_THREADS: 1
steps:
- uses: actions/checkout@v3
- uses: actions/checkout@v4
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v4
uses: actions/setup-python@v5
with:
python-version: ${{ matrix.python-version }}
- name: Install dependencies
Expand Down
60 changes: 56 additions & 4 deletions bin/picca_Pk1D.py
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,16 @@ def process_all_files(index_file_args):
if np.sum(delta.exposures_diff)==0:
continue

if linear_binning:
max_num_pixels_forest_theoretical=(1180-1050)*(delta.z_qso+1)/pixel_step #currently no min/max restframe values defined, so hard coding for the moment
else:
max_num_pixels_forest_theoretical=(np.log(1180)-np.log10(1050))/pixel_step
# minimum number of pixel in forest
if args.nb_pixel_min is not None:
min_num_pixels = args.nb_pixel_min
else:
min_num_pixels = int(args.nb_pixel_frac_min*max_num_pixels_forest_theoretical) #this is currently just hardcoding values so that spectra have a minimum length changing with z; but might be problematic for SBs

if args.parts_in_redshift:
# define chunks on a fixed redshift grid
z_grid = np.array(args.z_parts)
Expand All @@ -105,11 +115,12 @@ def process_all_files(index_file_args):
delta.delta,
delta.exposures_diff,
delta.ivar,
min_num_pixels=args.nb_pixel_min,
min_num_pixels=min_num_pixels,
reso_matrix=(delta.resolution_matrix
if reso_correction == 'matrix' else None),
linear_binning=linear_binning)
num_parts = len(split_array[0])

if reso_correction == 'matrix':
(mean_z_array, lambda_array, delta_array, exposures_diff_array,
ivar_array, reso_matrix_array) = split_array
Expand Down Expand Up @@ -194,7 +205,15 @@ def process_all_files(index_file_args):
pixel_step, log_lambda_array[part_index],
delta_array[part_index], exposures_diff_array[part_index],
ivar_array[part_index], args.no_apply_filling)
if num_masked_pixels > args.nb_pixel_masked_max:

if (args.nb_pixel_masked_max is not None):
max_num_masked_pixels = args.nb_pixel_masked_max
elif linear_binning:
max_num_masked_pixels = int(args.nb_pixel_masked_frac_max*(np.max(lambda_new)-np.min(lambda_new))/pixel_step) #this only accounts for masking inside the spectrum, not at the adges
else:
max_num_masked_pixels = int(args.nb_pixel_masked_frac_max*(np.max(log_lambda_new)-np.min(log_lambda_new))/pixel_step)

if num_masked_pixels > max_num_masked_pixels:
continue

# Compute pk_raw, needs uniform binning
Expand Down Expand Up @@ -388,16 +407,30 @@ def main(cmdargs):

parser.add_argument('--nb-pixel-min',
type=int,
default=75,
default=None,
required=False,
help='Minimal number of pixels in a part of forest')


parser.add_argument('--nb-pixel-frac-min',
type=float,
default=None,
required=False,
help='Minimal number of pixels in a part of forest (default is assuming ~577 pixels for a z=2.5 QSO in the forest of 1050-1180A and masking up to 75)')

parser.add_argument(
'--nb-pixel-masked-max',
type=int,
default=40,
default=None,#40,
required=False,
help='Maximal number of masked pixels in a part of forest')

parser.add_argument(
'--nb-pixel-masked-frac-max',
type=float,
default=None,
required=False,
help='Maximal number of masked pixels in a part of forest (default is 21% of the forest length, i.e. similar to the previous value at z=2.5 for a 3 chunk spectrum and 1050-1180A)')

parser.add_argument('--no-apply-filling',
action='store_true',
Expand Down Expand Up @@ -503,6 +536,25 @@ def main(cmdargs):
#create output dir if it does not exist
os.makedirs(args.out_dir, exist_ok=True)

if args.nb_pixel_min is None and args.nb_pixel_frac_min is None:
# could make this fraction a new default, but that would probably cause trouble for SBs
# args.nb_pixel_frac_min = 0.13 #this is a suggestion for a new default
args.nb_pixel_min = 75 #this is the previous default
elif not (args.nb_pixel_frac_min is None or args.nb_pixel_min is None):
print("both nb_pixel_frac_min and nb_pixel_min were set, using the latter")
args.nb_pixel_frac_min=None

if args.nb_pixel_masked_frac_max is None and args.nb_pixel_masked_max is None:
# could make this, i.e. 10% of the estimated forest length the new default
# args.nb_pixel_masked_frac_max = 0.21 #this is a suggestion for a new default
args.nb_pixel_masked_max = 40 #this is the previous default
elif not (args.nb_pixel_masked_frac_max is None or args.nb_pixel_masked_max is None):
print("both nb_pixel_masked_frac_max and nb_pixel_masked_max were set, using the latter")
args.nb_pixel_masked_frac_max=None


print([[i, f] for i, f in enumerate(files)])

if args.num_processors > 1:
pool = Pool(args.num_processors)
index_file_args = [(i, f, args) for i, f in enumerate(files)]
Expand Down
2 changes: 1 addition & 1 deletion py/picca/_version.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = '9.8.0'
__version__ = '9.11.0'
35 changes: 22 additions & 13 deletions py/picca/pk1d/postproc_pk1d.py
Original file line number Diff line number Diff line change
Expand Up @@ -797,7 +797,11 @@ def compute_average_pk_redshift(
snr_bins = (snr_bin_edges[:-1] + snr_bin_edges[1:]) / 2

data_values = p1d_table[col][select]
data_values = data_values[mask_nan_p1d_values]
data_snr = p1d_table["forest_snr"][select]
data_snr, data_values = (
data_snr[mask_nan_p1d_values],
data_values[mask_nan_p1d_values],
)
# Fit function to observed dispersion in col:
standard_dev_col, _, _ = binned_statistic(
data_snr, data_values, statistic="std", bins=snr_bin_edges
Expand All @@ -820,15 +824,20 @@ def compute_average_pk_redshift(
variance_estimated_col = fitfunc_variance_pk1d(data_snr, *coef_col)
weights_col = 1.0 / variance_estimated_col
if apply_z_weights:
weights *= redshift_weights
mean = np.average(data_values, weights=weights)
mean = np.average(data_values, weights=weights * redshift_weights)
else:
mean = np.average(data_values, weights=weights)
if apply_z_weights:
# Analytic expression for the re-weighted average:
error = np.sqrt(np.sum(weights_col * redshift_weights)) / np.sum(
weights_col
)
else:
error = np.sqrt(1.0 / np.sum(weights_col))
# Variance estimator derived by Jean-Marc, we keep the estimated one.
# error = np.sqrt(((np.sum(weights)**2 / np.sum(weights**2)) - 1 )**(-1) * (
# ( np.sum(weights**2 * data_values**2) / np.sum(weights**2) ) - (
# np.sum(weights * data_values)/ np.sum(weights) )**2 ))
if col == "Pk":
standard_dev = np.concatenate(
[
Expand Down Expand Up @@ -1142,7 +1151,7 @@ def compute_groups_for_one_forest(nbins_k, p1d_los):
if number_in_bins != 0:
weight = p1d_los["weight"][mask_ikbin][0]
p1d_weights_id[ikbin] = weight
covariance_weights_id[ikbin] = np.sqrt(weight / number_in_bins)
covariance_weights_id[ikbin] = weight / number_in_bins
p1d_groups_id[ikbin] = np.nansum(
p1d_los["pk"][mask_ikbin] * covariance_weights_id[ikbin]
)
Expand Down Expand Up @@ -1199,11 +1208,11 @@ def compute_cov(
mean_pk_product = np.outer(mean_pk, mean_pk)

sum_p1d_weights = np.nansum(p1d_weights, axis=0)
weights_sum_of_product = np.outer(sum_p1d_weights, sum_p1d_weights)
weights_sum_product = np.outer(sum_p1d_weights, sum_p1d_weights)

p1d_groups_product_sum = np.zeros((nbins_k, nbins_k))
covariance_weights_product_of_sum = np.zeros((nbins_k, nbins_k))
weights_product_of_sum = np.zeros((nbins_k, nbins_k))
covariance_weights_product_sum = np.zeros((nbins_k, nbins_k))
weights_product_sum = np.zeros((nbins_k, nbins_k))

for i, p1d_group in enumerate(p1d_groups):
# The summation is done with np.nansum instead of simple addition to not
Expand All @@ -1212,21 +1221,21 @@ def compute_cov(
p1d_groups_product_sum = np.nansum(
[p1d_groups_product_sum, np.outer(p1d_group, p1d_group)], axis=0
)
covariance_weights_product_of_sum = np.nansum(
covariance_weights_product_sum = np.nansum(
[
covariance_weights_product_of_sum,
covariance_weights_product_sum,
np.outer(covariance_weights[i], covariance_weights[i]),
],
axis=0,
)
weights_product_of_sum = np.nansum(
[weights_product_of_sum, np.outer(p1d_weights[i], p1d_weights[i])], axis=0
weights_product_sum = np.nansum(
[weights_product_sum, np.outer(p1d_weights[i], p1d_weights[i])], axis=0
)

del p1d_groups, covariance_weights, p1d_weights

covariance_matrix = (weights_product_of_sum / weights_sum_of_product) * (
(p1d_groups_product_sum / covariance_weights_product_of_sum) - mean_pk_product
covariance_matrix = ((weights_sum_product /weights_product_sum) - 1)**(-1) * (
(p1d_groups_product_sum / covariance_weights_product_sum) - mean_pk_product
)

# For fit_snr method, due to the SNR fitting scheme used for weighting,
Expand Down
20 changes: 16 additions & 4 deletions py/picca/raw_io.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,8 +76,14 @@ def read_transmission_file(filename, num_bins, objs_thingid, tracer='F_LYA', lam
if np.in1d(thingid, objs_thingid).sum() == 0:
hdul.close()
return
ra = hdul['METADATA']['RA'][:].astype(np.float64) * np.pi / 180.
dec = hdul['METADATA']['DEC'][:].astype(np.float64) * np.pi / 180.
if 'RA' in hdul['METADATA'].get_colnames() and 'DEC' in hdul['METADATA'].get_colnames():
ra = hdul['METADATA']['RA'][:].astype(np.float64) * np.pi / 180.
dec = hdul['METADATA']['DEC'][:].astype(np.float64) * np.pi / 180.
elif 'TARGET_RA' in hdul['METADATA'].get_colnames() and 'TARGET_DEC' in hdul['METADATA'].get_colnames():
ra = hdul['METADATA']['TARGET_RA'][:].astype(np.float64) * np.pi / 180.
dec = hdul['METADATA']['TARGET_DEC'][:].astype(np.float64) * np.pi / 180.
else:
raise ValueError('RA and DEC columns not found in transmission files.')
z = hdul['METADATA']['Z'][:]

# Use "lambda_array" to store either lambda or log lambda
Expand Down Expand Up @@ -314,8 +320,14 @@ def convert_transmission_to_deltas(obj_path, out_dir, in_dir=None, in_filenames=

w = hdu[z_key][:] > max(0., lambda_min / lambda_max_rest_frame - 1.)
w &= hdu[z_key][:] < max(0., lambda_max / lambda_min_rest_frame - 1.)
objs_ra = hdu['RA'][:][w].astype('float64') * np.pi / 180.
objs_dec = hdu['DEC'][:][w].astype('float64') * np.pi / 180.
if 'RA' in key_val and 'DEC' in key_val:
objs_ra = hdu['RA'][:][w].astype('float64') * np.pi / 180.
objs_dec = hdu['DEC'][:][w].astype('float64') * np.pi / 180.
elif 'TARGET_RA' in key_val and 'TARGET_DEC' in key_val:
objs_ra = hdu['TARGET_RA'][:][w].astype('float64') * np.pi / 180.
objs_dec = hdu['TARGET_DEC'][:][w].astype('float64') * np.pi / 180.
else:
raise ValueError('RA and DEC columns not found in objects catalog.')
objs_thingid = objs_thingid[w]
hdul.close()
userprint('INFO: Found {} quasars'.format(objs_ra.size))
Expand Down
Binary file modified py/picca/tests/data/test_Pk1D/meanPk1D_covariance.fits.gz
Binary file not shown.

0 comments on commit 3a18ed2

Please sign in to comment.