diff --git a/.bumpversion.cfg b/.bumpversion.cfg index aa5733af3..28ff45c63 100644 --- a/.bumpversion.cfg +++ b/.bumpversion.cfg @@ -1,5 +1,5 @@ [bumpversion] -current_version = 9.8.0 +current_version = 9.11.0 commit = True [bumpversion:file:py/picca/_version.py] diff --git a/.github/workflows/bump_version.yml b/.github/workflows/bump_version.yml index 29fee4f64..3def75ac8 100644 --- a/.github/workflows/bump_version.yml +++ b/.github/workflows/bump_version.yml @@ -13,7 +13,7 @@ jobs: bump_version: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 with: token: ${{ secrets.PAT_MW_TOKEN_ACTION }} @@ -23,7 +23,7 @@ jobs: git config --global user.name "Michael Walther (bot)" - name: Set up Python - uses: actions/setup-python@v4 + uses: actions/setup-python@v5 - name: Install bump2version run: pip install bump2version diff --git a/.github/workflows/publish-to-test-pypi.yml b/.github/workflows/publish-to-test-pypi.yml index 1984d920d..fcf0000f4 100644 --- a/.github/workflows/publish-to-test-pypi.yml +++ b/.github/workflows/publish-to-test-pypi.yml @@ -14,9 +14,9 @@ jobs: id-token: write # IMPORTANT: this permission is mandatory for trusted publishing steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Set up Python - uses: actions/setup-python@v4 + uses: actions/setup-python@v5 with: python-version: "3.x" @@ -37,7 +37,8 @@ jobs: - name: Publish distribution 📦 to Test PyPI uses: pypa/gh-action-pypi-publish@release/v1 with: - repository-url: https://test.pypi.org/legacy/ + repository_url: https://test.pypi.org/legacy/ + attestations: false - name: Publish distribution 📦 to PyPI if: startsWith(github.ref, 'refs/tags') diff --git a/.github/workflows/pylint.yml b/.github/workflows/pylint.yml index dfe5ee1eb..25e2a9cf4 100644 --- a/.github/workflows/pylint.yml +++ b/.github/workflows/pylint.yml @@ -15,9 +15,9 @@ jobs: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Set up Python 3.12 - uses: actions/setup-python@v4 + uses: actions/setup-python@v5 with: python-version: 3.12 - name: Install dependencies diff --git a/.github/workflows/python_package.yml b/.github/workflows/python_package.yml index c751d0cbe..7bef9f300 100644 --- a/.github/workflows/python_package.yml +++ b/.github/workflows/python_package.yml @@ -29,9 +29,9 @@ jobs: # OPENBLAS_NUM_THREADS: 1 # NUMEXPR_NUM_THREADS: 1 steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v4 + uses: actions/setup-python@v5 with: python-version: ${{ matrix.python-version }} - name: Install dependencies diff --git a/bin/picca_Pk1D.py b/bin/picca_Pk1D.py index 15080bccb..65080d0a9 100755 --- a/bin/picca_Pk1D.py +++ b/bin/picca_Pk1D.py @@ -97,6 +97,16 @@ def process_all_files(index_file_args): if np.sum(delta.exposures_diff)==0: continue + if linear_binning: + max_num_pixels_forest_theoretical=(1180-1050)*(delta.z_qso+1)/pixel_step #currently no min/max restframe values defined, so hard coding for the moment + else: + max_num_pixels_forest_theoretical=(np.log(1180)-np.log10(1050))/pixel_step + # minimum number of pixel in forest + if args.nb_pixel_min is not None: + min_num_pixels = args.nb_pixel_min + else: + min_num_pixels = int(args.nb_pixel_frac_min*max_num_pixels_forest_theoretical) #this is currently just hardcoding values so that spectra have a minimum length changing with z; but might be problematic for SBs + if args.parts_in_redshift: # define chunks on a fixed redshift grid z_grid = np.array(args.z_parts) @@ -105,11 +115,12 @@ def process_all_files(index_file_args): delta.delta, delta.exposures_diff, delta.ivar, - min_num_pixels=args.nb_pixel_min, + min_num_pixels=min_num_pixels, reso_matrix=(delta.resolution_matrix if reso_correction == 'matrix' else None), linear_binning=linear_binning) num_parts = len(split_array[0]) + if reso_correction == 'matrix': (mean_z_array, lambda_array, delta_array, exposures_diff_array, ivar_array, reso_matrix_array) = split_array @@ -194,7 +205,15 @@ def process_all_files(index_file_args): pixel_step, log_lambda_array[part_index], delta_array[part_index], exposures_diff_array[part_index], ivar_array[part_index], args.no_apply_filling) - if num_masked_pixels > args.nb_pixel_masked_max: + + if (args.nb_pixel_masked_max is not None): + max_num_masked_pixels = args.nb_pixel_masked_max + elif linear_binning: + max_num_masked_pixels = int(args.nb_pixel_masked_frac_max*(np.max(lambda_new)-np.min(lambda_new))/pixel_step) #this only accounts for masking inside the spectrum, not at the adges + else: + max_num_masked_pixels = int(args.nb_pixel_masked_frac_max*(np.max(log_lambda_new)-np.min(log_lambda_new))/pixel_step) + + if num_masked_pixels > max_num_masked_pixels: continue # Compute pk_raw, needs uniform binning @@ -388,16 +407,30 @@ def main(cmdargs): parser.add_argument('--nb-pixel-min', type=int, - default=75, + default=None, required=False, help='Minimal number of pixels in a part of forest') + + + parser.add_argument('--nb-pixel-frac-min', + type=float, + default=None, + required=False, + help='Minimal number of pixels in a part of forest (default is assuming ~577 pixels for a z=2.5 QSO in the forest of 1050-1180A and masking up to 75)') parser.add_argument( '--nb-pixel-masked-max', type=int, - default=40, + default=None,#40, required=False, help='Maximal number of masked pixels in a part of forest') + + parser.add_argument( + '--nb-pixel-masked-frac-max', + type=float, + default=None, + required=False, + help='Maximal number of masked pixels in a part of forest (default is 21% of the forest length, i.e. similar to the previous value at z=2.5 for a 3 chunk spectrum and 1050-1180A)') parser.add_argument('--no-apply-filling', action='store_true', @@ -503,6 +536,25 @@ def main(cmdargs): #create output dir if it does not exist os.makedirs(args.out_dir, exist_ok=True) + if args.nb_pixel_min is None and args.nb_pixel_frac_min is None: + # could make this fraction a new default, but that would probably cause trouble for SBs + # args.nb_pixel_frac_min = 0.13 #this is a suggestion for a new default + args.nb_pixel_min = 75 #this is the previous default + elif not (args.nb_pixel_frac_min is None or args.nb_pixel_min is None): + print("both nb_pixel_frac_min and nb_pixel_min were set, using the latter") + args.nb_pixel_frac_min=None + + if args.nb_pixel_masked_frac_max is None and args.nb_pixel_masked_max is None: + # could make this, i.e. 10% of the estimated forest length the new default + # args.nb_pixel_masked_frac_max = 0.21 #this is a suggestion for a new default + args.nb_pixel_masked_max = 40 #this is the previous default + elif not (args.nb_pixel_masked_frac_max is None or args.nb_pixel_masked_max is None): + print("both nb_pixel_masked_frac_max and nb_pixel_masked_max were set, using the latter") + args.nb_pixel_masked_frac_max=None + + + print([[i, f] for i, f in enumerate(files)]) + if args.num_processors > 1: pool = Pool(args.num_processors) index_file_args = [(i, f, args) for i, f in enumerate(files)] diff --git a/py/picca/_version.py b/py/picca/_version.py index 3cd81c284..95fcb099c 100644 --- a/py/picca/_version.py +++ b/py/picca/_version.py @@ -1 +1 @@ -__version__ = '9.8.0' +__version__ = '9.11.0' diff --git a/py/picca/pk1d/postproc_pk1d.py b/py/picca/pk1d/postproc_pk1d.py index cdd6ab6fe..4f2d71ace 100644 --- a/py/picca/pk1d/postproc_pk1d.py +++ b/py/picca/pk1d/postproc_pk1d.py @@ -797,7 +797,11 @@ def compute_average_pk_redshift( snr_bins = (snr_bin_edges[:-1] + snr_bin_edges[1:]) / 2 data_values = p1d_table[col][select] - data_values = data_values[mask_nan_p1d_values] + data_snr = p1d_table["forest_snr"][select] + data_snr, data_values = ( + data_snr[mask_nan_p1d_values], + data_values[mask_nan_p1d_values], + ) # Fit function to observed dispersion in col: standard_dev_col, _, _ = binned_statistic( data_snr, data_values, statistic="std", bins=snr_bin_edges @@ -820,8 +824,9 @@ def compute_average_pk_redshift( variance_estimated_col = fitfunc_variance_pk1d(data_snr, *coef_col) weights_col = 1.0 / variance_estimated_col if apply_z_weights: - weights *= redshift_weights - mean = np.average(data_values, weights=weights) + mean = np.average(data_values, weights=weights * redshift_weights) + else: + mean = np.average(data_values, weights=weights) if apply_z_weights: # Analytic expression for the re-weighted average: error = np.sqrt(np.sum(weights_col * redshift_weights)) / np.sum( @@ -829,6 +834,10 @@ def compute_average_pk_redshift( ) else: error = np.sqrt(1.0 / np.sum(weights_col)) + # Variance estimator derived by Jean-Marc, we keep the estimated one. + # error = np.sqrt(((np.sum(weights)**2 / np.sum(weights**2)) - 1 )**(-1) * ( + # ( np.sum(weights**2 * data_values**2) / np.sum(weights**2) ) - ( + # np.sum(weights * data_values)/ np.sum(weights) )**2 )) if col == "Pk": standard_dev = np.concatenate( [ @@ -1142,7 +1151,7 @@ def compute_groups_for_one_forest(nbins_k, p1d_los): if number_in_bins != 0: weight = p1d_los["weight"][mask_ikbin][0] p1d_weights_id[ikbin] = weight - covariance_weights_id[ikbin] = np.sqrt(weight / number_in_bins) + covariance_weights_id[ikbin] = weight / number_in_bins p1d_groups_id[ikbin] = np.nansum( p1d_los["pk"][mask_ikbin] * covariance_weights_id[ikbin] ) @@ -1199,11 +1208,11 @@ def compute_cov( mean_pk_product = np.outer(mean_pk, mean_pk) sum_p1d_weights = np.nansum(p1d_weights, axis=0) - weights_sum_of_product = np.outer(sum_p1d_weights, sum_p1d_weights) + weights_sum_product = np.outer(sum_p1d_weights, sum_p1d_weights) p1d_groups_product_sum = np.zeros((nbins_k, nbins_k)) - covariance_weights_product_of_sum = np.zeros((nbins_k, nbins_k)) - weights_product_of_sum = np.zeros((nbins_k, nbins_k)) + covariance_weights_product_sum = np.zeros((nbins_k, nbins_k)) + weights_product_sum = np.zeros((nbins_k, nbins_k)) for i, p1d_group in enumerate(p1d_groups): # The summation is done with np.nansum instead of simple addition to not @@ -1212,21 +1221,21 @@ def compute_cov( p1d_groups_product_sum = np.nansum( [p1d_groups_product_sum, np.outer(p1d_group, p1d_group)], axis=0 ) - covariance_weights_product_of_sum = np.nansum( + covariance_weights_product_sum = np.nansum( [ - covariance_weights_product_of_sum, + covariance_weights_product_sum, np.outer(covariance_weights[i], covariance_weights[i]), ], axis=0, ) - weights_product_of_sum = np.nansum( - [weights_product_of_sum, np.outer(p1d_weights[i], p1d_weights[i])], axis=0 + weights_product_sum = np.nansum( + [weights_product_sum, np.outer(p1d_weights[i], p1d_weights[i])], axis=0 ) del p1d_groups, covariance_weights, p1d_weights - covariance_matrix = (weights_product_of_sum / weights_sum_of_product) * ( - (p1d_groups_product_sum / covariance_weights_product_of_sum) - mean_pk_product + covariance_matrix = ((weights_sum_product /weights_product_sum) - 1)**(-1) * ( + (p1d_groups_product_sum / covariance_weights_product_sum) - mean_pk_product ) # For fit_snr method, due to the SNR fitting scheme used for weighting, diff --git a/py/picca/raw_io.py b/py/picca/raw_io.py index e3a7866dd..26c18d7aa 100644 --- a/py/picca/raw_io.py +++ b/py/picca/raw_io.py @@ -76,8 +76,14 @@ def read_transmission_file(filename, num_bins, objs_thingid, tracer='F_LYA', lam if np.in1d(thingid, objs_thingid).sum() == 0: hdul.close() return - ra = hdul['METADATA']['RA'][:].astype(np.float64) * np.pi / 180. - dec = hdul['METADATA']['DEC'][:].astype(np.float64) * np.pi / 180. + if 'RA' in hdul['METADATA'].get_colnames() and 'DEC' in hdul['METADATA'].get_colnames(): + ra = hdul['METADATA']['RA'][:].astype(np.float64) * np.pi / 180. + dec = hdul['METADATA']['DEC'][:].astype(np.float64) * np.pi / 180. + elif 'TARGET_RA' in hdul['METADATA'].get_colnames() and 'TARGET_DEC' in hdul['METADATA'].get_colnames(): + ra = hdul['METADATA']['TARGET_RA'][:].astype(np.float64) * np.pi / 180. + dec = hdul['METADATA']['TARGET_DEC'][:].astype(np.float64) * np.pi / 180. + else: + raise ValueError('RA and DEC columns not found in transmission files.') z = hdul['METADATA']['Z'][:] # Use "lambda_array" to store either lambda or log lambda @@ -314,8 +320,14 @@ def convert_transmission_to_deltas(obj_path, out_dir, in_dir=None, in_filenames= w = hdu[z_key][:] > max(0., lambda_min / lambda_max_rest_frame - 1.) w &= hdu[z_key][:] < max(0., lambda_max / lambda_min_rest_frame - 1.) - objs_ra = hdu['RA'][:][w].astype('float64') * np.pi / 180. - objs_dec = hdu['DEC'][:][w].astype('float64') * np.pi / 180. + if 'RA' in key_val and 'DEC' in key_val: + objs_ra = hdu['RA'][:][w].astype('float64') * np.pi / 180. + objs_dec = hdu['DEC'][:][w].astype('float64') * np.pi / 180. + elif 'TARGET_RA' in key_val and 'TARGET_DEC' in key_val: + objs_ra = hdu['TARGET_RA'][:][w].astype('float64') * np.pi / 180. + objs_dec = hdu['TARGET_DEC'][:][w].astype('float64') * np.pi / 180. + else: + raise ValueError('RA and DEC columns not found in objects catalog.') objs_thingid = objs_thingid[w] hdul.close() userprint('INFO: Found {} quasars'.format(objs_ra.size)) diff --git a/py/picca/tests/data/test_Pk1D/meanPk1D_covariance.fits.gz b/py/picca/tests/data/test_Pk1D/meanPk1D_covariance.fits.gz index eb0595e03..147e80907 100644 Binary files a/py/picca/tests/data/test_Pk1D/meanPk1D_covariance.fits.gz and b/py/picca/tests/data/test_Pk1D/meanPk1D_covariance.fits.gz differ