From 5ade1ffe60d3c9a8e5a02728b691063b29b24720 Mon Sep 17 00:00:00 2001 From: Ell Bogat Date: Wed, 13 Nov 2024 12:17:30 -0500 Subject: [PATCH 01/11] add psflib functions --- spaceKLIP/__init__.py | 1 + spaceKLIP/psflib.py | 421 ++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 422 insertions(+) create mode 100644 spaceKLIP/psflib.py diff --git a/spaceKLIP/__init__.py b/spaceKLIP/__init__.py index dcd1595f..034a16db 100644 --- a/spaceKLIP/__init__.py +++ b/spaceKLIP/__init__.py @@ -8,6 +8,7 @@ from . import mast from . import plotting from . import psf +from . import psflib from . import pyklippipeline from . import utils diff --git a/spaceKLIP/psflib.py b/spaceKLIP/psflib.py new file mode 100644 index 00000000..b2aa2de3 --- /dev/null +++ b/spaceKLIP/psflib.py @@ -0,0 +1,421 @@ +################################################################################ +# This script takes a directory of JWST calints files as input and builds a # +# database of reference info for each file, which can then be read via the # +# pipeline script. # +# # +# Basically it will hold the target-specific info needed by the pipeline, as # +# well as info needed to choose reference observations for a given science # +# target. # +# # +# Written 2024-07-10 by Ellis Bogat # +################################################################################ + + +# imports +import os +import warnings +import glob +import re +import mocapy +import pandas as pd +from astropy.io import fits + +from astroquery.simbad import Simbad +import numpy as np + +sp_class_letters = ['O','B','A','F','G','K','M','T','Y'] + +# Helper functions for logic with database series +def isnone(series): + try: + return np.isnan(series) + except: + return (series == '') | (series == None) + + +def not_isnone(series): + + return ~ isnone(series) + + +def load_refdb(fpath): + """ + Reads the database of target- and observation-specific reference info for + each observation in the PSF library. + + Args: + fpath (str or os.path): Path to the .csv file containing the reference + database. + + Returns: + pandas.DataFrame: database containing target- and observation-specific + info for each file. + """ + + refdb = pd.read_csv(fpath) + refdb.set_index('TARGNAME',inplace=True) + + return refdb + + +def decode_simbad_sptype(input_sptypes): + + if isinstance(input_sptypes,str): + input_sptypes = [input_sptypes] + + + sp_classes = [] + sp_subclasses = [] + sp_lclasses = [] + + for simbad_spectype in input_sptypes: + + m = re.search(r'([OBAFGKMTY])(\d\.*\d*)[-+/]*(\d\.*\d*)*[-+/]*(I*V*I*)[-/]*(I*V*I*)',simbad_spectype) + + if m is None: + sp_classes.append('') + sp_subclasses.append('') + sp_lclasses.append('') + else: + res = m.group(1,2,3,4,5) + + sp_classes.append(res[0]) + sp_subclasses.append(res[1]) + if 'V' in res[3:] or res[3:] == ('',''): + sp_lclasses.append('V') + + elif res[4] != '': + sp_lclasses.append(res[4]) + + else: + sp_lclasses.append(res[3]) + + return (sp_classes,sp_subclasses,sp_lclasses) + + +def update_db_sptypes(refdb): + simbad_sptypes = refdb.SPTYPE.values + + sp_classes, sp_subclasses, sp_lclasses = decode_simbad_sptype(simbad_sptypes) + + refdb_copy = refdb.copy() + refdb_copy['SP_CLASS'] = sp_classes + refdb_copy['SP_SUBCLASS'] = sp_subclasses + refdb_copy['SP_LCLASS'] = sp_lclasses + + return refdb_copy + + +def build_refdb(idir,odir='.',uncal=False,overwrite=False): + """ + Constructs a database of target-specific reference info for each + calints file in the input directory. + + Args: + idir (path): Path to pre-processed (stage 2) JWST images to be added + to the database + odir (path, optional): Location to save the database. Defaults to '.'. + uncal (bool, optional): Toggle using uncal files as inputs instead of + calints. Defaults to None. + overwrite (bool, optional): If true, overwrite the existing caldb. + + Returns: + pandas.DataFrame: database containing target- and observation-specific + info for each file. + """ + + # TODO: + # - describe each column & its units + # - write tests for build_refdb() + # - nonexistent input directory + # - empty input directory + # - no calints files in input directory + # - header kw missing + # - duplicate science target with different program names + # - calints vs uncal toggle + # - (warning if no calints present but uncals are + # 'did you mean to set uncal = True?') + + # Check that you won't accidentally overwrite an existing csv. + outpath = os.path.join(odir,'ref_lib.csv') + if os.path.exists(outpath): + + if overwrite: + msg = f'\nThis operation will overwrite {outpath}. \nIf this is not what you want, abort now!' + warnings.warn(msg) + + else: + raise Exception(f'This operation is trying to overwrite {outpath}.\nIf this is what you want, set overwrite=True.') + + # Read in uncal files + print('Reading input files...') + suffix = 'uncal' if uncal else 'calints' + fpaths = sorted(glob.glob(os.path.join(idir,f"*_{suffix}.fits"))) + + # Start a dataframe with the header info we want from each file + csv_list = [] + fits_cols = [ + 'TARGPROP', + 'TARGNAME', # Save 2MASS ID also + 'FILENAME', + 'DATE-OBS', + 'TIME-OBS', + 'DURATION', # Total exposure time + 'TARG_RA', + 'TARG_DEC', + 'TARGURA', # RA uncertainty + 'TARGUDEC', # Dec uncertainty + 'MU_RA', # Proper motion + 'MU_DEC', + 'MU_EPOCH', + 'INSTRUME', + 'DETECTOR', + 'MODULE', + 'CHANNEL', + 'FILTER', + 'PUPIL', + ] + for fpath in fpaths: + row = [] + hdr = fits.getheader(fpath) + for col in fits_cols: + row.append(hdr[col]) + csv_list.append(row) + + df = pd.DataFrame(csv_list,columns=fits_cols) + + # Make a df with only one entry for each unique target + targnames = np.unique(df['TARGNAME']) + df_unique = pd.DataFrame(np.transpose([targnames]),columns=['TARGNAME']) + + # Get 2MASS IDs + print('Collecting 2MASS IDs...') + twomass_ids = [] + for targname in targnames: + result_table = Simbad.query_objectids(targname) + if result_table is None: + raise Exception(f'No SIMBAD object found for targname {targname}.') + tmids_found = [] + for name in list(result_table['ID']): + if name[:6] == '2MASS ': + twomass_ids.append(name) + tmids_found.append(name) + if len(tmids_found) < 1: + raise Exception(f'No 2MASS ID found for targname {targname}.') + elif len(tmids_found) > 1: + raise Exception(f'Multiple 2MASS ID found for targname {targname}: {tmids_found}') + df_unique['2MASS_ID'] = twomass_ids + df_unique.set_index('2MASS_ID',inplace=True) + + # Query SIMBAD + print('Querying SIMBAD...') + + customSimbad = Simbad() + customSimbad.add_votable_fields('sptype', + 'flux(K)', 'flux_error(K)', + 'plx', 'plx_error') + simbad_list = list(df_unique.index) + scistar_simbad_table = customSimbad.query_objects(simbad_list) + + # Convert to pandas df and make 2MASS IDs the index + df_simbad = scistar_simbad_table.to_pandas() + df_simbad['2MASS_ID'] = simbad_list + df_simbad.set_index('2MASS_ID',inplace=True) + + # Rename some columns + simbad_cols = { # Full column list here: http://simbad.u-strasbg.fr/Pages/guide/sim-fscript.htx + 'SPTYPE': 'SP_TYPE', # maybe use 'simple_spt' or 'complete_spt'? + 'KMAG': 'FLUX_K', # 'kmag' + 'KMAG_ERR': 'FLUX_ERROR_K', # 'ekmag' + 'PLX': 'PLX_VALUE', # 'plx' + 'PLX_ERR': 'PLX_ERROR', # 'eplx' + } + for col,simbad_col in simbad_cols.items(): + df_simbad[col] = list(df_simbad[simbad_col]) + + # Add the values we want to df_unique + df_unique = pd.concat([df_unique,df_simbad.loc[:,simbad_cols.keys()]],axis=1) + + # Query mocadb.ca for extra info + print('Querying MOCADB (this may take a minute)...') + names_df = pd.DataFrame(list(df_unique.index),columns=['designation']) + moca = mocapy.MocaEngine() + mdf = moca.query("SELECT tt.designation AS input_designation, sam.* FROM tmp_table AS tt LEFT JOIN mechanics_all_designations AS mad ON(mad.designation LIKE tt.designation) LEFT JOIN summary_all_objects AS sam ON(sam.moca_oid=mad.moca_oid)", tmp_table=names_df) + mdf.set_index('input_designation',inplace=True) + + moca_cols = { + 'SPTYPE': 'spt', # maybe use 'simple_spt' or 'complete_spt'? + 'PLX': 'plx', # 'plx' + 'PLX_ERR': 'eplx', # 'eplx' + 'AGE': 'age', # 'age' + 'AGE_ERR': 'eage', # 'eage' + } + + # Update the column names for consistency + for col,moca_col in moca_cols.items(): + # print(col, list(mdf[moca_col])) + mdf[col] = list(mdf[moca_col]) + + # Fill in values missing from SIMBAD with MOCA + + df_unique['COMMENTS'] = '' + + # Sort all the dfs by index so they match up + df_unique.sort_index(inplace=True) + df_simbad.sort_index(inplace=True) + mdf.sort_index(inplace=True) + + # Replace values and update comments + cols_overlap = list(set(list(simbad_cols.keys())).intersection(list(moca_cols.keys()))) + for col in cols_overlap: + df_unique.loc[isnone(df_simbad[col]) & ~isnone(mdf[col]),'COMMENTS'] += f"{col} adopted from MOCA. " + df_unique.loc[isnone(df_simbad[col]) & ~isnone(mdf[col]),col] = mdf + + for col in ['AGE','AGE_ERR']: + df_unique[col] = mdf[col] + df_unique.loc[~isnone(mdf[col]),'COMMENTS'] += f"{col} adopted from MOCA. " + + # # Replace values + # df_unique.loc[df_unique['SPTYPE']=='','SPTYPE'] = None + # df_unique_replaced = df_unique.loc[:,cols_overlap].combine_first(mdf.loc[:,cols_overlap]) + # df_unique.loc[:,cols_overlap] = df_unique_replaced.loc[:,cols_overlap] + # cols_overlap = ['SPTYPE','PLX','PLX_ERR'] + # df_unique.loc[isnone(df_unique.loc[:,cols_overlap]),cols_overlap] = mdf.loc[isnone(df_unique.loc[:,cols_overlap]),cols_overlap] + # df_unique.loc[:,cols_overlap].where(not_isnone,other=mdf,inplace=True) + + # Calculate distances from plx in mas + df_unique['DIST'] = 1. / (df_unique['PLX'] / 1000) + df_unique['DIST_ERR'] = df_unique['PLX_ERR'] / 1000 / ((df_unique['PLX'] / 1000)**2) + + # Decode spectral types + df_unique = update_db_sptypes(df_unique) + + # Add empty columns + empty_cols = [ + 'FLAGS', + 'HAS_DISK', + 'HAS_CANDS'] + for col in empty_cols: + df_unique[col] = '' + + # Apply dataframe of unique targets to the original file list + df.set_index('TARGNAME',inplace=True) + df_unique.reset_index(inplace=True) + df_unique.set_index('TARGNAME',inplace=True) + df_unique = df_unique.reindex(df.index) + df_out = pd.concat([df,df_unique],axis=1) + + # Save dataframe + df_out.to_csv(outpath) + print(f'Database saved to {outpath}') + print(f'Done.') + + return df_out + + +def get_sciref_files(sci_target, refdb, idir=None, spt_choice=None, filters=None, exclude_disks=False): + """Construct a list of science files and reference files to input to a PSF subtraction routine. + + Args: + sci_target (str): + name of the science target to be PSF subtracted. Can be the proposal target name, + JWST resolved target name, or 2MASS ID. + refdb (pandas.DataFrame or str): + pandas dataframe or filepath to csv containing the reference database generated by + the build_refdb() function. + idir (str): + path to directory of input data, to be appended to file names. + spt_choice (str, optional): + None (default): use all spectral types. + 'near' : use only references with the same spectral class letter. + 'nearer' : use only refs within +- 1 spectral type. ie. M3-5 for an M4 science target. + 'nearest' : use only refs with the exact same spectral type. + filters (str or list, optional): + None (default) : include all filters. + 'F444W' or other filter name: include only that filter. + ['filt1','filt2']: include only filt1 and filt2 + exclude_disks (bool, optional): Exclude references that are known to have disks. Defaults to False. + + Returns: + list: filenames of science observations. + list: filenames of reference observations. + """ + + # TODO: + # - filter out flags? + # - sptype filters + # - tests: + # - sci_target in index + # - sci_target in 2MASS_ID + # - sci_target in TARGPROP + # - sci_target not in refdb + # - exceptions if 0 science fnames are returned + # - warnings if 0 reference observations are returned + + if isinstance(refdb,str): + refdb = load_refdb(refdb) + + # Locate input target 2MASS ID + # (input name could be in index, TARGPROP, or 2MASS_ID column) + if sci_target in refdb['2MASS_ID'].to_list(): + targ_2mass = sci_target + + elif sci_target in refdb.index.to_list(): + targ_2mass = refdb.loc[sci_target,'2MASS_ID'].to_list()[0] + + elif sci_target in refdb['TARGPROP'].to_list(): + refdb_temp = refdb.reset_index() + refdb_temp.set_index('TARGPROP',inplace=True) + targ_2mass = refdb_temp.loc[sci_target,'2MASS_ID'].to_list()[0] + + else: + raise Exception(f'Science target {sci_target} not found in reference database.') + + refdb_temp = refdb.reset_index() + refdb_temp.set_index('FILENAME',inplace=True) + + # Collect all the science files + sci_fnames = refdb_temp.index[refdb_temp['2MASS_ID'] == targ_2mass].to_list() + + # Start list of reference files + ref_fnames = refdb_temp.index[refdb_temp['2MASS_ID'] != targ_2mass].to_list() + + # Collect the reference files + if spt_choice != None: + raise Exception('Only spt_choice=None is currently supported.') + + # Remove observations with disks flagged + if exclude_disks: + disk_fnames = refdb_temp.index[refdb_temp['HAS_DISK'] == True].to_list() + ref_fnames = list(set(ref_fnames) - set(disk_fnames)) + + if filters != None: + if isinstance('filter',str): + filters = [filters] + filter_fnames = [] + for filter in filters: + filter_fnames.extend(refdb_temp.index[refdb_temp['FILTER'] == filter].to_list()) + if len(filter_fnames) == 0: + raise Warning(f'No observations found with filters {filters}.') + + sci_fnames = list(set(sci_fnames).intersection(filter_fnames)) + ref_fnames = list(set(ref_fnames).intersection(filter_fnames)) + + # Make sure no observations are in both sci_fnames and ref_fnames + if len(set(sci_fnames).intersection(ref_fnames)) > 0: + raise Exception("One or more filenames exists in both the science and reference file list. Something is wrong.") + + if not idir is None: + sci_fpaths = [os.path.join(idir,sci_fname) for sci_fname in sci_fnames] + ref_fpaths = [os.path.join(idir,ref_fname) for ref_fname in ref_fnames] + else: + sci_fpaths = sci_fnames + ref_fpaths = ref_fnames + + return [sci_fpaths, ref_fpaths] + +# # TESTING +# idir = 'DATA/NANREPLACED_v0' +# ref_db = build_refdb(idir,overwrite=True) + +# print('Done Done.') \ No newline at end of file From 155d805b988a2207dce549ae4470af77f745e966 Mon Sep 17 00:00:00 2001 From: Ell Bogat Date: Mon, 25 Nov 2024 08:51:31 -0500 Subject: [PATCH 02/11] update build_refdb to accept generic input file suffixes --- spaceKLIP/psflib.py | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/spaceKLIP/psflib.py b/spaceKLIP/psflib.py index b2aa2de3..ea4643fb 100644 --- a/spaceKLIP/psflib.py +++ b/spaceKLIP/psflib.py @@ -106,7 +106,7 @@ def update_db_sptypes(refdb): return refdb_copy -def build_refdb(idir,odir='.',uncal=False,overwrite=False): +def build_refdb(idir,odir='.',suffix='calints',overwrite=False): """ Constructs a database of target-specific reference info for each calints file in the input directory. @@ -115,8 +115,7 @@ def build_refdb(idir,odir='.',uncal=False,overwrite=False): idir (path): Path to pre-processed (stage 2) JWST images to be added to the database odir (path, optional): Location to save the database. Defaults to '.'. - uncal (bool, optional): Toggle using uncal files as inputs instead of - calints. Defaults to None. + suffix (str, optional): Input filename suffix, e.g. 'uncal' or 'calints'. Defaults to 'calints'. overwrite (bool, optional): If true, overwrite the existing caldb. Returns: @@ -147,9 +146,9 @@ def build_refdb(idir,odir='.',uncal=False,overwrite=False): else: raise Exception(f'This operation is trying to overwrite {outpath}.\nIf this is what you want, set overwrite=True.') - # Read in uncal files + # Read input files print('Reading input files...') - suffix = 'uncal' if uncal else 'calints' + suffix = suffix.strip('_') fpaths = sorted(glob.glob(os.path.join(idir,f"*_{suffix}.fits"))) # Start a dataframe with the header info we want from each file @@ -175,6 +174,7 @@ def build_refdb(idir,odir='.',uncal=False,overwrite=False): 'FILTER', 'PUPIL', ] + for fpath in fpaths: row = [] hdr = fits.getheader(fpath) @@ -313,7 +313,10 @@ def build_refdb(idir,odir='.',uncal=False,overwrite=False): return df_out -def get_sciref_files(sci_target, refdb, idir=None, spt_choice=None, filters=None, exclude_disks=False): +def get_sciref_files(sci_target, refdb, idir=None, + spt_choice=None, + filters=None, + exclude_disks=False): """Construct a list of science files and reference files to input to a PSF subtraction routine. Args: From 1bdbc90fdf49963466f605927f0d8e36fe8dd7c6 Mon Sep 17 00:00:00 2001 From: Ell Bogat Date: Mon, 25 Nov 2024 09:07:09 -0500 Subject: [PATCH 03/11] set default refdb column values to 'unknown' if manual flagging is needed --- spaceKLIP/psflib.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/spaceKLIP/psflib.py b/spaceKLIP/psflib.py index ea4643fb..61ca09d9 100644 --- a/spaceKLIP/psflib.py +++ b/spaceKLIP/psflib.py @@ -131,9 +131,7 @@ def build_refdb(idir,odir='.',suffix='calints',overwrite=False): # - no calints files in input directory # - header kw missing # - duplicate science target with different program names - # - calints vs uncal toggle - # - (warning if no calints present but uncals are - # 'did you mean to set uncal = True?') + # - logic for if 'HAS_DISK','HAS_CANDS' have a mix of 'unknown' and bool values # Check that you won't accidentally overwrite an existing csv. outpath = os.path.join(odir,'ref_lib.csv') @@ -150,6 +148,8 @@ def build_refdb(idir,odir='.',suffix='calints',overwrite=False): print('Reading input files...') suffix = suffix.strip('_') fpaths = sorted(glob.glob(os.path.join(idir,f"*_{suffix}.fits"))) + if len(fpaths) == 0: + raise Exception(f'No "{suffix}" files found in input directory {idir} .') # Start a dataframe with the header info we want from each file csv_list = [] @@ -258,7 +258,7 @@ def build_refdb(idir,odir='.',suffix='calints',overwrite=False): # Fill in values missing from SIMBAD with MOCA - df_unique['COMMENTS'] = '' + df_unique['COMMENTS'] = 'unknown' # Sort all the dfs by index so they match up df_unique.sort_index(inplace=True) @@ -291,12 +291,13 @@ def build_refdb(idir,odir='.',suffix='calints',overwrite=False): df_unique = update_db_sptypes(df_unique) # Add empty columns - empty_cols = [ + manual_cols = [ 'FLAGS', 'HAS_DISK', 'HAS_CANDS'] - for col in empty_cols: - df_unique[col] = '' + + for col in manual_cols: + df_unique[col] = 'unknown' # Apply dataframe of unique targets to the original file list df.set_index('TARGNAME',inplace=True) @@ -312,7 +313,6 @@ def build_refdb(idir,odir='.',suffix='calints',overwrite=False): return df_out - def get_sciref_files(sci_target, refdb, idir=None, spt_choice=None, filters=None, From a5bf15c03d761104955b84b48a14dc652a0bc9a6 Mon Sep 17 00:00:00 2001 From: Ell Bogat Date: Tue, 26 Nov 2024 09:04:30 -0500 Subject: [PATCH 04/11] add ability to download ref_db files from MAST --- spaceKLIP/psflib.py | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/spaceKLIP/psflib.py b/spaceKLIP/psflib.py index 61ca09d9..0bdfc785 100644 --- a/spaceKLIP/psflib.py +++ b/spaceKLIP/psflib.py @@ -19,6 +19,7 @@ import mocapy import pandas as pd from astropy.io import fits +from spaceKLIP import mast from astroquery.simbad import Simbad import numpy as np @@ -157,6 +158,7 @@ def build_refdb(idir,odir='.',suffix='calints',overwrite=False): 'TARGPROP', 'TARGNAME', # Save 2MASS ID also 'FILENAME', + 'OBS_ID', 'DATE-OBS', 'TIME-OBS', 'DURATION', # Total exposure time @@ -313,6 +315,7 @@ def build_refdb(idir,odir='.',suffix='calints',overwrite=False): return df_out + def get_sciref_files(sci_target, refdb, idir=None, spt_choice=None, filters=None, @@ -417,8 +420,16 @@ def get_sciref_files(sci_target, refdb, idir=None, return [sci_fpaths, ref_fpaths] -# # TESTING -# idir = 'DATA/NANREPLACED_v0' -# ref_db = build_refdb(idir,overwrite=True) -# print('Done Done.') \ No newline at end of file +def download_mast(ref_db,token=None, + overwrite=False,exists_ok=True, + progress=False, verbose=False, + base_dir=os.path.join('DATA','MAST_DOWNLOAD')): + + for fname in list(ref_db.FILENAME): + mast.get_mast_filename(fname, + outputdir=base_dir, + overwrite=overwrite, exists_ok=exists_ok, + progress=progress, verbose=verbose, + mast_api_token=token) + From 02a5896f3cb6fe04bec3dfaecccdcb76e67a4b82 Mon Sep 17 00:00:00 2001 From: Ell Bogat Date: Tue, 26 Nov 2024 09:15:39 -0500 Subject: [PATCH 05/11] add mocapy to requirements --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index 5e82e331..1dc5a506 100644 --- a/requirements.txt +++ b/requirements.txt @@ -12,4 +12,4 @@ webbpsf_ext>=1.2.0 pyklip>=2.7.1 ipywidgets>=8.1.5 lmfit>1.2.2 - +mocapy @ git+https://github.com/jgagneastro/mocapy.git From 7fd38d58d4f51aee123987163aa32f6860968709 Mon Sep 17 00:00:00 2001 From: Ell Bogat Date: Wed, 4 Dec 2024 14:58:48 -0500 Subject: [PATCH 06/11] add tqdm import --- spaceKLIP/mast.py | 1 + 1 file changed, 1 insertion(+) diff --git a/spaceKLIP/mast.py b/spaceKLIP/mast.py index 57e3e3c6..d35bef84 100644 --- a/spaceKLIP/mast.py +++ b/spaceKLIP/mast.py @@ -9,6 +9,7 @@ import astropy, astropy.table from astroquery.mast import Mast import requests +import tqdm import logging log = logging.getLogger(__name__) From a0896655157ce21d56d42fdf96fef8f94037245c Mon Sep 17 00:00:00 2001 From: Ell Bogat Date: Wed, 4 Dec 2024 14:59:18 -0500 Subject: [PATCH 07/11] add mast download for specific file suffixes --- spaceKLIP/psflib.py | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/spaceKLIP/psflib.py b/spaceKLIP/psflib.py index 0bdfc785..57a76e06 100644 --- a/spaceKLIP/psflib.py +++ b/spaceKLIP/psflib.py @@ -424,9 +424,24 @@ def get_sciref_files(sci_target, refdb, idir=None, def download_mast(ref_db,token=None, overwrite=False,exists_ok=True, progress=False, verbose=False, + suffix=None, # e.g. 'calints' base_dir=os.path.join('DATA','MAST_DOWNLOAD')): - for fname in list(ref_db.FILENAME): + fnames = list(ref_db.FILENAME) + + # Update file suffix if provided + if not suffix == None: + new_suffix = suffix.strip('_') + + for ff,fname in enumerate(fnames): + fname_split = fname.split('_') + new_fname = '_'.join(fname_split[:-1]) + f'_{new_suffix}.fits' + + fnames[ff] = new_fname + + # Download each file + for fname in fnames: + mast.get_mast_filename(fname, outputdir=base_dir, overwrite=overwrite, exists_ok=exists_ok, From 34a6e71b37873eee749ecb8de42ff0cde77ba58f Mon Sep 17 00:00:00 2001 From: Ell Bogat Date: Wed, 4 Dec 2024 15:11:40 -0500 Subject: [PATCH 08/11] convert print statements to log statements --- spaceKLIP/psflib.py | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/spaceKLIP/psflib.py b/spaceKLIP/psflib.py index 57a76e06..13e47500 100644 --- a/spaceKLIP/psflib.py +++ b/spaceKLIP/psflib.py @@ -24,6 +24,10 @@ from astroquery.simbad import Simbad import numpy as np +import logging +log = logging.getLogger(__name__) +log.setLevel(logging.INFO) + sp_class_letters = ['O','B','A','F','G','K','M','T','Y'] # Helper functions for logic with database series @@ -146,7 +150,7 @@ def build_refdb(idir,odir='.',suffix='calints',overwrite=False): raise Exception(f'This operation is trying to overwrite {outpath}.\nIf this is what you want, set overwrite=True.') # Read input files - print('Reading input files...') + log.info('Reading input files...') suffix = suffix.strip('_') fpaths = sorted(glob.glob(os.path.join(idir,f"*_{suffix}.fits"))) if len(fpaths) == 0: @@ -191,7 +195,7 @@ def build_refdb(idir,odir='.',suffix='calints',overwrite=False): df_unique = pd.DataFrame(np.transpose([targnames]),columns=['TARGNAME']) # Get 2MASS IDs - print('Collecting 2MASS IDs...') + log.info('Collecting 2MASS IDs...') twomass_ids = [] for targname in targnames: result_table = Simbad.query_objectids(targname) @@ -210,7 +214,7 @@ def build_refdb(idir,odir='.',suffix='calints',overwrite=False): df_unique.set_index('2MASS_ID',inplace=True) # Query SIMBAD - print('Querying SIMBAD...') + log.info('Querying SIMBAD...') customSimbad = Simbad() customSimbad.add_votable_fields('sptype', @@ -239,7 +243,7 @@ def build_refdb(idir,odir='.',suffix='calints',overwrite=False): df_unique = pd.concat([df_unique,df_simbad.loc[:,simbad_cols.keys()]],axis=1) # Query mocadb.ca for extra info - print('Querying MOCADB (this may take a minute)...') + log.info('Querying MOCADB (this may take a minute)...') names_df = pd.DataFrame(list(df_unique.index),columns=['designation']) moca = mocapy.MocaEngine() mdf = moca.query("SELECT tt.designation AS input_designation, sam.* FROM tmp_table AS tt LEFT JOIN mechanics_all_designations AS mad ON(mad.designation LIKE tt.designation) LEFT JOIN summary_all_objects AS sam ON(sam.moca_oid=mad.moca_oid)", tmp_table=names_df) @@ -255,7 +259,6 @@ def build_refdb(idir,odir='.',suffix='calints',overwrite=False): # Update the column names for consistency for col,moca_col in moca_cols.items(): - # print(col, list(mdf[moca_col])) mdf[col] = list(mdf[moca_col]) # Fill in values missing from SIMBAD with MOCA @@ -310,9 +313,8 @@ def build_refdb(idir,odir='.',suffix='calints',overwrite=False): # Save dataframe df_out.to_csv(outpath) - print(f'Database saved to {outpath}') - print(f'Done.') - + log.info(f'Database saved to {outpath}') + return df_out From 6cbd7ad93127709d5d32e4caf97d3a4a5983f525 Mon Sep 17 00:00:00 2001 From: Ell Bogat Date: Wed, 4 Dec 2024 15:35:00 -0500 Subject: [PATCH 09/11] add tqdm to spaceKLIP requirements --- requirements.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/requirements.txt b/requirements.txt index 1dc5a506..f20f0a1e 100644 --- a/requirements.txt +++ b/requirements.txt @@ -12,4 +12,5 @@ webbpsf_ext>=1.2.0 pyklip>=2.7.1 ipywidgets>=8.1.5 lmfit>1.2.2 +tqdm mocapy @ git+https://github.com/jgagneastro/mocapy.git From 78146fc7dd0b1ac21f1ea8d808fe55f854cc36ea Mon Sep 17 00:00:00 2001 From: Ell Bogat Date: Thu, 5 Dec 2024 10:39:55 -0500 Subject: [PATCH 10/11] fix syntax warning --- spaceKLIP/mast.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/spaceKLIP/mast.py b/spaceKLIP/mast.py index d35bef84..222fc472 100644 --- a/spaceKLIP/mast.py +++ b/spaceKLIP/mast.py @@ -193,7 +193,7 @@ def query_coron_datasets(inst, if return_filenames: - if level is not None and level.lower() is not 'cal' and level.lower() is not '2b': + if level is not None and level.lower() != 'cal' and level.lower() != '2b': # Transform filenames to either rate or uncal files # This may not be robust to all possible scenarios yet... if level.lower()=='rate' or level.lower()=='2a': From fc67e23ba2d9a9c6db98e300a755dda56e7cc7d1 Mon Sep 17 00:00:00 2001 From: Ell Bogat Date: Thu, 5 Dec 2024 10:40:24 -0500 Subject: [PATCH 11/11] add spectral type filters --- spaceKLIP/psflib.py | 150 +++++++++++++++++++++++++++++++++++++++----- 1 file changed, 134 insertions(+), 16 deletions(-) diff --git a/spaceKLIP/psflib.py b/spaceKLIP/psflib.py index 13e47500..81f0b976 100644 --- a/spaceKLIP/psflib.py +++ b/spaceKLIP/psflib.py @@ -32,6 +32,16 @@ # Helper functions for logic with database series def isnone(series): + """Helper function to determine which elements of a series + are NaN, None, or empty strings. + + Args: + series (list-like): input data to check + + Returns: + boolean array: True values where input array is NaN/None/'' + """ + try: return np.isnan(series) except: @@ -64,6 +74,19 @@ def load_refdb(fpath): def decode_simbad_sptype(input_sptypes): + """Decodes the complicated SIMBAD spectral type string into simplified + spectral type components. + + Args: + input_sptypes (str or list): SIMBAD spectral type string (or list of strings) + + Returns: + tuple: list of spectral class letters, list of subclass numbers, + and list of luminosity class numerals for each input string. + """ + + # TODO: + # - test this! if isinstance(input_sptypes,str): input_sptypes = [input_sptypes] @@ -98,7 +121,56 @@ def decode_simbad_sptype(input_sptypes): return (sp_classes,sp_subclasses,sp_lclasses) +def adjust_spttype(spt_tup): + # TODO: + # - Tests: + # - input hotter than O0 + # - input colder than Y9 + # - subclass not int + # - known transitions: A10 -> F0, M-3 -> F7 + + spt_class, spt_subclass = spt_tup + + spt_subclass = int(spt_subclass) + + if not spt_class in sp_class_letters: + raise Exception(f'Invalid spectral class letter: {spt_class}') + + i_class = sp_class_letters.index(spt_class) + + while spt_subclass > 9: + + if spt_class == sp_class_letters[-1]: + return (spt_class,9) + + i_class += 1 + spt_class = sp_class_letters[i_class] + spt_subclass -= 10 + + while spt_subclass < 0: + + if spt_class == sp_class_letters[0]: + return (spt_class,0) + + i_class -= 1 + spt_class = sp_class_letters[i_class] + spt_subclass += 10 + + return (spt_class,spt_subclass) + + def update_db_sptypes(refdb): + """ + Separate the spectral type column into a column each for the + spectral class letter, subclass number, and luminosity class numeral. + + Args: + refdb (pandas.DataFrame): PSF reference dataframe + + Returns: + pandas.DataFrame: updated PSF reference dataframe + """ + simbad_sptypes = refdb.SPTYPE.values sp_classes, sp_subclasses, sp_lclasses = decode_simbad_sptype(simbad_sptypes) @@ -319,7 +391,7 @@ def build_refdb(idir,odir='.',suffix='calints',overwrite=False): def get_sciref_files(sci_target, refdb, idir=None, - spt_choice=None, + spt_tolerance=None, filters=None, exclude_disks=False): """Construct a list of science files and reference files to input to a PSF subtraction routine. @@ -333,11 +405,12 @@ def get_sciref_files(sci_target, refdb, idir=None, the build_refdb() function. idir (str): path to directory of input data, to be appended to file names. - spt_choice (str, optional): + spt_tolerance (str or int, optional): None (default): use all spectral types. - 'near' : use only references with the same spectral class letter. - 'nearer' : use only refs within +- 1 spectral type. ie. M3-5 for an M4 science target. - 'nearest' : use only refs with the exact same spectral type. + 'exact' : use only refs with the exact same spectral type. + 'class' : use only references with the same spectral class letter. + 'subclass' : use only references with the same spectral class letter and subclass number. (ignores luminosity class) + int : use only refs within +- N spectral subclasses, e.g. M3-5 for an M4 science target if spt_tolerance = 1. filters (str or list, optional): None (default) : include all filters. 'F444W' or other filter name: include only that filter. @@ -350,15 +423,7 @@ def get_sciref_files(sci_target, refdb, idir=None, """ # TODO: - # - filter out flags? - # - sptype filters - # - tests: - # - sci_target in index - # - sci_target in 2MASS_ID - # - sci_target in TARGPROP - # - sci_target not in refdb - # - exceptions if 0 science fnames are returned - # - warnings if 0 reference observations are returned + # - filter out manual flags if isinstance(refdb,str): refdb = load_refdb(refdb) @@ -377,6 +442,7 @@ def get_sciref_files(sci_target, refdb, idir=None, targ_2mass = refdb_temp.loc[sci_target,'2MASS_ID'].to_list()[0] else: + log.error(f'Science target {sci_target} not found in reference database.') raise Exception(f'Science target {sci_target} not found in reference database.') refdb_temp = refdb.reset_index() @@ -384,13 +450,65 @@ def get_sciref_files(sci_target, refdb, idir=None, # Collect all the science files sci_fnames = refdb_temp.index[refdb_temp['2MASS_ID'] == targ_2mass].to_list() + first_scifile = sci_fnames[0] # Start list of reference files ref_fnames = refdb_temp.index[refdb_temp['2MASS_ID'] != targ_2mass].to_list() # Collect the reference files - if spt_choice != None: - raise Exception('Only spt_choice=None is currently supported.') + if spt_tolerance != None: + + # Consider handling float subclasses (e.g. M4.5) better. Take floor for now. + refdb_temp['SP_SUBCLASS'] = np.floor(refdb_temp['SP_SUBCLASS']) + + targ_sp_class = refdb_temp.loc[first_scifile,'SP_CLASS'] + targ_sp_subclass = refdb_temp.loc[first_scifile,'SP_SUBCLASS'] + targ_sp_lclass = refdb_temp.loc[first_scifile,'SP_LCLASS'] + + if isinstance(spt_tolerance,str): + if spt_tolerance.lower() == 'exact': + + spt_fnames = refdb_temp.index[(refdb_temp['SP_CLASS'] == targ_sp_class) & + (refdb_temp['SP_SUBCLASS'] == targ_sp_subclass) & + (refdb_temp['SP_LCLASS'] == targ_sp_lclass) + ].to_list() + + elif spt_tolerance.lower() == 'class': + + spt_fnames = refdb_temp.index[(refdb_temp['SP_CLASS'] == targ_sp_class) + ].to_list() + + elif spt_tolerance.lower() == 'subclass': + + spt_fnames = refdb_temp.index[(refdb_temp['SP_CLASS'] == targ_sp_class) & + (refdb_temp['SP_SUBCLASS'] == targ_sp_subclass) + ].to_list() + + else: + raise Exception(f'spt_tolerance {spt_tolerance} not configured.') + + else: + + assert isinstance(spt_tolerance,int) + + spt_fnames = [] + + for i in range(-spt_tolerance,spt_tolerance+1): + + spt_tup = (targ_sp_class,targ_sp_subclass+i) + + # Carry over spectral classes and subclasses correctly + spt_tup = adjust_spttype(spt_tup) + + spt_fnames.extend(refdb_temp.index[(refdb_temp['SP_CLASS'] == spt_tup[0]) & + (refdb_temp['SP_SUBCLASS'] == spt_tup[1]) + ].to_list()) + + if len(spt_fnames) == 0: + raise Warning(f'No observations found with specified spectral type filter.') + + sci_fnames = list(set(sci_fnames).intersection(spt_fnames)) + ref_fnames = list(set(ref_fnames).intersection(spt_fnames)) # Remove observations with disks flagged if exclude_disks: