diff --git a/scripts/add_fiters.py b/scripts/add_fiters.py deleted file mode 100644 index e3bd537..0000000 --- a/scripts/add_fiters.py +++ /dev/null @@ -1,325 +0,0 @@ -import requests -from io import BytesIO -import logging -import sqlalchemy.exc -import astropy.units as u -from astropy.io.votable import parse -from astrodb_utils import load_astrodb, ingest_instrument, ingest_publication -from astrodb_utils import check_internet_connection as internet_connection -from schema.schema_template import * - -from astrodb_utils import AstroDBError - -logger = logging.getLogger("AstroDB") -logger.setLevel(logging.DEBUG) - -SAVE_DB = True -RECREATE_DB = True - -db = load_astrodb("astrodb-template.sqlite", recreatedb=RECREATE_DB) - -ingest_publication(db, bibcode="2003AJ....126.1090C") -ingest_publication(db, bibcode="2003tmc..book.....C") -ingest_publication(db, bibcode="2010AJ....140.1868W") -ingest_publication(db, bibcode="2016A&A...595A...1G", publication="Gaia") -ingest_publication(db, bibcode="2023A&A...674A...1G") - -ingest_instrument(db, telescope="2MASS", instrument="2MASS", mode="Imaging") -with db.engine.connect() as conn: - conn.execute( - db.Telescopes.update() - .where(db.Telescopes.c.telescope == "2MASS") - .values(reference="Cutr03", description="The Two Micron All Sky Survey") - ) - - conn.execute( - db.Instruments.update() - .where(db.Instruments.c.instrument == "2MASS") - .values(reference="Cohe03") - ) - conn.commit() - -ingest_instrument(db, telescope="Gaia", instrument="Gaia3", mode="Imaging") -with db.engine.connect() as conn: - conn.execute( - db.Telescopes.update() - .where(db.Telescopes.c.telescope == "Gaia") - .values(description="Gaia") - ) - conn.execute( - db.Instruments.update() - .where(db.Instruments.c.instrument == "Gaia3") - .values(reference="Gaia23") - ) - conn.commit() -ingest_instrument(db, telescope="SLOAN", instrument="SDSS", mode="Imaging") -with db.engine.connect() as conn: - conn.execute( - db.Telescopes.update() - .where(db.Telescopes.c.telescope == "SLOAN") - .values(description="The Sloan Digital Sky Survey") - ) - conn.commit() - -ingest_instrument(db, telescope="WISE", instrument="WISE", mode="Imaging") -with db.engine.connect() as conn: - conn.execute( - db.Telescopes.update() - .where(db.Telescopes.c.telescope == "WISE") - .values(reference="Wrig10", description="Wide-field Infrared Survey Explorer") - ) - conn.commit() - -ingest_instrument(db, telescope="Generic", instrument="Johnson", mode="Imaging") -with db.engine.connect() as conn: - conn.execute( - db.Telescopes.update() - .where(db.Telescopes.c.telescope == "Generic") - .values(description="Generic telescope") - ) - conn.commit() -ingest_instrument(db, telescope="Generic", instrument="Cousins", mode="Imaging") - -ingest_photometry_filter(db, filter_name="J", telescope="2MASS", instrument="2MASS") -ingest_photometry_filter(db, filter_name="H", telescope="2MASS", instrument="2MASS") -ingest_photometry_filter(db, filter_name="Ks", telescope="2MASS", instrument="2MASS") - -ingest_photometry_filter(db, filter_name="G", telescope="Gaia", instrument="Gaia3") -ingest_photometry_filter(db, filter_name="Gbp", telescope="Gaia", instrument="Gaia3") -ingest_photometry_filter(db, filter_name="Grp", telescope="Gaia", instrument="Gaia3") - -ingest_photometry_filter(db, filter_name="u", telescope="SLOAN", instrument="SDSS") -ingest_photometry_filter(db, filter_name="g", telescope="SLOAN", instrument="SDSS") -ingest_photometry_filter(db, filter_name="r", telescope="SLOAN", instrument="SDSS") -ingest_photometry_filter(db, filter_name="i", telescope="SLOAN", instrument="SDSS") -ingest_photometry_filter(db, filter_name="z", telescope="SLOAN", instrument="SDSS") - -ingest_photometry_filter(db, filter_name="W1", telescope="WISE", instrument="WISE") -ingest_photometry_filter(db, filter_name="W2", telescope="WISE", instrument="WISE") -ingest_photometry_filter(db, filter_name="W3", telescope="WISE", instrument="WISE") -ingest_photometry_filter(db, filter_name="W4", telescope="WISE", instrument="WISE") - -ingest_photometry_filter(db, filter_name="U", telescope="Generic", instrument="Johnson") -ingest_photometry_filter(db, filter_name="B", telescope="Generic", instrument="Johnson") -ingest_photometry_filter(db, filter_name="V", telescope="Generic", instrument="Johnson") -ingest_photometry_filter(db, filter_name="R", telescope="Generic", instrument="Johnson") -ingest_photometry_filter(db, filter_name="I", telescope="Generic", instrument="Johnson") -ingest_photometry_filter(db, filter_name="J", telescope="Generic", instrument="Johnson") -ingest_photometry_filter(db, filter_name="M", telescope="Generic", instrument="Johnson") - -ingest_photometry_filter(db, filter_name="R", telescope="Generic", instrument="Cousins") -ingest_photometry_filter(db, filter_name="I", telescope="Generic", instrument="Cousins") - -if SAVE_DB: - db.save_database("data/") - -## These scripts will eventually live in astrodb_utils package - - -def ingest_photometry_filter( - db, *, telescope=None, instrument=None, filter_name=None, ucd=None -): - """ - Add a new photometry filter to the database - """ - # Fetch existing telescopes, add if missing - existing = ( - db.query(db.Telescopes).filter(db.Telescopes.c.telescope == telescope).table() - ) - if len(existing) == 0: - with db.engine.connect() as conn: - conn.execute(db.Telescopes.insert().values({"telescope": telescope})) - conn.commit() - logger.debug(f"Added telescope {telescope}.") - else: - logger.debug(f"Telescope {telescope} already exists.") - - # Fetch existing instruments, add if missing - existing = ( - db.query(db.Instruments) - .filter(db.Instruments.c.instrument == instrument) - .table() - ) - if len(existing) == 0: - with db.engine.connect() as conn: - conn.execute(db.Instruments.insert().values({"instrument": instrument})) - conn.commit() - logger.debug(f"Added instrument {instrument}.") - else: - logger.debug(f"Instrument {instrument} already exists.") - - # Get data from SVO - filter_id, eff_wave, fwhm, width_effective = fetch_svo( - telescope, instrument, filter_name - ) - logger.debug( - f"Filter {filter_id} has effective wavelength {eff_wave} " - "and FWHM {fwhm} and width_effective {width_effective}." - ) - - if ucd is None: - ucd = assign_ucd(eff_wave) - logger.debug(f"UCD for filter {filter_id} is {ucd}") - - # Add the filter - try: - with db.engine.connect() as conn: - conn.execute( - db.PhotometryFilters.insert().values( - { - "band": filter_id, - "ucd": ucd, - "effective_wavelength_angstroms": eff_wave.to(u.Angstrom).value, - "width_angstroms": width_effective.to(u.Angstrom).value, - } - ) - ) - conn.commit() - logger.info( - f"Added filter {filter_id} with effective wavelength {eff_wave}, " - f"FWHM {fwhm}, and UCD {ucd}." - ) - except sqlalchemy.exc.IntegrityError as e: - if "UNIQUE constraint failed:" in str(e): - msg = str(e) + f"Filter {filter_id} already exists in the database." - raise AstroDBError(msg) - else: - msg = str(e) + f"Error adding filter {filter_id}." - raise AstroDBError(msg) - except Exception as e: - msg = str(e) - raise AstroDBError(msg) - - -def fetch_svo(telescope: str = None, instrument: str = None, filter_name: str = None): - """ - Fetch photometry filter information from the SVO Filter Profile Service - http://svo2.cab.inta-csic.es/theory/fps/ - - Could use better error handling when instrument name or filter name is not found - - Parameters - ---------- - telescope: str - Telescope name - instrument: str - Instrument name - filter_name: str - Filter name - - Returns - ------- - filter_id: str - Filter ID - eff_wave: Quantity - Effective wavelength - fwhm: Quantity - Full width at half maximum (FWHM) - - - Raises - ------ - AstroDBError - If the SVO URL is not reachable or the filter information is not found - KeyError - If the filter information is not found in the VOTable - """ - - if internet_connection() == False: - msg = "No internet connection. Cannot fetch photometry filter information from the SVO website." - logger.error(msg) - raise AstroDBError(msg) - - url = ( - f"http://svo2.cab.inta-csic.es/svo/theory/fps3/fps.php?ID=" - f"{telescope}/{instrument}.{filter_name}" - ) - r = requests.get(url) - - if r.status_code != 200: - msg = f"Error retrieving {url}. Status code: {r.status_code}" - logger.error(msg) - raise AstroDBError(msg) - - # Parse VOTable contents - content = BytesIO(r.content) - votable = parse(content) - - # Get Filter ID - try: - filter_id = votable.get_field_by_id("filterID").value - except KeyError: - msg = f"Filter {telescope}, {instrument}, {filter_name} not found in SVO." - raise AstroDBError(msg) - - # Get effective wavelength and FWHM - eff_wave = votable.get_field_by_id("WavelengthEff") - fwhm = votable.get_field_by_id("FWHM") - width_effective = votable.get_field_by_id("WidthEff") - - if eff_wave.unit == "AA" and fwhm.unit == "AA" and width_effective.unit == "AA": - eff_wave = eff_wave.value * u.Angstrom - fwhm = fwhm.value * u.Angstrom - width_effective = width_effective.value * u.Angstrom - else: - msg = f"Wavelengths from SVO may not be Angstroms as expected: {eff_wave.unit}, {fwhm.unit}, {width_effective.unit}." - raise AstroDBError(msg) - - logger.debug( - f"Found in SVO: " - f"Filter {filter_id} has effective wavelength {eff_wave} and " - f"FWHM {fwhm} and width_effective {width_effective}." - ) - - return filter_id, eff_wave, fwhm, width_effective - - -def assign_ucd(eff_wave_quantity: u.Quantity): - """ - Assign a Unified Content Descriptors (UCD) to a photometry filter - based on its effective wavelength - UCDs are from the UCD1+ controlled vocabulary - https://www.ivoa.net/documents/UCD1+/20200212/PEN-UCDlist-1.4-20200212.html#tth_sEcB - - Parameters - ---------- - eff_wave: Quantity - Effective wavelength in Angstroms - - Returns - ------- - ucd: str - UCD string - - """ - eff_wave_quantity.to(u.Angstrom) - eff_wave = eff_wave_quantity.value - - if 3000 < eff_wave <= 4000: - ucd = "em.opt.U" - elif 4000 < eff_wave <= 5000: - ucd = "em.opt.B" - elif 5000 < eff_wave <= 6000: - ucd = "em.opt.V" - elif 6000 < eff_wave <= 7500: - ucd = "em.opt.R" - elif 7500 < eff_wave <= 10000: - ucd = "em.opt.I" - elif 10000 < eff_wave <= 15000: - ucd = "em.IR.J" - elif 15000 < eff_wave <= 20000: - ucd = "em.IR.H" - elif 20000 < eff_wave <= 30000: - ucd = "em.IR.K" - elif 30000 < eff_wave <= 40000: - ucd = "em.IR.3-4um" - elif 40000 < eff_wave <= 80000: - ucd = "em.IR.4-8um" - elif 80000 < eff_wave <= 150000: - ucd = "em.IR.8-15um" - elif 150000 < eff_wave <= 300000: - ucd = "em.IR.15-30um" - else: - ucd = None - - return ucd diff --git a/scripts/ingest_spectra.py b/scripts/ingest_spectra.py deleted file mode 100644 index 9bc747d..0000000 --- a/scripts/ingest_spectra.py +++ /dev/null @@ -1,360 +0,0 @@ -from astrodb_utils import ( - AstroDBError, - find_source_in_db, - check_internet_connection, - check_url_valid, -) -import logging -import pandas as pd -import numpy.ma as ma -import sqlalchemy - -logger = logging.getLogger("AstroDB") - - -def ingest_spectra( - db, - sources, - spectra, - regimes, - telescopes, - instruments, - modes, - obs_dates, - references, - original_spectra=None, - wavelength_units=None, - flux_units=None, - wavelength_order=None, - comments=None, - other_references=None, - raise_error=True, -): - """ - - Parameters - ---------- - db: astrodbkit.astrodb.Database - sources: list[str] - List of source names - spectra: list[str] - List of filenames corresponding to spectra files - regimes: str or list[str] - List or string - telescopes: str or list[str] - List or string - instruments: str or list[str] - List or string - modes: str or list[str] - List or string - obs_dates: str or datetime - List of strings or datetime objects - references: list[str] - List or string - original_spectra: list[str] - List of filenames corresponding to original spectra files - wavelength_units: str or list[str] or Quantity, optional - List or string - flux_units: str or list[str] or Quantity, optional - List or string - wavelength_order: list[int], optional - comments: list[str], optional - List of strings - other_references: list[str], optional - List of strings - raise_error: bool - - """ - - # Convert single value input values to lists - if isinstance(sources, str): - sources = [sources] - - if isinstance(spectra, str): - spectra = [spectra] - - input_values = [ - regimes, - telescopes, - instruments, - modes, - obs_dates, - wavelength_order, - wavelength_units, - flux_units, - references, - comments, - other_references, - ] - for i, input_value in enumerate(input_values): - if isinstance(input_value, str): - input_values[i] = [input_value] * len(sources) - elif isinstance(input_value, type(None)): - input_values[i] = [None] * len(sources) - ( - regimes, - telescopes, - instruments, - modes, - obs_dates, - wavelength_order, - wavelength_units, - flux_units, - references, - comments, - other_references, - ) = input_values - - n_spectra = len(spectra) - n_skipped = 0 - n_dupes = 0 - n_missing_instrument = 0 - n_added = 0 - n_blank = 0 - - msg = f"Trying to add {n_spectra} spectra" - logger.info(msg) - - for i, source in enumerate(sources): - # TODO: check that spectrum can be read by astrodbkit - - # Get source name as it appears in the database - db_name = find_source_in_db(db, source) - - if len(db_name) != 1: - msg = f"No unique source match for {source} in the database" - raise AstroDBError(msg) - else: - db_name = db_name[0] - - # Check if spectrum file is accessible - # First check for internet - internet = check_internet_connection() - if internet: - status = check_url_valid(spectra[i]) - if status == "skipped": - n_skipped += 1 - if raise_error: - raise AstroDBError(msg) - else: - continue - if original_spectra: - status = check_url_valid(original_spectra[i]) - if status == "skipped": - n_skipped += 1 - if raise_error: - raise AstroDBError(msg) - else: - continue - else: - msg = "No internet connection. Internet is needed to check spectrum files." - raise AstroDBError(msg) - - # Find what spectra already exists in database for this source - source_spec_data = ( - db.query(db.Spectra).filter(db.Spectra.c.source == db_name).table() - ) - - # SKIP if observation date is blank - # TODO: try to populate obs date from meta data in spectrum file - if ma.is_masked(obs_dates[i]) or obs_dates[i] == "": - obs_date = None - missing_obs_msg = ( - f"Skipping spectrum with missing observation date: {source} \n" - ) - missing_row_spe = f"{source, obs_dates[i], references[i]} \n" - logger.info(missing_obs_msg) - logger.debug(missing_row_spe) - n_blank += 1 - continue - else: - try: - obs_date = pd.to_datetime( - obs_dates[i] - ) # TODO: Another method that doesn't require pandas? - except ValueError: - n_skipped += 1 - if raise_error: - msg = f"{source}: Can't convert obs date to Date Time object: {obs_dates[i]}" - logger.error(msg) - raise AstroDBError - except dateutil.parser._parser.ParserError: - n_skipped += 1 - if raise_error: - msg = f"{source}: Can't convert obs date to Date Time object: {obs_dates[i]}" - logger.error(msg) - raise AstroDBError - else: - msg = f"Skipping {source} Can't convert obs date to Date Time object: {obs_dates[i]}" - logger.warning(msg) - continue - - # TODO: make it possible to ingest units and order - row_data = [ - { - "source": db_name, - "spectrum": spectra[i], - "original_spectrum": None, # if ma.is_masked(original_spectra[i]) or isinstance(original_spectra,None) - # else original_spectra[i], - "local_spectrum": None, # if ma.is_masked(local_spectra[i]) else local_spectra[i], - "regime": regimes[i], - "telescope": telescopes[i], - "instrument": None if ma.is_masked(instruments[i]) else instruments[i], - "mode": None if ma.is_masked(modes[i]) else modes[i], - "observation_date": obs_date, - "wavelength_units": None - if ma.is_masked(wavelength_units[i]) - else wavelength_units[i], - "flux_units": None if ma.is_masked(flux_units[i]) else flux_units[i], - "wavelength_order": None - if ma.is_masked(wavelength_order[i]) - else wavelength_order[i], - "comments": None if ma.is_masked(comments[i]) else comments[i], - "reference": references[i], - "other_references": None - if ma.is_masked(other_references[i]) - else other_references[i], - } - ] - logger.debug(row_data) - - try: - with db.engine.connect() as conn: - conn.execute(db.Spectra.insert().values(row_data)) - conn.commit() - n_added += 1 - except sqlalchemy.exc.IntegrityError as e: - if "CHECK constraint failed: regime" in str(e): - msg = f"Regime provided is not in schema: {regimes[i]}" - logger.error(msg) - if raise_error: - raise AstroDBError(msg) - else: - continue - if ( - db.query(db.Publications) - .filter(db.Publications.c.publication == references[i]) - .count() - == 0 - ): - msg = ( - f"Spectrum for {source} could not be added to the database because the reference {references[i]} is not in Publications table. \n" - f"(Add it with ingest_publication function.) \n " - ) - logger.warning(msg) - if raise_error: - raise AstroDBError(msg) - else: - continue - # check telescope, instrument, mode exists - telescope = ( - db.query(db.Telescopes) - .filter(db.Telescopes.c.name == row_data[0]["telescope"]) - .table() - ) - instrument = ( - db.query(db.Instruments) - .filter(db.Instruments.c.name == row_data[0]["instrument"]) - .table() - ) - mode = ( - db.query(db.Modes) - .filter(db.Modes.c.name == row_data[0]["mode"]) - .table() - ) - - if len(source_spec_data) > 0: # Spectra data already exists - # check for duplicate measurement - ref_dupe_ind = source_spec_data["reference"] == references[i] - date_dupe_ind = source_spec_data["observation_date"] == obs_date - instrument_dupe_ind = source_spec_data["instrument"] == instruments[i] - mode_dupe_ind = source_spec_data["mode"] == modes[i] - if ( - sum(ref_dupe_ind) - and sum(date_dupe_ind) - and sum(instrument_dupe_ind) - and sum(mode_dupe_ind) - ): - msg = f"Skipping suspected duplicate measurement\n{source}\n" - msg2 = f"{source_spec_data[ref_dupe_ind]['source', 'instrument', 'mode', 'observation_date', 'reference']}" - msg3 = f"{instruments[i], modes[i], obs_date, references[i], spectra[i]} \n" - logger.warning(msg) - logger.debug(msg2 + msg3 + str(e)) - n_dupes += 1 - if raise_error: - raise AstroDBError - else: - continue # Skip duplicate measurement - # else: - # msg = f'Spectrum could not be added to the database (other data exist): \n ' \ - # f"{source, instruments[i], modes[i], obs_date, references[i], spectra[i]} \n" - # msg2 = f"Existing Data: \n " - # # f"{source_spec_data[ref_dupe_ind]['source', 'instrument', 'mode', 'observation_date', 'reference', 'spectrum']}" - # msg3 = f"Data not able to add: \n {row_data} \n " - # logger.warning(msg + msg2) - # source_spec_data[ref_dupe_ind][ - # 'source', 'instrument', 'mode', 'observation_date', 'reference', 'spectrum'].pprint_all() - # logger.debug(msg3) - # n_skipped += 1 - # continue - if len(instrument) == 0 or len(mode) == 0 or len(telescope) == 0: - msg = ( - f"Spectrum for {source} could not be added to the database. \n" - f" Telescope, Instrument, and/or Mode need to be added to the appropriate table. \n" - f" Trying to find telescope: {row_data[0]['telescope']}, instrument: {row_data[0]['instrument']}, " - f" mode: {row_data[0]['mode']} \n" - f" Telescope: {telescope}, Instrument: {instrument}, Mode: {mode} \n" - ) - logger.error(msg) - n_missing_instrument += 1 - if raise_error: - raise AstroDBError - else: - continue - else: - msg = f"Spectrum for {source} could not be added to the database for unknown reason: \n {row_data} \n " - logger.error(msg) - raise AstroDBError(msg) - - msg = ( - f"SPECTRA ADDED: {n_added} \n" - f" Spectra with blank obs_date: {n_blank} \n" - f" Suspected duplicates skipped: {n_dupes}\n" - f" Missing Telescope/Instrument/Mode: {n_missing_instrument} \n" - f" Spectra skipped for unknown reason: {n_skipped} \n" - ) - if n_spectra == 1: - logger.info(f"Added {source} : \n" f"{row_data}") - else: - logger.info(msg) - - if n_added + n_dupes + n_blank + n_skipped + n_missing_instrument != n_spectra: - msg = "Numbers don't add up: " - logger.error(msg) - raise AstroDBError(msg) - - spec_count = ( - db.query(Spectra.regime, func.count(Spectra.regime)) - .group_by(Spectra.regime) - .all() - ) - - spec_ref_count = ( - db.query(Spectra.reference, func.count(Spectra.reference)) - .group_by(Spectra.reference) - .order_by(func.count(Spectra.reference).desc()) - .limit(20) - .all() - ) - - telescope_spec_count = ( - db.query(Spectra.telescope, func.count(Spectra.telescope)) - .group_by(Spectra.telescope) - .order_by(func.count(Spectra.telescope).desc()) - .limit(20) - .all() - ) - - # logger.info(f'Spectra in the database: \n {spec_count} \n {spec_ref_count} \n {telescope_spec_count}') - - return