From 56a897924a2c6d0a0eb9c27c1fe70bb0b50ee0b0 Mon Sep 17 00:00:00 2001 From: Ben Young Date: Mon, 2 Dec 2024 14:05:57 -0500 Subject: [PATCH 01/10] add back develop tags --- requirements.txt | 2 +- setup.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/requirements.txt b/requirements.txt index a576b047..113f137c 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,4 @@ -git+https://github.com/USEPA/esupy.git#egg=esupy +git+https://github.com/USEPA/esupy.git@develop#egg=esupy pandas>=1.3 # Powerful data structures for data analysis, time series, and statistics. numpy>=1.20.1 # NumPy is the fundamental package for array computing with Python requests>=2.20 # Python HTTP for Humans; used for webservice calls diff --git a/setup.py b/setup.py index afdb46a3..2c50924b 100644 --- a/setup.py +++ b/setup.py @@ -15,7 +15,7 @@ include_package_data=True, python_requires=">=3.8", install_requires=[ - 'esupy @ git+https://github.com/USEPA/esupy.git#egg=esupy', + 'esupy @ git+https://github.com/USEPA/esupy.git@develop#egg=esupy', 'numpy>=1.20.1', 'pandas>=1.3', 'requests>=2.20', From 0e3c422ca86c454f1cc86fb9a901e2adde2c3c73 Mon Sep 17 00:00:00 2001 From: Ben Young Date: Mon, 2 Dec 2024 14:11:46 -0500 Subject: [PATCH 02/10] drop testing of 3.8, add 3.12 and 3.13 --- .github/workflows/generate_all_inventories.yml | 2 +- .github/workflows/generate_combined_inventory.yml | 2 +- .github/workflows/generate_select_inventories.yml | 2 +- .github/workflows/python-package.yml | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/workflows/generate_all_inventories.yml b/.github/workflows/generate_all_inventories.yml index 2d301d9d..6e86ecc9 100644 --- a/.github/workflows/generate_all_inventories.yml +++ b/.github/workflows/generate_all_inventories.yml @@ -21,7 +21,7 @@ jobs: - name: Set up Python uses: actions/setup-python@v3 with: - python-version: "3.10" + python-version: "3.12" - name: Update pip & install testing pkgs run: | diff --git a/.github/workflows/generate_combined_inventory.yml b/.github/workflows/generate_combined_inventory.yml index 02f47bd2..a745ab21 100644 --- a/.github/workflows/generate_combined_inventory.yml +++ b/.github/workflows/generate_combined_inventory.yml @@ -19,7 +19,7 @@ jobs: - name: Set up Python uses: actions/setup-python@v3 with: - python-version: "3.10" + python-version: "3.12" - name: Update pip & install testing pkgs run: | diff --git a/.github/workflows/generate_select_inventories.yml b/.github/workflows/generate_select_inventories.yml index 533f179b..4326df9f 100644 --- a/.github/workflows/generate_select_inventories.yml +++ b/.github/workflows/generate_select_inventories.yml @@ -25,7 +25,7 @@ jobs: - name: Set up Python uses: actions/setup-python@v3 with: - python-version: "3.10" + python-version: "3.12" - name: Update pip & install testing pkgs run: | diff --git a/.github/workflows/python-package.yml b/.github/workflows/python-package.yml index 966d05cc..414b1b0e 100644 --- a/.github/workflows/python-package.yml +++ b/.github/workflows/python-package.yml @@ -31,7 +31,7 @@ jobs: fail-fast: false matrix: os: [ubuntu-latest, windows-latest, macos-latest] - py-version: ['3.8', '3.9', '3.10', '3.11'] + py-version: ['3.9', '3.10', '3.11', '3.12', '3.13'] steps: - uses: actions/checkout@v3 From 975abe375d4616d520d20f70df158e37a37c199a Mon Sep 17 00:00:00 2001 From: Ben Young Date: Mon, 2 Dec 2024 14:13:25 -0500 Subject: [PATCH 03/10] resolve future warnings, #162 #166 --- stewicombo/overlaphandler.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/stewicombo/overlaphandler.py b/stewicombo/overlaphandler.py index ef504a21..664e24af 100644 --- a/stewicombo/overlaphandler.py +++ b/stewicombo/overlaphandler.py @@ -40,7 +40,7 @@ def remove_flow_overlap(df, flow_cpst, flows_cntb, cmpt='air', SCC=False): df_cf = (df.query('SRS_ID in @flows_cntb and ' '_CompartmentPrimary == @cmpt') .groupby(cols_agg, as_index=False) - .agg({'FlowAmount': sum}) + .agg({'FlowAmount': 'sum'}) .assign(SRS_ID=flow_cpst) .rename(columns={'FlowAmount': 'ContributingAmount'})) # then remove contributing flow totals from composite flow @@ -119,8 +119,8 @@ def aggregate_and_remove_overlap(df): # functions by column for intra-inventory aggregation funcs_agg = { 'FacilityID': '_'.join, # or `set` or `'unique'` to get unique set of vals - 'FlowAmount': sum, - 'DataReliability': sum, # sums FlowAmount-weighted elements + 'FlowAmount': 'sum', + 'DataReliability': 'sum', # sums FlowAmount-weighted elements 'FlowName': 'first', # get the first element in .agg } # cols to define unique flows WITHIN inventories; using more grouping cols, @@ -147,7 +147,7 @@ def aggregate_and_remove_overlap(df): # then drop cross-inventory dups by keeping entries w/ min _SourcePref df_dup['_SourcePrefMin'] = (df_dup.groupby(cols_inter)['_SourcePref'] - .transform(min)) + .transform('min')) df_dup = df_dup.query('_SourcePref == _SourcePrefMin') log.debug('Reincorporating rows with NaN FRS_ID or SRS_ID') From a367860ab66064c0150006095bac0db54556a658 Mon Sep 17 00:00:00 2001 From: Ben Young Date: Tue, 3 Dec 2024 16:04:26 -0500 Subject: [PATCH 04/10] bump upload-artifact --- .github/workflows/conda-secondary_cntxt.yml | 2 +- .github/workflows/generate_all_inventories.yml | 2 +- .github/workflows/generate_combined_inventory.yml | 2 +- .github/workflows/generate_select_inventories.yml | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/workflows/conda-secondary_cntxt.yml b/.github/workflows/conda-secondary_cntxt.yml index 0603711d..8434e93e 100644 --- a/.github/workflows/conda-secondary_cntxt.yml +++ b/.github/workflows/conda-secondary_cntxt.yml @@ -50,7 +50,7 @@ jobs: python scripts/generate_select_inventories.py --years $YEAR --inventory $INVENTORY - name: Upload files - uses: actions/upload-artifact@v3 + uses: actions/upload-artifact@v4 with: # Artifact name name: "${{ github.event.inputs.inventory }}" diff --git a/.github/workflows/generate_all_inventories.yml b/.github/workflows/generate_all_inventories.yml index 6e86ecc9..a09636cd 100644 --- a/.github/workflows/generate_all_inventories.yml +++ b/.github/workflows/generate_all_inventories.yml @@ -38,7 +38,7 @@ jobs: pytest -m inventory --log-level=DEBUG - name: Upload files - uses: actions/upload-artifact@v3 + uses: actions/upload-artifact@v4 with: # Artifact name name: StEWI Inventory files diff --git a/.github/workflows/generate_combined_inventory.yml b/.github/workflows/generate_combined_inventory.yml index a745ab21..d5519978 100644 --- a/.github/workflows/generate_combined_inventory.yml +++ b/.github/workflows/generate_combined_inventory.yml @@ -37,7 +37,7 @@ jobs: - name: Upload files if: always() # Upload files even if some inventories fail - uses: actions/upload-artifact@v3 + uses: actions/upload-artifact@v4 with: # Artifact name name: StEWI Combined inventory files diff --git a/.github/workflows/generate_select_inventories.yml b/.github/workflows/generate_select_inventories.yml index 4326df9f..aa91cf76 100644 --- a/.github/workflows/generate_select_inventories.yml +++ b/.github/workflows/generate_select_inventories.yml @@ -46,7 +46,7 @@ jobs: python scripts/generate_select_inventories.py --years $YEAR --inventory $INVENTORY - name: Upload files - uses: actions/upload-artifact@v3 + uses: actions/upload-artifact@v4 with: # Artifact name name: "${{ github.event.inputs.inventory }}" From 8e1fb144618da470de5ddedceba1697081bb0242 Mon Sep 17 00:00:00 2001 From: Ben Young Date: Wed, 8 Jan 2025 07:41:56 -0500 Subject: [PATCH 05/10] add error handling for queries that return no data --- stewi/exceptions.py | 8 ++++++++ stewicombo/__init__.py | 4 ++++ stewicombo/overlaphandler.py | 3 +++ 3 files changed, 15 insertions(+) diff --git a/stewi/exceptions.py b/stewi/exceptions.py index ad77f06d..98993400 100644 --- a/stewi/exceptions.py +++ b/stewi/exceptions.py @@ -29,3 +29,11 @@ def __init__(self, message=None): message = ("Source data not found, download before proceeding") self.message = message super().__init__(self.message) + + +class StewiQueryError(Exception): + def __init__(self, message=None): + if message is None: + message = ("No data found for passed query.") + self.message = message + super().__init__(self.message) diff --git a/stewicombo/__init__.py b/stewicombo/__init__.py index 1e4ddb10..37e43d32 100644 --- a/stewicombo/__init__.py +++ b/stewicombo/__init__.py @@ -11,6 +11,7 @@ filter_by_primary_compartment, addChemicalMatches, addBaseInventoryIDs, \ storeCombinedInventory, write_stewicombo_metadata, compile_metadata, \ getCombinedInventory, download_stewicombo_from_remote +from stewi.exceptions import StewiQueryError def combineFullInventories(inventory_dict, @@ -124,6 +125,9 @@ def combineInventoriesforFacilityList(base_inventory, inventory_acronyms = list(inventory_dict.keys()) facilitymatches = facilitymatcher.get_matches_for_id_list( base_inventory, facility_id_list, inventory_acronyms) + if len(facilitymatches) == 0: + raise StewiQueryError( + message='No facility matches found for facility_id_list') inventories = getInventoriesforFacilityMatches(inventory_dict, facilitymatches, filter_for_LCI, diff --git a/stewicombo/overlaphandler.py b/stewicombo/overlaphandler.py index 664e24af..5e28e490 100644 --- a/stewicombo/overlaphandler.py +++ b/stewicombo/overlaphandler.py @@ -5,6 +5,7 @@ import pandas as pd from stewi.globals import log +from stewi.exceptions import StewiQueryError path_module = Path(__file__).parent @@ -86,6 +87,8 @@ def aggregate_and_remove_overlap(df): by facility and compartment :param df: pd.DataFrame, inventory df incl. chemical & facility matches """ + if len(df) == 0: + raise StewiQueryError(message='No data found to combine') log.info('removing overlap between inventories') ## TODO: implement args for different duplicate handling schemes # see commented-out code in commit f2fc7c2 (or earlier, uncommented) From caf0092c99ee44876dc7b9dfe72cf8e39016e9a1 Mon Sep 17 00:00:00 2001 From: dt-woods Date: Tue, 21 Jan 2025 13:51:32 -0500 Subject: [PATCH 06/10] New getAllInventoriesandYears method; addresses #150 Create new global dictionary, STEWI_DATA_VINTAGES, which needs to be manually updated when new standardized inventories become available. Create new global method, linear_search, for reverse searching a list and pulling the index of the value up to but not greater than a given value. Create new init method, getAllInventoriesandYears, to companion with getAvailableInventoriesandYears; the former returns the STEWI_DATA_VINTAGES dictionary if no year is given; otherwise, uses the linear search to get the most appropriate inventory years for a given year. --- stewi/__init__.py | 34 +++++++++++++++++++++++++++++++++- stewi/globals.py | 39 ++++++++++++++++++++++++++++++++++++++- 2 files changed, 71 insertions(+), 2 deletions(-) diff --git a/stewi/__init__.py b/stewi/__init__.py index 405fee60..dac88350 100644 --- a/stewi/__init__.py +++ b/stewi/__init__.py @@ -3,7 +3,7 @@ # coding=utf-8 """ Public API for stewi. Functions to return inventory data for a single -inventory in standard formats +inventory in standard formats. """ @@ -11,13 +11,45 @@ from stewi.globals import log, add_missing_fields,\ WRITE_FORMAT, read_inventory, paths,\ set_stewi_meta, aggregate +from stewi.globals import STEWI_DATA_VINTAGES +from stewi.globals import linear_search from stewi.filter import apply_filters_to_inventory, filter_config from stewi.formats import StewiFormat, ensure_format +def getAllInventoriesandYears(year=None): + """Return inventory year(s) of interest. + + If no year is given, the full list of years are returned for each inventory + otherwise, only the most recent year is provided for each inventory (i.e., + with vintage the same or older than the given year). + + :param year: An integer year of interest, defaults to None + :type year: int, optional + :return: A dictionary of inventory names (key) and list or int of year(s) + :rtype: dict + """ + r_dict = {} + if year is None: + # Don't give user access to global var: copy it! + for k,v in STEWI_DATA_VINTAGES.items(): + r_dict[k] = v + else: + for key in STEWI_DATA_VINTAGES.keys(): + avail_years = STEWI_DATA_VINTAGES[key] + y_idx = linear_search(avail_years, year) + if y_idx != -1: + r_dict[key] = STEWI_DATA_VINTAGES[key][y_idx] + return r_dict + + def getAvailableInventoriesandYears(stewiformat='flowbyfacility'): """Get available inventories and years for a given output format. + Note these inventories and years are based on the data downloaded by + the user. For the whole list of available inventories, see + :func:`getAllInventoriesandYears`. + :param stewiformat: str e.g. 'flowbyfacility' :return: existing_inventories dictionary of inventories like: {NEI: [2014], diff --git a/stewi/globals.py b/stewi/globals.py index 9dc8c337..fae652f7 100644 --- a/stewi/globals.py +++ b/stewi/globals.py @@ -2,7 +2,7 @@ # !/usr/bin/env python3 # coding=utf-8 """ -Supporting variables and functions used in stewi +Supporting variables and functions used in stewi. """ import json @@ -70,6 +70,16 @@ "GHGRP": "air", "DMR": "water"} +STEWI_DATA_VINTAGES = { + 'DMR': [x for x in range(2011, 2023, 1)], + 'GHGRP': [x for x in range(2011, 2023, 1)], + 'eGRID': [2014, 2016, 2018, 2019, 2020, 2021], + 'NEI': [2011, 2014, 2017, 2020], + 'RCRAInfo': [x for x in range(2011, 2023, 2)], + 'TRI': [x for x in range(2011, 2023, 1)], +} +'''A dictionary of StEWI inventories and their available vintages.''' + def set_stewi_meta(file_name, stewiformat=''): """Create a class of esupy FileMeta with stewiformat assigned as category.""" @@ -114,6 +124,33 @@ def aggregate(df, grouping_vars=None): return df_agg +def linear_search(lst, target): + """Backwards search a list for index less than or equal to a given value. + + :param lst: (list) A list of numerically sorted data (lowest to highest). + :param target : (int, float) A target value (e.g., year). + :return: (int) + The index of the search list associated with the value equal to or + less than the target, else -1 for a target out-of-range (i.e., smaller than the smallest entry in the list). + + :Example: + + >>> NEI_YEARS = [2011, 2014, 2017, 2020] + >>> linear_search(NEI_YEARS, 2020) + 3 + >>> linear_search(NEI_YEARS, 2019) + 2 + >>> linear_search(NEI_YEARS, 2018) + 2 + >>> linear_search(NEI_YEARS, 2010) + -1 + """ + for i in range(len(lst) - 1, -1, -1): + if lst[i] <= target: + return i + return -1 + + def unit_convert(df, coln1, coln2, unit, conversion_factor, coln3): """Convert values in coln3 if coln2 == unit, based on the conversion factor, and assigns to coln1. From d04ec673a356040a80d744a328455b5f45c66a47 Mon Sep 17 00:00:00 2001 From: Ben Young Date: Tue, 21 Jan 2025 14:58:26 -0500 Subject: [PATCH 07/10] drop extraneous requirements, resolves #169 --- requirements.txt | 2 -- setup.py | 2 -- 2 files changed, 4 deletions(-) diff --git a/requirements.txt b/requirements.txt index 113f137c..eb1cb3e9 100644 --- a/requirements.txt +++ b/requirements.txt @@ -2,7 +2,5 @@ git+https://github.com/USEPA/esupy.git@develop#egg=esupy pandas>=1.3 # Powerful data structures for data analysis, time series, and statistics. numpy>=1.20.1 # NumPy is the fundamental package for array computing with Python requests>=2.20 # Python HTTP for Humans; used for webservice calls -beautifulsoup4>=4.9.3 PyYAML>=5.1 openpyxl>=3.0.7 -xlrd>=2.0.0 diff --git a/setup.py b/setup.py index 2c50924b..78b637a2 100644 --- a/setup.py +++ b/setup.py @@ -19,10 +19,8 @@ 'numpy>=1.20.1', 'pandas>=1.3', 'requests>=2.20', - 'beautifulsoup4>=4.9.3', 'PyYAML>=5.1', 'openpyxl>=3.0.7', - 'xlrd>=2.0.0', ], classifiers=[ "Development Status :: 5 - Production/Stable", From 72742360a7019c9a2e55872d2c79de783da74dcf Mon Sep 17 00:00:00 2001 From: Ben Young Date: Tue, 21 Jan 2025 15:12:11 -0500 Subject: [PATCH 08/10] partial reversion of d04ec67 --- requirements.txt | 1 + setup.py | 1 + 2 files changed, 2 insertions(+) diff --git a/requirements.txt b/requirements.txt index eb1cb3e9..c288084f 100644 --- a/requirements.txt +++ b/requirements.txt @@ -4,3 +4,4 @@ numpy>=1.20.1 # NumPy is the fundamental package for array comp requests>=2.20 # Python HTTP for Humans; used for webservice calls PyYAML>=5.1 openpyxl>=3.0.7 +xlrd>=2.0.0 diff --git a/setup.py b/setup.py index 78b637a2..63fd950d 100644 --- a/setup.py +++ b/setup.py @@ -21,6 +21,7 @@ 'requests>=2.20', 'PyYAML>=5.1', 'openpyxl>=3.0.7', + 'xlrd>=2.0.0', ], classifiers=[ "Development Status :: 5 - Production/Stable", From 0927f9745c36cbe90eee0b36e5307638b2dd6a20 Mon Sep 17 00:00:00 2001 From: Ben Young Date: Wed, 5 Feb 2025 16:39:08 -0500 Subject: [PATCH 09/10] limit `STEWI_DATA_VINTAGES` to those officially supported --- stewi/globals.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/stewi/globals.py b/stewi/globals.py index fae652f7..5eec7d1b 100644 --- a/stewi/globals.py +++ b/stewi/globals.py @@ -71,12 +71,12 @@ "DMR": "water"} STEWI_DATA_VINTAGES = { - 'DMR': [x for x in range(2011, 2023, 1)], - 'GHGRP': [x for x in range(2011, 2023, 1)], + 'DMR': [x for x in range(2011, 2021, 1)], + 'GHGRP': [x for x in range(2011, 2021, 1)], 'eGRID': [2014, 2016, 2018, 2019, 2020, 2021], 'NEI': [2011, 2014, 2017, 2020], - 'RCRAInfo': [x for x in range(2011, 2023, 2)], - 'TRI': [x for x in range(2011, 2023, 1)], + 'RCRAInfo': [x for x in range(2011, 2021, 2)], + 'TRI': [x for x in range(2011, 2021, 1)], } '''A dictionary of StEWI inventories and their available vintages.''' From 1c1df3860a8495db05ad73c5feb7fc246977caa4 Mon Sep 17 00:00:00 2001 From: Ben Young Date: Wed, 5 Feb 2025 16:42:23 -0500 Subject: [PATCH 10/10] :bookmark: bump to v1.1.4 --- requirements.txt | 2 +- setup.py | 4 ++-- stewi/globals.py | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/requirements.txt b/requirements.txt index c288084f..0fdc174c 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,4 @@ -git+https://github.com/USEPA/esupy.git@develop#egg=esupy +git+https://github.com/USEPA/esupy.git#egg=esupy pandas>=1.3 # Powerful data structures for data analysis, time series, and statistics. numpy>=1.20.1 # NumPy is the fundamental package for array computing with Python requests>=2.20 # Python HTTP for Humans; used for webservice calls diff --git a/setup.py b/setup.py index 63fd950d..c8c2a18d 100644 --- a/setup.py +++ b/setup.py @@ -2,7 +2,7 @@ setup( name="StEWI", - version="1.1.3", + version="1.1.4", author="Ben Young, Wesley Ingwersen, Matthew Bergmann, Jose Hernandez-Betancur, Tapajyoti Ghosh, Eric Bell", author_email="ingwersen.wesley@epa.gov", description="Standardized Emission And Waste Inventories (StEWI)" @@ -15,7 +15,7 @@ include_package_data=True, python_requires=">=3.8", install_requires=[ - 'esupy @ git+https://github.com/USEPA/esupy.git@develop#egg=esupy', + 'esupy @ git+https://github.com/USEPA/esupy.git#egg=esupy', 'numpy>=1.20.1', 'pandas>=1.3', 'requests>=2.20', diff --git a/stewi/globals.py b/stewi/globals.py index 5eec7d1b..0b5475cf 100644 --- a/stewi/globals.py +++ b/stewi/globals.py @@ -30,7 +30,7 @@ DATA_PATH = MODULEPATH / 'data' log.basicConfig(level=log.INFO, format='%(levelname)s %(message)s') -STEWI_VERSION = '1.1.3' +STEWI_VERSION = '1.1.4' # Conversion factors USton_kg = 907.18474