Merge pull request #173 from USEPA/release_v1.1.4

Release v1.1.4
USEPA · Feb 6, 2025 · 46dc046 · 46dc046
2 parents 6710a0f + 1c1df38
commit 46dc046
Show file tree

Hide file tree

Showing 12 changed files with 100 additions and 18 deletions.
diff --git a/.github/workflows/conda-secondary_cntxt.yml b/.github/workflows/conda-secondary_cntxt.yml
@@ -50,7 +50,7 @@ jobs:
         python scripts/generate_select_inventories.py --years $YEAR --inventory $INVENTORY
 
     - name: Upload files
-      uses: actions/upload-artifact@v3
+      uses: actions/upload-artifact@v4
       with:
         # Artifact name
         name: "${{ github.event.inputs.inventory }}"

diff --git a/.github/workflows/generate_all_inventories.yml b/.github/workflows/generate_all_inventories.yml
@@ -21,7 +21,7 @@ jobs:
     - name: Set up Python
       uses: actions/setup-python@v3
       with:
-        python-version: "3.10"
+        python-version: "3.12"
 
     - name: Update pip & install testing pkgs
       run: |
@@ -38,7 +38,7 @@ jobs:
         pytest -m inventory --log-level=DEBUG
 
     - name: Upload files
-      uses: actions/upload-artifact@v3
+      uses: actions/upload-artifact@v4
       with:
         # Artifact name
         name: StEWI Inventory files

diff --git a/.github/workflows/generate_combined_inventory.yml b/.github/workflows/generate_combined_inventory.yml
@@ -19,7 +19,7 @@ jobs:
     - name: Set up Python
       uses: actions/setup-python@v3
       with:
-        python-version: "3.10"
+        python-version: "3.12"
 
     - name: Update pip & install testing pkgs
       run: |
@@ -37,7 +37,7 @@ jobs:
 
     - name: Upload files
       if: always() # Upload files even if some inventories fail
-      uses: actions/upload-artifact@v3
+      uses: actions/upload-artifact@v4
       with:
         # Artifact name
         name: StEWI Combined inventory files

diff --git a/.github/workflows/generate_select_inventories.yml b/.github/workflows/generate_select_inventories.yml
@@ -25,7 +25,7 @@ jobs:
     - name: Set up Python
       uses: actions/setup-python@v3
       with:
-        python-version: "3.10"
+        python-version: "3.12"
 
     - name: Update pip & install testing pkgs
       run: |
@@ -46,7 +46,7 @@ jobs:
         python scripts/generate_select_inventories.py --years $YEAR --inventory $INVENTORY
 
     - name: Upload files
-      uses: actions/upload-artifact@v3
+      uses: actions/upload-artifact@v4
       with:
         # Artifact name
         name: "${{ github.event.inputs.inventory }}"

diff --git a/.github/workflows/python-package.yml b/.github/workflows/python-package.yml
@@ -31,7 +31,7 @@ jobs:
       fail-fast: false
       matrix:
         os: [ubuntu-latest, windows-latest, macos-latest]
-        py-version: ['3.8', '3.9', '3.10', '3.11']
+        py-version: ['3.9', '3.10', '3.11', '3.12', '3.13']
 
     steps:
     - uses: actions/checkout@v3

diff --git a/requirements.txt b/requirements.txt
@@ -2,7 +2,6 @@ git+https://github.com/USEPA/esupy.git#egg=esupy
 pandas>=1.3                    # Powerful data structures for data analysis, time series, and statistics.
 numpy>=1.20.1                  # NumPy is the fundamental package for array computing with Python
 requests>=2.20                 # Python HTTP for Humans; used for webservice calls
-beautifulsoup4>=4.9.3
 PyYAML>=5.1
 openpyxl>=3.0.7
 xlrd>=2.0.0
diff --git a/setup.py b/setup.py
@@ -2,7 +2,7 @@
 
 setup(
     name="StEWI",
-    version="1.1.3",
+    version="1.1.4",
     author="Ben Young, Wesley Ingwersen, Matthew Bergmann, Jose Hernandez-Betancur, Tapajyoti Ghosh, Eric Bell",
     author_email="[email protected]",
     description="Standardized Emission And Waste Inventories (StEWI)"
@@ -19,7 +19,6 @@
         'numpy>=1.20.1',
         'pandas>=1.3',
         'requests>=2.20',
-        'beautifulsoup4>=4.9.3',
         'PyYAML>=5.1',
         'openpyxl>=3.0.7',
         'xlrd>=2.0.0',

diff --git a/stewi/__init__.py b/stewi/__init__.py
@@ -3,21 +3,53 @@
 # coding=utf-8
 """
 Public API for stewi. Functions to return inventory data for a single
-inventory in standard formats
+inventory in standard formats.
 """
 
 
 from esupy.processed_data_mgmt import read_source_metadata
 from stewi.globals import log, add_missing_fields,\
     WRITE_FORMAT, read_inventory, paths,\
     set_stewi_meta, aggregate
+from stewi.globals import STEWI_DATA_VINTAGES
+from stewi.globals import linear_search
 from stewi.filter import apply_filters_to_inventory, filter_config
 from stewi.formats import StewiFormat, ensure_format
 
 
+def getAllInventoriesandYears(year=None):
+    """Return inventory year(s) of interest.
+
+    If no year is given, the full list of years are returned for each inventory
+    otherwise, only the most recent year is provided for each inventory (i.e.,
+    with vintage the same or older than the given year).
+
+    :param year: An integer year of interest, defaults to None
+    :type year: int, optional
+    :return: A dictionary of inventory names (key) and list or int of year(s)
+    :rtype: dict
+    """
+    r_dict = {}
+    if year is None:
+        # Don't give user access to global var: copy it!
+        for k,v in STEWI_DATA_VINTAGES.items():
+            r_dict[k] = v
+    else:
+        for key in STEWI_DATA_VINTAGES.keys():
+            avail_years = STEWI_DATA_VINTAGES[key]
+            y_idx = linear_search(avail_years, year)
+            if y_idx != -1:
+                r_dict[key] = STEWI_DATA_VINTAGES[key][y_idx]
+    return r_dict
+
+
 def getAvailableInventoriesandYears(stewiformat='flowbyfacility'):
     """Get available inventories and years for a given output format.
 
+    Note these inventories and years are based on the data downloaded by
+    the user. For the whole list of available inventories, see
+    :func:`getAllInventoriesandYears`.
+
     :param stewiformat: str e.g. 'flowbyfacility'
     :return: existing_inventories dictionary of inventories like:
         {NEI: [2014],

diff --git a/stewi/exceptions.py b/stewi/exceptions.py
@@ -29,3 +29,11 @@ def __init__(self, message=None):
             message = ("Source data not found, download before proceeding")
         self.message = message
         super().__init__(self.message)
+
+
+class StewiQueryError(Exception):
+    def __init__(self, message=None):
+        if message is None:
+            message = ("No data found for passed query.")
+        self.message = message
+        super().__init__(self.message)
diff --git a/stewi/globals.py b/stewi/globals.py
@@ -2,7 +2,7 @@
 # !/usr/bin/env python3
 # coding=utf-8
 """
-Supporting variables and functions used in stewi
+Supporting variables and functions used in stewi.
 """
 
 import json
@@ -30,7 +30,7 @@
 DATA_PATH = MODULEPATH / 'data'
 
 log.basicConfig(level=log.INFO, format='%(levelname)s %(message)s')
-STEWI_VERSION = '1.1.3'
+STEWI_VERSION = '1.1.4'
 
 # Conversion factors
 USton_kg = 907.18474
@@ -70,6 +70,16 @@
                                  "GHGRP": "air",
                                  "DMR": "water"}
 
+STEWI_DATA_VINTAGES = {
+    'DMR': [x for x in range(2011, 2021, 1)],
+    'GHGRP': [x for x in range(2011, 2021, 1)],
+    'eGRID': [2014, 2016, 2018, 2019, 2020, 2021],
+    'NEI': [2011, 2014, 2017, 2020],
+    'RCRAInfo': [x for x in range(2011, 2021, 2)],
+    'TRI': [x for x in range(2011, 2021, 1)],
+}
+'''A dictionary of StEWI inventories and their available vintages.'''
+
 
 def set_stewi_meta(file_name, stewiformat=''):
     """Create a class of esupy FileMeta with stewiformat assigned as category."""
@@ -114,6 +124,33 @@ def aggregate(df, grouping_vars=None):
     return df_agg
 
 
+def linear_search(lst, target):
+    """Backwards search a list for index less than or equal to a given value.
+
+    :param lst: (list) A list of numerically sorted data (lowest to highest).
+    :param target : (int, float) A target value (e.g., year).
+    :return: (int)
+        The index of the search list associated with the value equal to or
+        less than the target, else -1 for a target out-of-range (i.e., smaller than the smallest entry in the list).
+
+    :Example:
+
+    >>> NEI_YEARS = [2011, 2014, 2017, 2020]
+    >>> linear_search(NEI_YEARS, 2020)
+    3
+    >>> linear_search(NEI_YEARS, 2019)
+    2
+    >>> linear_search(NEI_YEARS, 2018)
+    2
+    >>> linear_search(NEI_YEARS, 2010)
+    -1
+    """
+    for i in range(len(lst) - 1, -1, -1):
+        if lst[i] <= target:
+            return i
+    return -1
+
+
 def unit_convert(df, coln1, coln2, unit, conversion_factor, coln3):
     """Convert values in coln3 if coln2 == unit, based on the conversion
     factor, and assigns to coln1.

diff --git a/stewicombo/__init__.py b/stewicombo/__init__.py
@@ -11,6 +11,7 @@
     filter_by_primary_compartment, addChemicalMatches, addBaseInventoryIDs, \
     storeCombinedInventory, write_stewicombo_metadata, compile_metadata, \
     getCombinedInventory, download_stewicombo_from_remote
+from stewi.exceptions import StewiQueryError
 
 
 def combineFullInventories(inventory_dict,
@@ -124,6 +125,9 @@ def combineInventoriesforFacilityList(base_inventory,
     inventory_acronyms = list(inventory_dict.keys())
     facilitymatches = facilitymatcher.get_matches_for_id_list(
         base_inventory, facility_id_list, inventory_acronyms)
+    if len(facilitymatches) == 0:
+        raise StewiQueryError(
+            message='No facility matches found for facility_id_list')
     inventories = getInventoriesforFacilityMatches(inventory_dict,
                                                    facilitymatches,
                                                    filter_for_LCI,

diff --git a/stewicombo/overlaphandler.py b/stewicombo/overlaphandler.py
@@ -5,6 +5,7 @@
 import pandas as pd
 
 from stewi.globals import log
+from stewi.exceptions import StewiQueryError
 
 
 path_module = Path(__file__).parent
@@ -40,7 +41,7 @@ def remove_flow_overlap(df, flow_cpst, flows_cntb, cmpt='air', SCC=False):
     df_cf = (df.query('SRS_ID in @flows_cntb and '
                       '_CompartmentPrimary == @cmpt')
                .groupby(cols_agg, as_index=False)
-               .agg({'FlowAmount': sum})
+               .agg({'FlowAmount': 'sum'})
                .assign(SRS_ID=flow_cpst)
                .rename(columns={'FlowAmount': 'ContributingAmount'}))
     # then remove contributing flow totals from composite flow
@@ -86,6 +87,8 @@ def aggregate_and_remove_overlap(df):
     by facility and compartment
     :param df: pd.DataFrame, inventory df incl. chemical & facility matches
     """
+    if len(df) == 0:
+        raise StewiQueryError(message='No data found to combine')
     log.info('removing overlap between inventories')
     ## TODO: implement args for different duplicate handling schemes
         # see commented-out code in commit f2fc7c2 (or earlier, uncommented)
@@ -119,8 +122,8 @@ def aggregate_and_remove_overlap(df):
     # functions by column for intra-inventory aggregation
     funcs_agg = {
         'FacilityID':       '_'.join, # or `set` or `'unique'` to get unique set of vals
-        'FlowAmount':       sum,
-        'DataReliability':  sum,  # sums FlowAmount-weighted elements
+        'FlowAmount':       'sum',
+        'DataReliability':  'sum',  # sums FlowAmount-weighted elements
         'FlowName':         'first', # get the first element in .agg
         }
     # cols to define unique flows WITHIN inventories; using more grouping cols,
@@ -147,7 +150,7 @@ def aggregate_and_remove_overlap(df):
 
     # then drop cross-inventory dups by keeping entries w/ min _SourcePref
     df_dup['_SourcePrefMin'] = (df_dup.groupby(cols_inter)['_SourcePref']
-                                      .transform(min))
+                                      .transform('min'))
     df_dup = df_dup.query('_SourcePref == _SourcePrefMin')
 
     log.debug('Reincorporating rows with NaN FRS_ID or SRS_ID')