Skip to content

Commit

Permalink
Merge pull request #173 from USEPA/release_v1.1.4
Browse files Browse the repository at this point in the history
Release v1.1.4
  • Loading branch information
bl-young authored Feb 6, 2025
2 parents 6710a0f + 1c1df38 commit 46dc046
Show file tree
Hide file tree
Showing 12 changed files with 100 additions and 18 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/conda-secondary_cntxt.yml
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ jobs:
python scripts/generate_select_inventories.py --years $YEAR --inventory $INVENTORY
- name: Upload files
uses: actions/upload-artifact@v3
uses: actions/upload-artifact@v4
with:
# Artifact name
name: "${{ github.event.inputs.inventory }}"
Expand Down
4 changes: 2 additions & 2 deletions .github/workflows/generate_all_inventories.yml
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ jobs:
- name: Set up Python
uses: actions/setup-python@v3
with:
python-version: "3.10"
python-version: "3.12"

- name: Update pip & install testing pkgs
run: |
Expand All @@ -38,7 +38,7 @@ jobs:
pytest -m inventory --log-level=DEBUG
- name: Upload files
uses: actions/upload-artifact@v3
uses: actions/upload-artifact@v4
with:
# Artifact name
name: StEWI Inventory files
Expand Down
4 changes: 2 additions & 2 deletions .github/workflows/generate_combined_inventory.yml
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ jobs:
- name: Set up Python
uses: actions/setup-python@v3
with:
python-version: "3.10"
python-version: "3.12"

- name: Update pip & install testing pkgs
run: |
Expand All @@ -37,7 +37,7 @@ jobs:
- name: Upload files
if: always() # Upload files even if some inventories fail
uses: actions/upload-artifact@v3
uses: actions/upload-artifact@v4
with:
# Artifact name
name: StEWI Combined inventory files
Expand Down
4 changes: 2 additions & 2 deletions .github/workflows/generate_select_inventories.yml
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ jobs:
- name: Set up Python
uses: actions/setup-python@v3
with:
python-version: "3.10"
python-version: "3.12"

- name: Update pip & install testing pkgs
run: |
Expand All @@ -46,7 +46,7 @@ jobs:
python scripts/generate_select_inventories.py --years $YEAR --inventory $INVENTORY
- name: Upload files
uses: actions/upload-artifact@v3
uses: actions/upload-artifact@v4
with:
# Artifact name
name: "${{ github.event.inputs.inventory }}"
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/python-package.yml
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ jobs:
fail-fast: false
matrix:
os: [ubuntu-latest, windows-latest, macos-latest]
py-version: ['3.8', '3.9', '3.10', '3.11']
py-version: ['3.9', '3.10', '3.11', '3.12', '3.13']

steps:
- uses: actions/checkout@v3
Expand Down
1 change: 0 additions & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@ git+https://github.com/USEPA/esupy.git#egg=esupy
pandas>=1.3 # Powerful data structures for data analysis, time series, and statistics.
numpy>=1.20.1 # NumPy is the fundamental package for array computing with Python
requests>=2.20 # Python HTTP for Humans; used for webservice calls
beautifulsoup4>=4.9.3
PyYAML>=5.1
openpyxl>=3.0.7
xlrd>=2.0.0
3 changes: 1 addition & 2 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

setup(
name="StEWI",
version="1.1.3",
version="1.1.4",
author="Ben Young, Wesley Ingwersen, Matthew Bergmann, Jose Hernandez-Betancur, Tapajyoti Ghosh, Eric Bell",
author_email="[email protected]",
description="Standardized Emission And Waste Inventories (StEWI)"
Expand All @@ -19,7 +19,6 @@
'numpy>=1.20.1',
'pandas>=1.3',
'requests>=2.20',
'beautifulsoup4>=4.9.3',
'PyYAML>=5.1',
'openpyxl>=3.0.7',
'xlrd>=2.0.0',
Expand Down
34 changes: 33 additions & 1 deletion stewi/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,21 +3,53 @@
# coding=utf-8
"""
Public API for stewi. Functions to return inventory data for a single
inventory in standard formats
inventory in standard formats.
"""


from esupy.processed_data_mgmt import read_source_metadata
from stewi.globals import log, add_missing_fields,\
WRITE_FORMAT, read_inventory, paths,\
set_stewi_meta, aggregate
from stewi.globals import STEWI_DATA_VINTAGES
from stewi.globals import linear_search
from stewi.filter import apply_filters_to_inventory, filter_config
from stewi.formats import StewiFormat, ensure_format


def getAllInventoriesandYears(year=None):
"""Return inventory year(s) of interest.
If no year is given, the full list of years are returned for each inventory
otherwise, only the most recent year is provided for each inventory (i.e.,
with vintage the same or older than the given year).
:param year: An integer year of interest, defaults to None
:type year: int, optional
:return: A dictionary of inventory names (key) and list or int of year(s)
:rtype: dict
"""
r_dict = {}
if year is None:
# Don't give user access to global var: copy it!
for k,v in STEWI_DATA_VINTAGES.items():
r_dict[k] = v
else:
for key in STEWI_DATA_VINTAGES.keys():
avail_years = STEWI_DATA_VINTAGES[key]
y_idx = linear_search(avail_years, year)
if y_idx != -1:
r_dict[key] = STEWI_DATA_VINTAGES[key][y_idx]
return r_dict


def getAvailableInventoriesandYears(stewiformat='flowbyfacility'):
"""Get available inventories and years for a given output format.
Note these inventories and years are based on the data downloaded by
the user. For the whole list of available inventories, see
:func:`getAllInventoriesandYears`.
:param stewiformat: str e.g. 'flowbyfacility'
:return: existing_inventories dictionary of inventories like:
{NEI: [2014],
Expand Down
8 changes: 8 additions & 0 deletions stewi/exceptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,3 +29,11 @@ def __init__(self, message=None):
message = ("Source data not found, download before proceeding")
self.message = message
super().__init__(self.message)


class StewiQueryError(Exception):
def __init__(self, message=None):
if message is None:
message = ("No data found for passed query.")
self.message = message
super().__init__(self.message)
41 changes: 39 additions & 2 deletions stewi/globals.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
# !/usr/bin/env python3
# coding=utf-8
"""
Supporting variables and functions used in stewi
Supporting variables and functions used in stewi.
"""

import json
Expand Down Expand Up @@ -30,7 +30,7 @@
DATA_PATH = MODULEPATH / 'data'

log.basicConfig(level=log.INFO, format='%(levelname)s %(message)s')
STEWI_VERSION = '1.1.3'
STEWI_VERSION = '1.1.4'

# Conversion factors
USton_kg = 907.18474
Expand Down Expand Up @@ -70,6 +70,16 @@
"GHGRP": "air",
"DMR": "water"}

STEWI_DATA_VINTAGES = {
'DMR': [x for x in range(2011, 2021, 1)],
'GHGRP': [x for x in range(2011, 2021, 1)],
'eGRID': [2014, 2016, 2018, 2019, 2020, 2021],
'NEI': [2011, 2014, 2017, 2020],
'RCRAInfo': [x for x in range(2011, 2021, 2)],
'TRI': [x for x in range(2011, 2021, 1)],
}
'''A dictionary of StEWI inventories and their available vintages.'''


def set_stewi_meta(file_name, stewiformat=''):
"""Create a class of esupy FileMeta with stewiformat assigned as category."""
Expand Down Expand Up @@ -114,6 +124,33 @@ def aggregate(df, grouping_vars=None):
return df_agg


def linear_search(lst, target):
"""Backwards search a list for index less than or equal to a given value.
:param lst: (list) A list of numerically sorted data (lowest to highest).
:param target : (int, float) A target value (e.g., year).
:return: (int)
The index of the search list associated with the value equal to or
less than the target, else -1 for a target out-of-range (i.e., smaller than the smallest entry in the list).
:Example:
>>> NEI_YEARS = [2011, 2014, 2017, 2020]
>>> linear_search(NEI_YEARS, 2020)
3
>>> linear_search(NEI_YEARS, 2019)
2
>>> linear_search(NEI_YEARS, 2018)
2
>>> linear_search(NEI_YEARS, 2010)
-1
"""
for i in range(len(lst) - 1, -1, -1):
if lst[i] <= target:
return i
return -1


def unit_convert(df, coln1, coln2, unit, conversion_factor, coln3):
"""Convert values in coln3 if coln2 == unit, based on the conversion
factor, and assigns to coln1.
Expand Down
4 changes: 4 additions & 0 deletions stewicombo/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
filter_by_primary_compartment, addChemicalMatches, addBaseInventoryIDs, \
storeCombinedInventory, write_stewicombo_metadata, compile_metadata, \
getCombinedInventory, download_stewicombo_from_remote
from stewi.exceptions import StewiQueryError


def combineFullInventories(inventory_dict,
Expand Down Expand Up @@ -124,6 +125,9 @@ def combineInventoriesforFacilityList(base_inventory,
inventory_acronyms = list(inventory_dict.keys())
facilitymatches = facilitymatcher.get_matches_for_id_list(
base_inventory, facility_id_list, inventory_acronyms)
if len(facilitymatches) == 0:
raise StewiQueryError(
message='No facility matches found for facility_id_list')
inventories = getInventoriesforFacilityMatches(inventory_dict,
facilitymatches,
filter_for_LCI,
Expand Down
11 changes: 7 additions & 4 deletions stewicombo/overlaphandler.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import pandas as pd

from stewi.globals import log
from stewi.exceptions import StewiQueryError


path_module = Path(__file__).parent
Expand Down Expand Up @@ -40,7 +41,7 @@ def remove_flow_overlap(df, flow_cpst, flows_cntb, cmpt='air', SCC=False):
df_cf = (df.query('SRS_ID in @flows_cntb and '
'_CompartmentPrimary == @cmpt')
.groupby(cols_agg, as_index=False)
.agg({'FlowAmount': sum})
.agg({'FlowAmount': 'sum'})
.assign(SRS_ID=flow_cpst)
.rename(columns={'FlowAmount': 'ContributingAmount'}))
# then remove contributing flow totals from composite flow
Expand Down Expand Up @@ -86,6 +87,8 @@ def aggregate_and_remove_overlap(df):
by facility and compartment
:param df: pd.DataFrame, inventory df incl. chemical & facility matches
"""
if len(df) == 0:
raise StewiQueryError(message='No data found to combine')
log.info('removing overlap between inventories')
## TODO: implement args for different duplicate handling schemes
# see commented-out code in commit f2fc7c2 (or earlier, uncommented)
Expand Down Expand Up @@ -119,8 +122,8 @@ def aggregate_and_remove_overlap(df):
# functions by column for intra-inventory aggregation
funcs_agg = {
'FacilityID': '_'.join, # or `set` or `'unique'` to get unique set of vals
'FlowAmount': sum,
'DataReliability': sum, # sums FlowAmount-weighted elements
'FlowAmount': 'sum',
'DataReliability': 'sum', # sums FlowAmount-weighted elements
'FlowName': 'first', # get the first element in .agg
}
# cols to define unique flows WITHIN inventories; using more grouping cols,
Expand All @@ -147,7 +150,7 @@ def aggregate_and_remove_overlap(df):

# then drop cross-inventory dups by keeping entries w/ min _SourcePref
df_dup['_SourcePrefMin'] = (df_dup.groupby(cols_inter)['_SourcePref']
.transform(min))
.transform('min'))
df_dup = df_dup.query('_SourcePref == _SourcePrefMin')

log.debug('Reincorporating rows with NaN FRS_ID or SRS_ID')
Expand Down

0 comments on commit 46dc046

Please sign in to comment.