Skip to content

Commit

Permalink
Merge pull request #177 from USEPA/nei_update
Browse files Browse the repository at this point in the history
Adds 2020 and 2021 NEI; adds validation datasets for interim years
  • Loading branch information
bl-young authored Feb 14, 2025
2 parents 9c64f93 + 915d2dc commit d47ef96
Show file tree
Hide file tree
Showing 15 changed files with 2,640 additions and 635 deletions.
22 changes: 12 additions & 10 deletions stewi/NEI.py
Original file line number Diff line number Diff line change
Expand Up @@ -124,9 +124,9 @@ def generate_national_totals(year):

# generate url based on data year
build_url = _config['national_url']
file = _config['national_version'][year]
if year in _config['national_version']:
build_url = _config['national_version'][year]
url = build_url.replace('__year__', year)
url = url.replace('__file__', file)

r = make_url_request(url, verify=False)

Expand Down Expand Up @@ -169,7 +169,7 @@ def generate_national_totals(year):

# Update validationSets_Sources.csv
validation_dict = {'Inventory': 'NEI',
'Version': file,
'Version': '',
'Year': year,
'Name': 'NEI Data',
'URL': url,
Expand Down Expand Up @@ -289,17 +289,19 @@ def main(**kwargs):

generate_metadata(year, parameters)

if year in ['2011', '2014', '2017', '2020']:
validate_national_totals(nei_flowbyfacility, year)
if int(year) >= 2022:
log.info(f'national totals do not exist for year {year}. '
'No validation available.')
else:
log.info('no validation performed')
validate_national_totals(nei_flowbyfacility, year)

elif kwargs['Option'] == 'B':
if year in ['2011', '2014', '2017', '2020']:
generate_national_totals(year)
if int(year) >= 2022:
log.info(f'national totals do not exist for year {year}. '
'No validation available.')
else:
log.info(f'national totals do not exist for year {year}')
generate_national_totals(year)


if __name__ == '__main__':
main(Year=[2019, 2020], Option='A')
main(Year=range(2011, 2023), Option='B')
23 changes: 18 additions & 5 deletions stewi/config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -99,12 +99,25 @@ databases:
- 'sppd_rtr_24507.parquet'
- 'sppd_rtr_24592.parquet'
file_version: 'v1'
national_url: 'https://gaftp.epa.gov/air/nei/__year__/data_summaries/__file__.zip'
'2021':
file_name:
- 'sppd_rtr_31440.parquet'
- 'sppd_rtr_31463.parquet'
- 'sppd_rtr_31510.parquet'
- 'sppd_rtr_31511.parquet'
file_version: 'v2'
'2022':
file_name:
- 'sppd_rtr_31281.parquet'
- 'sppd_rtr_31284.parquet'
- 'sppd_rtr_31302.parquet'
- 'sppd_rtr_31303.parquet'
file_version: 'v1'
national_url: 'https://gaftp.epa.gov/air/nei/nei_facility_summaries/__year___NEI_Facility_summary.zip'
# ^ url validation file for 2012+
national_version:
'2020': 'Facility%20Level%20by%20Pollutant'
'2017': '2017v1/2017neiJan_facility'
'2014': '2014v2/2014neiv2_facility'
'2011': '2011v2/2011neiv2_facility'
'2011': 'https://gaftp.epa.gov/air/nei/2011/data_summaries/2011v2/2011neiv2_facility.zip'

DMR:
url: 'https://echo.epa.gov/trends/loading-tool/water-pollution-search'
base_url: 'https://echodata.epa.gov/echo/dmr_rest_services.get_custom_data_annual?'
Expand Down
42 changes: 21 additions & 21 deletions stewi/data/NEI/NEI_required_fields.csv
Original file line number Diff line number Diff line change
@@ -1,21 +1,21 @@
StandardizedEPA,2020,2019,2018,2017,2016,2015,2014,2013,2012,2011
FacilityID,sppd_facility_identifier,sppd_facility_identifier,sppd_facility_identifier,sppd_facility_identifier,sppd_facility_identifier,sppd_facility_identifier,sppd_facility_identifier,sppd_facility_identifier,sppd_facility_identifier,sppd_facility_identifier
FacilityName,facility_name,facility_name,facility_name,facility_name,facility_name,facility_name,facility_name,facility_name,facility_name,facility_name
CompanyName,,,,,,,,,,
Address,location_address,location_address,location_address,location_address,location_address,location_address,location_address,location_address,location_address,location_address
City,city,city,city,city,city,city,city,city,city,city
State,state_abbr,state_abbr,state_abbr,state_abbr,state_abbr,state_abbr,state_abbr,state_abbr,state_abbr,state_abbr
Zip,zipcode,zipcode,zipcode,zipcode,zipcode,zipcode,zipcode,zipcode,zipcode,zipcode
Latitude,y_coordinate,y_coordinate,y_coordinate,y_coordinate,y_coordinate,y_coordinate,y_coordinate,y_coordinate,y_coordinate,y_coordinate
Longitude,x_coordinate,x_coordinate,x_coordinate,x_coordinate,x_coordinate,x_coordinate,x_coordinate,x_coordinate,x_coordinate,x_coordinate
NAICS,naics_primary,naics_primary,naics_primary,naics_primary,naics_primary,naics_primary,naics_primary,naics_primary,naics_primary,naics_primary
County,county_name,county_name,county_name,county_name,county_name,county_name,county_name,county_name,county_name,county_name
FlowName,pollutant_description,pollutant_description,pollutant_description,pollutant_description,pollutant_description,pollutant_description,pollutant_description,pollutant_description,pollutant_description,pollutant_description
FlowID,pollutant_code,pollutant_code,pollutant_code,pollutant_code,pollutant_code,pollutant_code,pollutant_code,pollutant_code,pollutant_code,pollutant_code
FlowAmount,actual_emissions_tpy,actual_emissions_tpy,actual_emissions_tpy,actual_emissions_tpy,actual_emissions_tpy,actual_emissions_tpy,actual_emissions_tpy,actual_emissions_tpy,actual_emissions_tpy,actual_emissions_tpy
UOM,,,,,,,,,,
ReliabilityScore,emission_calc_method_code,emission_calc_method_code,emission_calc_method_code,emission_calc_method_code,emission_calc_method_code,emission_calc_method_code,emission_calc_method_code,emission_calc_method_code,emission_calc_method_code,emission_calc_method_code
Process,scc,scc,scc,scc,scc,scc,scc,scc,scc,scc
UnitID,emission_unit_id,emission_unit_id,emission_unit_id,emission_unit_id,emission_unit_id,emission_unit_id,emission_unit_id,emission_unit_id,emission_unit_id,emission_unit_id
UnitType,unit_type_code,unit_type_code,unit_type_code,unit_type_code,unit_type_code,unit_type_code,,unit_type_code,unit_type_code,
StackHeight,stack_height (ft),stack_height (ft),stack_height (ft),stack_height (ft),stack_height (ft),stack_height (ft),stack_height_ft,stack_height (ft),stack_height (ft),stack_height_ft
StandardizedEPA,2022,2021,2020,2019,2018,2017,2016,2015,2014,2013,2012,2011
FacilityID,sppd_facility_identifier,sppd_facility_identifier,sppd_facility_identifier,sppd_facility_identifier,sppd_facility_identifier,sppd_facility_identifier,sppd_facility_identifier,sppd_facility_identifier,sppd_facility_identifier,sppd_facility_identifier,sppd_facility_identifier,sppd_facility_identifier
FacilityName,facility_name,facility_name,facility_name,facility_name,facility_name,facility_name,facility_name,facility_name,facility_name,facility_name,facility_name,facility_name
CompanyName,,,,,,,,,,,,
Address,location_address,location_address,location_address,location_address,location_address,location_address,location_address,location_address,location_address,location_address,location_address,location_address
City,city,city,city,city,city,city,city,city,city,city,city,city
State,state_abbr,state_abbr,state_abbr,state_abbr,state_abbr,state_abbr,state_abbr,state_abbr,state_abbr,state_abbr,state_abbr,state_abbr
Zip,zipcode,zipcode,zipcode,zipcode,zipcode,zipcode,zipcode,zipcode,zipcode,zipcode,zipcode,zipcode
Latitude,y_coordinate,y_coordinate,y_coordinate,y_coordinate,y_coordinate,y_coordinate,y_coordinate,y_coordinate,y_coordinate,y_coordinate,y_coordinate,y_coordinate
Longitude,x_coordinate,x_coordinate,x_coordinate,x_coordinate,x_coordinate,x_coordinate,x_coordinate,x_coordinate,x_coordinate,x_coordinate,x_coordinate,x_coordinate
NAICS,naics_primary,naics_primary,naics_primary,naics_primary,naics_primary,naics_primary,naics_primary,naics_primary,naics_primary,naics_primary,naics_primary,naics_primary
County,county_name,county_name,county_name,county_name,county_name,county_name,county_name,county_name,county_name,county_name,county_name,county_name
FlowName,pollutant_description,pollutant_description,pollutant_description,pollutant_description,pollutant_description,pollutant_description,pollutant_description,pollutant_description,pollutant_description,pollutant_description,pollutant_description,pollutant_description
FlowID,pollutant_code,pollutant_code,pollutant_code,pollutant_code,pollutant_code,pollutant_code,pollutant_code,pollutant_code,pollutant_code,pollutant_code,pollutant_code,pollutant_code
FlowAmount,actual_emissions_tpy,actual_emissions_tpy,actual_emissions_tpy,actual_emissions_tpy,actual_emissions_tpy,actual_emissions_tpy,actual_emissions_tpy,actual_emissions_tpy,actual_emissions_tpy,actual_emissions_tpy,actual_emissions_tpy,actual_emissions_tpy
UOM,,,,,,,,,,,,
ReliabilityScore,emission_calc_method_code,emission_calc_method_code,emission_calc_method_code,emission_calc_method_code,emission_calc_method_code,emission_calc_method_code,emission_calc_method_code,emission_calc_method_code,emission_calc_method_code,emission_calc_method_code,emission_calc_method_code,emission_calc_method_code
Process,scc,scc,scc,scc,scc,scc,scc,scc,scc,scc,scc,scc
UnitID,emission_unit_id,emission_unit_id,emission_unit_id,emission_unit_id,emission_unit_id,emission_unit_id,emission_unit_id,emission_unit_id,emission_unit_id,emission_unit_id,emission_unit_id,emission_unit_id
UnitType,unit_type_code,unit_type_code,unit_type_code,unit_type_code,unit_type_code,unit_type_code,unit_type_code,unit_type_code,,unit_type_code,unit_type_code,
StackHeight,stack_height (ft),stack_height (ft),stack_height (ft),stack_height (ft),stack_height (ft),stack_height (ft),stack_height (ft),stack_height (ft),stack_height_ft,stack_height (ft),stack_height (ft),stack_height_ft
Loading

0 comments on commit d47ef96

Please sign in to comment.