Skip to content


Update to AEO 2021
Browse files Browse the repository at this point in the history
Update input data files to AEO 2021 data. Update code that generates input files to accommodate changes made by EIA to the AEO data files.

EIA split out onsite electricity generation from building energy use at the census division and building type level. Add a passthrough of onsite generation data to the ecm_results.json output file. Data included are based on the building types and climate zones of the measures included in a given run.

Add detailed onsite generation outputs from, including breakouts by applicable region and AEO building type, and calculation of avoided electricity costs and CO2 emissions from onsite generation. Improve handling of other fuel in and specify energy costs and CO2 emissions intensities for technologies listed under "other fuel": "furnace (kerosene)", "furnace (LPG)", and "stove (wood)". "furnace (kerosene)" CO2 emissions and energy costs are tied to distillate, "furnace (LPG)" CO2 emissions and energy costs are tied to propane, and "stove (wood)" CO2 emissions and costs are zero.
  • Loading branch information
JLReyna authored and trynthink committed Sep 30, 2021
1 parent 21a4abd commit 323b3f7
Show file tree
Hide file tree
Showing 33 changed files with 3,643,638 additions and 3,799,677 deletions.
138 changes: 128 additions & 10 deletions
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ class EIAData(object):
def __init__(self):
self.serv_dmd = 'KSDOUT.txt'
self.catg_dmd = 'KDBOUT.txt'
self.com_generation = 'KDGENOUT.txt'

class UsefulVars(object):
Expand Down Expand Up @@ -297,7 +298,9 @@ def sd_mseg_percent(sd_array, sel, yrs):
# Also check the special case where the technology name is so
# long that the year number is partially truncated at the end
# of the string
exc_tech_name ='.+?(?=\s+2[0-9]{1,2}$)', row['Description'])
exc_tech_name =

# If the regex matched, overwrite the original description with
# the matching text, which describes the technology without
Expand Down Expand Up @@ -371,7 +374,7 @@ def sd_mseg_percent(sd_array, sel, yrs):
# a measure of absolute energy use)
if tval.any():
with np.errstate(divide='ignore', invalid='ignore'):
tval = tval/np.sum(tval, axis=0)
tval = tval / np.sum(tval, axis=0)
tval = np.nan_to_num(tval) # Replace nan from 0/0 with 0

return (tval, trunc_technames)
Expand Down Expand Up @@ -480,6 +483,10 @@ def data_handler(db_array, sd_array, load_array, key_series, sd_end_uses, yrs):
JSON specified by 'key_series'.

def array_mult(dct, factor):
scaled = {key: val * factor for key, val in dct.items()}
return scaled

# Convert the list of keys into a list of numeric indices that can
# be used to select the appropriate data
idx_series = json_interpreter(key_series)
Expand Down Expand Up @@ -517,13 +524,13 @@ def data_handler(db_array, sd_array, load_array, key_series, sd_end_uses, yrs):
load_array['CDIV'] == idx_series[0],
load_array['BLDG'] == idx_series[1],
load_array['ENDUSE'] == idx_series[2]],
# N.B. tl_multiplier is a 1x1 numpy array

# Multiply together the thermal load multiplier and energy use
# data and construct the dict with years as keys
final_dict = {'energy': dict(zip(
subset['Year'], subset['Amount']*tl_multiplier*to_mmbtu)),
subset['Year'], subset['Amount'] * tl_multiplier * to_mmbtu)),
'stock': 'NA'}
elif 'MELs' in key_series:
# Miscellaneous Electric Loads (MELs) energy use data are
Expand All @@ -540,7 +547,7 @@ def data_handler(db_array, sd_array, load_array, key_series, sd_end_uses, yrs):

# Convert into dict with years as keys and energy as values
final_dict = {'energy': dict(zip(subset['Year'],
subset['Amount'] * to_mmbtu)),
'stock': 'NA'}
elif 'new square footage' in key_series:
# Extract the relevant data from KDBOUT
Expand Down Expand Up @@ -579,7 +586,7 @@ def data_handler(db_array, sd_array, load_array, key_series, sd_end_uses, yrs):
for technology in tech_pct:
{'energy': dict(zip(subset['Year'],
technology * subset['Amount'] * to_mmbtu)),
'stock': 'NA'})

# The final dict should be {technology: {year: data, ...}, ...}
Expand All @@ -592,7 +599,7 @@ def data_handler(db_array, sd_array, load_array, key_series, sd_end_uses, yrs):

# Convert into dict with years as keys and energy as values
final_dict = {'energy': dict(zip(subset['Year'],
subset['Amount'] * to_mmbtu)),
'stock': 'NA'}

# Return the dict that should end up at the leaf node in the exported JSON
Expand Down Expand Up @@ -787,7 +794,7 @@ def data_import(data_file_path, dtype_list, delim_char=',', hl=None, cols=[]):
# row of data in the ktek file (which is the intended
# target for these lines of code).
if hl:
for i in range(0, hl+1):
for i in range(0, hl + 1):

# Import the data, skipping lines that are not the correct length
Expand Down Expand Up @@ -956,6 +963,112 @@ def special_character_handler(text_string):
return data_array

def onsite_prep(generation_file):
""" Preps the onsite generation file for commercial
by adding together all technologies by segment and
converting building and census division names to
stings for easier querying"""

bldgtypedict = {'Assembly': 'assembly',
'Education': 'education',
'Food Sales': 'food sales',
'Food Service': 'food service',
'Health Care': 'health care',
'Lodging': 'lodging',
'Office-Large': 'large office',
'Office-Small': 'small office',
'Merc/Service': 'mercantile/service',
'Warehouse': 'warehouse',
'Other': 'other'

# Read in AEO's onsite generation file
gen_dtypes = dtype_array(generation_file)
gen_dtypes[1] = ('Year', '<U50')
gen_data = data_import(generation_file, gen_dtypes)

# Pull all the unique microsegment combinations
years = np.unique(gen_data['Year'])
div = np.unique(gen_data['Division'])
bld = np.unique(gen_data['BldgType'])

# Define datatypes of OwnUse aggregated
gen_dtypes = [('Year', '<U50'),
('Division', '<i4'),
('BldgType', '<U50'),
('OwnUse', '<f8')]

# Sum all onsite generation by microsegment
gen_data = np.array([(i, j, k, gen_data[(gen_data['Year'] == i) &
(gen_data['Division'] == j) &
(gen_data['BldgType'] == k)][
for i in years for j in div
for k in bld], dtype=gen_dtypes)

# Factor to convert commercial energy data from TBTU to MMBTU
to_mmbtu = 1000000 # 1e6

# Convert cdivision to names
cdiv_dct = {str(v): k for k, v in

gen_dtypes = [('Year', '<U50'), ('Division', '<U50'),
('BldgType', '<U50'),
('OwnUse', '<f8')]
gen_data = gen_data.astype(gen_dtypes)

# Unit converstion of TBTU to MMBTU
gen_data['OwnUse'] = gen_data['OwnUse'] * to_mmbtu

def name_map(data_array, trans_dict):
newArray = np.copy(data_array)
for k, v in trans_dict.items():
newArray[data_array == k] = v
return newArray

gen_data['Division'] = name_map(gen_data['Division'], cdiv_dct)
gen_data['BldgType'] = np.char.rstrip(gen_data['BldgType'])
gen_data['BldgType'] = name_map(gen_data['BldgType'], bldgtypedict)

return gen_data

def onsite_calc(generation_file, json_results):
""" Calculates net electricity use using EIA's pre-2021 methodology
and adds a new PV technology type. """

def array_mult(dct, factor):
scaled = {key: val * factor for key, val in dct.items()}
return scaled

def onsite_pull(cdiv, bld):
pull = generation_file[np.all([generation_file['Division'] == cdiv,
generation_file['BldgType'] == bld],
years = np.unique(pull['Year'])
pull = dict([(i, pull[pull['Year'] == i]['OwnUse'].sum())
for i in years])
pull = array_mult(pull, -1)
return pull

# Pull the onsite generation by census division and building type
cdiv = np.unique(generation_file['Division'])
bldtype = np.unique(generation_file['BldgType'])

for div in cdiv:
for bld in bldtype:
gen = onsite_pull(div, bld)

# Add onsite generation as new end use
elec_slice = json_results[div][bld]['electricity']
elec_slice['onsite generation'] = {}
elec_slice['onsite generation']['energy'] = gen
elec_slice['onsite generation']['stock'] = 'NA'

return json_results

def main():
""" Import input data files and do other things """

Expand All @@ -977,6 +1090,9 @@ def main():
load_dtypes = dtype_array(handyvars.com_tloads, '\t')
load_data = data_import(handyvars.com_tloads, load_dtypes, '\t')

# Import and process onsite generation from KDGENOUT.txt
onsite_gen = onsite_prep(eiadata.com_generation)

# Not all end uses are broken down by equipment type and vintage in
# KSDOUT; determine which end uses are present so that the service
# demand data are not explored unnecessarily when they are not even
Expand All @@ -992,14 +1108,16 @@ def main():

# Import empty microsegments JSON file and traverse database structure
with open(handyvars.json_in, 'r') as jsi, open(
handyvars.json_out, 'w') as jso:
with open(handyvars.json_in, 'r') as jsi, open(handyvars.json_out,
'w') as jso:
msjson = json.load(jsi)

# Proceed recursively through database structure
result = walk(catg_data, serv_data, load_data,
serv_data_end_uses, msjson, years)

# Add in onsite generation
result = onsite_calc(onsite_gen, result)
# Write the updated dict of data to a new JSON file
json.dump(result, jso, indent=2)

Expand Down
5 changes: 3 additions & 2 deletions
Original file line number Diff line number Diff line change
Expand Up @@ -36,8 +36,9 @@ def setUpClass(self):
with open(self.eiafiles.cpl_data, 'r') as tech:
tech_fl = csv.reader(tech)

# Skip content preceding header row
for i in range(0, self.usefulvars.cpl_data_skip_lines):
# Skip content preceding header row, adjust skip
# lines to stop at first of two header rows
for i in range(0, self.usefulvars.cpl_data_skip_lines - 1):

self.tech_head = [entry.strip() for entry in next(tech_fl)]
Expand Down
86 changes: 53 additions & 33 deletions
Original file line number Diff line number Diff line change
Expand Up @@ -431,7 +431,9 @@ def updater(conv, api_key, aeo_yr, scen, captured_energy_method):
Using data from the AEO year and specified NEMS modeling scenario,
calculate revised site-source conversion factors, CO2 emissions
rates, and energy prices. In the case of the "other" fuel types,
rates, and energy prices.
In the case of the "other" fuel types,
energy prices are based on a energy use by fuel type-weighted
Expand Down Expand Up @@ -533,26 +535,37 @@ def updater(conv, api_key, aeo_yr, scen, captured_energy_method):
print('\nDue to failed data retrieval from the API, commercial '
'natural gas CO2 emissions intensities were not updated.')

# Residential other fuel CO2 intensities [Mt CO2/quads]
# Residential propane CO2 intensities [Mt CO2/quads]
co2_res_ot_ints = z['petro_res_co2']/z['petro_res_energy']
for idx, year in enumerate(yrs):
conv['other']['CO2 intensity']['data']['residential'][year] = (
round(co2_res_ot_ints[idx], 6))
conv['propane']['CO2 intensity']['data']['residential'][year] = (
62.88) # hard coded CO2 intensity of propane
except KeyError:
print('\nDue to failed data retrieval from the API, residential '
'"other fuel" CO2 emissions intensities were not updated.')
print('\nError updating residential propane CO2 emissions intensities.')

# Commercial other fuel CO2 intensities [Mt CO2/quads]
# Commercial propane CO2 intensities [Mt CO2/quads]
co2_com_ot_ints = ((z['petro_com_co2'] + z['coal_com_co2']) /
(z['petro_com_energy'] + z['coal_com_energy']))
for idx, year in enumerate(yrs):
conv['other']['CO2 intensity']['data']['commercial'][year] = (
round(co2_com_ot_ints[idx], 6))
conv['propane']['CO2 intensity']['data']['commercial'][year] = (
62.88) # hard coded CO2 intensity of propane
except KeyError:
print('\nDue to failed data retrieval from the API, commercial '
'"other fuel" CO2 emissions intensities were not updated.')
print('\nError updating commercial propane CO2 emissions intensities.')

# Residential distillate CO2 intensities [Mt CO2/quads]
for idx, year in enumerate(yrs):
conv['distillate']['CO2 intensity']['data']['residential'][year] = (
74.14) # hard coded CO2 intensity of distillate
except KeyError:
print('\nError updating residential distillate CO2 emissions intensities.')

# Commercial distillate CO2 intensities [Mt CO2/quads]
for idx, year in enumerate(yrs):
conv['distillate']['CO2 intensity']['data']['commercial'][year] = (
74.14) # hard coded CO2 intensity of distillate
except KeyError:
print('\nError updating commercial distillate CO2 emissions intensities.')

# Residential electricity prices [$/MMBtu source]
Expand Down Expand Up @@ -590,34 +603,41 @@ def updater(conv, api_key, aeo_yr, scen, captured_energy_method):
print('\nDue to failed data retrieval from the API, commercial '
'natural gas prices were not updated.')

# Residential other fuel price as energy use-weighted average
# of propane and distillate (fuel oil) prices [$/MMBtu source]
# Residential propane prices [$/MMBtu source]
for idx, year in enumerate(yrs):
conv['propane']['price']['data']['residential'][year] = (
round(z['lpg_res_price'][idx], 6))
except KeyError:
print('\nDue to failed data retrieval from the API, residential '
'propane prices were not updated.')

# Commercial propane prices [$/MMBtu source]
for idx, year in enumerate(yrs):
conv['propane']['price']['data']['commercial'][year] = (
round(z['lpg_com_price'][idx], 6))
except KeyError:
print('\nDue to failed data retrieval from the API, commercial '
'propane prices were not updated.')

# Residential distillate prices [$/MMBtu source]
res_other_price = (z['lpg_res_price']*z['lpg_res_energy']/(
z['lpg_res_energy'] + z['distl_res_energy']) +
z['lpg_res_energy'] + z['distl_res_energy']))
for idx, year in enumerate(yrs):
conv['other']['price']['data']['residential'][year] = (
round(res_other_price[idx], 6))
conv['distillate']['price']['data']['residential'][year] = (
round(z['distl_res_price'][idx], 6))
except KeyError:
print('\nDue to failed data retrieval from the API, residential '
'"other fuel" prices were not updated.')
'distillate prices were not updated.')

# Commercial other fuel price as energy use-weighted average of
# propane, distillate (fuel oil), and residual (fuel oil) prices
# [$/MMBtu source]
# Commercial distillate prices [$/MMBtu source]
denom = z['lpg_com_energy']+z['distl_com_energy']+z['rsid_com_energy']
com_other_price = (z['lpg_com_price']*z['lpg_com_energy']/denom +
z['distl_com_price']*z['distl_com_energy']/denom +
for idx, year in enumerate(yrs):
conv['other']['price']['data']['commercial'][year] = (
round(com_other_price[idx], 6))
conv['distillate']['price']['data']['commercial'][year] = (
round(z['distl_com_price'][idx], 6))
except KeyError:
print('\nDue to failed data retrieval from the API, commercial '
'"other fuel" prices were not updated.')
'distillate prices were not updated.')

return conv

Expand Down

0 comments on commit 323b3f7

Please sign in to comment.