Skip to content

Commit

Permalink
Merge branch 'dev' into features/#525-loadareas-to-datasets
Browse files Browse the repository at this point in the history
  • Loading branch information
IlkaCu authored Dec 3, 2021
2 parents c8cbdba + 21f7722 commit e2bf407
Show file tree
Hide file tree
Showing 33 changed files with 1,469 additions and 534 deletions.
3 changes: 1 addition & 2 deletions .github/workflows/build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,7 @@ name: "Tests, code style & coverage"
on:
push:
branches:
- dev
- main
- '**'
pull_request:
branches:
- dev
Expand Down
16 changes: 16 additions & 0 deletions CHANGELOG.rst
Original file line number Diff line number Diff line change
Expand Up @@ -153,6 +153,14 @@ Added
`#405 <https://github.com/openego/eGon-data/issues/405>`_
* Include allocation of conventional (non CHP) power plants
`#392 <https://github.com/openego/eGon-data/issues/392>`_
* Fill egon-etrago-generators table
`#485 <https://github.com/openego/eGon-data/issues/485>`_
* Include time-dependent coefficient of performance for heat pumps
`#532 <https://github.com/openego/eGon-data/issues/532>`_
* Limit number of parallel processes per task
`#265 <https://github.com/openego/eGon-data/issues/265>`_
* Include biomass CHP plants to eTraGo tables
`#498 <https://github.com/openego/eGon-data/issues/498>`_

.. _PR #159: https://github.com/openego/eGon-data/pull/159

Expand Down Expand Up @@ -236,10 +244,16 @@ Changed
`#463 <https://github.com/openego/eGon-data/issues/463>`_
* Update deposit id for zenodo download
`#397 <https://github.com/openego/eGon-data/issues/498>`_
* Add to etrago.setug.py the busmap table
`#484 <https://github.com/openego/eGon-data/issues/484>`_
* Migrate dlr script to datasets
`#508 <https://github.com/openego/eGon-data/issues/508>`_
* Migrate loadarea scripts to datasets
`#525 <https://github.com/openego/eGon-data/issues/525>`_
* Migrate plot.py to dataset of district heating areas
`#527 <https://github.com/openego/eGon-data/issues/527>`_
* Migrate substation scripts to datasets
`#304 <https://github.com/openego/eGon-data/issues/304>`_


Bug fixes
Expand Down Expand Up @@ -311,3 +325,5 @@ Bug fixes
`#504 <https://github.com/openego/eGon-data/issues/504>`_
* Use inbuilt `datetime` package instead of `pandas.datetime`
`#516 <https://github.com/openego/eGon-data/issues/516>`_
* Delete only AC loads for eTraGo in electricity_demand_etrago
`#535 <https://github.com/openego/eGon-data/issues/535>`_
4 changes: 2 additions & 2 deletions README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -170,7 +170,7 @@ to it.
Run the workflow
================

The :py:mod:`egon.data` package installs a command line application
The :code:`egon.data` package installs a command line application
called :code:`egon-data` with which you can control the workflow so once
the installation is successful, you can explore the command line
interface starting with :code:`egon-data --help`.
Expand All @@ -191,7 +191,7 @@ solution.
.. warning::

A complete run of the workflow might require much computing power and
can't be run on laptop. Use the :ref:`test mode <Test mode>` for
can't be run on laptop. Use the `test mode <#test-mode>`_ for
experimenting.

.. warning::
Expand Down
67 changes: 25 additions & 42 deletions src/egon/data/airflow/dags/pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
import importlib_resources as resources

from egon.data import db
from egon.data.config import set_numexpr_threads
from egon.data.datasets import database
from egon.data.datasets.calculate_dlr import Calculate_dlr
from egon.data.datasets.ch4_storages import CH4Storages
Expand All @@ -23,6 +24,7 @@
from egon.data.datasets.electricity_demand_etrago import ElectricalLoadEtrago
from egon.data.datasets.era5 import WeatherData
from egon.data.datasets.etrago_setup import EtragoSetup
from egon.data.datasets.fill_etrago_gen import Egon_etrago_gen
from egon.data.datasets.gas_grid import GasNodesandPipes
from egon.data.datasets.gas_prod import CH4Production
from egon.data.datasets.heat_demand import HeatDemandImport
Expand Down Expand Up @@ -50,6 +52,8 @@
from egon.data.datasets.scenario_parameters import ScenarioParameters
from egon.data.datasets.society_prognosis import SocietyPrognosis
from egon.data.datasets.storages import PumpedHydro
from egon.data.datasets.substation import SubstationExtraction
from egon.data.datasets.substation_voronoi import SubstationVoronoi
from egon.data.datasets.vg250 import Vg250
from egon.data.datasets.vg250_mv_grid_districts import Vg250MvGridDistricts
from egon.data.datasets.zensus_mv_grid_districts import ZensusMvGridDistricts
Expand All @@ -60,7 +64,10 @@
import egon.data.importing.zensus as import_zs
import egon.data.processing.gas_areas as gas_areas
import egon.data.processing.power_to_h2 as power_to_h2
import egon.data.processing.substation as substation


# Set number of threads used by numpy and pandas
set_numexpr_threads()

with airflow.DAG(
"egon-data-processing-pipeline",
Expand Down Expand Up @@ -185,60 +192,33 @@
mastr_data.insert_into(pipeline)
retrieve_mastr_data = tasks["mastr.download-mastr-data"]

# Substation extraction
substation_tables = PythonOperator(
task_id="create_substation_tables",
python_callable=substation.create_tables,
)

substation_functions = PythonOperator(
task_id="substation_functions",
python_callable=substation.create_sql_functions,
substation_extraction = SubstationExtraction(
dependencies=[osm_add_metadata, vg250_clean_and_prepare]
)

hvmv_substation_extraction = PostgresOperator(
task_id="hvmv_substation_extraction",
sql=resources.read_text(substation, "hvmv_substation.sql"),
postgres_conn_id="egon_data",
autocommit=True,
)

ehv_substation_extraction = PostgresOperator(
task_id="ehv_substation_extraction",
sql=resources.read_text(substation, "ehv_substation.sql"),
postgres_conn_id="egon_data",
autocommit=True,
)

osm_add_metadata >> substation_tables >> substation_functions
substation_functions >> hvmv_substation_extraction
substation_functions >> ehv_substation_extraction
vg250_clean_and_prepare >> hvmv_substation_extraction
vg250_clean_and_prepare >> ehv_substation_extraction

# osmTGmod ehv/hv grid model generation
osmtgmod = Osmtgmod(
dependencies=[
osm_download,
ehv_substation_extraction,
hvmv_substation_extraction,
substation_extraction,
setup_etrago,
]
)
osmtgmod.insert_into(pipeline)
osmtgmod_pypsa = tasks["osmtgmod.to-pypsa"]
osmtgmod_substation = tasks["osmtgmod_substation"]

# create Voronoi for MV grid districts
create_voronoi_substation = PythonOperator(
task_id="create-voronoi-substations",
python_callable=substation.create_voronoi,
# create Voronoi polygons
substation_voronoi = SubstationVoronoi(
dependencies=[
osmtgmod_substation,
vg250,
]
)
osmtgmod_substation >> create_voronoi_substation

# MV grid districts
mv_grid_districts = mv_grid_districts_setup(
dependencies=[create_voronoi_substation]
dependencies=[substation_voronoi]
)
mv_grid_districts.insert_into(pipeline)
define_mv_grid_districts = tasks[
Expand Down Expand Up @@ -296,7 +276,7 @@
feedin_wind_onshore = tasks["renewable_feedin.wind"]
feedin_pv = tasks["renewable_feedin.pv"]
feedin_solar_thermal = tasks["renewable_feedin.solar-thermal"]

# District heating areas demarcation
district_heating_areas = DistrictHeatingAreas(
dependencies=[heat_demand_Germany, scenario_parameters]
Expand Down Expand Up @@ -419,6 +399,7 @@
dependencies=[
setup,
renewable_feedin,
substation_extraction,
mv_grid_districts,
mastr_data,
re_potential_areas,
Expand All @@ -436,14 +417,16 @@
"power_plants.pv_rooftop.pv-rooftop-per-mv-grid"
]

hvmv_substation_extraction >> generate_wind_farms
hvmv_substation_extraction >> generate_pv_ground_mounted
feedin_pv >> solar_rooftop_etrago
elec_cts_demands_zensus >> solar_rooftop_etrago
elec_household_demands_zensus >> solar_rooftop_etrago
etrago_input_data >> solar_rooftop_etrago
map_zensus_grid_districts >> solar_rooftop_etrago

# Fill eTraGo Generators tables
fill_etrago_generators = Egon_etrago_gen(
dependencies=[power_plants, weather_data])

# Heat supply
heat_supply = HeatSupply(
dependencies=[
Expand All @@ -457,7 +440,7 @@

# Heat to eTraGo
heat_etrago = HeatEtrago(
dependencies=[heat_supply, mv_grid_districts, setup_etrago]
dependencies=[heat_supply, mv_grid_districts, setup_etrago, renewable_feedin]
)

heat_etrago_buses = tasks["heat_etrago.buses"]
Expand Down
11 changes: 11 additions & 0 deletions src/egon/data/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -109,6 +109,17 @@
),
show_default=True,
)
@click.option(
"--processes-per-task",
default=1,
metavar="N_PROCESS",
help=(
"Each task can use at maximum N_PROCESS parallel processes. Remember"
" that in addition to that, tasks can run in parallel (see N) and"
" there's always the scheduler and probably the serverrunning."
),
show_default=True,
)
@click.option(
"--docker-container-name",
default="egon-data-local-database-container",
Expand Down
41 changes: 35 additions & 6 deletions src/egon/data/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,13 +53,28 @@ def settings() -> dict[str, dict[str, str]]:
"""
files = paths(pid="*") + paths()
if not files[0].exists():
# TODO: Fatal errors should be raised as exceptions, so one can figure
# out where they are coming from without having to debug.
logger.error(
f"Unable to determine settings.\nConfiguration file:"
f"\n\n{files[0]}\n\nnot found.\nExiting."
logger.warning(
f"Configuration file:"
f"\n\n{files[0]}\n\nnot found.\nUsing defaults."
)
sys.exit(-1)
return {
"egon-data": {
"--airflow-database-name": "airflow",
"--airflow-port": 8080,
"--compose-project-name": "egon-data",
"--database-host": "127.0.0.1",
"--database-name": "egon-data",
"--database-password": "data",
"--database-port": "59734",
"--database-user": "egon",
"--dataset-boundary": "Everything",
"--docker-container-name":
"egon-data-local-database-container",
"--jobs": 1,
"--random-seed": 42,
"--processes-per-task": 1,
}
}
with open(files[0]) as f:
return yaml.safe_load(f)

Expand Down Expand Up @@ -87,3 +102,17 @@ def datasets(config_file=None):
config_file = os.path.join(package_path, "datasets.yml")

return yaml.load(open(config_file), Loader=yaml.SafeLoader)

def set_numexpr_threads():
"""Sets maximum threads used by NumExpr
Returns
-------
None
"""
# Read maximum number of threads per task from egon-data.configuration.yaml
num_processes = settings()["egon-data"]["--processes-per-task"]

os.environ['NUMEXPR_MAX_THREADS'] = str(num_processes)
os.environ['NUMEXPR_NUM_THREADS'] = str(num_processes)
Loading

0 comments on commit e2bf407

Please sign in to comment.