Skip to content

Commit

Permalink
Merge pull request #3 from primap-community/read-dataset
Browse files Browse the repository at this point in the history
Read dataset
  • Loading branch information
crdanielbusch authored Feb 3, 2025
2 parents 0e2b5dc + 0254c48 commit ce608c5
Show file tree
Hide file tree
Showing 21 changed files with 1,298 additions and 507 deletions.
1 change: 1 addition & 0 deletions .github/workflows/ci.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -119,6 +119,7 @@ jobs:
shell: bash
run: |
TEMP_FILE=$(mktemp)
poetry self add poetry-plugin-export
poetry export --without=tests --without=docs --without=dev > $TEMP_FILE
poetry run liccheck -r $TEMP_FILE -R licence-check.txt
cat licence-check.txt
5 changes: 0 additions & 5 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -77,8 +77,3 @@ virtual-environment: ## update virtual environment, create a new one if it does
poetry config virtualenvs.in-project true
poetry install --all-extras
poetry run pre-commit install

.PHONY: download_all_domains-environment
download_all_domains:
# downloads and stages (datalad save) all available data
datalad run poetry run python3 scripts/download_all_domains.py
1 change: 1 addition & 0 deletions changelog/2.feature.md
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Add a scripts that converts the downloaded data into IF and primap2 native format
58 changes: 58 additions & 0 deletions dodo.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
"""
Define tasks to download and read the FAO data set.
"""
import datalad.api


def task_download():
"""
Download latest data
"""

def datalad_run_download():
datalad.api.run(
cmd="python3 scripts/download_all_domains.py",
outputs="downloaded_data",
)

return {"actions": [datalad_run_download]}


def task_read():
"""
read data set
"""

def read_dataset(save_path, run_id):
print(f"Reading dataset for {save_path=} and {run_id=}")
cmd = (
f"python3 scripts/read_data_set.py "
f"--save_path {save_path} --run_id {run_id}"
)

datalad.api.run(
cmd=cmd,
message="Read data set",
outputs=f"{save_path}",
)

return {
"actions": [read_dataset],
"params": [
{
"name": "save_path",
"short": "s",
"long": "save_path",
"default": "extracted_data",
"help": "Path to save the data.",
},
{
"name": "run_id",
"long": "run_id",
"short": "r",
"default": "2024",
"help": "Run identifier.",
},
],
"verbosity": 2,
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
attrs:
references: https://www.fao.org/faostat
rights: Creative Commons Attribution-4.0 International licence (CC BY 4.0)
contact: [email protected]
title: Agrifood systems emissions
comment: Published by Food and Agriculture Organization of the United Nations (FAO),
converted to PRIMAP2 format by Daniel Busch
institution: Food and Agriculture Organization of the United Nations
area: area (ISO3)
cat: category (FAOSTAT)
scen: scenario (FAO)
time_format: '%Y'
dimensions:
'*':
- time
- source
- category (FAOSTAT)
- area (ISO3)
- scenario (FAO)
- entity
- unit
data_file: FAOSTAT_Agrifood_system_emissions_v2024-11-14.csv
111 changes: 55 additions & 56 deletions poetry.lock

Large diffs are not rendered by default.

3 changes: 3 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -206,6 +206,9 @@ authorized_licenses = [
"python software foundation license",
"zpl 2.1",
'CMU License (MIT-CMU)',
'GNU General Public License v3 (GPLv3)',
'GNU Lesser General Public License v3 (LGPLv3)',

]
# This starting list is relatively conservative. Depending on the project, it
# may make sense to move some of these into the authorized list
Expand Down
101 changes: 50 additions & 51 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -577,48 +577,47 @@ loguru==0.7.2 ; python_version >= "3.10" and python_version < "3.13" \
looseversion==1.3.0 ; python_version >= "3.10" and python_version < "3.13" \
--hash=sha256:781ef477b45946fc03dd4c84ea87734b21137ecda0e1e122bcb3c8d16d2a56e0 \
--hash=sha256:ebde65f3f6bb9531a81016c6fef3eb95a61181adc47b7f949e9c0ea47911669e
matplotlib==3.9.3 ; python_version >= "3.10" and python_version < "3.13" \
--hash=sha256:026bdf3137ab6022c866efa4813b6bbeddc2ed4c9e7e02f0e323a7bca380dfa0 \
--hash=sha256:031b7f5b8e595cc07def77ec5b58464e9bb67dc5760be5d6f26d9da24892481d \
--hash=sha256:0a0a63cb8404d1d1f94968ef35738900038137dab8af836b6c21bb6f03d75465 \
--hash=sha256:0a361bd5583bf0bcc08841df3c10269617ee2a36b99ac39d455a767da908bbbc \
--hash=sha256:10d3e5c7a99bd28afb957e1ae661323b0800d75b419f24d041ed1cc5d844a764 \
--hash=sha256:1c40c244221a1adbb1256692b1133c6fb89418df27bf759a31a333e7912a4010 \
--hash=sha256:203d18df84f5288973b2d56de63d4678cc748250026ca9e1ad8f8a0fd8a75d83 \
--hash=sha256:213d6dc25ce686516208d8a3e91120c6a4fdae4a3e06b8505ced5b716b50cc04 \
--hash=sha256:3119b2f16de7f7b9212ba76d8fe6a0e9f90b27a1e04683cd89833a991682f639 \
--hash=sha256:3fb0b37c896172899a4a93d9442ffdc6f870165f59e05ce2e07c6fded1c15749 \
--hash=sha256:41b016e3be4e740b66c79a031a0a6e145728dbc248142e751e8dab4f3188ca1d \
--hash=sha256:4a8d279f78844aad213c4935c18f8292a9432d51af2d88bca99072c903948045 \
--hash=sha256:4e6eefae6effa0c35bbbc18c25ee6e0b1da44d2359c3cd526eb0c9e703cf055d \
--hash=sha256:5f2a4ea08e6876206d511365b0bc234edc813d90b930be72c3011bbd7898796f \
--hash=sha256:66d7b171fecf96940ce069923a08ba3df33ef542de82c2ff4fe8caa8346fa95a \
--hash=sha256:687df7ceff57b8f070d02b4db66f75566370e7ae182a0782b6d3d21b0d6917dc \
--hash=sha256:6be0ba61f6ff2e6b68e4270fb63b6813c9e7dec3d15fc3a93f47480444fd72f0 \
--hash=sha256:6e9de2b390d253a508dd497e9b5579f3a851f208763ed67fdca5dc0c3ea6849c \
--hash=sha256:760a5e89ebbb172989e8273024a1024b0f084510b9105261b3b00c15e9c9f006 \
--hash=sha256:816a966d5d376bf24c92af8f379e78e67278833e4c7cbc9fa41872eec629a060 \
--hash=sha256:87ad73763d93add1b6c1f9fcd33af662fd62ed70e620c52fcb79f3ac427cf3a6 \
--hash=sha256:896774766fd6be4571a43bc2fcbcb1dcca0807e53cab4a5bf88c4aa861a08e12 \
--hash=sha256:8e0143975fc2a6d7136c97e19c637321288371e8f09cff2564ecd73e865ea0b9 \
--hash=sha256:90a85a004fefed9e583597478420bf904bb1a065b0b0ee5b9d8d31b04b0f3f70 \
--hash=sha256:9b081dac96ab19c54fd8558fac17c9d2c9cb5cc4656e7ed3261ddc927ba3e2c5 \
--hash=sha256:9d6b2e8856dec3a6db1ae51aec85c82223e834b228c1d3228aede87eee2b34f9 \
--hash=sha256:9f459c8ee2c086455744723628264e43c884be0c7d7b45d84b8cd981310b4815 \
--hash=sha256:9fa6e193c14d6944e0685cdb527cb6b38b0e4a518043e7212f214113af7391da \
--hash=sha256:a42b9dc42de2cfe357efa27d9c50c7833fc5ab9b2eb7252ccd5d5f836a84e1e4 \
--hash=sha256:b651b0d3642991259109dc0351fc33ad44c624801367bb8307be9bfc35e427ad \
--hash=sha256:b6c12514329ac0d03128cf1dcceb335f4fbf7c11da98bca68dca8dcb983153a9 \
--hash=sha256:c52f48eb75fcc119a4fdb68ba83eb5f71656999420375df7c94cc68e0e14686e \
--hash=sha256:c96eeeb8c68b662c7747f91a385688d4b449687d29b691eff7068a4602fe6dc4 \
--hash=sha256:cd1077b9a09b16d8c3c7075a8add5ffbfe6a69156a57e290c800ed4d435bef1d \
--hash=sha256:cd5dbbc8e25cad5f706845c4d100e2c8b34691b412b93717ce38d8ae803bcfa5 \
--hash=sha256:cf2a60daf6cecff6828bc608df00dbc794380e7234d2411c0ec612811f01969d \
--hash=sha256:d3c93796b44fa111049b88a24105e947f03c01966b5c0cc782e2ee3887b790a3 \
--hash=sha256:d796272408f8567ff7eaa00eb2856b3a00524490e47ad505b0b4ca6bb8a7411f \
--hash=sha256:e0fcb7da73fbf67b5f4bdaa57d85bb585a4e913d4a10f3e15b32baea56a67f0a \
--hash=sha256:e14485bb1b83eeb3d55b6878f9560240981e7bbc7a8d4e1e8c38b9bd6ec8d2de \
--hash=sha256:edd14cf733fdc4f6e6fe3f705af97676a7e52859bf0044aa2c84e55be739241c
matplotlib==3.9.2 ; python_version >= "3.10" and python_version < "3.13" \
--hash=sha256:039082812cacd6c6bec8e17a9c1e6baca230d4116d522e81e1f63a74d01d2e21 \
--hash=sha256:03ba9c1299c920964e8d3857ba27173b4dbb51ca4bab47ffc2c2ba0eb5e2cbc5 \
--hash=sha256:050598c2b29e0b9832cde72bcf97627bf00262adbc4a54e2b856426bb2ef0697 \
--hash=sha256:18128cc08f0d3cfff10b76baa2f296fc28c4607368a8402de61bb3f2eb33c7d9 \
--hash=sha256:1cd93b91ab47a3616b4d3c42b52f8363b88ca021e340804c6ab2536344fad9ca \
--hash=sha256:1d94ff717eb2bd0b58fe66380bd8b14ac35f48a98e7c6765117fe67fb7684e64 \
--hash=sha256:306c8dfc73239f0e72ac50e5a9cf19cc4e8e331dd0c54f5e69ca8758550f1e1e \
--hash=sha256:37e51dd1c2db16ede9cfd7b5cabdfc818b2c6397c83f8b10e0e797501c963a03 \
--hash=sha256:3fd595f34aa8a55b7fc8bf9ebea8aa665a84c82d275190a61118d33fbc82ccae \
--hash=sha256:4876d7d40219e8ae8bb70f9263bcbe5714415acfdf781086601211335e24f8aa \
--hash=sha256:5413401594cfaff0052f9d8b1aafc6d305b4bd7c4331dccd18f561ff7e1d3bd3 \
--hash=sha256:5816b1e1fe8c192cbc013f8f3e3368ac56fbecf02fb41b8f8559303f24c5015e \
--hash=sha256:65aacf95b62272d568044531e41de26285d54aec8cb859031f511f84bd8b495a \
--hash=sha256:6758baae2ed64f2331d4fd19be38b7b4eae3ecec210049a26b6a4f3ae1c85dcc \
--hash=sha256:6d1ce5ed2aefcdce11904fc5bbea7d9c21fff3d5f543841edf3dea84451a09ea \
--hash=sha256:6d9f07a80deab4bb0b82858a9e9ad53d1382fd122be8cde11080f4e7dfedb38b \
--hash=sha256:7741f26a58a240f43bee74965c4882b6c93df3e7eb3de160126d8c8f53a6ae6e \
--hash=sha256:8912ef7c2362f7193b5819d17dae8629b34a95c58603d781329712ada83f9447 \
--hash=sha256:909645cce2dc28b735674ce0931a4ac94e12f5b13f6bb0b5a5e65e7cea2c192b \
--hash=sha256:96ab43906269ca64a6366934106fa01534454a69e471b7bf3d79083981aaab92 \
--hash=sha256:9d78bbc0cbc891ad55b4f39a48c22182e9bdaea7fc0e5dbd364f49f729ca1bbb \
--hash=sha256:ab68d50c06938ef28681073327795c5db99bb4666214d2d5f880ed11aeaded66 \
--hash=sha256:ac43031375a65c3196bee99f6001e7fa5bdfb00ddf43379d3c0609bdca042df9 \
--hash=sha256:ae82a14dab96fbfad7965403c643cafe6515e386de723e498cf3eeb1e0b70cc7 \
--hash=sha256:b2696efdc08648536efd4e1601b5fd491fd47f4db97a5fbfd175549a7365c1b2 \
--hash=sha256:b82c5045cebcecd8496a4d694d43f9cc84aeeb49fe2133e036b207abe73f4d30 \
--hash=sha256:be0fc24a5e4531ae4d8e858a1a548c1fe33b176bb13eff7f9d0d38ce5112a27d \
--hash=sha256:bf81de2926c2db243c9b2cbc3917619a0fc85796c6ba4e58f541df814bbf83c7 \
--hash=sha256:c375cc72229614632c87355366bdf2570c2dac01ac66b8ad048d2dabadf2d0d4 \
--hash=sha256:c797dac8bb9c7a3fd3382b16fe8f215b4cf0f22adccea36f1545a6d7be310b41 \
--hash=sha256:cef2a73d06601437be399908cf13aee74e86932a5ccc6ccdf173408ebc5f6bb2 \
--hash=sha256:d52a3b618cb1cbb769ce2ee1dcdb333c3ab6e823944e9a2d36e37253815f9556 \
--hash=sha256:d719465db13267bcef19ea8954a971db03b9f48b4647e3860e4bc8e6ed86610f \
--hash=sha256:d8dd059447824eec055e829258ab092b56bb0579fc3164fa09c64f3acd478772 \
--hash=sha256:dbe196377a8248972f5cede786d4c5508ed5f5ca4a1e09b44bda889958b33f8c \
--hash=sha256:e0830e188029c14e891fadd99702fd90d317df294c3298aad682739c5533721a \
--hash=sha256:f053c40f94bc51bc03832a41b4f153d83f2062d88c72b5e79997072594e97e51 \
--hash=sha256:f32c7410c7f246838a77d6d1eff0c0f87f3cb0e7c4247aebea71a6d5a68cab49 \
--hash=sha256:f6ee45bc4245533111ced13f1f2cace1e7f89d1c793390392a80c139d6cf0e6c \
--hash=sha256:f7c0410f181a531ec4e93bbc27692f2c71a15c2da16766f5ba9761e7ae518413
more-itertools==10.5.0 ; python_version >= "3.10" and python_version < "3.13" \
--hash=sha256:037b0d3203ce90cca8ab1defbbdac29d5f993fc20131f3664dc8d6acfa872aef \
--hash=sha256:5482bfef7849c25dc3c6dd53a6173ae4795da2a41a80faea6700d9f5846c5da6
Expand Down Expand Up @@ -761,9 +760,9 @@ outcome==1.3.0.post0 ; python_version >= "3.10" and python_version < "3.13" \
packaging==24.2 ; python_version >= "3.10" and python_version < "3.13" \
--hash=sha256:09abb1bccd265c01f4a3aa3f7a7db064b36514d2cba19a2f694fe6150451a759 \
--hash=sha256:c228a6dc5e932d346bc5739379109d49e8853dd8223571c7c5b55260edc0b97f
pandas-stubs==2.2.3.241126 ; python_version >= "3.10" and python_version < "3.13" \
--hash=sha256:74aa79c167af374fe97068acc90776c0ebec5266a6e5c69fe11e9c2cf51f2267 \
--hash=sha256:cf819383c6d9ae7d4dabf34cd47e1e45525bb2f312e6ad2939c2c204cb708acd
pandas-stubs==2.2.3.241009 ; python_version >= "3.10" and python_version < "3.13" \
--hash=sha256:3a6f8f142105a42550be677ba741ba532621f4e0acad2155c0e7b2450f114cfa \
--hash=sha256:d4ab618253f0acf78a5d0d2bfd6dffdd92d91a56a69bdc8144e5a5c6d25be3b5
pandas==2.2.3 ; python_version >= "3.10" and python_version < "3.13" \
--hash=sha256:062309c1b9ea12a50e8ce661145c6aab431b1e99530d3cd60640e255778bd43a \
--hash=sha256:15c0e1e02e93116177d29ff83e8b1619c93ddc9c49083f237d4312337a61165d \
Expand Down Expand Up @@ -1035,9 +1034,9 @@ soupsieve==2.6 ; python_version >= "3.10" and python_version < "3.13" \
strictyaml==1.7.3 ; python_version >= "3.10" and python_version < "3.13" \
--hash=sha256:22f854a5fcab42b5ddba8030a0e4be51ca89af0267961c8d6cfa86395586c407 \
--hash=sha256:fb5c8a4edb43bebb765959e420f9b3978d7f1af88c80606c03fb420888f5d1c7
tqdm==4.67.1 ; python_version >= "3.10" and python_version < "3.13" \
--hash=sha256:26445eca388f82e72884e0d580d5464cd801a3ea01e63e5601bdff9ba6a48de2 \
--hash=sha256:f8aef9c52c08c13a65f30ea34f4e5aac3fd1a34959879d7e59e63027286627f2
tqdm==4.67.0 ; python_version >= "3.10" and python_version < "3.13" \
--hash=sha256:0cd8af9d56911acab92182e88d763100d4788bdf421d251616040cc4d44863be \
--hash=sha256:fe5a6f95e6fe0b9755e9469b77b9c3cf850048224ecaa8293d7d2d31f97d869a
trio-websocket==0.11.1 ; python_version >= "3.10" and python_version < "3.13" \
--hash=sha256:18c11793647703c158b1f6e62de638acada927344d534e3c7628eedcb746839f \
--hash=sha256:520d046b0d030cf970b8b2b2e00c4c2245b3807853ecd44214acd33d74581638
Expand Down Expand Up @@ -1080,9 +1079,9 @@ win32-setctime==1.1.0 ; python_version >= "3.10" and python_version < "3.13" and
wsproto==1.2.0 ; python_version >= "3.10" and python_version < "3.13" \
--hash=sha256:ad565f26ecb92588a3e43bc3d96164de84cd9902482b130d0ddbaa9664a85065 \
--hash=sha256:b9acddd652b585d75b20477888c56642fdade28bdfd3579aa24a4d2c037dd736
xarray==2024.11.0 ; python_version >= "3.10" and python_version < "3.13" \
--hash=sha256:1ccace44573ddb862e210ad3ec204210654d2c750bec11bbe7d842dfc298591f \
--hash=sha256:6ee94f63ddcbdd0cf3909d1177f78cdac756640279c0e32ae36819a89cdaba37
xarray==2024.10.0 ; python_version >= "3.10" and python_version < "3.13" \
--hash=sha256:ae1d38cb44a0324dfb61e492394158ae22389bf7de9f3c174309c17376df63a0 \
--hash=sha256:e369e2bac430e418c2448e5b96f07da4635f98c1319aa23cfeb3fbcb9a01d2e0
zipp==3.21.0 ; python_version >= "3.10" and python_version < "3.13" \
--hash=sha256:2c9958f6430a2040341a52eb608ed6dd93ef4392e02ffe219417c1b28b5dd1f4 \
--hash=sha256:ac1bbe05fd2991f160ebce24ffbac5f6d11d83dc90891255885223d42b3cd931
8 changes: 8 additions & 0 deletions scripts/read_all_domains.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
"""Read the latest release of all available domains."""

from faostat_data_primap.read import (
read_latest_data,
)

if __name__ == "__main__":
read_latest_data()
31 changes: 31 additions & 0 deletions scripts/read_data_set.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
"""Read selected domains and versions."""
from pathlib import Path

import click

from faostat_data_primap.helper.definitions import domains_and_releases_to_read
from faostat_data_primap.helper.paths import (
extracted_data_path,
)
from faostat_data_primap.read import (
read_data,
)


@click.command()
@click.option("--run_id", default="2024", help="Configuration to run")
@click.option("--save_path", default=None, help="Where to save data in root directory.")
def run(run_id, save_path):
"""Prepare and run read data function"""
if not save_path:
save_path = extracted_data_path
else:
save_path = Path(save_path)
read_data(
domains_and_releases_to_read=domains_and_releases_to_read[run_id],
save_path=save_path,
)


if __name__ == "__main__":
run()
Loading

0 comments on commit ce608c5

Please sign in to comment.