Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[DAG IRVE] Outsource geoloc test to frformat #308

Open
wants to merge 2 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions data_processing/irve/DAG.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,8 @@
TMP_CONFIG_FILE = TMP_FOLDER / "schema.data.gouv.fr/config_consolidation.yml"
SCHEMA_CATALOG = "https://schema.data.gouv.fr/schemas/schemas.json"
GIT_REPO = "[email protected]:etalab/schema.data.gouv.fr.git"
# DEV : for local dev without SSH enabled
# GIT_REPO = "https://github.com/etalab/schema.data.gouv.fr.git"
output_data_folder = f"{TMP_FOLDER}/output/"

default_args = {
Expand All @@ -59,8 +61,6 @@
clone_dag_schema_repo = BashOperator(
task_id="clone_dag_schema_repo",
bash_command=f"cd {TMP_FOLDER} && git clone {GIT_REPO} --depth 1 ",
# DEV : for local dev without SSH enabled
# bash_command=f"cd {TMP_FOLDER} && git clone https://github.com/etalab/schema.data.gouv.fr.git --depth 1 ",
)

get_all_irve_resources = PythonOperator(
Expand Down
12 changes: 0 additions & 12 deletions data_processing/irve/geo_utils/france_bbox.geojson

This file was deleted.

33 changes: 5 additions & 28 deletions data_processing/irve/geo_utils/geo.py
Original file line number Diff line number Diff line change
@@ -1,47 +1,24 @@
from typing import Dict, List
import geojson
from typing import Dict
import json
import os
import pandas as pd
import requests
from shapely.geometry import Point, shape
from shapely.geometry.polygon import Polygon
from datagouvfr_data_pipelines.config import AIRFLOW_DAG_HOME

with open(
f"{AIRFLOW_DAG_HOME}/datagouvfr_data_pipelines/schema/utils/france_bbox.geojson"
) as f:
FRANCE_BBOXES = geojson.load(f)

# Create a Polygon
geoms = [region["geometry"] for region in FRANCE_BBOXES.get("features")]
polys = [shape(geom) for geom in geoms]


def is_point_in_polygon(x: float, y: float, polygon: List[List[float]]) -> bool:
point = Point(x, y)
polygon_shape = Polygon(polygon)
return polygon_shape.contains(point)


def is_point_in_france(coordonnees_xy: List[float]) -> bool:
p = Point(*coordonnees_xy)
return any(p.within(poly) for poly in polys)
from frformat.geo.coordonnees_gps_francaises import CoordonneesGPSFrancaises


def fix_coordinates_order(
df: pd.DataFrame, coordinates_column: str = "coordonneesXY"
) -> pd.DataFrame:
"""
Cette fonction modifie une dataframe pour placer la longitude avant la latitude
dans la colonne qui contient les deux au format "[lon, lat]".
Cette fonction modifie un dataframe pour placer la longitude avant la latitude
dans la colonne qui contient les deux au format "[lat, lon]".
"""

def fix_coordinates(row: pd.Series) -> pd.Series:
coordonnees_xy = json.loads(row[coordinates_column])
reversed_coordonnees = list(reversed(coordonnees_xy))
row["consolidated_coordinates_reordered"] = False
if is_point_in_france(reversed_coordonnees):
if CoordonneesGPSFrancaises.is_valid(*reversed_coordonnees):
# Coordinates are inverted with lat before lon
row[coordinates_column] = json.dumps(reversed_coordonnees)
row["consolidated_coordinates_reordered"] = True
Expand Down
12 changes: 0 additions & 12 deletions schema/utils/france_bbox.geojson

This file was deleted.

268 changes: 0 additions & 268 deletions schema/utils/geo.py

This file was deleted.

Loading