Inhaltsverzeichnis der AIP IFR herunterladen

siehe #1
hamarituc · Aug 28, 2022 · 64dc8a7 · 64dc8a7
1 parent 9f69ba0
commit 64dc8a7
Showing 1 changed file with 188 additions and 0 deletions.
diff --git a/ifr_fetch.py b/ifr_fetch.py
@@ -0,0 +1,188 @@
+#!/bin/env python3
+
+#
+# Copyright (C) 2022 Mario Haustein, [email protected]
+#
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of the GNU Lesser General Public
+# License as published by the Free Software Foundation; either
+# version 3 of the License, or (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public License
+# along with this program; if not, write to the Free Software Foundation,
+# Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+#
+
+import argparse
+from bs4 import BeautifulSoup
+import datetime
+import json
+import re
+import requests
+
+
+
+parser = argparse.ArgumentParser(
+        description = "AIP-IFR Daten herunterladen"
+    )
+
+parser.add_argument(
+    '--url',
+    type = str,
+    default = "https://aip.dfs.de/basicIFR",
+    help = 'Basis-URL')
+
+parser.add_argument(
+    '--debug',
+    action = 'store_true',
+    help = 'Debug-Ausgabe erzeugen')
+
+args = parser.parse_args()
+
+
+AIP_BASEURL = args.url
+
+
+# Startseite abrufen
+response = requests.get(AIP_BASEURL)
+response.raise_for_status()
+
+
+# Die Startseite verwendet einen meta-Redirect, dem wir manuell folgen müssen.
+soup = BeautifulSoup(response.content, 'html.parser')
+result = soup.find('meta', attrs = { 'http-equiv': 'Refresh' })
+if not result:
+    raise AssertionError("Auf der Startseite wird ein Redirect erwartet. Vermutlich hat die DFS den Aufbau der Startseite geändert, sodass dieses Skript angepasst werden muss.")
+
+url = result['content'].split(';')[1].strip()
+if not url.lower().startswith('url='):
+    raise AssertionError("Die Startseite hat einen kaputten Redirect. Vermutlich hat die DFS den Aufbau der Startseite geändert, sodass dieses Skript angepasst werden muss.")
+url = url[4:]
+url = AIP_BASEURL + '/' + url
+
+response = requests.get(url)
+response.raise_for_status()
+
+
+# Startseite parsen
+soup = BeautifulSoup(response.content, 'html.parser')
+
+
+# Ausgabedatum bestimmen
+aip_airac = soup.find('span', class_ = 'updated-time').text.strip()
+aip_airac = re.fullmatch(r'Effective: ([0-9]{2}) ([A-Z]{3}) ([0-9]{4})', aip_airac)
+if not aip_airac:
+    raise AssertionError("Kann Ausgabedatum des AIPs nicht bestimmen.")
+
+MONTHS = \
+{
+  'JAN':  1,
+  'FEB':  2,
+  'MAR':  3,
+  'APR':  4,
+  'MAY':  5,
+  'JUN':  6,
+  'JUL':  7,
+  'AUG':  8,
+  'SEP':  9,
+  'OCT': 10,
+  'NOV': 11,
+  'DEC': 12,
+}
+
+airac_month = aip_airac[2].upper()
+if airac_month not in MONTHS:
+    raise AssertionError("Ungültiger Monat '%s'", airac_month)
+airac_month = MONTHS[airac_month]
+
+airac_day  = int(aip_airac[1])
+airac_year = int(aip_airac[3])
+
+AIRAC_DATE = datetime.date(airac_year, airac_month, airac_day)
+
+
+#
+# Inhaltsverzeichnis erstellen
+#
+def fetch_folder(url):
+    baseurl = '/'.join(url.split('/')[:-1])
+
+    result = []
+
+    response = requests.get(url)
+    response.raise_for_status()
+
+    soup = BeautifulSoup(response.content, 'html.parser')
+    soup = soup.find('div', class_ = 'pageContent')
+
+    # Nur die erste List auswerten. Auf der Startseite gibt es mehrere Listen,
+    # aber dort interessiert uns nur die erste.
+    soup = soup.find('ul')
+
+    soup = soup.find_all('a')
+    for e in soup:
+        cls = e['class'][0]
+        url = baseurl + '/' + e['href']
+
+        entry = {}
+        entry['href'] = url
+
+        if cls == 'folder-link':
+            entry['prefix'] = e.find('span', class_ = 'folder-prefix').text.strip()
+            entry['name']   = e.find('span', class_ = 'folder-name', lang = 'de').text.strip()
+
+        elif cls == 'document-link':
+            entry['prefix']   = e.find('span', class_ = 'document-prefix').text.strip()
+            entry['name']     = e.find('span', class_ = 'document-name', lang = 'de').text.strip()
+
+        if not entry['name']:
+            del entry['name']
+
+        if args.debug:
+            print(entry['prefix'])
+
+        if cls == 'folder-link':
+            entry['folder'] = fetch_folder(url)
+
+        result.append(entry)
+
+    return result
+
+
+airac_string = AIRAC_DATE.isoformat()
+toc_file = '%s.json' % airac_string
+
+try:
+    with open(toc_file) as f:
+        toc = json.load(f)
+
+    if toc['airac'] != airac_string:
+        raise AssertionError("AIRAC-Datum '%s' des Inhaltsverzeichnisses '%s' stimmt nicht mit dem Datum der Webseite '%s' überein." % ( toc['airac'], toc_file, airac_string ))
+
+except FileNotFoundError:
+    toc = {}
+    toc['airac'] = airac_string
+    toc['folder'] = fetch_folder(url)
+
+    with open(toc_file, 'w') as f:
+        json.dump(toc, f, indent = 2)
+
+
+#
+# TODO: Seiten herunterladen
+#
+
+# def fetch_docurl(url):
+#     response = requests.get(url)
+#     response.raise_for_status()
+# 
+#     soup = BeautifulSoup(response.content, 'html.parser')
+#     soup = soup.find('header')
+#     soup = soup.find('a', target = '_blank')
+# 
+#     return soup['href']