-
Notifications
You must be signed in to change notification settings - Fork 2
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Inhaltsverzeichnis der AIP IFR herunterladen
siehe #1
- Loading branch information
Showing
1 changed file
with
188 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,188 @@ | ||
#!/bin/env python3 | ||
|
||
# | ||
# Copyright (C) 2022 Mario Haustein, [email protected] | ||
# | ||
# This program is free software; you can redistribute it and/or | ||
# modify it under the terms of the GNU Lesser General Public | ||
# License as published by the Free Software Foundation; either | ||
# version 3 of the License, or (at your option) any later version. | ||
# | ||
# This program is distributed in the hope that it will be useful, | ||
# but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
# Lesser General Public License for more details. | ||
# | ||
# You should have received a copy of the GNU Lesser General Public License | ||
# along with this program; if not, write to the Free Software Foundation, | ||
# Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. | ||
# | ||
|
||
import argparse | ||
from bs4 import BeautifulSoup | ||
import datetime | ||
import json | ||
import re | ||
import requests | ||
|
||
|
||
|
||
parser = argparse.ArgumentParser( | ||
description = "AIP-IFR Daten herunterladen" | ||
) | ||
|
||
parser.add_argument( | ||
'--url', | ||
type = str, | ||
default = "https://aip.dfs.de/basicIFR", | ||
help = 'Basis-URL') | ||
|
||
parser.add_argument( | ||
'--debug', | ||
action = 'store_true', | ||
help = 'Debug-Ausgabe erzeugen') | ||
|
||
args = parser.parse_args() | ||
|
||
|
||
AIP_BASEURL = args.url | ||
|
||
|
||
# Startseite abrufen | ||
response = requests.get(AIP_BASEURL) | ||
response.raise_for_status() | ||
|
||
|
||
# Die Startseite verwendet einen meta-Redirect, dem wir manuell folgen müssen. | ||
soup = BeautifulSoup(response.content, 'html.parser') | ||
result = soup.find('meta', attrs = { 'http-equiv': 'Refresh' }) | ||
if not result: | ||
raise AssertionError("Auf der Startseite wird ein Redirect erwartet. Vermutlich hat die DFS den Aufbau der Startseite geändert, sodass dieses Skript angepasst werden muss.") | ||
|
||
url = result['content'].split(';')[1].strip() | ||
if not url.lower().startswith('url='): | ||
raise AssertionError("Die Startseite hat einen kaputten Redirect. Vermutlich hat die DFS den Aufbau der Startseite geändert, sodass dieses Skript angepasst werden muss.") | ||
url = url[4:] | ||
url = AIP_BASEURL + '/' + url | ||
|
||
response = requests.get(url) | ||
response.raise_for_status() | ||
|
||
|
||
# Startseite parsen | ||
soup = BeautifulSoup(response.content, 'html.parser') | ||
|
||
|
||
# Ausgabedatum bestimmen | ||
aip_airac = soup.find('span', class_ = 'updated-time').text.strip() | ||
aip_airac = re.fullmatch(r'Effective: ([0-9]{2}) ([A-Z]{3}) ([0-9]{4})', aip_airac) | ||
if not aip_airac: | ||
raise AssertionError("Kann Ausgabedatum des AIPs nicht bestimmen.") | ||
|
||
MONTHS = \ | ||
{ | ||
'JAN': 1, | ||
'FEB': 2, | ||
'MAR': 3, | ||
'APR': 4, | ||
'MAY': 5, | ||
'JUN': 6, | ||
'JUL': 7, | ||
'AUG': 8, | ||
'SEP': 9, | ||
'OCT': 10, | ||
'NOV': 11, | ||
'DEC': 12, | ||
} | ||
|
||
airac_month = aip_airac[2].upper() | ||
if airac_month not in MONTHS: | ||
raise AssertionError("Ungültiger Monat '%s'", airac_month) | ||
airac_month = MONTHS[airac_month] | ||
|
||
airac_day = int(aip_airac[1]) | ||
airac_year = int(aip_airac[3]) | ||
|
||
AIRAC_DATE = datetime.date(airac_year, airac_month, airac_day) | ||
|
||
|
||
# | ||
# Inhaltsverzeichnis erstellen | ||
# | ||
def fetch_folder(url): | ||
baseurl = '/'.join(url.split('/')[:-1]) | ||
|
||
result = [] | ||
|
||
response = requests.get(url) | ||
response.raise_for_status() | ||
|
||
soup = BeautifulSoup(response.content, 'html.parser') | ||
soup = soup.find('div', class_ = 'pageContent') | ||
|
||
# Nur die erste List auswerten. Auf der Startseite gibt es mehrere Listen, | ||
# aber dort interessiert uns nur die erste. | ||
soup = soup.find('ul') | ||
|
||
soup = soup.find_all('a') | ||
for e in soup: | ||
cls = e['class'][0] | ||
url = baseurl + '/' + e['href'] | ||
|
||
entry = {} | ||
entry['href'] = url | ||
|
||
if cls == 'folder-link': | ||
entry['prefix'] = e.find('span', class_ = 'folder-prefix').text.strip() | ||
entry['name'] = e.find('span', class_ = 'folder-name', lang = 'de').text.strip() | ||
|
||
elif cls == 'document-link': | ||
entry['prefix'] = e.find('span', class_ = 'document-prefix').text.strip() | ||
entry['name'] = e.find('span', class_ = 'document-name', lang = 'de').text.strip() | ||
|
||
if not entry['name']: | ||
del entry['name'] | ||
|
||
if args.debug: | ||
print(entry['prefix']) | ||
|
||
if cls == 'folder-link': | ||
entry['folder'] = fetch_folder(url) | ||
|
||
result.append(entry) | ||
|
||
return result | ||
|
||
|
||
airac_string = AIRAC_DATE.isoformat() | ||
toc_file = '%s.json' % airac_string | ||
|
||
try: | ||
with open(toc_file) as f: | ||
toc = json.load(f) | ||
|
||
if toc['airac'] != airac_string: | ||
raise AssertionError("AIRAC-Datum '%s' des Inhaltsverzeichnisses '%s' stimmt nicht mit dem Datum der Webseite '%s' überein." % ( toc['airac'], toc_file, airac_string )) | ||
|
||
except FileNotFoundError: | ||
toc = {} | ||
toc['airac'] = airac_string | ||
toc['folder'] = fetch_folder(url) | ||
|
||
with open(toc_file, 'w') as f: | ||
json.dump(toc, f, indent = 2) | ||
|
||
|
||
# | ||
# TODO: Seiten herunterladen | ||
# | ||
|
||
# def fetch_docurl(url): | ||
# response = requests.get(url) | ||
# response.raise_for_status() | ||
# | ||
# soup = BeautifulSoup(response.content, 'html.parser') | ||
# soup = soup.find('header') | ||
# soup = soup.find('a', target = '_blank') | ||
# | ||
# return soup['href'] |