Skip to content

Commit

Permalink
Inhaltsverzeichnis der AIP IFR herunterladen
Browse files Browse the repository at this point in the history
siehe #1
  • Loading branch information
hamarituc committed Aug 28, 2022
1 parent 9f69ba0 commit 64dc8a7
Showing 1 changed file with 188 additions and 0 deletions.
188 changes: 188 additions & 0 deletions ifr_fetch.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,188 @@
#!/bin/env python3

#
# Copyright (C) 2022 Mario Haustein, [email protected]
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU Lesser General Public
# License as published by the Free Software Foundation; either
# version 3 of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# Lesser General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public License
# along with this program; if not, write to the Free Software Foundation,
# Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
#

import argparse
from bs4 import BeautifulSoup
import datetime
import json
import re
import requests



parser = argparse.ArgumentParser(
description = "AIP-IFR Daten herunterladen"
)

parser.add_argument(
'--url',
type = str,
default = "https://aip.dfs.de/basicIFR",
help = 'Basis-URL')

parser.add_argument(
'--debug',
action = 'store_true',
help = 'Debug-Ausgabe erzeugen')

args = parser.parse_args()


AIP_BASEURL = args.url


# Startseite abrufen
response = requests.get(AIP_BASEURL)
response.raise_for_status()


# Die Startseite verwendet einen meta-Redirect, dem wir manuell folgen müssen.
soup = BeautifulSoup(response.content, 'html.parser')
result = soup.find('meta', attrs = { 'http-equiv': 'Refresh' })
if not result:
raise AssertionError("Auf der Startseite wird ein Redirect erwartet. Vermutlich hat die DFS den Aufbau der Startseite geändert, sodass dieses Skript angepasst werden muss.")

url = result['content'].split(';')[1].strip()
if not url.lower().startswith('url='):
raise AssertionError("Die Startseite hat einen kaputten Redirect. Vermutlich hat die DFS den Aufbau der Startseite geändert, sodass dieses Skript angepasst werden muss.")
url = url[4:]
url = AIP_BASEURL + '/' + url

response = requests.get(url)
response.raise_for_status()


# Startseite parsen
soup = BeautifulSoup(response.content, 'html.parser')


# Ausgabedatum bestimmen
aip_airac = soup.find('span', class_ = 'updated-time').text.strip()
aip_airac = re.fullmatch(r'Effective: ([0-9]{2}) ([A-Z]{3}) ([0-9]{4})', aip_airac)
if not aip_airac:
raise AssertionError("Kann Ausgabedatum des AIPs nicht bestimmen.")

MONTHS = \
{
'JAN': 1,
'FEB': 2,
'MAR': 3,
'APR': 4,
'MAY': 5,
'JUN': 6,
'JUL': 7,
'AUG': 8,
'SEP': 9,
'OCT': 10,
'NOV': 11,
'DEC': 12,
}

airac_month = aip_airac[2].upper()
if airac_month not in MONTHS:
raise AssertionError("Ungültiger Monat '%s'", airac_month)
airac_month = MONTHS[airac_month]

airac_day = int(aip_airac[1])
airac_year = int(aip_airac[3])

AIRAC_DATE = datetime.date(airac_year, airac_month, airac_day)


#
# Inhaltsverzeichnis erstellen
#
def fetch_folder(url):
baseurl = '/'.join(url.split('/')[:-1])

result = []

response = requests.get(url)
response.raise_for_status()

soup = BeautifulSoup(response.content, 'html.parser')
soup = soup.find('div', class_ = 'pageContent')

# Nur die erste List auswerten. Auf der Startseite gibt es mehrere Listen,
# aber dort interessiert uns nur die erste.
soup = soup.find('ul')

soup = soup.find_all('a')
for e in soup:
cls = e['class'][0]
url = baseurl + '/' + e['href']

entry = {}
entry['href'] = url

if cls == 'folder-link':
entry['prefix'] = e.find('span', class_ = 'folder-prefix').text.strip()
entry['name'] = e.find('span', class_ = 'folder-name', lang = 'de').text.strip()

elif cls == 'document-link':
entry['prefix'] = e.find('span', class_ = 'document-prefix').text.strip()
entry['name'] = e.find('span', class_ = 'document-name', lang = 'de').text.strip()

if not entry['name']:
del entry['name']

if args.debug:
print(entry['prefix'])

if cls == 'folder-link':
entry['folder'] = fetch_folder(url)

result.append(entry)

return result


airac_string = AIRAC_DATE.isoformat()
toc_file = '%s.json' % airac_string

try:
with open(toc_file) as f:
toc = json.load(f)

if toc['airac'] != airac_string:
raise AssertionError("AIRAC-Datum '%s' des Inhaltsverzeichnisses '%s' stimmt nicht mit dem Datum der Webseite '%s' überein." % ( toc['airac'], toc_file, airac_string ))

except FileNotFoundError:
toc = {}
toc['airac'] = airac_string
toc['folder'] = fetch_folder(url)

with open(toc_file, 'w') as f:
json.dump(toc, f, indent = 2)


#
# TODO: Seiten herunterladen
#

# def fetch_docurl(url):
# response = requests.get(url)
# response.raise_for_status()
#
# soup = BeautifulSoup(response.content, 'html.parser')
# soup = soup.find('header')
# soup = soup.find('a', target = '_blank')
#
# return soup['href']

0 comments on commit 64dc8a7

Please sign in to comment.