Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Rewritten parser for Dresden from Unlifate/master #124

Open
wants to merge 15 commits into
base: master
Choose a base branch
from
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
228 changes: 130 additions & 98 deletions parsers/dresden.py
Original file line number Diff line number Diff line change
@@ -1,107 +1,139 @@
from urllib.request import urlopen
from bs4 import BeautifulSoup as parse
import re

import json
from utils import Parser

from pyopenmensa.feed import LazyBuilder, extractDate

price_regex = re.compile(r'(?P<price>\d+[,.]\d{2}) ?€')
speiseplan_regex = re.compile(r'^Speiseplan\s+')
kein_angebot_regex = re.compile(r'kein Angebot')

roles = ('student', 'employee')


def parse_week(url, canteen):
data = urlopen(url).read().decode('utf-8')
document = parse(data, 'lxml')

# The day plans are in a div with no special class or id. Thus
# we try to find a div with a heading "Speiseplan "
for week_heading in document(class_='swdd-ueberschrift',
text=speiseplan_regex):
week_div = week_heading.parent

# The meals for each day a in card. Again there is no class or id to
# select the meal cards. Thus we lookung for all card with a card-header
# which stores the date
for card_header in week_div.find_all(class_='card-header'):
day_card = card_header.parent

try:
date = extractDate(card_header.text)
except ValueError:
# There was no valid date in the table header, which happens eg
# for special "Aktionswoche" cards.
# TODO: check if this card contains any meals, which was not the
# case when it was used for the first time.
continue

# Check if there is a "kein Angebot" item
if day_card.find(class_='list-group-item', text=kein_angebot_regex):
canteen.setDayClosed(date)
continue

# Iterate over the list-group-item within the card which are used
# for individual meals
for meal in day_card.find_all(class_='swdd-link-list-item'):

name = meal.find(name='span')
if name is not None:
name = name.text
else:
continue

if ': ' in name:
category, name = name.split(': ', 1)
else:
category = 'Angebote'

notes = [img['alt'] for img in meal.find_all(class_='swdd-spl-symbol')]

if '* ' in name:
name, note = name.split('* ', 1)
notes.append(note)

if meal.strong is not None:
prices = price_regex.findall(meal.strong.text)
else:
prices = []

canteen.addMeal(date, category, name, notes,
prices, roles)
from datetime import datetime, timedelta
from pyopenmensa.feed import LazyBuilder

# Grab all dresden canteen information from its public original legit API
API_VERSION = '2'
URL_BASE = 'https://api.studentenwerk-dresden.de/openmensa/v' + API_VERSION + '/'

# Which canteens do we want?
LOCATION = {
'dresden-reichenbachstrasse': 'Mensa Reichenbachstraße',
'dresden-zeltschloesschen': 'Zeltschlösschen',
'dresden-alte-mensa': 'Alte Mensa',
'dresden-mensologie': 'Mensologie',
'dresden-siedepunkt': 'Mensa Siedepunkt',
'dresden-johannstadt': 'Mensa Johannstadt',
'tharandt-tellerrandt': 'Mensa TellerRandt',
'dresden-palucca-hochschule': 'Mensa Palucca Hochschule',
'dresden-wueins': 'Mensa WUeins',
'dresden-bruehl': 'Mensa Brühl',
'dresden-stimm-gabel': 'Mensa Stimm-Gabel',
'dresden-u-boot': 'BioMensa U-Boot',
'dresden-mensa-sport': 'Mensa Sport',
'dresden-cafe-cube': 'Grill Cube',
'dresden-cafe-mobil': 'Pasta-Mobil',
'zittau-kraatschn': 'Mensa Kraatschn',
'zittau-mahlwerk': 'Mensa Mahlwerk',
'goerlitz-mio': 'MiO - Mensa im Osten',
'rothenburg-mensa': 'Mensa Rothenburg',
'bautzen-polizeihochschule': 'Mensa Bautzen Polizeihochschule',
'bautzen-oberschmausitz': 'Mensa Oberschmausitz'
}

# Create a dict where we save the original ids
LOCATION_ID = {}

# Which canteens does studentenwerk dresden have?
with urlopen(URL_BASE + 'canteens') as response:
canteens = json.loads(response.read().decode())

# Copy the original ids into our dict
for item in canteens:
for key in LOCATION:
if LOCATION[key] == item["name"]:
LOCATION_ID.update({key: item["id"]})


# Returns a list of days that the canteen supports (Format YYYY-MM-DD)
# based on https://doc.openmensa.org/api/v2/canteens/days/ >> "List days of a canteen"
def get_accepted_days(url):
with urlopen(url + '/days') as response:
result = json.loads(response.read().decode())
return result

def validate_prices(prices=None):
# @Studentenwerk Dresden: it would be nice to remove the two following lines. :-)
# If it's possible: update your roles
if prices is None:
prices = {}
for key in prices:
if prices[key] is None:
prices[key] = float(0)
else:
prices[key] = float(prices[key])
newprices = prices.copy()
for key in prices:
if key not in ['student', 'employee', 'pupil', 'other', 'Studierende', 'Bedienstete', 'Schüler']:
newprices['other'] = prices[key]
del newprices[key]
prices = newprices.copy()
if 'Studierende' in prices:
prices['student'] = prices['Studierende']
del prices['Studierende']
if 'Bedienstete' in prices:
prices['employee'] = prices['Bedienstete']
del prices['Bedienstete']
if 'Schüler' in prices:
prices['pupil'] = prices['Schüler']
del prices['Schüler']
return prices


def parse_today(url, canteen):
days = get_accepted_days(url)
# check if today is within the accepted days
today_time_string = datetime.today().strftime("%Y-%m-%d")
for day in days:
if day["date"] == today_time_string:
if day["closed"]:
canteen.setDayClosed(today_time_string)
else:
with urlopen(url + '/days/' + today_time_string + '/meals') as response:
meals = json.loads(response.read().decode())
for meal in meals:
canteen_prices = validate_prices(meal["prices"])
canteen.addMeal(today_time_string, meal["category"], meal["name"], meal["notes"],
canteen_prices)


def parse_full(url, canteen):
all_days = get_accepted_days(url)
result_days = {}
# get first day of timespan - e.g. the first day of this week
day_begin = (datetime.today() - timedelta(days=datetime.today().weekday())).date()
# add 14 days, as we need this week + next week
for i in range(0, 14):
for day in all_days:
day_iteration = (day_begin + timedelta(i)).isoformat() # (str) "2021-08-20"
if day["date"] == day_iteration:
# save all 14 days, that we want and their closed-state
result_days[day["date"]] = day["closed"]
# for each day set a meal or set closed
for day in result_days:
if result_days[day] is True:
canteen.setDayClosed(day)
else:
with urlopen(url + '/days/' + day + '/meals') as response:
meals = json.loads(response.read().decode())
for meal in meals:
canteen_prices = validate_prices(meal["prices"])
canteen.addMeal(day, meal["category"], meal["name"], meal["notes"], canteen_prices)


def parse_url(url, today=False):
canteen = LazyBuilder()
parse_week(url + '.html?view=list', canteen)
if not today:
parse_week(url + '-w1.html?view=list', canteen)
parse_week(url + '-w2.html?view=list', canteen)
if today:
parse_today(url, canteen)
else:
parse_full(url, canteen)
return canteen.toXMLFeed()


parser = Parser('dresden', handler=parse_url,
shared_prefix='https://www.studentenwerk-dresden.de/mensen/speiseplan/')
parser.define('dresden-reichenbachstrasse', suffix='mensa-reichenbachstrasse')
parser.define('dresden-zeltschloesschen', suffix='zeltschloesschen')
parser.define('dresden-alte-mensa', suffix='alte-mensa')
parser.define('dresden-mensologie', suffix='mensologie')
parser.define('dresden-siedepunkt', suffix='mensa-siedepunkt')
parser.define('dresden-johannstadt', suffix='mensa-johannstadt')
parser.define('tharandt-tellerrandt', suffix='mensa-tellerrandt')
parser.define('dresden-palucca-hochschule', suffix='mensa-palucca-hochschule')
parser.define('dresden-wueins', suffix='mensa-wueins')
parser.define('dresden-bruehl', suffix='mensa-bruehl')
parser.define('dresden-stimm-gabel', suffix='mensa-stimm-gabel')
parser.define('dresden-u-boot', suffix='u-boot')
parser.define('dresden-mensa-sport', suffix='mensa-sport')
parser.define('dresden-cafe-cube', suffix='grill-cube')
parser.define('dresden-cafe-mobil', suffix='pasta-mobil')
parser.define('zittau-kraatschn', suffix='mensa-kraatschn')
parser.define('zittau-mahlwerk', suffix='mensa-mahlwerk')
parser.define('goerlitz-mio', suffix='mio-mensa-im-osten')
parser.define('rothenburg-mensa', suffix='mensa-rothenburg')
parser.define('bautzen-polizeihochschule', suffix='mensa-bautzen')
parser.define('bautzen-oberschmausitz', suffix='mensa-oberschmausitz')
parser = Parser('dresden', handler=parse_url, shared_prefix=URL_BASE + 'canteens/')

# each canteen needs to be defined
for key in LOCATION_ID:
parser.define(key, suffix=str(LOCATION_ID[key]))