-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathscript.py
34 lines (29 loc) · 1.13 KB
/
script.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
import requests
from tqdm import tqdm
from bs4 import BeautifulSoup
from run import app, db, Course, Review, Professor
with app.app_context():
db.create_all()
for course in tqdm(Course.query.all()):
if course.description:
continue
try:
payload = {"site":"Directory_of_Classes", "num": 1,
"q": "{} {}".format(course.name, course.number).lower()}
r = requests.get("http://search.columbia.edu/search", params=payload)
soup = BeautifulSoup(r.text)
if soup.find('u'):
page = soup.find('u').text
r = requests.get(page)
soup = BeautifulSoup(r.text)
for tr in soup.find_all("tr"):
tds = tr.find_all("td")
if len(tds) == 2 and tds[0].text.strip() == "Course Description":
course.description = tds[1].text.strip()
except KeyboardInterrupt:
raise
except:
print course.id
if course.id % 100 == 0:
db.session.commit()
db.session.commit()