-
Notifications
You must be signed in to change notification settings - Fork 13
/
Copy pathyahoo.py
38 lines (29 loc) · 956 Bytes
/
yahoo.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
#!/usr/bin/env python
"""
Scrape historical seasons of the PGA that are only available on Yahoo -- and,
sadly, only have the player's total for a given round.
"""
import os
import re
import lxml.html as lh
import requests as req
headers = {
"User-Agent": "Mozilla/5.0 (ESPN) AppleWebKit/535.30 (KHTML, like Gecko)",
"Referer": "http://sports.yahoo.com/golf/pga/schedule"
}
def season(year):
"""Grab a Yahoo golf season's HTML schedule."""
print year
url = "http://sports.yahoo.com/golf/pga/schedule?season={0}".format(year)
text = req.get(url, headers=headers).text
html = lh.fromstring(text)
# Grab the schedule
schedule = html.cssselect("#schedule")
schedule = lh.tostring(schedule[0])
file_name = "ysports/{0}.html".format(year)
with open(file_name, "w") as f:
print "\t Saved."
f.write(schedule)
if __name__ == '__main__':
for year in range(1977, 2001):
season(year)