forked from stanfordjournalism/search-script-scrape
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy path71.py
32 lines (28 loc) · 1.01 KB
/
71.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
# The percent increase in number of babies named Archer nationwide in 2010 compared to 2000, according to the Social Security Administration
# landing page:
# http://www.ssa.gov/oact/babynames/limits.html
import csv
import os
import requests
from shutil import unpack_archive
LOCAL_DATADIR = "/tmp/babynames"
os.makedirs(LOCAL_DATADIR, exist_ok = True)
url = 'http://www.ssa.gov/oact/babynames/names.zip'
zname = os.path.join(LOCAL_DATADIR, 'names.zip')
# download the file
if not os.path.exists(zname):
print("Downloading", url, 'to', zname)
z = requests.get(url).content
with open(zname, 'wb') as f:
f.write(z)
# Unzip the data
print('Unzipping', zname, 'to', LOCAL_DATADIR)
unpack_archive(zname, LOCAL_DATADIR)
d = {2010: 0, 2000: 0}
for y in d.keys():
fname = os.path.join(LOCAL_DATADIR, "yob%d.txt" % y)
rows = list(csv.reader(open(fname)))
# each row looks like this:
# Pamela,F,258
d[y] += sum([int(r[2]) for r in rows if r[0] == 'Archer'])
print(100 * (d[2010] - d[2000]) / d[2000])