-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathbillboard_biz.py
135 lines (121 loc) · 4.16 KB
/
billboard_biz.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
__author__ = 'jpatdalton'
'''
This file gets the artists, producers, writers, label, songs, position, and title from the billboard top 100
'''
import urllib2
from bs4 import BeautifulSoup
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
import time
import columns
def get_details(worksheet, indices, end):
"""This function gets the elements from the billboard biz hot 100 page
Args:
worksheet: the google drive worksheet
indices: the spreadsheet index for each top 100 element
end: Last index of spreadsheet
"""
driver = webdriver.Firefox()
url = 'http://www.billboard.com/biz/charts/the-billboard-hot-100'
driver.get(url)
driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
time.sleep(10)
html = driver.page_source
soup = BeautifulSoup(html)
driver.close()
titles = soup.select(".details")
col_writers = columns.writers
col_producers = columns.producers
col_label = columns.label
cell_list_writers = worksheet.range(col_writers+'2:'+col_writers+str(end))
cell_list_producers = worksheet.range(col_producers+'2:'+col_producers+str(end))
cell_list_labels = worksheet.range(col_label+'2:'+col_label+str(end))
writers = list()
producers = list()
labels = list()
n=0
for title in titles:
try:
arr = title.get_text().split('\n')
text = arr[len(arr)-2].strip()
text1 = text.split('(')
producers.append(text1[0].strip())
text2 = text1[1].split(')')
writers.append(text2[0])
text3 = text2[1].split()
labels.append(' '.join(text3))
except Exception, e:
print e, 'Billboard Biz'
n+=1
print len(writers), len(producers), len(labels)
n=0
for ind in indices:
try:
cell_list_writers[ind].value = writers[n]
cell_list_producers[ind].value = producers[n]
cell_list_labels[ind].value = labels[n]
except Exception, e:
print e, 'Billboard Biz'
n+=1
worksheet.update_cells(cell_list_writers)
worksheet.update_cells(cell_list_producers)
worksheet.update_cells(cell_list_labels)
def get_writers_producers_labels():
"""This function gets the writers and producers and labels form the hot 100 page
Returns:
writers: list of the top 100 writers
producers: list of the top 100 producers
labels: list of the top 100 labels
"""
driver = webdriver.Firefox()
url = 'http://www.billboard.com/biz/charts/the-billboard-hot-100'
driver.get(url)
time.sleep(11)
login = driver.find_element_by_link_text('Log In')
login.click()
time.sleep(5)
name = driver.find_element_by_id('edit-name')
name.send_keys('')
time.sleep(1)
password = driver.find_element_by_id('edit-pass')
password.send_keys('')
time.sleep(1)
password.send_keys(Keys.ENTER)
time.sleep(11)
driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
time.sleep(4)
html = driver.page_source
soup = BeautifulSoup(html)
driver.close()
titles = soup.select(".details")
writers = list()
producers = list()
labels = list()
n=0
for title in titles:
try:
arr = title.get_text().split('\n')
text = arr[len(arr)-2].strip()
text1 = text.split('(')
producers.append(text1[0].strip())
except Exception, e:
print title
print e, 'The above element has no producers'
producers.append('None Listed')
try:
text2 = text1[1].split(')')
writers.append(text2[0])
except Exception, e:
print title
print e, 'The above element has no writers'
writers.append('None Listed')
try:
text3 = text2[1].split()
labels.append(' '.join(text3))
except Exception, e:
print title
print e, 'The above element has no label'
labels.append('None Listed')
n+=1
print len(writers), len(producers), len(labels)
return writers, producers, labels