-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathget_links.py
48 lines (34 loc) · 1.14 KB
/
get_links.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
#!/usr/bin/env python
# get_links.py
import re
import sys
import urllib
import urlparse
from bs4 import BeautifulSoup
from os.path import basename
class MyOpener(urllib.FancyURLopener):
version = 'Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US; rv:1.9.2.15) Gecko/20110303 Firefox/3.6.15'
def process(url):
myopener = MyOpener()
#page = urllib.urlopen(url)
page = myopener.open(url)
text = page.read()
page.close()
soup = BeautifulSoup(text, "html.parser")
rk=[]
VIDEOS={}
for tag in soup.findAll('a', href=True):
tag['href'] = urlparse.urljoin(url, tag['href'])
## rk.append(tag['href'])
rk.append({'name': basename(tag['href']),
'thumb': 'http://www.vidsplay.com/wp-content/uploads/2017/04/crab-screenshot.jpg',
'video': tag['href'] ,
'genre': '2017'})
VIDEOS['2017']=rk
print VIDEOS
# process(url)
def main(url):
process(url)
# main()
if __name__ == "__main__":
main('http://www.stocktonsda.org/Videos/2017')