-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathpyputteer.py
78 lines (56 loc) · 2.5 KB
/
pyputteer.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
import asyncio
from pyppeteer import launch
from bs4 import BeautifulSoup
from mdate import convertdate,convertepoch,today,tillmaturity,daystillmaturity
async def scrape_website(term):
path="/usr/bin/google-chrome-stable"
browser = await launch(headless=True,executablePath=path,args=['--no-sandbox', '--disable-gpu'])
page = await browser.newPage()
try:
store={}
url="https://www.bondsupermart.com/bsm/general-search/"+str(term)
await page.setViewport({'width': 1920, 'height': 1080})
await page.setExtraHTTPHeaders({'Accept-Encoding': 'gzip, deflate, br'})
await page.goto(url, {'waitUntil': 'networkidle0'})
content = await page.evaluate("document.querySelector('.ant-table').outerHTML")
soup = BeautifulSoup(content, "html.parser")
#print(soup)
name=soup.find_all(class_="link-primary")
sector=soup.find_all(class_="text-black-7 font-semibold ant-table-cell")
#print(name)
#print(sector)
for i in range(0,len(name)):
try:
isin=str(name[i]).split('href="/bsm/bond-factsheet/')[1].split('"><strong')[0]
#sectorname=sector[i]
#print(sectorname.text)
#print(isin)
bondname=str(name[i]).split('="">')[1].split('</strong>')[0]
n=str(sector[0].text)
cr=str(sector[1].text)
md=str(sector[2].text)
ap=str(sector[3].text)
ytm=str(sector[4].text)
epoch=convertepoch(convertdate(md))
tillm=tillmaturity(epoch,today())
try:
for j in range(0,5):
sector.pop(0)
except:
print("List Length:",len(sector))
store[isin]={"bondname":bondname.replace('<span class=\"text-info\">','').replace('</span>',''),"sector":n,"couponrate":cr,"maturitydate":md,"maturitydateepoch":epoch,"tillmaturityepoch":tillm,"daystillmaturity":daystillmaturity(tillm),"askprice":ap,"ytm":ytm}
except:
break
for i in range(0,len(sector)):
sectorname=sector[i]
print(store)
return store
finally:
#time.sleep(7)
await browser.close()
async def intercept_request(req):
if req.resourceType in ['image', 'stylesheet']:
await req.abort()
else:
await req.continue_()
#asyncio.run(scrape_website("Apple"))