Skip to content

Commit

Permalink
code is added to populate database from two different sources
Browse files Browse the repository at this point in the history
  • Loading branch information
Sohan Islam committed Sep 18, 2019
1 parent d432ef4 commit 34f2ca8
Show file tree
Hide file tree
Showing 2 changed files with 53 additions and 16 deletions.
42 changes: 36 additions & 6 deletions parser/scholarshipscanadaParser.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
from urllib.request import urlopen
from bs4 import BeautifulSoup
from selenium import webdriver
import pymysql


BASE_URL = 'https://www.scholarshipscanada.com'
Expand All @@ -29,16 +30,45 @@ def parse(soup):

for i in range(len(awardNames)):
awardInfo = awardNames[i].text.strip().split('\n')
print("award name: {}".format(awardInfo[0]))
print("school: {}".format(awardInfo[1][8:]))
print("field of study: {}".format(awardInfo[2][16:]))
print("amount: {}".format(amounts[i].text.strip()))
print("deadline: {}".format(deadlines[i].text.strip()))
print("link: {}".format(links[i]))
title = awardInfo[0]
provider = awardInfo[1][8:]
field = awardInfo[2][16:]
value = amounts[i].text.strip()
deadline = deadlines[i].text.strip()
link = links[i]
print("award name: {}".format(title))
print("school: {}".format(provider))
print("field of study: {}".format(field))
print("amount: {}".format(value))
print("deadline: {}".format(deadline))
print("link: {}".format(link))
print()

# Prepare SQL query to INSERT a record into the database.
sql = "INSERT INTO scholarship \
(link, school, award_name, amount, deadline, field_of_study) \
VALUES ('%s', '%s', '%s', '%s', '%s', '%s')" % \
(link, provider, title, value, deadline, field)
try:
# Execute the SQL command
cursor.execute(sql)
# Commit your changes in the database
db.commit()
except:
# Rollback in case there is any error
db.rollback()

print("len: {}".format(len(awardNames)))


# Open database connection
db = pymysql.connect('10.5.29.7', 'user', 'password', 'ixhack')

# prepare a cursor object using cursor() method
cursor = db.cursor()

soup = getSoup('https://www.scholarshipscanada.com/Scholarships/ScholarshipSearch.aspx?type=ScholarshipName&s=toronto')
parse(soup)

# disconnect from server
db.close()
27 changes: 17 additions & 10 deletions parser/universitystudyParser.py
100644 → 100755
Original file line number Diff line number Diff line change
Expand Up @@ -15,11 +15,19 @@ def getSoup(link):
return soup

# Open database connection
db = pymysql.connect('10.5.29.7', 'ixhack', 'user', 'password')
db = pymysql.connect('10.5.29.7', 'user', 'password', 'ixhack')

# prepare a cursor object using cursor() method
cursor = db.cursor()

# Remove all entries from the table
try:
cursor.execute("DELETE FROM scholarship")
db.commit()
except:
db.rollback()
print("deleted everything")

# Parse html into scholarship info
j = 0
total = 0
Expand All @@ -35,13 +43,15 @@ def getSoup(link):
total += 1
link = award.find('a')['href']
awardSoup = getSoup(link)

sidebar = awardSoup.find('div',attrs={'class': 'box'})
sidebarContent = sidebar.find_all('p')
if(deadLine != "Closed"):
provider = sidebarContent[0].text.strip()
value = sidebarContent[1].text.strip()
deadLine = sidebarContent[3].text.strip()
applyNow = sidebarContent[-2]
link = applyNow.find('a')
if link:
link = link['href']
provider = sidebarContent[-5].text.strip()
value = sidebarContent[-4].text.strip()
deadLine = sidebarContent[-2].text.strip()[:-9]
title = awardSoup.find('h1', attrs={'class': 'entry-title'}).text.strip()
print("title: {}".format(title))
print("link: {}".format(link))
Expand All @@ -55,11 +65,8 @@ def getSoup(link):
# Prepare SQL query to INSERT a record into the database.
sql = "INSERT INTO scholarship \
(link, school, award_name, amount, deadline, field_of_study) \
VALUES ('%s', '%s', '%s', '%d', '%s', '%s')" % \
VALUES ('%s', '%s', '%s', '%s', '%s', '%s')" % \
(link, provider, title, value, deadLine, field)
# VALUES ('%s', '%s', '%s', '%s', '%d', \
# '%s', '%s', '%s', '%s', '%s', '%s', \
# '%d', '%d', '%s' )" % \
try:
# Execute the SQL command
cursor.execute(sql)
Expand Down

0 comments on commit 34f2ca8

Please sign in to comment.