-
Notifications
You must be signed in to change notification settings - Fork 25
/
Copy pathwebsite_checker.py
166 lines (153 loc) · 4.02 KB
/
website_checker.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
import requests
from bs4 import BeautifulSoup
from datetime import datetime
import pytz
import os
import shutil
import difflib
def check_now(a,urls):
url = urls[a]
# Make a new folder
os.makedirs('./contents/new/',exist_ok=True)
# Setup old and new path
old_file_path = "./contents/old/" + url["name"]
new_file_path = "./contents/new/" + url["name"]
# Fetch latest version of the site
try:
res_new = requests.get(url["url"], timeout=10).text
soup_new = BeautifulSoup(res_new, 'html.parser')
except Exception as e:
print(e)
return
# Save new version to file
with open(new_file_path, "w") as new:
new.write(str(soup_new))
# See if an old version is present
try:
old = open(old_file_path, "r")
except:
print("Old copy not found. Continue")
return
# Open the new version again for comparison
new = open(new_file_path, "r")
# Use difflib to find differences between old and new
diff = difflib.unified_diff(
old.readlines(),
new.readlines(),
fromfile=old_file_path,
tofile=new_file_path,
n=0
)
# If there is a difference, change status from Same to Updated
status = "Same"
for line in diff:
if url['check'] in line:
print("Content changed for: "+url["name"])
print(line)
status = "Updated"
break
print(
"Status ["
+ url["name"]
+ "] : "
+ status
)
# Close open new file
new.close()
return status
if __name__ == "__main__":
'''
Code to check if a URL has updated
'''
# assuming now contains a timezone aware datetime
tz = pytz.timezone('Asia/Kolkata')
now = datetime.now()
your_now = now.astimezone(tz)
# For KL
today_1 = str(your_now.strftime('%Y/%m/%d/%d-%m-%Y'))
print(today_1)
# For PY
today_2 = str(your_now.strftime('%d-%m-%Y'))
print(today_2)
# Setup URLS
urls = [
{
"name" : "AndamanandNicobarIslands",
"url" : "https://dhs.andaman.gov.in",
"check" : 'ISLANDS HEALTH BULLETIN FOR CONTAINMENT OF COVID-19'
},
#{
# "name" : "Assam" ,
# "url" : "https://covid19.assam.gov.in/all-districts/",
# "check" : ''
#},
{
"name" : "Chandigarh",
"url" : "http://chdcovid19.in/",
"check" : '<div class="col-md-3 form-group">'
},
{
"name" : "DadraandNagarHaveliandDamanandDiu",
"url" : "https://dddcovid19.in/index",
"check": 'ContentPlaceHolder1_lbldate'
},
{
"name" : "Haryana",
"url" : "http://www.nhmharyana.gov.in/page.aspx?id=208",
"check" : '<a href="/WriteReadData/userfiles/file/'
},
{
"name" : "Kerala",
"url" : "https://dhs.kerala.gov.in/%E0%B4%A1%E0%B5%86%E0%B4%AF%E0%B4%BF%E0%B4%B2%E0%B4%BF-%E0%B4%AC%E0%B5%81%E0%B4%B3%E0%B5%8D%E0%B4%B3%E0%B4%B1%E0%B5%8D%E0%B4%B1%E0%B4%BF%E0%B4%A8%E0%B5%8D%E2%80%8D/",
"check": "https://dhs.kerala.gov.in/" + today_1 + "/"
},
{
"name" : "Puducherry",
"url" : "https://health.py.gov.in/",
"check": "https://health.py.gov.in/bulletin-" + today_2 + "-1000am"
},
{
"name" : "Tripura",
"url" : "https://covid19.tripura.gov.in",
"check": 'lblActiceCases'
},
{
"name" : "WestBengal",
"url" : "https://www.wbhealth.gov.in/pages/corona/bulletin",
"check" : 'href="https://www.wbhealth.gov.in/uploaded_files/corona/'
},
{
"name" : "Gujarat",
"url" : "https://gujcovid19.gujarat.gov.in/DrillDownCharts.aspx",
"check" : 'ctl00_body_lblDate'
},
{
"name" : "AndraPradesh",
"url" : "http://covid19.ap.gov.in/Covid19_Admin/index.html",
"check" : 'id="ActiveCases"'
}
]
#Setup BOT TOKEN and CHAT ID from environment variable
MONITOR_BOT_TOKEN = os.environ['MONITOR_BOT_TOKEN']
CHAT_ID = os.environ['DATA_OPS_CHAT_ID']
for a in range(len(urls)):
status = check_now(a,urls)
if status == "Updated":
message_call = (
"https://api.telegram.org/bot"
+ MONITOR_BOT_TOKEN
+ "/sendMessage?chat_id="
+ CHAT_ID
+ "&text="
+ urls[a]['name']
+ " site got updated!\n\n"
+ "URL: "
+ urls[a]['url']
)
requests.get(message_call)
# Make the new contents as old
try:
shutil.rmtree('./contents/old/')
except FileNotFoundError:
pass
os.rename('./contents/new/','./contents/old/')