-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathcheck_file.py
40 lines (30 loc) · 1.46 KB
/
check_file.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
# checks if the file exists and returns from where to start the scraping for that file/place
import codecs
import pathlib
import re
project_path = 'C:\\Users\\username\\PycharmProjects\\TNregi_Scrape\\'
logs_path = f'{project_path}logs\\'
def check_file(mar_type, mar_place, mar_year) -> int:
place_name = mar_place
file_path = (f'{project_path}{mar_type}\\{mar_year}\\')
new_file_name = f'{file_path}RECORDS_{mar_type}_{place_name}_{mar_year}.csv'
if pathlib.Path(new_file_name).is_file():
print('file exist')
print('extracting last lines...')
with open(new_file_name, 'r', encoding="utf-8") as f:
temp_last_line = f.readlines()[-1]
print(f'temp last line {temp_last_line}')
last_line = re.findall("(?<=.../)[A-Za-z0-9 \\-()_.]+/\\d+/\\d+(?=,)", temp_last_line)
print(f"this is {last_line}")
with codecs.open(f"{logs_path}LastLines_{mar_year}.txt",
mode='a') as lastLine_file:
final_out = last_line[0].split("/", 3)
lastLine_file.write(f"{last_line[0]}\n")
print('Write last line to file success !')
print(f' {final_out[0]} has details till {final_out[1]} ')
return int(final_out[1]) + 1
else:
start_no = 1
print(f'{place_name} is fresh run')
return 1
#print(check_file('TMR1', 'ambattur', 2015))