-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathmapillary_downloader.py
55 lines (46 loc) · 2.02 KB
/
mapillary_downloader.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
import os
from pathlib import Path
import geopandas as gpd
import wget
from mapillary_crawler import crawl_multipolygon
import time
import pandas as pd
import urllib
import urllib.request
from PIL import Image
from tqdm import tqdm
def main():
df = pd.read_csv('bcn/mapillary_raw.csv')
df = df.drop_duplicates(subset=['url']).reset_index().drop('index', axis=1)
df.to_csv('bcn/mapillary.csv', index=False)
# download images into mapillary directory
with tqdm(total=df.shape[0]) as pbar:
for index, row in df.iterrows():
if not os.path.exists(f'mapillary/train/{row.district}'):
os.makedirs(f'mapillary/train/{row.district}')
if not os.path.exists(f'mapillary/val/{row.district}'):
os.makedirs(f'mapillary/val/{row.district}')
if not os.path.exists(f'mapillary/test/{row.district}'):
os.makedirs(f'mapillary/test/{row.district}')
path, dirs, files = next(os.walk(f'mapillary/train/{row.district}'))
path_val, dirs_val, files_val = next(os.walk(f'mapillary/val/{row.district}'))
file_count = len(files)
file_count_val = len(files_val)
# split into 80% training 10% validation 10% test
if (file_count < (0.8 * len(df[df['district'] == row.district]))):
path = 'mapillary/train'
elif (file_count_val < (0.1 * len(df[df['district'] == row.district]))):
path = 'mapillary/val'
else:
path = 'mapillary/test'
try:
urllib.request.urlretrieve(row.url, f'{path}/{row.district}/{index}.jpeg')
image = Image.open(f'{path}/{row.district}/{index}.jpeg')
image = image.resize((256, 256), Image.ANTIALIAS)
image.save(f'{path}/{row.district}/{index}.jpeg')
except:
print("Image not available.")
pbar.update(1)
print('Download finished!')
if __name__ == '__main__':
main()