-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathgdrive.py
61 lines (45 loc) · 1.71 KB
/
gdrive.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
#!/usr/bin/env python
import typing as ty
from .modules import install as install_package
try:
import gdown
except ImportError:
install_package('gdown')
from .common import resolve_path
from .console import stderr, stdout
# TODO(has) add all datasets from signal-public to this dictionary
SIGNAL_DATASETS = {
"activity_triplets_V1_02182022": "1BUd2sAUP04Jf1Qnin0uhMeAXaN3gCyTy",
"maintainers": '1g9hEzRp-EMOqr5AcoDJ8pdUjiwuWPn_2',
"linux-kernel-data": "1h1AGfQkOhvgtcCR8tWVzObBSpBTURWM2",
'lkml_developer_from0820_0121': '19sEwMOZkbW8pnZOdTonImEwtYYsPyynJ'
}
def download_data_from_google_drive(google_file_id: str, output_file_name: str, quiet_download: bool) -> str:
try:
file_path = resolve_path(f'./{output_file_name}')
stdout.print(f"{output_file_name} already exists!")
except ValueError:
gdown.download(id=google_file_id, output=output_file_name, quiet=quiet_download)
file_path = resolve_path(f'./{output_file_name}')
return file_path
def get_dataset(dataset_name: str, is_local_file: bool, name2gdid: ty.Dict[str, str] = SIGNAL_DATASETS) -> str:
file_path = f'./{dataset_name}'
if is_local_file:
file_path = resolve_path(file_path)
else:
if dataset_name in name2gdid:
file_path = download_data_from_google_drive(
name2gdid[dataset_name],
dataset_name,
quiet_download=True)
else:
stderr.print(f"Dataset {dataset_name} not found!")
return None
return file_path
def get_dataset_files(
files: ty.List[str],
is_local_file: bool,
name2gdid: ty.Dict[str, str] = SIGNAL_DATASETS) -> ty.Dict[str, str]:
return dict({file: get_dataset(file, is_local_file, name2gdid) for file in files})
if __name__ == "__main__":
pass