-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmain.py
80 lines (66 loc) · 2.94 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
from nasa_api import NASA_API
import asyncio
import json
import os
import aiohttp
import glob
import tqdm.asyncio
from aiolimiter import AsyncLimiter
import aioboto3
OUTPUT_PATH = "/home/black/TSL/nasa_images"
async def rekognize(session, rk, pbar, limiter, item):
"""Fetches the image, runs the AWS Rekognition API, and then writes the image and labels to disk"""
async with limiter:
# use the first available image size in the following order of priority
keys = ["medium", "small", "large", "orig", "thumb"]
key = next(k for k in keys if k in item["image_urls"])
url = item["image_urls"][key]
# read the image data
async with session.get(url) as resp: resp.raise_for_status()
img = await resp.read()
# run AWS rekognition
labels = (
await rk.detect_labels(Image={"Bytes": img}, MaxLabels=100, MinConfidence=0)
)["Labels"]
# write image and labels to output path
meta = dict(item, labels=labels)
img_id = item["nasa_id"]
meta_fn = f"meta_{img_id}.json"
image_fn = f"image_{img_id}.{url.split('.')[-1]}"
with open(os.path.join(OUTPUT_PATH, meta_fn), "w") as f:
json.dump(meta, f)
with open(os.path.join(OUTPUT_PATH, image_fn), "wb") as f:
f.write(img)
pbar.update(1)
async def main():
limiter = AsyncLimiter(40, 1)
tasks = set()
# load the image IDs we've already processed from previous runs
img_ids = set(
os.path.basename(s)[6:].split(".")[0]
for s in glob.glob(os.path.join(OUTPUT_PATH, "image_*"))
)
async with NASA_API as napi:
async with aiohttp.ClientSession() as session:
async with aioboto3.client("rekognition") as rk:
# the lower progress par represents the progress of the `rekognize` tasks
pbar = tqdm.tqdm(position=1, total=0)
# these search parameters can be changed to get a variety of images
search = await napi.search(center="JSC", media_type="image", q="dock")
# the upper progress bar represents the progress of the NASA API search
async for item in tqdm.asyncio.tqdm(search, position=0):
# skip images that have already been processed
if item["nasa_id"] not in img_ids:
img_ids.add(item["nasa_id"])
# enqueue a task that will fetch the image data, run AWS recoknition,
# and then write the output to disk
tasks.add(
asyncio.create_task(
rekognize(session, rk, pbar, limiter, item)
)
)
pbar.total += 1
# wait for all rekognition tasks to finish
await asyncio.gather(*tasks)
if __name__ == "__main__":
asyncio.run(main())