Skip to content

Commit

Permalink
First commit
Browse files Browse the repository at this point in the history
Add Python scripts and trained YOLOv5s model (.blob format) with config .json to repository.
  • Loading branch information
maxsitt committed Dec 21, 2022
1 parent cf49a9b commit 78be505
Show file tree
Hide file tree
Showing 8 changed files with 1,191 additions and 0 deletions.
45 changes: 45 additions & 0 deletions cam_preview.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
#!/usr/bin/env python3

'''
Author: Maximilian Sittinger (https://github.com/maxsitt)
Website: https://maxsitt.github.io/insect-detect-docs/
License: GNU GPLv3 (https://choosealicense.com/licenses/gpl-3.0/)
This Python script does the following:
- show a preview of full FOV 4K frames downscaled to LQ frames (e.g. 416x416)
compiled with open source scripts available at https://github.com/luxonis
'''

import cv2
import depthai as dai

# Create depthai pipeline
pipeline = dai.Pipeline()

# Define camera source and output
cam_rgb = pipeline.create(dai.node.ColorCamera)
#cam_rgb.setImageOrientation(dai.CameraImageOrientation.ROTATE_180_DEG)
cam_rgb.setResolution(dai.ColorCameraProperties.SensorResolution.THE_4_K)
cam_rgb.setPreviewSize(416, 416) # downscaled LQ frames
cam_rgb.setInterleaved(False)
cam_rgb.setPreviewKeepAspectRatio(False) # squash full FOV frames to square
cam_rgb.setFps(20) # frames per second available for focus/exposure

xout_rgb = pipeline.create(dai.node.XLinkOut)
xout_rgb.setStreamName("frame")
cam_rgb.preview.link(xout_rgb.input)

# Connect to OAK device and start pipeline
with dai.Device(pipeline, usb2Mode=True) as device:

# Create output queue to get the frames from the output defined above
q_frame = device.getOutputQueue(name="frame", maxSize=4, blocking=False)

# Get LQ preview frames and show in window (e.g. via X11 forwarding)
while True:
frame = q_frame.get().getCvFrame()
cv2.imshow("cam_preview", frame)

if cv2.waitKey(1) == ord("q"):
break
55 changes: 55 additions & 0 deletions models/json/yolov5s_416.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
{
"nn_config": {
"output_format": "detection",
"NN_family": "YOLO",
"input_size": "416x416",
"NN_specific_metadata": {
"classes": 1,
"coordinates": 4,
"anchors": [
10,
13,
16,
30,
33,
23,
30,
61,
62,
45,
59,
119,
116,
90,
156,
198,
373,
326
],
"anchor_masks": {
"side52": [
0,
1,
2
],
"side26": [
3,
4,
5
],
"side13": [
6,
7,
8
]
},
"iou_threshold": 0.5,
"confidence_threshold": 0.5
}
},
"mappings": {
"labels": [
"insect"
]
}
}
Binary file added models/yolov5s_416_openvino_2022.1_9shave.blob
Binary file not shown.
74 changes: 74 additions & 0 deletions still_capture_timelapse.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
#!/usr/bin/env python3

'''
Author: Maximilian Sittinger (https://github.com/maxsitt)
Website: https://maxsitt.github.io/insect-detect-docs/
License: GNU GPLv3 (https://choosealicense.com/licenses/gpl-3.0/)
This Python script does the following:
- save still images in highest possible resolution to .jpg at specified time interval
includes segments from open source scripts available at https://github.com/luxonis
'''

from datetime import datetime
from pathlib import Path

import cv2
import depthai as dai

# Create depthai pipeline
pipeline = dai.Pipeline()

# Define camera source
cam_rgb = pipeline.create(dai.node.ColorCamera)
#cam_rgb.setImageOrientation(dai.CameraImageOrientation.ROTATE_180_DEG)
cam_rgb.setResolution(dai.ColorCameraProperties.SensorResolution.THE_12_MP) # OAK-1 (IMX378)
#cam_rgb.setResolution(dai.ColorCameraProperties.SensorResolution.THE_13_MP) # OAK-1 Lite (IMX214)
#cam_rgb.setResolution(dai.ColorCameraProperties.SensorResolution.THE_5312X6000) # OAK-1 MAX (LCM48)
cam_rgb.setNumFramesPool(2,2,2,2,2)
cam_rgb.setFps(10) # frames per second available for focus/exposure

# Define MJPEG encoder
still_enc = pipeline.create(dai.node.VideoEncoder)
still_enc.setDefaultProfilePreset(1, dai.VideoEncoderProperties.Profile.MJPEG)
still_enc.setNumFramesPool(1)

# Define script node
script = pipeline.create(dai.node.Script)

# Set script that will be run on-device (Luxonis OAK)
script.setScript('''
import time
ctrl = CameraControl()
ctrl.setCaptureStill(True)
while True:
node.io["capture_still"].send(ctrl)
time.sleep(3) # capture still image every 3 seconds
''')

# Send capture command to camera and still image to the MJPEG encoder
script.outputs["capture_still"].link(cam_rgb.inputControl)
cam_rgb.still.link(still_enc.input)

xout_still = pipeline.create(dai.node.XLinkOut)
xout_still.setStreamName("still")
still_enc.bitstream.link(xout_still.input)

# Connect to OAK device and start pipeline
with dai.Device(pipeline, usb2Mode=True) as device:

# Create output queue to get the encoded still images
q_still = device.getOutputQueue(name="still", maxSize=1, blocking=False)

rec_start = datetime.now().strftime("%Y%m%d_%H-%M")
save_path = f"./insect-detect/still/{rec_start[:8]}/{rec_start}"
Path(f"{save_path}").mkdir(parents=True, exist_ok=True)

while True:
enc_still = q_still.get()
timestamp = datetime.now().strftime("%Y%m%d_%H-%M-%S.%f")
with open(f"{save_path}/{timestamp}_still.jpg", "wb") as still_jpg:
still_jpg.write(bytearray(enc_still.getData()))
138 changes: 138 additions & 0 deletions yolov5_preview.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,138 @@
#!/usr/bin/env python3

'''
Author: Maximilian Sittinger (https://github.com/maxsitt)
Website: https://maxsitt.github.io/insect-detect-docs/
License: GNU GPLv3 (https://choosealicense.com/licenses/gpl-3.0/)
This Python script does the following:
- run a custom YOLOv5 object detection model (.blob format) on-device (Luxonis OAK)
- use full FOV 4K frames downscaled to LQ frames (e.g. 416x416) as model input
- show a preview of full FOV 4K frames downscaled to LQ frames (e.g. 416x416) + model output
- optional: print available Rasperry Pi memory (MB) and RPi CPU utilization (percent)
-> "-log" to print RPi info to console
compiled with open source scripts available at https://github.com/luxonis
'''

import argparse
import json
import sys
import time
from pathlib import Path

import cv2
import depthai as dai
import numpy as np

# Define optional arguments
parser = argparse.ArgumentParser()
parser.add_argument("-log", "--print-log", action="store_true",
help="print RPi available memory (MB) + CPU utilization (percent)")
args = parser.parse_args()

if args.print_log:
import psutil

# Set file paths to the detection model and config JSON
MODEL_PATH = Path("./insect-detect/models/yolov5s_416_openvino_2022.1_9shave.blob")
CONFIG_PATH = Path("./insect-detect/models/json/yolov5s_416.json")

# Extract detection model metadata from config JSON
with CONFIG_PATH.open(encoding="utf-8") as f:
config = json.load(f)
nn_config = config.get("nn_config", {})
nn_metadata = nn_config.get("NN_specific_metadata", {})
classes = nn_metadata.get("classes", {})
coordinates = nn_metadata.get("coordinates", {})
anchors = nn_metadata.get("anchors", {})
anchor_masks = nn_metadata.get("anchor_masks", {})
iou_threshold = nn_metadata.get("iou_threshold", {})
confidence_threshold = nn_metadata.get("confidence_threshold", {})
nn_mappings = config.get("mappings", {})
labels = nn_mappings.get("labels", {})

# Create depthai pipeline
pipeline = dai.Pipeline()

# Define camera source
cam_rgb = pipeline.create(dai.node.ColorCamera)
#cam_rgb.setImageOrientation(dai.CameraImageOrientation.ROTATE_180_DEG)
cam_rgb.setResolution(dai.ColorCameraProperties.SensorResolution.THE_4_K)
cam_rgb.setPreviewSize(416, 416) # downscaled LQ frames for model input
cam_rgb.setInterleaved(False)
cam_rgb.setPreviewKeepAspectRatio(False) # squash full FOV frames to square
cam_rgb.setFps(20) # frames per second available for focus/exposure/model input

# Define detection model source and input + output
nn = pipeline.create(dai.node.YoloDetectionNetwork)
cam_rgb.preview.link(nn.input) # downscaled LQ frames as model input
nn.input.setBlocking(False)

xout_nn = pipeline.create(dai.node.XLinkOut)
xout_nn.setStreamName("nn")
nn.out.link(xout_nn.input)

xout_rgb = pipeline.create(dai.node.XLinkOut)
xout_rgb.setStreamName("frame")
nn.passthrough.link(xout_rgb.input)

# Set detection model specific settings
nn.setBlobPath(MODEL_PATH)
nn.setNumClasses(classes)
nn.setCoordinateSize(coordinates)
nn.setAnchors(anchors)
nn.setAnchorMasks(anchor_masks)
nn.setIouThreshold(iou_threshold)
nn.setConfidenceThreshold(confidence_threshold)
nn.setNumInferenceThreads(2)

# Define function to convert relative bounding box coordinates (0-1) to pixel coordinates
def frame_norm(frame, bbox):
"""Convert relative bounding box coordinates (0-1) to pixel coordinates."""
norm_vals = np.full(len(bbox), frame.shape[0])
norm_vals[::2] = frame.shape[1]
return (np.clip(np.array(bbox), 0, 1) * norm_vals).astype(int)

# Connect to OAK device and start pipeline
with dai.Device(pipeline, usb2Mode=True) as device:

# Create output queues to get the frames and detections from the outputs defined above
q_frame = device.getOutputQueue(name="frame", maxSize=4, blocking=False)
q_nn = device.getOutputQueue(name="nn", maxSize=4, blocking=False)

# Create start_time and counter variables to measure fps of the detection model
start_time = time.monotonic()
counter = 0

# Get LQ preview frames and model output (detections) and show in window
while True:
if args.print_log:
print(f"Available RPi memory: {round(psutil.virtual_memory().available / 1048576)} MB")
print(f"RPi CPU utilization: {psutil.cpu_percent(interval=None)}%")
print("\n")

frame = q_frame.get().getCvFrame()
nn_out = q_nn.get()

if nn_out is not None:
dets = nn_out.detections
counter += 1

if frame is not None:
for detection in dets:
bbox = frame_norm(frame, (detection.xmin, detection.ymin,
detection.xmax, detection.ymax))
cv2.putText(frame, labels[detection.label], (bbox[0], bbox[3] + 20),
cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255), 1)
cv2.putText(frame, f"{round(detection.confidence, 2)}", (bbox[0], bbox[3] + 40),
cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 255, 255), 1)
cv2.rectangle(frame, (bbox[0], bbox[1]), (bbox[2], bbox[3]), (0, 0, 255), 2)

cv2.putText(frame, "NN fps: {:.2f}".format(counter / (time.monotonic() - start_time)),
(2, frame.shape[0] - 4), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 255, 255), 2)

cv2.imshow("yolov5_preview", frame)

if cv2.waitKey(1) == ord("q"):
break
Loading

0 comments on commit 78be505

Please sign in to comment.