-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathvideo.py
executable file
·132 lines (98 loc) · 4.41 KB
/
video.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
#!/usr/bin/python3
import os
import cv2
import argparse
import importlib.util
import numpy as np
def detect(video, frame_rate_calc):
while video.isOpened():
ret, frame1 = video.read()
if not ret:
print('Reached the end of the video!')
break
# Start timer (for calculating frame rate)
t1 = cv2.getTickCount()
# Acquire frame and resize
frame = frame1.copy()
frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
frame_resized = cv2.resize(frame_rgb, (width, height))
input_data = np.expand_dims(frame_resized, axis=0)
if floating_model:
input_data = (np.float32(input_data) - input_mean) / input_std
# Passing Input Image to the pre-trained model
interpreter.set_tensor(input_details[0]['index'], input_data)
interpreter.invoke()
# Getting the output info
boxes = interpreter.get_tensor(output_details[0]['index'])[0]
classes = interpreter.get_tensor(output_details[1]['index'])[0]
scores = interpreter.get_tensor(output_details[2]['index'])[0]
# Draw bounding boxes for every prediction
for i, (box, cls, score) in enumerate(zip(boxes, classes, scores)):
if score > min_conf_threshold and score <= 1.0:
# Getting bounding box dimensions
ymin = int(max(1, (box[0] * imH)))
xmin = int(max(1, (box[1] * imW)))
ymax = int(min(imH, (box[2] * imH)))
xmax = int(min(imW, (box[3] * imW)))
cv2.rectangle(frame, (xmin, ymin), (xmax, ymax), (10, 255, 0), 2)
# Draw Label
object_name = labels[int(cls)]
label = f"{object_name} {round(score*100, 2)}%"
labelSize, baseLine = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, 0.7, 2)
label_ymin = max(ymin, labelSize[1] + 10)
cv2.rectangle(frame, (xmin, label_ymin-labelSize[1]-10), (xmin+labelSize[0], label_ymin+baseLine-10), (255, 255, 255), cv2.FILLED) # Draw white box to put label text in
cv2.putText(frame, label, (xmin, label_ymin-7), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 0, 0), 2) # Draw label text
cv2.putText(frame, f"FPS: {round(frame_rate_calc, 2)}", (30, 50), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 0), 2, cv2.LINE_AA)
cv2.imshow('Object Detection (q to close)', frame)
t2 = cv2.getTickCount()
time1 = (t2 - t1)/freq
frame_rate_calc = 1/time1
if cv2.waitKey(1) == ord('q'):
break
cv2.destroyAllWindows()
video.release()
# Define and parse input arguments
parser = argparse.ArgumentParser()
parser.add_argument('--model_dir', help='Path of directory which contains .tflite file', required=True)
parser.add_argument('--tflite_file', help='Name of .tflite if different from default', default='detect.tflite')
parser.add_argument('--labels', help='Name of labels file if different from default', default='labelmap.txt')
parser.add_argument('--video', help='Name of the video path', default='video.mp4')
args = parser.parse_args()
MODEL_NAME = args.model_dir
GRAPH_NAME = args.tflite_file
LABELMAP_NAME = args.labels
VIDEO_NAME = args.video
pkg = importlib.util.find_spec('tflite_runtime')
if pkg:
from tflite_runtime.interpreter import Interpreter
else:
from tensorflow.lite.python.interpreter import Interpreter
CWD_PATH = os.getcwd()
VIDEO_PATH = os.path.join(CWD_PATH, VIDEO_NAME)
PATH_TO_CKPT = os.path.join(CWD_PATH, MODEL_NAME, GRAPH_NAME)
PATH_TO_LABELS = os.path.join(CWD_PATH, MODEL_NAME, LABELMAP_NAME)
with open(PATH_TO_LABELS) as fh:
labels = [line.strip() for line in fh.readlines()]
if labels[0] == '???':
del(labels[0])
interpreter = Interpreter(model_path=PATH_TO_CKPT)
min_conf_threshold = 0.5
interpreter.allocate_tensors()
input_details = interpreter.get_input_details()
output_details = interpreter.get_output_details()
height = input_details[0]['shape'][1]
width = input_details[0]['shape'][2]
floating_model = (input_details[0]['dtype'] == np.float32)
# Image Preprocessing dimensions
input_mean = 127.5
input_std = 127.5
# Initialize video stream
frame_rate_calc = 1
freq = cv2.getTickFrequency()
# Open video file
video = cv2.VideoCapture(VIDEO_PATH)
imW = video.get(cv2.CAP_PROP_FRAME_WIDTH)
imH = video.get(cv2.CAP_PROP_FRAME_HEIGHT)
if __name__ == "__main__":
# Intialize video stream
detect(video, frame_rate_calc)