HandDetector.py

import numpy as np
import tensorflow as tf
import cv2 as cv

# Initialize the camera
vc = cv.VideoCapture(0)

b, img = vc.read()
shape = img.shape
whiteboard = 255 * np.ones(shape=[shape[0], shape[1], shape[2]], dtype=np.uint8)


# Read the graph.
with tf.gfile.FastGFile('frozen_inference_graph.pb', 'rb') as f:
    graph_def = tf.GraphDef()
    graph_def.ParseFromString(f.read())

with tf.Session() as sess:
    # Restore session
    sess.graph.as_default()
    tf.import_graph_def(graph_def, name='')

    startedDrawing = False
    lastPoint = (0,0)

    while True:
        b, img = vc.read()
        if b:
            # Read and preprocess an image.
            img = cv.flip(img, 1)
            rows = img.shape[0]
            cols = img.shape[1]
            inp = cv.resize(img, (300, 300))
            inp = inp[:, :, [2, 1, 0]]  # BGR2RGB

            # Run the model
            out = sess.run([sess.graph.get_tensor_by_name('num_detections:0'),
                            sess.graph.get_tensor_by_name('detection_scores:0'),
                            sess.graph.get_tensor_by_name('detection_boxes:0'),
                            sess.graph.get_tensor_by_name('detection_classes:0')],
                        feed_dict={'image_tensor:0': inp.reshape(1, inp.shape[0], inp.shape[1], 3)})

            # Visualize detected bounding boxes.
            num_detections = int(out[0][0])
            biggestArea = 0
            biggestPoint = (0,0)
            for i in range(num_detections):
                #classId = int(out[3][0][i])
                score = float(out[1][0][i])
                bbox = [float(v) for v in out[2][0][i]]
                if score > 0.5:
                    x = bbox[1] * cols
                    y = bbox[0] * rows
                    right = bbox[3] * cols
                    bottom = bbox[2] * rows

                    currentArea = (right - x) * (bottom - y)
                    if currentArea > biggestArea:
                        biggestArea = currentArea
                        biggestPoint = (int((right - x)/2 + x), int((y - bottom)/2 + bottom))

                    cv.rectangle(img, (int(x), int(y)), (int(right), int(bottom)), (125, 255, 51), thickness=2)
            
            # Paint
            if biggestArea != 0:
                if startedDrawing == False:
                    lastPoint = biggestPoint
                    startedDrawing = True
                else:
                    cv.line(whiteboard, lastPoint, biggestPoint, (125, 255, 51), thickness=2)
                    lastPoint = biggestPoint

            # Display image and whiteboard
            cv.imshow("Webcam", img)
            cv.imshow("Whiteboard", whiteboard)
            cv.waitKey(1)