custom facial expression example

Rassibassi · Mar 15, 2022 · 55ae570 · 55ae570
1 parent 228b10b
commit 55ae570
Show file tree

Hide file tree

Showing 2 changed files with 124 additions and 0 deletions.
diff --git a/custom/core.py b/custom/core.py
@@ -166,3 +166,43 @@ def slice_from_roi(roi, image_size, horizontal_side=True):
     second_id = int((center + norm_side / 2) * image_side)
 
     return (first_id, second_id)
+
+
+def extract_faces(raw_frame, results, x_scale=1.0, y_scale=1.0):
+    frames = []
+    if results.detections is None:
+        return frames
+    for detection in results.detections:
+        image_size = raw_frame.shape[1::-1]
+        x_min = detection.location_data.relative_bounding_box.xmin
+        y_min = detection.location_data.relative_bounding_box.ymin
+        width = detection.location_data.relative_bounding_box.width
+        height = detection.location_data.relative_bounding_box.height
+
+        x_min = image_size[0] * x_min
+        y_min = image_size[1] * y_min
+        width = image_size[0] * width
+        height = image_size[1] * height
+        x_max = x_min + width
+        y_max = y_min + height
+
+        x_center = (x_min + x_max) / 2
+        y_center = (y_min + y_max) / 2
+
+        width = x_scale * width
+        height = y_scale * height
+
+        x_min = x_center - width / 2
+        y_min = y_center - height / 2
+
+        x_max = x_min + width
+        y_max = y_min + height
+
+        x_min, x_max, y_min, y_max = map(int, [x_min, x_max, y_min, y_max])
+
+        frame = raw_frame[y_min:y_max, x_min:x_max]
+
+        if frame.any():
+            frames.append(frame)
+
+    return frames
diff --git a/facial_expression.py b/facial_expression.py
@@ -0,0 +1,84 @@
+import cv2
+import mediapipe as mp
+import numpy as np
+
+from custom.core import extract_faces, tflite_inference
+from videosource import WebcamSource
+
+mp_drawing = mp.solutions.drawing_utils
+mp_face_detection = mp.solutions.face_detection
+
+labels = ["Neutral", "Happiness", "Sadness", "Surprise", "Fear", "Disgust", "Anger"]
+
+fast_model = True
+slow_model_every_x = 5
+
+
+def main():
+    source = WebcamSource()
+
+    mean = np.array([0.57535914, 0.44928582, 0.40079932])
+    std = np.array([0.20735591, 0.18981615, 0.18132027])
+
+    if fast_model:
+        # from https://github.com/zengqunzhao/EfficientFace
+        model_path = "models/efficient_face_model.tflite"
+    else:
+        # from https://github.com/zengqunzhao/EfficientFace
+        model_path = "models/dlg_model.tflite"
+
+    with mp_face_detection.FaceDetection(
+        model_selection=0, min_detection_confidence=0.5
+    ) as face_detection:
+
+        for idx, (frame, frame_rgb) in enumerate(source):
+
+            results = face_detection.process(frame_rgb)
+
+            face_frames = extract_faces(frame_rgb, results, x_scale=1.2, y_scale=1.2)
+
+            if face_frames:
+                face_frame = cv2.resize(face_frames[0], (224, 224))
+                frame[0:224, 0:224, :] = cv2.cvtColor(face_frame, cv2.COLOR_RGB2BGR)
+
+                if fast_model or (idx % slow_model_every_x == 0):
+                    face_frame = face_frame / 255
+                    face_frame -= mean
+                    face_frame /= std
+                    face_frame = np.moveaxis(face_frame, -1, 0)
+
+                    outputs = tflite_inference(face_frame, model_path)
+                    outputs = outputs[0]
+                    expression_id = np.argmax(outputs)
+
+                    # write expression over head
+                    detection = results.detections[0]
+                    relative_keypoints = detection.location_data.relative_keypoints
+                    landmarks = np.stack([(rk.x, rk.y) for rk in relative_keypoints])
+
+                    image_size = frame_rgb.shape[1::-1]
+                    pos = landmarks[2, :]
+                    pos = image_size * pos
+                    text_size = cv2.getTextSize(
+                        labels[expression_id], cv2.FONT_HERSHEY_SIMPLEX, 1.2, 2
+                    )
+                    text_size = text_size[0]
+                    pos[0] -= text_size[0] / 2
+                    pos[1] -= 150
+
+                    pos = tuple(pos.astype(np.int32).tolist())
+                    cv2.putText(
+                        frame,
+                        labels[expression_id],
+                        pos,
+                        cv2.FONT_HERSHEY_SIMPLEX,
+                        1.2,
+                        (57, 255, 20),
+                        2,
+                    )
+
+            source.show(frame)
+
+
+if __name__ == "__main__":
+    main()