Merge remote-tracking branch 'origin/master'

sign-language-processing · Jun 16, 2024 · 2df25f5 · 2df25f5
2 parents e8ca608 + 4a6ecab
commit 2df25f5
Show file tree

Hide file tree

Showing 6 changed files with 203 additions and 40 deletions.
diff --git a/.github/workflows/python.yaml b/.github/workflows/python.yaml
@@ -12,7 +12,7 @@ jobs:
     runs-on: ubuntu-latest
     strategy:
       matrix:
-        python-version: ["3.9"]
+        python-version: ["3.8"]
 
     steps:
       - uses: actions/checkout@v3
@@ -30,4 +30,4 @@ jobs:
 
       - name: Run additional tests
         working-directory: src/python
-        run: pytest tests -s
+        run: pytest tests
diff --git a/src/python/pose_format/bin/directory.py b/src/python/pose_format/bin/directory.py
@@ -1,20 +1,25 @@
-#!/usr/bin/env python
-
 import argparse
 import os
 
 from pose_format.bin.pose_estimation import pose_video
 from tqdm import tqdm
 
 
+def removesuffix(text: str, suffix: str):
+	if text.endswith(suffix):
+		return text[:-len(suffix)]
+	else:
+		return text
+
+
 def find_missing_pose_files(directory: str):
     all_files = os.listdir(directory)
     mp4_files = [f for f in all_files if f.endswith(".mp4")]
-    pose_files = {f.removesuffix(".pose") for f in all_files if f.endswith(".pose")}
+    pose_files = {removesuffix(f, ".pose") for f in all_files if f.endswith(".pose")}
     missing_pose_files = []
 
     for mp4_file in mp4_files:
-        base_name = mp4_file.removesuffix(".mp4")
+        base_name = removesuffix(mp4_file, ".mp4")
         if base_name not in pose_files:
             missing_pose_files.append(os.path.join(directory, mp4_file))
 
@@ -34,5 +39,5 @@ def main():
     missing_pose_files = find_missing_pose_files(args.directory)
 
     for mp4_path in tqdm(missing_pose_files):
-        pose_file_name = mp4_path.removesuffix(".mp4") + ".pose"
+        pose_file_name = removesuffix(mp4_path, ".mp4") + ".pose"
         pose_video(mp4_path, pose_file_name, 'mediapipe')
diff --git a/src/python/pose_format/pose_visualizer.py b/src/python/pose_format/pose_visualizer.py
@@ -2,7 +2,8 @@
 import logging
 import math
 from functools import lru_cache
-from typing import Iterable, Tuple
+from typing import Iterable, Tuple, Union
+from io import BytesIO
 
 import numpy as np
 import numpy.ma as ma
@@ -209,14 +210,14 @@ def save_frame(self, f_name: str, frame: np.ndarray):
         """
         self.cv2.imwrite(f_name, frame)
 
-    def _save_image(self, f_name: str, frames: Iterable[np.ndarray], format: str = "GIF", transparency: bool = False):
+    def _save_image(self, f_name: Union[str, None], frames: Iterable[np.ndarray], format: str = "GIF", transparency: bool = False) -> Union[None, bytes]:
         """
         Save pose frames as Image (GIF or PNG).
 
         Parameters
         ----------
-        f_name : str
-            filename to save Image to.
+        f_name : Union[str, None]
+        	Filename to save Image to. If None, image will be saved to memory and returned as bytes.
         frames : Iterable[np.ndarray]
             Series of pose frames to be included in Image.
         format : str
@@ -226,7 +227,8 @@ def _save_image(self, f_name: str, frames: Iterable[np.ndarray], format: str = "
 
         Returns
         -------
-        None
+        Union[None, bytes]
+        	If f_name is None, returns the image data as bytes. Otherwise, returns None.
 
         Raises
         ------
@@ -244,59 +246,70 @@ def _save_image(self, f_name: str, frames: Iterable[np.ndarray], format: str = "
             cv_code = self.cv2.COLOR_BGR2RGB
 
         images = [Image.fromarray(self.cv2.cvtColor(frame, cv_code)) for frame in frames]
-        images[0].save(f_name,
-                       format=format,
-                       append_images=images[1:],
-                       save_all=True,
-                       duration=1000 / self.pose.body.fps,
-                       loop=0,
-                       disposal=2 if transparency else 0)
-
-    def save_gif(self, f_name: str, frames: Iterable[np.ndarray]):
+
+        def save_to(obj: Union[str, None]):
+            images[0].save(obj,
+							format=format,
+							append_images=images[1:],
+							save_all=True,
+							duration=1000 / self.pose.body.fps,
+							loop=0,
+							disposal=2 if transparency else 0)
+
+        if f_name:
+            save_to(f_name)
+        else:
+            with BytesIO() as mem:
+                save_to(mem)
+                return mem.getvalue()
+
+    def save_gif(self, f_name: Union[str, None], frames: Iterable[np.ndarray]) -> Union[None, bytes]:
         """
         Save pose frames as GIF.
 
         Parameters
         ----------
-        f_name : str
-            filename to save GIF to.
+        f_name : Union[str, None]
+       		Filename to save PNG to. If None, image will be saved to memory and returned as bytes.
         frames : Iterable[np.ndarray]
             Series of pose frames to be included in GIF.
 
         Returns
         -------
-        None
+        Union[None, bytes]
+        	If f_name is None, returns the PNG image data as bytes. Otherwise, returns None.
 
         Raises
         ------
         ImportError 
             If Pillow is not installed.
         """
-        self._save_image(f_name, frames, "GIF", False)
+        return self._save_image(f_name, frames, "GIF", False)
 
-    def save_png(self, f_name: str, frames: Iterable[np.ndarray], transparency: bool = True):
+    def save_png(self, f_name: Union[str, None], frames: Iterable[np.ndarray], transparency: bool = True) -> Union[None, bytes]:
         """
         Save pose frames as PNG.
 
         Parameters
         ----------
-        f_name : str
-            filename to save PNG to.
+        f_name : Union[str, None]
+        	Filename to save PNG to. If None, image will be saved to memory and returned as bytes.
         frames : Iterable[np.ndarray]
             Series of pose frames to be included in PNG.
         transparency : bool
             transparency decides opacity of background color.
 
         Returns
         -------
-        None
+        Union[None, bytes]
+        	If f_name is None, returns the PNG image data as bytes. Otherwise, returns None.
 
         Raises
         ------
         ImportError 
             If Pillow is not installed.
         """        
-        self._save_image(f_name, frames, "PNG", transparency)
+        return self._save_image(f_name, frames, "PNG", transparency)
 
     def save_video(self, f_name: str, frames: Iterable[np.ndarray], custom_ffmpeg=None):
         """

diff --git a/src/python/pose_format/utils/pose_converter.py b/src/python/pose_format/utils/pose_converter.py
@@ -38,7 +38,144 @@
     {("pose_keypoints_2d", "RHeel"), ("POSE_LANDMARKS", "RIGHT_HEEL")},
 ]
 
-POSES_MAP = BODY_MAP + LEFT_HAND_MAP + RIGHT_HAND_MAP
+FACE_MAP = [
+    # face border mappings and interpolations:
+    (("face_keypoints_2d", "FB_0"), ("FACE_LANDMARKS", "127")),
+    (("face_keypoints_2d", "FB_1"), ("FACE_LANDMARKS", "234")),
+    (("face_keypoints_2d", "FB_2"), ("FACE_LANDMARKS", "93")),
+    (("face_keypoints_2d", "FB_3"), ("FACE_LANDMARKS", "132")),
+    (("face_keypoints_2d", "FB_4"), ("FACE_LANDMARKS", "58")),
+    (("face_keypoints_2d", "FB_5"), ("FACE_LANDMARKS", "172")),
+    (("face_keypoints_2d", "FB_6"), ("FACE_LANDMARKS", "136")),
+    (("face_keypoints_2d", "FB_7"), ("FACE_LANDMARKS", "149")),
+    (("face_keypoints_2d", "FB_8"), ("FACE_LANDMARKS", "152")),
+    (("face_keypoints_2d", "FB_9"), ("FACE_LANDMARKS", "378")),
+    (("face_keypoints_2d", "FB_10"), ("FACE_LANDMARKS", "365")),
+    (("face_keypoints_2d", "FB_11"), ("FACE_LANDMARKS", "397")),
+    (("face_keypoints_2d", "FB_12"), ("FACE_LANDMARKS", "288")),
+    (("face_keypoints_2d", "FB_13"), ("FACE_LANDMARKS", "361")),
+    (("face_keypoints_2d", "FB_14"), ("FACE_LANDMARKS", "323")),
+    (("face_keypoints_2d", "FB_15"), ("FACE_LANDMARKS", "454")),
+    (("face_keypoints_2d", "FB_16"), ("FACE_LANDMARKS", "356")),
+    {("face_keypoints_2d", ("FB_6", "FB_7")), ("FACE_LANDMARKS", "150")},
+    {("face_keypoints_2d", ("FB_7", "FB_8")), ("FACE_LANDMARKS", "176")},
+    {("face_keypoints_2d", ("FB_7", "FB_8")), ("FACE_LANDMARKS", "148")},
+    {("face_keypoints_2d", ("FB_8", "FB_9")), ("FACE_LANDMARKS", "377")},
+    {("face_keypoints_2d", ("FB_8", "FB_8")), ("FACE_LANDMARKS", "400")},
+    {("face_keypoints_2d", ("FB_9", "FB_10")), ("FACE_LANDMARKS", "379")},
+
+    # Right eye mappings and interpolations:
+    (("face_keypoints_2d", "FE_42"), ("FACE_LANDMARKS", "362")),
+    (("face_keypoints_2d", "FE_43"), ("FACE_LANDMARKS", "385")),
+    (("face_keypoints_2d", "FE_44"), ("FACE_LANDMARKS", "387")),
+    (("face_keypoints_2d", "FE_45"), ("FACE_LANDMARKS", "263")),
+    (("face_keypoints_2d", "FE_46"), ("FACE_LANDMARKS", "373")),
+    (("face_keypoints_2d", "FE_47"), ("FACE_LANDMARKS", "380")),
+    {("face_keypoints_2d", ("FE_42", "FE_43")), ("FACE_LANDMARKS", "398")},
+    {("face_keypoints_2d", ("FE_42", "FE_43")), ("FACE_LANDMARKS", "384")},
+    {("face_keypoints_2d", ("FE_43", "FE_44")), ("FACE_LANDMARKS", "386")},
+    {("face_keypoints_2d", ("FE_44", "FE_45")), ("FACE_LANDMARKS", "388")},
+    {("face_keypoints_2d", ("FE_44", "FE_45")), ("FACE_LANDMARKS", "466")},
+    {("face_keypoints_2d", ("FE_45", "FE_46")), ("FACE_LANDMARKS", "249")},
+    {("face_keypoints_2d", ("FE_45", "FE_46")), ("FACE_LANDMARKS", "390")},
+    {("face_keypoints_2d", ("FE_46", "FE_47")), ("FACE_LANDMARKS", "374")},
+    {("face_keypoints_2d", ("FE_47", "FE_42")), ("FACE_LANDMARKS", "381")},
+    {("face_keypoints_2d", ("FE_47", "FE_42")), ("FACE_LANDMARKS", "382")},
+
+    # Left eye mappings and interpolations:
+    (("face_keypoints_2d", "FE_36"), ("FACE_LANDMARKS", "33")),
+    (("face_keypoints_2d", "FE_37"), ("FACE_LANDMARKS", "160")),
+    (("face_keypoints_2d", "FE_38"), ("FACE_LANDMARKS", "158")),
+    (("face_keypoints_2d", "FE_39"), ("FACE_LANDMARKS", "133")),
+    (("face_keypoints_2d", "FE_40"), ("FACE_LANDMARKS", "153")),
+    (("face_keypoints_2d", "FE_41"), ("FACE_LANDMARKS", "144")),
+    {("face_keypoints_2d", ("FE_36", "FE_37")), ("FACE_LANDMARKS", "246")},
+    {("face_keypoints_2d", ("FE_36", "FE_37")), ("FACE_LANDMARKS", "161")},
+    {("face_keypoints_2d", ("FE_37", "FE_38")), ("FACE_LANDMARKS", "159")},
+    {("face_keypoints_2d", ("FE_38", "FE_39")), ("FACE_LANDMARKS", "157")},
+    {("face_keypoints_2d", ("FE_38", "FE_39")), ("FACE_LANDMARKS", "173")},
+    {("face_keypoints_2d", ("FE_39", "FE_40")), ("FACE_LANDMARKS", "155")},
+    {("face_keypoints_2d", ("FE_39", "FE_40")), ("FACE_LANDMARKS", "154")},
+    {("face_keypoints_2d", ("FE_40", "FE_41")), ("FACE_LANDMARKS", "145")},
+    {("face_keypoints_2d", ("FE_41", "FE_36")), ("FACE_LANDMARKS", "163")},
+    {("face_keypoints_2d", ("FE_41", "FE_36")), ("FACE_LANDMARKS", "7")},
+
+    # Nose mappings and interpolations:
+    (("face_keypoints_2d", "FN_27"), ("FACE_LANDMARKS", "168")),
+    (("face_keypoints_2d", "FN_28"), ("FACE_LANDMARKS", "197")),
+    (("face_keypoints_2d", "FN_29"), ("FACE_LANDMARKS", "5")),
+    (("face_keypoints_2d", "FN_30"), ("FACE_LANDMARKS", "4")),
+    {("face_keypoints_2d", ("FN_27", "FN_28")), ("FACE_LANDMARKS", "6")},
+    {("face_keypoints_2d", ("FN_28", "FN_29")), ("FACE_LANDMARKS", "195")},
+
+    (("face_keypoints_2d", "FN_31"), ("FACE_LANDMARKS", "219")),
+    (("face_keypoints_2d", "FN_32"), ("FACE_LANDMARKS", "237")),
+    (("face_keypoints_2d", "FN_33"), ("FACE_LANDMARKS", "1")),
+    (("face_keypoints_2d", "FN_34"), ("FACE_LANDMARKS", "457")),
+    (("face_keypoints_2d", "FN_35"), ("FACE_LANDMARKS", "439")),
+    {("face_keypoints_2d", ("FN_31", "FN_32")), ("FACE_LANDMARKS", "218")},
+    {("face_keypoints_2d", ("FN_32", "FN_33")), ("FACE_LANDMARKS", "44")},
+    {("face_keypoints_2d", ("FN_33", "FN_34")), ("FACE_LANDMARKS", "274")},
+    {("face_keypoints_2d", ("FN_34", "FN_35")), ("FACE_LANDMARKS", "438")},
+
+    # Mouth mappings and interpolations:
+    (("face_keypoints_2d", "FLO_48"), ("FACE_LANDMARKS", "61")),
+    (("face_keypoints_2d", "FLO_49"), ("FACE_LANDMARKS", "40")),
+    (("face_keypoints_2d", "FLO_50"), ("FACE_LANDMARKS", "37")),
+    (("face_keypoints_2d", "FLO_51"), ("FACE_LANDMARKS", "0")),
+    (("face_keypoints_2d", "FLO_52"), ("FACE_LANDMARKS", "267")),
+    (("face_keypoints_2d", "FLO_53"), ("FACE_LANDMARKS", "270")),
+    (("face_keypoints_2d", "FLO_54"), ("FACE_LANDMARKS", "291")),
+    (("face_keypoints_2d", "FLO_55"), ("FACE_LANDMARKS", "321")),
+    (("face_keypoints_2d", "FLO_56"), ("FACE_LANDMARKS", "314")),
+    (("face_keypoints_2d", "FLO_57"), ("FACE_LANDMARKS", "17")),
+    (("face_keypoints_2d", "FLO_58"), ("FACE_LANDMARKS", "84")),
+    (("face_keypoints_2d", "FLO_59"), ("FACE_LANDMARKS", "91")),
+    {("face_keypoints_2d", ("FLO_48", "FLO_49")), ("FACE_LANDMARKS", "185")},
+    {("face_keypoints_2d", ("FLO_49", "FLO_50")), ("FACE_LANDMARKS", "39")},
+    {("face_keypoints_2d", ("FLO_52", "FLO_53")), ("FACE_LANDMARKS", "269")},
+    {("face_keypoints_2d", ("FLO_53", "FLO_54")), ("FACE_LANDMARKS", "409")},
+    {("face_keypoints_2d", ("FLO_54", "FLO_55")), ("FACE_LANDMARKS", "375")},
+    {("face_keypoints_2d", ("FLO_55", "FLO_56")), ("FACE_LANDMARKS", "405")},
+    {("face_keypoints_2d", ("FLO_58", "FLO_59")), ("FACE_LANDMARKS", "181")},
+    {("face_keypoints_2d", ("FLO_59", "FLO_48")), ("FACE_LANDMARKS", "146")},
+
+    # Inner mouth mappings and interpolations:
+    (("face_keypoints_2d", "FLI_60"), ("FACE_LANDMARKS", "78")),
+    (("face_keypoints_2d", "FLI_61"), ("FACE_LANDMARKS", "81")),
+    (("face_keypoints_2d", "FLI_62"), ("FACE_LANDMARKS", "13")),
+    (("face_keypoints_2d", "FLI_63"), ("FACE_LANDMARKS", "311")),
+    (("face_keypoints_2d", "FLI_64"), ("FACE_LANDMARKS", "308")),
+    (("face_keypoints_2d", "FLI_65"), ("FACE_LANDMARKS", "402")),
+    (("face_keypoints_2d", "FLI_66"), ("FACE_LANDMARKS", "14")),
+    (("face_keypoints_2d", "FLI_67"), ("FACE_LANDMARKS", "178")),
+    {("face_keypoints_2d", ("FLI_60", "FLI_61")), ("FACE_LANDMARKS", "191")},
+    {("face_keypoints_2d", ("FLI_60", "FLI_61")), ("FACE_LANDMARKS", "80")},
+    {("face_keypoints_2d", ("FLI_61", "FLI_62")), ("FACE_LANDMARKS", "82")},
+    {("face_keypoints_2d", ("FLI_62", "FLI_63")), ("FACE_LANDMARKS", "312")},
+    {("face_keypoints_2d", ("FLI_63", "FLI_64")), ("FACE_LANDMARKS", "310")},
+    {("face_keypoints_2d", ("FLI_63", "FLI_64")), ("FACE_LANDMARKS", "415")},
+    {("face_keypoints_2d", ("FLI_64", "FLI_65")), ("FACE_LANDMARKS", "318")},
+    {("face_keypoints_2d", ("FLI_64", "FLI_65")), ("FACE_LANDMARKS", "324")},
+    {("face_keypoints_2d", ("FLI_65", "FLI_66")), ("FACE_LANDMARKS", "317")},
+    {("face_keypoints_2d", ("FLI_66", "FLI_67")), ("FACE_LANDMARKS", "87")},
+    {("face_keypoints_2d", ("FLI_67", "FLI_60")), ("FACE_LANDMARKS", "88")},
+    {("face_keypoints_2d", ("FLI_67", "FLI_60")), ("FACE_LANDMARKS", "95")},
+
+    # FEB
+    (("face_keypoints_2d", "FEB_17"), ("FACE_LANDMARKS", "70")),
+    (("face_keypoints_2d", "FEB_18"), ("FACE_LANDMARKS", "63")),
+    (("face_keypoints_2d", "FEB_19"), ("FACE_LANDMARKS", "105")),
+    (("face_keypoints_2d", "FEB_20"), ("FACE_LANDMARKS", "66")),
+    (("face_keypoints_2d", "FEB_21"), ("FACE_LANDMARKS", "107")),
+    (("face_keypoints_2d", "FEB_22"), ("FACE_LANDMARKS", "336")),
+    (("face_keypoints_2d", "FEB_23"), ("FACE_LANDMARKS", "296")),
+    (("face_keypoints_2d", "FEB_24"), ("FACE_LANDMARKS", "334")),
+    (("face_keypoints_2d", "FEB_25"), ("FACE_LANDMARKS", "293")),
+    (("face_keypoints_2d", "FEB_26"), ("FACE_LANDMARKS", "300")),
+]
+
+POSES_MAP = BODY_MAP + LEFT_HAND_MAP + RIGHT_HAND_MAP + FACE_MAP
 
 
 def convert_pose(pose: Pose, pose_components: List[PoseHeaderComponent]) -> Pose:
@@ -93,7 +230,6 @@ def convert_pose(pose: Pose, pose_components: List[PoseHeaderComponent]) -> Pose
 
     return Pose(pose_header, pose_body)
 
-
 def save_image(pose: Pose, name: str):
     """
     Saves visualized pose as an image with a given name

diff --git a/src/python/pyproject.toml b/src/python/pyproject.toml
@@ -13,7 +13,7 @@ dependencies = [
     "scipy",
     "tqdm"
 ]
-requires-python = ">= 3.9"
+requires-python = ">= 3.8"
 
 [project.optional-dependencies]
 dev = [

diff --git a/src/python/tests/visualization_test.py b/src/python/tests/visualization_test.py
@@ -23,8 +23,7 @@ def test_save_gif(self):
         with tempfile.NamedTemporaryFile(suffix='.gif', delete=False) as temp_gif:
             v.save_gif(temp_gif.name, v.draw())
             self.assertTrue(os.path.exists(temp_gif.name))
-            self.assertGreater(os.path.getsize(
-                temp_gif.name), 0)
+            self.assertGreater(os.path.getsize(temp_gif.name), 0)
 
     def test_save_png(self):
         """
@@ -35,11 +34,10 @@ def test_save_png(self):
 
         v = PoseVisualizer(pose)
 
-        with tempfile.TemporaryDirectory() as temp_dir:
-            temp_png = os.path.join(temp_dir, 'example.png')
-            v.save_png(temp_png, v.draw(transparency=True))
-            self.assertTrue(os.path.exists(temp_png))
-            self.assertGreater(os.path.getsize(temp_png), 0)
+        with tempfile.NamedTemporaryFile(suffix='.png', delete=False) as temp_png:
+            v.save_png(temp_png.name, v.draw(transparency=True))
+            self.assertTrue(os.path.exists(temp_png.name))
+            self.assertGreater(os.path.getsize(temp_png.name), 0)
 
     def test_save_mp4(self):
         """
@@ -54,3 +52,14 @@ def test_save_mp4(self):
             v.save_video(temp_mp4.name, v.draw())
             self.assertTrue(os.path.exists(temp_mp4.name))
             self.assertGreater(os.path.getsize(temp_mp4.name), 0)
+
+    def test_save_to_memory(self):
+        """
+        Test saving pose visualization as bytes.
+        """
+        with open("tests/data/mediapipe_long_hand_normalized.pose", "rb") as f:
+            pose = Pose.read(f.read())
+
+        v = PoseVisualizer(pose)
+        file_bytes = v.save_png(None, v.draw())
+        self.assertGreater(len(file_bytes), 0)