From 9ad67eb707c5d74697848219699135a016c3925d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Mihael=20Ver=C4=8Dek?= <mihael@vercek.si>
Date: Sun, 7 Jul 2024 13:42:32 +0200
Subject: [PATCH 1/4] face detection works

---
 examples/cpp/CMakeLists.txt             |  1 +
 examples/cpp/RVC2/Emotions/emotions.cpp | 89 +++++++++++++++++++++++++
 2 files changed, 90 insertions(+)
 create mode 100644 examples/cpp/RVC2/Emotions/emotions.cpp

diff --git a/examples/cpp/CMakeLists.txt b/examples/cpp/CMakeLists.txt
index 0b33d04fb..f21049c36 100644
--- a/examples/cpp/CMakeLists.txt
+++ b/examples/cpp/CMakeLists.txt
@@ -469,6 +469,7 @@ dai_add_example(replay_video_meta RVC2/Replay/replay_video_meta.cpp ON OFF)
 dai_add_example(camera_multiple_outputs RVC4/Camera/camera_multiple_outputs.cpp ON OFF)
 
 # Host nodes
+dai_add_example(emotions RVC2/Emotions/emotions.cpp ON OFF)
 dai_add_example(rgb_video RVC2/ColorCamera/rgb_video.cpp ON OFF)
 dai_add_example(host_node HostNodes/host_node.cpp ON OFF)
 dai_add_example(threaded_host_node HostNodes/threaded_host_node.cpp ON OFF)
diff --git a/examples/cpp/RVC2/Emotions/emotions.cpp b/examples/cpp/RVC2/Emotions/emotions.cpp
new file mode 100644
index 000000000..e283bcda4
--- /dev/null
+++ b/examples/cpp/RVC2/Emotions/emotions.cpp
@@ -0,0 +1,89 @@
+// Includes common necessary includes for development using depthai library
+#include "depthai/depthai.hpp"
+
+void displayFrame(cv::Mat& frame, std::vector<dai::ImgDetection>& detections)
+{
+    auto color = cv::Scalar(255, 0, 0);
+    for(auto& detection : detections) {
+        int x1 = detection.xmin * frame.cols;
+        int y1 = detection.ymin * frame.rows;
+        int x2 = detection.xmax * frame.cols;
+        int y2 = detection.ymax * frame.rows;
+ 
+        std::stringstream confStr;
+        confStr << "label: " << detection.label << " " <<std::fixed << std::setprecision(2) << detection.confidence * 100;
+        cv::putText(frame, confStr.str(), cv::Point(x1 + 10, y1 + 40), cv::FONT_HERSHEY_TRIPLEX, 0.5, color);
+        cv::rectangle(frame, cv::Rect(cv::Point(x1, y1), cv::Point(x2, y2)), color, cv::FONT_HERSHEY_SIMPLEX);
+    }
+    cv::imshow("video", frame);
+}
+
+int main(int argc, char** argv) {
+    std::string nnPath;
+    if(argc > 1) {
+        nnPath = std::string(argv[1]);
+    } else {
+        std::cout << "Provide path to face detection blob as first arg!" << std::endl;
+        return 1;
+    }
+
+  
+    // Create pipeline
+    auto device = std::make_shared<dai::Device>(dai::OpenVINO::VERSION_UNIVERSAL, dai::UsbSpeed::HIGH);
+    dai::Pipeline pipeline(device);
+    // Define source and output
+    auto camRgb = pipeline.create<dai::node::ColorCamera>();
+    auto nn = pipeline.create<dai::node::NeuralNetwork>();
+    auto det = pipeline.create<dai::node::DetectionParser>();
+    
+    // Properties
+    camRgb->setBoardSocket(dai::CameraBoardSocket::CAM_A);
+    camRgb->setResolution(dai::ColorCameraProperties::SensorResolution::THE_1080_P);
+    camRgb->setPreviewSize(300, 300);
+    camRgb->setInterleaved(false);
+
+    nn->setNumInferenceThreads(2);
+    nn->input.setBlocking(false);
+    dai::OpenVINO::Blob blob(nnPath);
+    nn->setBlob(blob);
+    
+    det->setBlob(blob);
+    det->setNNFamily(DetectionNetworkType::MOBILENET);
+    det->setConfidenceThreshold(0.5);
+
+    camRgb->preview.link(nn->input);
+    nn->out.link(det->input);
+
+    auto outPassthrough = nn->passthrough.createOutputQueue();
+    auto outDet = det->out.createOutputQueue();
+    
+    pipeline.start();
+    while(pipeline.isRunning()) {
+        std::shared_ptr<dai::ImgFrame> inRgb;
+        std::shared_ptr<dai::ImgDetections> inDet;
+      
+        inRgb = outPassthrough->get<dai::ImgFrame>();
+        inDet = outDet->get<dai::ImgDetections>();
+
+        cv::Mat frame;
+        std::vector<dai::ImgDetection> detections;
+
+        if(inRgb) {
+            frame = inRgb->getCvFrame();
+        }
+
+        if(inDet) {
+            detections = inDet->detections;
+        }
+
+        if(!frame.empty()) {
+            displayFrame(frame, detections);
+        }
+
+        int key = cv::waitKey(1);
+        if(key == 'q' || key == 'Q') {
+            return 0;
+        }
+    }
+    return 0;
+}

From fee8a9cb490efa39e58137cf4ebee59e3ef2ca66 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Mihael=20Ver=C4=8Dek?= <mihael@vercek.si>
Date: Sun, 7 Jul 2024 16:58:54 +0200
Subject: [PATCH 2/4] Emotion detection pipeline set, but no emotions found

---
 examples/cpp/RVC2/Emotions/emotions.cpp | 107 ++++++++++++++++++++----
 1 file changed, 93 insertions(+), 14 deletions(-)

diff --git a/examples/cpp/RVC2/Emotions/emotions.cpp b/examples/cpp/RVC2/Emotions/emotions.cpp
index e283bcda4..4e38ca9f1 100644
--- a/examples/cpp/RVC2/Emotions/emotions.cpp
+++ b/examples/cpp/RVC2/Emotions/emotions.cpp
@@ -1,17 +1,54 @@
 // Includes common necessary includes for development using depthai library
 #include "depthai/depthai.hpp"
 
-void displayFrame(cv::Mat& frame, std::vector<dai::ImgDetection>& detections)
+class Face2ImageManipConfig : public dai::NodeCRTP<dai::node::ThreadedHostNode, Face2ImageManipConfig> {
+public:
+    Input inputDetections = dai::Node::Input{*this, {}};
+    Output outputManipulators = dai::Node::Output{*this, {}};
+    
+    void run() override {
+        while(isRunning()) {
+            std::shared_ptr<dai::ImgDetections> inDet;
+            inDet = inputDetections.get<dai::ImgDetections>();
+
+            if(!inDet)
+            {
+                continue;
+            }
+
+            for(auto& detection : inDet->detections) {
+              std::shared_ptr<dai::ImageManipConfig> manipulator = std::make_shared<dai::ImageManipConfig>();
+                manipulator->setCropRect(detection.xmin,
+                                        detection.ymin,
+                                        detection.xmax,
+                                        detection.ymax);
+                manipulator->setResize(64,64);
+                outputManipulators.send(manipulator);
+            }
+ 
+        }
+    }
+};
+
+void displayFrame(cv::Mat& frame, std::vector<dai::ImgDetection>& detections, std::vector<dai::ImgDetection>& detectionsEmo)
 {
     auto color = cv::Scalar(255, 0, 0);
-    for(auto& detection : detections) {
+    for(int i = 0; i < detections.size(); i++) {
+        auto& detection = detections[i];
         int x1 = detection.xmin * frame.cols;
         int y1 = detection.ymin * frame.rows;
         int x2 = detection.xmax * frame.cols;
         int y2 = detection.ymax * frame.rows;
  
         std::stringstream confStr;
-        confStr << "label: " << detection.label << " " <<std::fixed << std::setprecision(2) << detection.confidence * 100;
+        if(i < detectionsEmo.size()) {
+          auto& detEmo = detectionsEmo[i];
+          confStr << "label: " << detEmo.label << " " <<std::fixed << std::setprecision(2) << detEmo.confidence * 100;
+        }
+        else
+        {
+          confStr << "no emotion";
+        }
         cv::putText(frame, confStr.str(), cv::Point(x1 + 10, y1 + 40), cv::FONT_HERSHEY_TRIPLEX, 0.5, color);
         cv::rectangle(frame, cv::Rect(cv::Point(x1, y1), cv::Point(x2, y2)), color, cv::FONT_HERSHEY_SIMPLEX);
     }
@@ -20,10 +57,12 @@ void displayFrame(cv::Mat& frame, std::vector<dai::ImgDetection>& detections)
 
 int main(int argc, char** argv) {
     std::string nnPath;
-    if(argc > 1) {
+    std::string nnEmoPath;
+    if(argc > 2) {
         nnPath = std::string(argv[1]);
+        nnEmoPath = std::string(argv[2]);
     } else {
-        std::cout << "Provide path to face detection blob as first arg!" << std::endl;
+        std::cout << "call with arguments: {detection blob} {emotion blob}" << std::endl;
         return 1;
     }
 
@@ -34,39 +73,74 @@ int main(int argc, char** argv) {
     // Define source and output
     auto camRgb = pipeline.create<dai::node::ColorCamera>();
     auto nn = pipeline.create<dai::node::NeuralNetwork>();
+    auto nnEmo = pipeline.create<dai::node::NeuralNetwork>();
     auto det = pipeline.create<dai::node::DetectionParser>();
+    auto detEmo = pipeline.create<dai::node::DetectionParser>();
+    auto manipConf = pipeline.create<Face2ImageManipConfig>();
+    auto manip = pipeline.create<dai::node::ImageManip>();
     
-    // Properties
+    // Camera props
     camRgb->setBoardSocket(dai::CameraBoardSocket::CAM_A);
     camRgb->setResolution(dai::ColorCameraProperties::SensorResolution::THE_1080_P);
     camRgb->setPreviewSize(300, 300);
     camRgb->setInterleaved(false);
 
+    // Face detection NN props
     nn->setNumInferenceThreads(2);
     nn->input.setBlocking(false);
-    dai::OpenVINO::Blob blob(nnPath);
-    nn->setBlob(blob);
-    
-    det->setBlob(blob);
+    dai::OpenVINO::Blob blob1(nnPath);
+    nn->setBlob(blob1);
+
+    // Face detection NN parser props
+    det->setBlob(blob1);
     det->setNNFamily(DetectionNetworkType::MOBILENET);
     det->setConfidenceThreshold(0.5);
 
+    // Emotion detection NN props
+    nnEmo->setNumInferenceThreads(2);
+    nnEmo->input.setBlocking(false);
+    dai::OpenVINO::Blob blob2(nnEmoPath);
+    nnEmo->setBlob(blob2);
+    
+    // Emotion detection NN parser props
+    detEmo->setBlob(blob2);
+    detEmo->setNNFamily(DetectionNetworkType::MOBILENET);
+    detEmo->setConfidenceThreshold(0.5);
+    detEmo->setNumClasses(5);
+    
+    // ImageManip props
+    manip->initialConfig.setResize(64,64);
+
+    // Linking
+    /*
+      rgb -> nn -> det -> manipConf -> manip -> nnEmo -> detEmo
+          --------------------------->
+    */
     camRgb->preview.link(nn->input);
     nn->out.link(det->input);
-
+    det->out.link(manipConf->inputDetections);
+    manipConf->outputManipulators.link(manip->inputConfig);
+    camRgb->preview.link(manip->inputImage);
+    manip->out.link(nnEmo->input);
+    nnEmo->out.link(detEmo->input);
+    
     auto outPassthrough = nn->passthrough.createOutputQueue();
     auto outDet = det->out.createOutputQueue();
+    auto outDetEmo = detEmo->out.createOutputQueue();
     
     pipeline.start();
     while(pipeline.isRunning()) {
         std::shared_ptr<dai::ImgFrame> inRgb;
         std::shared_ptr<dai::ImgDetections> inDet;
-      
+        std::shared_ptr<dai::ImgDetections> inDetEmo;
+        
         inRgb = outPassthrough->get<dai::ImgFrame>();
         inDet = outDet->get<dai::ImgDetections>();
-
+        inDetEmo = outDetEmo->get<dai::ImgDetections>();
+        
         cv::Mat frame;
         std::vector<dai::ImgDetection> detections;
+        std::vector<dai::ImgDetection> detectionsEmo;
 
         if(inRgb) {
             frame = inRgb->getCvFrame();
@@ -76,8 +150,13 @@ int main(int argc, char** argv) {
             detections = inDet->detections;
         }
 
+        if(inDetEmo)
+        {
+            detectionsEmo = inDetEmo->detections;
+        }
+
         if(!frame.empty()) {
-            displayFrame(frame, detections);
+            displayFrame(frame, detections, detectionsEmo);
         }
 
         int key = cv::waitKey(1);

From ceedf1b26e3badc465905dfd827958697d26548b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Mihael=20Ver=C4=8Dek?= <mihael@vercek.si>
Date: Tue, 16 Jul 2024 20:55:22 +0200
Subject: [PATCH 3/4] Use last emotions detection last NN layer for
 determination

---
 examples/cpp/RVC2/Emotions/emotions.cpp | 48 +++++++++++--------------
 1 file changed, 21 insertions(+), 27 deletions(-)

diff --git a/examples/cpp/RVC2/Emotions/emotions.cpp b/examples/cpp/RVC2/Emotions/emotions.cpp
index 4e38ca9f1..1c0816108 100644
--- a/examples/cpp/RVC2/Emotions/emotions.cpp
+++ b/examples/cpp/RVC2/Emotions/emotions.cpp
@@ -1,5 +1,6 @@
 // Includes common necessary includes for development using depthai library
 #include "depthai/depthai.hpp"
+#include "xtensor/xsort.hpp"
 
 class Face2ImageManipConfig : public dai::NodeCRTP<dai::node::ThreadedHostNode, Face2ImageManipConfig> {
 public:
@@ -30,7 +31,9 @@ class Face2ImageManipConfig : public dai::NodeCRTP<dai::node::ThreadedHostNode,
     }
 };
 
-void displayFrame(cv::Mat& frame, std::vector<dai::ImgDetection>& detections, std::vector<dai::ImgDetection>& detectionsEmo)
+std::array<const char* const,5> emotions = {"neutral", "happy", "sad", "surprise", "anger"};
+
+void displayFrame(cv::Mat& frame, std::vector<dai::ImgDetection>& detections, xt::xarray<float>& resultLayer)
 {
     auto color = cv::Scalar(255, 0, 0);
     for(int i = 0; i < detections.size(); i++) {
@@ -41,14 +44,14 @@ void displayFrame(cv::Mat& frame, std::vector<dai::ImgDetection>& detections, st
         int y2 = detection.ymax * frame.rows;
  
         std::stringstream confStr;
-        if(i < detectionsEmo.size()) {
-          auto& detEmo = detectionsEmo[i];
-          confStr << "label: " << detEmo.label << " " <<std::fixed << std::setprecision(2) << detEmo.confidence * 100;
-        }
-        else
-        {
-          confStr << "no emotion";
+        auto emotionIndex = xt::argmax(resultLayer)(0);
+        if(emotionIndex < emotions.size()) {
+            confStr << emotions[emotionIndex];
+        } else {
+           confStr << "Err index: " << emotionIndex;
         }
+        
+        
         cv::putText(frame, confStr.str(), cv::Point(x1 + 10, y1 + 40), cv::FONT_HERSHEY_TRIPLEX, 0.5, color);
         cv::rectangle(frame, cv::Rect(cv::Point(x1, y1), cv::Point(x2, y2)), color, cv::FONT_HERSHEY_SIMPLEX);
     }
@@ -75,7 +78,6 @@ int main(int argc, char** argv) {
     auto nn = pipeline.create<dai::node::NeuralNetwork>();
     auto nnEmo = pipeline.create<dai::node::NeuralNetwork>();
     auto det = pipeline.create<dai::node::DetectionParser>();
-    auto detEmo = pipeline.create<dai::node::DetectionParser>();
     auto manipConf = pipeline.create<Face2ImageManipConfig>();
     auto manip = pipeline.create<dai::node::ImageManip>();
     
@@ -102,18 +104,12 @@ int main(int argc, char** argv) {
     dai::OpenVINO::Blob blob2(nnEmoPath);
     nnEmo->setBlob(blob2);
     
-    // Emotion detection NN parser props
-    detEmo->setBlob(blob2);
-    detEmo->setNNFamily(DetectionNetworkType::MOBILENET);
-    detEmo->setConfidenceThreshold(0.5);
-    detEmo->setNumClasses(5);
-    
     // ImageManip props
     manip->initialConfig.setResize(64,64);
 
     // Linking
     /*
-      rgb -> nn -> det -> manipConf -> manip -> nnEmo -> detEmo
+      rgb -> nn -> det -> manipConf -> manip -> nnEmo
           --------------------------->
     */
     camRgb->preview.link(nn->input);
@@ -122,26 +118,25 @@ int main(int argc, char** argv) {
     manipConf->outputManipulators.link(manip->inputConfig);
     camRgb->preview.link(manip->inputImage);
     manip->out.link(nnEmo->input);
-    nnEmo->out.link(detEmo->input);
     
     auto outPassthrough = nn->passthrough.createOutputQueue();
     auto outDet = det->out.createOutputQueue();
-    auto outDetEmo = detEmo->out.createOutputQueue();
+    auto outNNEmo = nnEmo->out.createOutputQueue();
     
     pipeline.start();
     while(pipeline.isRunning()) {
         std::shared_ptr<dai::ImgFrame> inRgb;
         std::shared_ptr<dai::ImgDetections> inDet;
-        std::shared_ptr<dai::ImgDetections> inDetEmo;
+        std::shared_ptr<dai::NNData> inNNEmo;
         
         inRgb = outPassthrough->get<dai::ImgFrame>();
         inDet = outDet->get<dai::ImgDetections>();
-        inDetEmo = outDetEmo->get<dai::ImgDetections>();
+        inNNEmo = outNNEmo->get<dai::NNData>();
         
         cv::Mat frame;
         std::vector<dai::ImgDetection> detections;
-        std::vector<dai::ImgDetection> detectionsEmo;
-
+        xt::xarray<float> resultLayer;
+        
         if(inRgb) {
             frame = inRgb->getCvFrame();
         }
@@ -150,13 +145,12 @@ int main(int argc, char** argv) {
             detections = inDet->detections;
         }
 
-        if(inDetEmo)
-        {
-            detectionsEmo = inDetEmo->detections;
+        if(inNNEmo && !inNNEmo->tensors.empty()) {
+            resultLayer = inNNEmo->getTensor<float>(inNNEmo->tensors.back().name, false);
         }
-
+        
         if(!frame.empty()) {
-            displayFrame(frame, detections, detectionsEmo);
+            displayFrame(frame, detections, resultLayer);
         }
 
         int key = cv::waitKey(1);

From 085109ed304991347d42876fdf0b086d9e682af6 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Mihael=20Ver=C4=8Dek?= <mihael@vercek.si>
Date: Tue, 16 Jul 2024 21:17:27 +0200
Subject: [PATCH 4/4] Use all available detections

---
 examples/cpp/RVC2/Emotions/emotions.cpp | 33 ++++++++++++++++---------
 1 file changed, 21 insertions(+), 12 deletions(-)

diff --git a/examples/cpp/RVC2/Emotions/emotions.cpp b/examples/cpp/RVC2/Emotions/emotions.cpp
index 1c0816108..e66dd29cd 100644
--- a/examples/cpp/RVC2/Emotions/emotions.cpp
+++ b/examples/cpp/RVC2/Emotions/emotions.cpp
@@ -33,7 +33,7 @@ class Face2ImageManipConfig : public dai::NodeCRTP<dai::node::ThreadedHostNode,
 
 std::array<const char* const,5> emotions = {"neutral", "happy", "sad", "surprise", "anger"};
 
-void displayFrame(cv::Mat& frame, std::vector<dai::ImgDetection>& detections, xt::xarray<float>& resultLayer)
+void displayFrame(cv::Mat& frame, std::vector<dai::ImgDetection>& detections, std::vector<xt::xarray<float>>& resultLayers)
 {
     auto color = cv::Scalar(255, 0, 0);
     for(int i = 0; i < detections.size(); i++) {
@@ -44,14 +44,17 @@ void displayFrame(cv::Mat& frame, std::vector<dai::ImgDetection>& detections, xt
         int y2 = detection.ymax * frame.rows;
  
         std::stringstream confStr;
-        auto emotionIndex = xt::argmax(resultLayer)(0);
-        if(emotionIndex < emotions.size()) {
-            confStr << emotions[emotionIndex];
+        if(i < resultLayers.size()) {
+            auto emotionIndex = xt::argmax(resultLayers[i])(0);
+            if(emotionIndex < emotions.size()) {
+                confStr << emotions[emotionIndex];
+            } else {
+               confStr << "Err index: " << emotionIndex;
+            }
         } else {
-           confStr << "Err index: " << emotionIndex;
+            confStr << "NA";
         }
         
-        
         cv::putText(frame, confStr.str(), cv::Point(x1 + 10, y1 + 40), cv::FONT_HERSHEY_TRIPLEX, 0.5, color);
         cv::rectangle(frame, cv::Rect(cv::Point(x1, y1), cv::Point(x2, y2)), color, cv::FONT_HERSHEY_SIMPLEX);
     }
@@ -127,15 +130,15 @@ int main(int argc, char** argv) {
     while(pipeline.isRunning()) {
         std::shared_ptr<dai::ImgFrame> inRgb;
         std::shared_ptr<dai::ImgDetections> inDet;
-        std::shared_ptr<dai::NNData> inNNEmo;
+        std::vector<std::shared_ptr<dai::NNData>> inNNEmos;
         
         inRgb = outPassthrough->get<dai::ImgFrame>();
         inDet = outDet->get<dai::ImgDetections>();
-        inNNEmo = outNNEmo->get<dai::NNData>();
+        inNNEmos = outNNEmo->getAll<dai::NNData>();
         
         cv::Mat frame;
         std::vector<dai::ImgDetection> detections;
-        xt::xarray<float> resultLayer;
+        std::vector<xt::xarray<float>> resultLayers;
         
         if(inRgb) {
             frame = inRgb->getCvFrame();
@@ -145,12 +148,18 @@ int main(int argc, char** argv) {
             detections = inDet->detections;
         }
 
-        if(inNNEmo && !inNNEmo->tensors.empty()) {
-            resultLayer = inNNEmo->getTensor<float>(inNNEmo->tensors.back().name, false);
+        for(auto& inNN : inNNEmos) {
+            if(inNN && !inNN->tensors.empty()) {
+                resultLayers.push_back(inNN->getTensor<float>(inNN->tensors.back().name, false));
+            } else {
+                resultLayers.push_back({});
+            }
         }
         
+        
+        
         if(!frame.empty()) {
-            displayFrame(frame, detections, resultLayer);
+            displayFrame(frame, detections, resultLayers);
         }
 
         int key = cv::waitKey(1);