diff --git a/examples/CMakeLists.txt b/examples/CMakeLists.txt index 2746c1f1e..d7fd6d7b3 100644 --- a/examples/CMakeLists.txt +++ b/examples/CMakeLists.txt @@ -134,6 +134,14 @@ hunter_private_data( LOCATION tiny_yolo_v4_blob ) +# YoloV8 resource +hunter_private_data( + URL "https://artifacts.luxonis.com/artifactory/luxonis-depthai-data-local/network/yolov8n_coco_640x352.blob" + SHA1 "6b88a80ffb4267e253a89cb2b3e4e186453431b0" + FILE "yolov8n_coco_640x352.blob" + LOCATION yolov8n_blob +) + # NeuralNetwork node, mobilenet example, 5 shaves hunter_private_data( URL "https://artifacts.luxonis.com/artifactory/luxonis-depthai-data-local/network/mobilenet-ssd_openvino_2021.4_5shave.blob" @@ -220,6 +228,7 @@ hunter_private_data( LOCATION normalization_model ) + ## message(STATUS "Location of test1.data: ${test1_data}") # bootloader @@ -380,8 +389,10 @@ dai_add_example(mjpeg_encoding VideoEncoder/mjpeg_encoding_example.cpp ON OFF) # Yolo dai_add_example(tiny_yolo_v4_device_side_decoding Yolo/tiny_yolo.cpp ON OFF) dai_add_example(tiny_yolo_v3_device_side_decoding Yolo/tiny_yolo.cpp ON OFF) +dai_add_example(yolo_v8_nano_device_side_decoding Yolo/yolov8_nano.cpp ON OFF) target_compile_definitions(tiny_yolo_v4_device_side_decoding PRIVATE BLOB_PATH="${tiny_yolo_v4_blob}") target_compile_definitions(tiny_yolo_v3_device_side_decoding PRIVATE BLOB_PATH="${tiny_yolo_v3_blob}") +target_compile_definitions(yolo_v8_nano_device_side_decoding PRIVATE BLOB_PATH="${yolov8n_blob}") dai_add_example(apriltag AprilTag/apriltag.cpp ON OFF) dai_add_example(apriltag_rgb AprilTag/apriltag_rgb.cpp ON OFF) diff --git a/examples/Yolo/yolov8_nano.cpp b/examples/Yolo/yolov8_nano.cpp new file mode 100644 index 000000000..1268e1a56 --- /dev/null +++ b/examples/Yolo/yolov8_nano.cpp @@ -0,0 +1,157 @@ +#include +#include + +// Includes common necessary includes for development using depthai library +#include "depthai/depthai.hpp" + +/* +The code is the same as for Tiny-yolo-V3 and V4, the only difference is the blob file and anchors that are not needed for V8. +The blob was compiled following this tutorial: https://github.com/TNTWEN/OpenVINO-YOLOV4 +*/ + +static const std::vector labelMap = { + "person", "bicycle", "car", "motorbike", "aeroplane", "bus", "train", "truck", "boat", + "traffic light", "fire hydrant", "stop sign", "parking meter", "bench", "bird", "cat", "dog", "horse", + "sheep", "cow", "elephant", "bear", "zebra", "giraffe", "backpack", "umbrella", "handbag", + "tie", "suitcase", "frisbee", "skis", "snowboard", "sports ball", "kite", "baseball bat", "baseball glove", + "skateboard", "surfboard", "tennis racket", "bottle", "wine glass", "cup", "fork", "knife", "spoon", + "bowl", "banana", "apple", "sandwich", "orange", "broccoli", "carrot", "hot dog", "pizza", + "donut", "cake", "chair", "sofa", "pottedplant", "bed", "diningtable", "toilet", "tvmonitor", + "laptop", "mouse", "remote", "keyboard", "cell phone", "microwave", "oven", "toaster", "sink", + "refrigerator", "book", "clock", "vase", "scissors", "teddy bear", "hair drier", "toothbrush"}; + +static std::atomic syncNN{true}; + +int main(int argc, char** argv) { + using namespace std; + using namespace std::chrono; + std::string nnPath(BLOB_PATH); + + // If path to blob specified, use that + if(argc > 1) { + nnPath = std::string(argv[1]); + } + + // Print which blob we are using + printf("Using blob at path: %s\n", nnPath.c_str()); + + // Create pipeline + dai::Pipeline pipeline; + + // Define sources and outputs + auto camRgb = pipeline.create(); + auto detectionNetwork = pipeline.create(); + auto xoutRgb = pipeline.create(); + auto nnOut = pipeline.create(); + + xoutRgb->setStreamName("rgb"); + nnOut->setStreamName("detections"); + + // Properties + camRgb->setPreviewSize(640, 352); + camRgb->setResolution(dai::ColorCameraProperties::SensorResolution::THE_1080_P); + camRgb->setInterleaved(false); + camRgb->setColorOrder(dai::ColorCameraProperties::ColorOrder::BGR); + camRgb->setFps(40); + + // Network specific settings + detectionNetwork->setConfidenceThreshold(0.5f); + detectionNetwork->setNumClasses(80); + detectionNetwork->setCoordinateSize(4); + detectionNetwork->setIouThreshold(0.5f); + detectionNetwork->setBlobPath(nnPath); + detectionNetwork->setNumInferenceThreads(2); + detectionNetwork->input.setBlocking(false); + + // Linking + camRgb->preview.link(detectionNetwork->input); + if(syncNN) { + detectionNetwork->passthrough.link(xoutRgb->input); + } else { + camRgb->preview.link(xoutRgb->input); + } + + detectionNetwork->out.link(nnOut->input); + + // Connect to device and start pipeline + dai::Device device(pipeline); + + // Output queues will be used to get the rgb frames and nn data from the outputs defined above + auto qRgb = device.getOutputQueue("rgb", 4, false); + auto qDet = device.getOutputQueue("detections", 4, false); + + cv::Mat frame; + std::vector detections; + auto startTime = steady_clock::now(); + int counter = 0; + float fps = 0; + auto color2 = cv::Scalar(255, 255, 255); + + // Add bounding boxes and text to the frame and show it to the user + auto displayFrame = [](std::string name, cv::Mat frame, std::vector& detections) { + auto color = cv::Scalar(255, 0, 0); + // nn data, being the bounding box locations, are in <0..1> range - they need to be normalized with frame width/height + for(auto& detection : detections) { + int x1 = detection.xmin * frame.cols; + int y1 = detection.ymin * frame.rows; + int x2 = detection.xmax * frame.cols; + int y2 = detection.ymax * frame.rows; + + uint32_t labelIndex = detection.label; + std::string labelStr = to_string(labelIndex); + if(labelIndex < labelMap.size()) { + labelStr = labelMap[labelIndex]; + } + cv::putText(frame, labelStr, cv::Point(x1 + 10, y1 + 20), cv::FONT_HERSHEY_TRIPLEX, 0.5, 255); + std::stringstream confStr; + confStr << std::fixed << std::setprecision(2) << detection.confidence * 100; + cv::putText(frame, confStr.str(), cv::Point(x1 + 10, y1 + 40), cv::FONT_HERSHEY_TRIPLEX, 0.5, 255); + cv::rectangle(frame, cv::Rect(cv::Point(x1, y1), cv::Point(x2, y2)), color, cv::FONT_HERSHEY_SIMPLEX); + } + // Show the frame + cv::imshow(name, frame); + }; + + while(true) { + std::shared_ptr inRgb; + std::shared_ptr inDet; + + if(syncNN) { + inRgb = qRgb->get(); + inDet = qDet->get(); + } else { + inRgb = qRgb->tryGet(); + inDet = qDet->tryGet(); + } + + counter++; + auto currentTime = steady_clock::now(); + auto elapsed = duration_cast>(currentTime - startTime); + if(elapsed > seconds(1)) { + fps = counter / elapsed.count(); + counter = 0; + startTime = currentTime; + } + + if(inRgb) { + frame = inRgb->getCvFrame(); + std::stringstream fpsStr; + fpsStr << "NN fps: " << std::fixed << std::setprecision(2) << fps; + cv::putText(frame, fpsStr.str(), cv::Point(2, inRgb->getHeight() - 4), cv::FONT_HERSHEY_TRIPLEX, 0.4, color2); + } + + if(inDet) { + detections = inDet->detections; + } + + if(!frame.empty()) { + displayFrame("rgb", frame, detections); + } + + int key = cv::waitKey(1); + if(key == 'q' || key == 'Q') { + return 0; + } + } + return 0; +}