diff --git a/examples/YOLOv8-OpenVINO-CPP-Inference/CMakeLists.txt b/examples/YOLOv8-OpenVINO-CPP-Inference/CMakeLists.txt new file mode 100644 index 00000000..d34ea96f --- /dev/null +++ b/examples/YOLOv8-OpenVINO-CPP-Inference/CMakeLists.txt @@ -0,0 +1,21 @@ +cmake_minimum_required(VERSION 3.12) +project(yolov8_openvino_example) + +set(CMAKE_CXX_STANDARD 14) + +find_package(OpenCV REQUIRED) + +include_directories( + ${OpenCV_INCLUDE_DIRS} + /path/to/intel/openvino/runtime/include +) + +add_executable(detect + main.cc + inference.cc +) + +target_link_libraries(detect + ${OpenCV_LIBS} + /path/to/intel/openvino/runtime/lib/intel64/libopenvino.so +) diff --git a/examples/YOLOv8-OpenVINO-CPP-Inference/README.md b/examples/YOLOv8-OpenVINO-CPP-Inference/README.md new file mode 100644 index 00000000..e668a0e7 --- /dev/null +++ b/examples/YOLOv8-OpenVINO-CPP-Inference/README.md @@ -0,0 +1,69 @@ +# YOLOv8 OpenVINO Inference in C++ 🦾 + +Welcome to the YOLOv8 OpenVINO Inference example in C++! This guide will help you get started with leveraging the powerful YOLOv8 models using OpenVINO and OpenCV API in your C++ projects. Whether you're looking to enhance performance or add flexibility to your applications, this example has got you covered. + +## 🌟 Features + +- 🚀 **Model Format Support**: Compatible with `ONNX` and `OpenVINO IR` formats. +- ⚡ **Precision Options**: Run models in `FP32`, `FP16`, and `INT8` precisions. +- 🔄 **Dynamic Shape Loading**: Easily handle models with dynamic input shapes. + +## 📋 Dependencies + +To ensure smooth execution, please make sure you have the following dependencies installed: + +| Dependency | Version | +| ---------- | -------- | +| OpenVINO | >=2023.3 | +| OpenCV | >=4.5.0 | +| C++ | >=14 | +| CMake | >=3.12.0 | + +## ⚙️ Build Instructions + +Follow these steps to build the project: + +1. Clone the repository: + + ```bash + git clone https://github.com/ultralytics/ultralytics.git + cd ultralytics/YOLOv8-OpenVINO-CPP-Inference + ``` + +2. Create a build directory and compile the project: + ```bash + mkdir build + cd build + cmake .. + make + ``` + +## 🛠️ Usage + +Once built, you can run inference on an image using the following command: + +```bash +./detect +``` + +## 🔄 Exporting YOLOv8 Models + +To use your YOLOv8 model with OpenVINO, you need to export it first. Use the command below to export the model: + +```commandline +yolo export model=yolov8s.pt imgsz=640 format=openvino +``` + +## 📸 Screenshots + +### Running Using OpenVINO Model + +![Running OpenVINO Model](https://github.com/ultralytics/ultralytics/assets/76827698/2d7cf201-3def-4357-824c-12446ccf85a9) + +### Running Using ONNX Model + +![Running ONNX Model](https://github.com/ultralytics/ultralytics/assets/76827698/9b90031c-cc81-4cfb-8b34-c619e09035a7) + +## ❤️ Contributions + +We hope this example helps you integrate YOLOv8 with OpenVINO and OpenCV into your C++ projects effortlessly. Happy coding! 🚀 diff --git a/examples/YOLOv8-OpenVINO-CPP-Inference/inference.cc b/examples/YOLOv8-OpenVINO-CPP-Inference/inference.cc new file mode 100644 index 00000000..dbabd2a9 --- /dev/null +++ b/examples/YOLOv8-OpenVINO-CPP-Inference/inference.cc @@ -0,0 +1,175 @@ +#include "inference.h" + +#include +#include +#include + +namespace yolo { + +// Constructor to initialize the model with default input shape +Inference::Inference(const std::string &model_path, const float &model_confidence_threshold, const float &model_NMS_threshold) { + model_input_shape_ = cv::Size(640, 640); // Set the default size for models with dynamic shapes to prevent errors. + model_confidence_threshold_ = model_confidence_threshold; + model_NMS_threshold_ = model_NMS_threshold; + InitializeModel(model_path); +} + +// Constructor to initialize the model with specified input shape +Inference::Inference(const std::string &model_path, const cv::Size model_input_shape, const float &model_confidence_threshold, const float &model_NMS_threshold) { + model_input_shape_ = model_input_shape; + model_confidence_threshold_ = model_confidence_threshold; + model_NMS_threshold_ = model_NMS_threshold; + InitializeModel(model_path); +} + +void Inference::InitializeModel(const std::string &model_path) { + ov::Core core; // OpenVINO core object + std::shared_ptr model = core.read_model(model_path); // Read the model from file + + // If the model has dynamic shapes, reshape it to the specified input shape + if (model->is_dynamic()) { + model->reshape({1, 3, static_cast(model_input_shape_.height), static_cast(model_input_shape_.width)}); + } + + // Preprocessing setup for the model + ov::preprocess::PrePostProcessor ppp = ov::preprocess::PrePostProcessor(model); + ppp.input().tensor().set_element_type(ov::element::u8).set_layout("NHWC").set_color_format(ov::preprocess::ColorFormat::BGR); + ppp.input().preprocess().convert_element_type(ov::element::f32).convert_color(ov::preprocess::ColorFormat::RGB).scale({255, 255, 255}); + ppp.input().model().set_layout("NCHW"); + ppp.output().tensor().set_element_type(ov::element::f32); + model = ppp.build(); // Build the preprocessed model + + // Compile the model for inference + compiled_model_ = core.compile_model(model, "AUTO"); + inference_request_ = compiled_model_.create_infer_request(); // Create inference request + + short width, height; + + // Get input shape from the model + const std::vector> inputs = model->inputs(); + const ov::Shape input_shape = inputs[0].get_shape(); + height = input_shape[1]; + width = input_shape[2]; + model_input_shape_ = cv::Size2f(width, height); + + // Get output shape from the model + const std::vector> outputs = model->outputs(); + const ov::Shape output_shape = outputs[0].get_shape(); + height = output_shape[1]; + width = output_shape[2]; + model_output_shape_ = cv::Size(width, height); +} + +// Method to run inference on an input frame +void Inference::RunInference(cv::Mat &frame) { + Preprocessing(frame); // Preprocess the input frame + inference_request_.infer(); // Run inference + PostProcessing(frame); // Postprocess the inference results +} + +// Method to preprocess the input frame +void Inference::Preprocessing(const cv::Mat &frame) { + cv::Mat resized_frame; + cv::resize(frame, resized_frame, model_input_shape_, 0, 0, cv::INTER_AREA); // Resize the frame to match the model input shape + + // Calculate scaling factor + scale_factor_.x = static_cast(frame.cols / model_input_shape_.width); + scale_factor_.y = static_cast(frame.rows / model_input_shape_.height); + + float *input_data = (float *)resized_frame.data; // Get pointer to resized frame data + const ov::Tensor input_tensor = ov::Tensor(compiled_model_.input().get_element_type(), compiled_model_.input().get_shape(), input_data); // Create input tensor + inference_request_.set_input_tensor(input_tensor); // Set input tensor for inference +} + +// Method to postprocess the inference results +void Inference::PostProcessing(cv::Mat &frame) { + std::vector class_list; + std::vector confidence_list; + std::vector box_list; + + // Get the output tensor from the inference request + const float *detections = inference_request_.get_output_tensor().data(); + const cv::Mat detection_outputs(model_output_shape_, CV_32F, (float *)detections); // Create OpenCV matrix from output tensor + + // Iterate over detections and collect class IDs, confidence scores, and bounding boxes + for (int i = 0; i < detection_outputs.cols; ++i) { + const cv::Mat classes_scores = detection_outputs.col(i).rowRange(4, detection_outputs.rows); + + cv::Point class_id; + double score; + cv::minMaxLoc(classes_scores, nullptr, &score, nullptr, &class_id); // Find the class with the highest score + + // Check if the detection meets the confidence threshold + if (score > model_confidence_threshold_) { + class_list.push_back(class_id.y); + confidence_list.push_back(score); + + const float x = detection_outputs.at(0, i); + const float y = detection_outputs.at(1, i); + const float w = detection_outputs.at(2, i); + const float h = detection_outputs.at(3, i); + + cv::Rect box; + box.x = static_cast(x); + box.y = static_cast(y); + box.width = static_cast(w); + box.height = static_cast(h); + box_list.push_back(box); + } + } + + // Apply Non-Maximum Suppression (NMS) to filter overlapping bounding boxes + std::vector NMS_result; + cv::dnn::NMSBoxes(box_list, confidence_list, model_confidence_threshold_, model_NMS_threshold_, NMS_result); + + // Collect final detections after NMS + for (int i = 0; i < NMS_result.size(); ++i) { + Detection result; + const unsigned short id = NMS_result[i]; + + result.class_id = class_list[id]; + result.confidence = confidence_list[id]; + result.box = GetBoundingBox(box_list[id]); + + DrawDetectedObject(frame, result); + } +} + +// Method to get the bounding box in the correct scale +cv::Rect Inference::GetBoundingBox(const cv::Rect &src) const { + cv::Rect box = src; + box.x = (box.x - box.width / 2) * scale_factor_.x; + box.y = (box.y - box.height / 2) * scale_factor_.y; + box.width *= scale_factor_.x; + box.height *= scale_factor_.y; + return box; +} + +void Inference::DrawDetectedObject(cv::Mat &frame, const Detection &detection) const { + const cv::Rect &box = detection.box; + const float &confidence = detection.confidence; + const int &class_id = detection.class_id; + + // Generate a random color for the bounding box + std::random_device rd; + std::mt19937 gen(rd()); + std::uniform_int_distribution dis(120, 255); + const cv::Scalar &color = cv::Scalar(dis(gen), dis(gen), dis(gen)); + + // Draw the bounding box around the detected object + cv::rectangle(frame, cv::Point(box.x, box.y), cv::Point(box.x + box.width, box.y + box.height), color, 3); + + // Prepare the class label and confidence text + std::string classString = classes_[class_id] + std::to_string(confidence).substr(0, 4); + + // Get the size of the text box + cv::Size textSize = cv::getTextSize(classString, cv::FONT_HERSHEY_DUPLEX, 0.75, 2, 0); + cv::Rect textBox(box.x, box.y - 40, textSize.width + 10, textSize.height + 20); + + // Draw the text box + cv::rectangle(frame, textBox, color, cv::FILLED); + + // Put the class label and confidence text above the bounding box + cv::putText(frame, classString, cv::Point(box.x + 5, box.y - 10), cv::FONT_HERSHEY_DUPLEX, 0.75, cv::Scalar(0, 0, 0), 2, 0); +} +} // namespace yolo diff --git a/examples/YOLOv8-OpenVINO-CPP-Inference/inference.h b/examples/YOLOv8-OpenVINO-CPP-Inference/inference.h new file mode 100644 index 00000000..7bcb20df --- /dev/null +++ b/examples/YOLOv8-OpenVINO-CPP-Inference/inference.h @@ -0,0 +1,59 @@ +#ifndef YOLO_INFERENCE_H_ +#define YOLO_INFERENCE_H_ + +#include +#include +#include +#include + +namespace yolo { + +struct Detection { + short class_id; + float confidence; + cv::Rect box; +}; + +class Inference { + public: + Inference() {} + // Constructor to initialize the model with default input shape + Inference(const std::string &model_path, const float &model_confidence_threshold, const float &model_NMS_threshold); + // Constructor to initialize the model with specified input shape + Inference(const std::string &model_path, const cv::Size model_input_shape, const float &model_confidence_threshold, const float &model_NMS_threshold); + + void RunInference(cv::Mat &frame); + + private: + void InitializeModel(const std::string &model_path); + void Preprocessing(const cv::Mat &frame); + void PostProcessing(cv::Mat &frame); + cv::Rect GetBoundingBox(const cv::Rect &src) const; + void DrawDetectedObject(cv::Mat &frame, const Detection &detections) const; + + cv::Point2f scale_factor_; // Scaling factor for the input frame + cv::Size2f model_input_shape_; // Input shape of the model + cv::Size model_output_shape_; // Output shape of the model + + ov::InferRequest inference_request_; // OpenVINO inference request + ov::CompiledModel compiled_model_; // OpenVINO compiled model + + float model_confidence_threshold_; // Confidence threshold for detections + float model_NMS_threshold_; // Non-Maximum Suppression threshold + + std::vector classes_ { + "person", "bicycle", "car", "motorcycle", "airplane", "bus", "train", "truck", "boat", "traffic light", + "fire hydrant", "stop sign", "parking meter", "bench", "bird", "cat", "dog", "horse", "sheep", "cow", + "elephant", "bear", "zebra", "giraffe", "backpack", "umbrella", "handbag", "tie", "suitcase", "frisbee", + "skis", "snowboard", "sports ball", "kite", "baseball bat", "baseball glove", "skateboard", "surfboard", + "tennis racket", "bottle", "wine glass", "cup", "fork", "knife", "spoon", "bowl", "banana", "apple", + "sandwich", "orange", "broccoli", "carrot", "hot dog", "pizza", "donut", "cake", "chair", "couch", + "potted plant", "bed", "dining table", "toilet", "tv", "laptop", "mouse", "remote", "keyboard", + "cell phone", "microwave", "oven", "toaster", "sink", "refrigerator", "book", "clock", "vase", + "scissors", "teddy bear", "hair drier", "toothbrush" + }; +}; + +} // namespace yolo + +#endif // YOLO_INFERENCE_H_ diff --git a/examples/YOLOv8-OpenVINO-CPP-Inference/main.cc b/examples/YOLOv8-OpenVINO-CPP-Inference/main.cc new file mode 100644 index 00000000..2031af6d --- /dev/null +++ b/examples/YOLOv8-OpenVINO-CPP-Inference/main.cc @@ -0,0 +1,41 @@ +#include "inference.h" + +#include +#include + +int main(int argc, char **argv) { + // Check if the correct number of arguments is provided + if (argc != 3) { + std::cerr << "usage: " << argv[0] << " " << std::endl; + return 1; + } + + // Get the model and image paths from the command-line arguments + const std::string model_path = argv[1]; + const std::string image_path = argv[2]; + + // Read the input image + cv::Mat image = cv::imread(image_path); + + // Check if the image was successfully loaded + if (image.empty()) { + std::cerr << "ERROR: image is empty" << std::endl; + return 1; + } + + // Define the confidence and NMS thresholds + const float confidence_threshold = 0.5; + const float NMS_threshold = 0.5; + + // Initialize the YOLO inference with the specified model and parameters + yolo::Inference inference(model_path, cv::Size(640, 640), confidence_threshold, NMS_threshold); + + // Run inference on the input image + inference.RunInference(image); + + // Display the image with the detections + cv::imshow("image", image); + cv::waitKey(0); + + return 0; +}