Improve YOLOv8 ONNX Runtime c++ example for all OS with CmakeList.txt support (#4274)

Signed-off-by: Onuralp SEZER <thunderbirdtr@fedoraproject.org> Signed-off-by: Onuralp SEZER <thunderbirdtr@gmail.com> Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Glenn Jocher <glenn.jocher@ultralytics.com>
2023-08-10 18:51:19 +03:00 · 2023-08-10 18:51:19 +03:00 · 22474e9ad5
commit 22474e9ad5
parent c9be1f3cce
5 changed files with 179 additions and 53 deletions
--- a/examples/YOLOv8-ONNXRuntime-CPP/inference.cpp
+++ b/examples/YOLOv8-ONNXRuntime-CPP/inference.cpp
@ -2,7 +2,6 @@
 #include <regex>

 #define benchmark
-#define ELOG

 DCSP_CORE::DCSP_CORE()
 {
@ -29,7 +28,7 @@ char* BlobFromImage(cv::Mat& iImg, T& iBlob)
 		{
 			for (int w = 0; w < imgWidth; w++)
 			{
-				iBlob[c * imgWidth * imgHeight + h * imgWidth + w] = (std::remove_pointer<T>::type)((iImg.at<cv::Vec3b>(h, w)[c]) / 255.0f);
+				iBlob[c * imgWidth * imgHeight + h * imgWidth + w] = typename std::remove_pointer<T>::type((iImg.at<cv::Vec3b>(h, w)[c]) / 255.0f);
 			}
 		}
 	}
@ -40,8 +39,8 @@ char* BlobFromImage(cv::Mat& iImg, T& iBlob)
 char* PostProcess(cv::Mat& iImg, std::vector<int> iImgSize, cv::Mat& oImg)
 {
 	cv::Mat img = iImg.clone();
-	cv::resize(iImg, oImg, cv::Size(iImgSize.at(0), iImgSize.at(1)));
-	if (img.channels() == 1)
+    cv::resize(iImg, oImg, cv::Size(iImgSize.at(0), iImgSize.at(1)));
+    if (img.channels() == 1)
 	{
 		cv::cvtColor(oImg, oImg, cv::COLOR_GRAY2BGR);
 	}
@ -75,17 +74,21 @@ char* DCSP_CORE::CreateSession(DCSP_INIT_PARAM &iParams)
 			OrtCUDAProviderOptions cudaOption;
 			cudaOption.device_id = 0;
 			sessionOption.AppendExecutionProvider_CUDA(cudaOption);
-			//OrtOpenVINOProviderOptions ovOption;
-			//sessionOption.AppendExecutionProvider_OpenVINO(ovOption);
 		}
 		sessionOption.SetGraphOptimizationLevel(GraphOptimizationLevel::ORT_ENABLE_ALL);
 		sessionOption.SetIntraOpNumThreads(iParams.IntraOpNumThreads);
 		sessionOption.SetLogSeverityLevel(iParams.LogSeverityLevel);
+
+#ifdef _WIN32
 		int ModelPathSize = MultiByteToWideChar(CP_UTF8, 0, iParams.ModelPath.c_str(), static_cast<int>(iParams.ModelPath.length()), nullptr, 0);
 		wchar_t* wide_cstr = new wchar_t[ModelPathSize + 1];
 		MultiByteToWideChar(CP_UTF8, 0, iParams.ModelPath.c_str(), static_cast<int>(iParams.ModelPath.length()), wide_cstr, ModelPathSize);
 		wide_cstr[ModelPathSize] = L'\0';
 		const wchar_t* modelPath = wide_cstr;
+#else
+		const char* modelPath = iParams.ModelPath.c_str();
+#endif // _WIN32
+
 		session = new Ort::Session(env, modelPath, sessionOption);
 		Ort::AllocatorWithDefaultOptions allocator;
 		size_t inputNodesNum = session->GetInputCount();
@ -96,7 +99,6 @@ char* DCSP_CORE::CreateSession(DCSP_INIT_PARAM &iParams)
 			strcpy(temp_buf, input_node_name.get());
 			inputNodeNames.push_back(temp_buf);
 		}
-
 		size_t OutputNodesNum = session->GetOutputCount();
 		for (size_t i = 0; i < OutputNodesNum; i++)
 		{
@ -151,7 +153,7 @@ char* DCSP_CORE::RunSession(cv::Mat &iImg, std::vector<DCSP_RESULT>& oResult)
 template<typename N>
 char* DCSP_CORE::TensorProcess(clock_t& starttime_1, cv::Mat& iImg, N& blob, std::vector<int64_t>& inputNodeDims,  std::vector<DCSP_RESULT>& oResult)
 {
-	Ort::Value inputTensor = Ort::Value::CreateTensor<std::remove_pointer<N>::type>(Ort::MemoryInfo::CreateCpu(OrtDeviceAllocator, OrtMemTypeCPU), blob, 3 * imgSize.at(0) * imgSize.at(1), inputNodeDims.data(), inputNodeDims.size());
+    Ort::Value inputTensor = Ort::Value::CreateTensor<typename std::remove_pointer<N>::type>(Ort::MemoryInfo::CreateCpu(OrtDeviceAllocator, OrtMemTypeCPU), blob, 3 * imgSize.at(0) * imgSize.at(1), inputNodeDims.data(), inputNodeDims.size());
 #ifdef benchmark
 	clock_t starttime_2 = clock();
 #endif // benchmark
@ -159,10 +161,11 @@ char* DCSP_CORE::TensorProcess(clock_t& starttime_1, cv::Mat& iImg, N& blob, std
 #ifdef benchmark
 	clock_t starttime_3 = clock();
 #endif // benchmark
+
 	Ort::TypeInfo typeInfo = outputTensor.front().GetTypeInfo();
 	auto tensor_info = typeInfo.GetTensorTypeAndShapeInfo();
 	std::vector<int64_t>outputNodeDims = tensor_info.GetShape();
-	std::remove_pointer<N>::type* output = outputTensor.front().GetTensorMutableData<std::remove_pointer<N>::type>();
+    auto output = outputTensor.front().GetTensorMutableData<typename std::remove_pointer<N>::type>();
 	delete blob;
 	switch (modelType)
 	{
@ -183,7 +186,7 @@ char* DCSP_CORE::TensorProcess(clock_t& starttime_1, cv::Mat& iImg, N& blob, std
 		for (int i = 0; i < strideNum; ++i)
 		{
 			float* classesScores = data + 4;
-			cv::Mat scores(1, classesNum, CV_32FC1, classesScores);
+			cv::Mat scores(1, this->classes.size(), CV_32FC1, classesScores);
 			cv::Point class_id;
 			double maxClassScore;
 			cv::minMaxLoc(scores, 0, &maxClassScore, 0, &class_id);
@ -203,13 +206,14 @@ char* DCSP_CORE::TensorProcess(clock_t& starttime_1, cv::Mat& iImg, N& blob, std
 				int width = int(w * x_factor);
 				int height = int(h * y_factor);

-				boxes.push_back(cv::Rect(left, top, width, height));
+				boxes.emplace_back(left, top, width, height);
 			}
 			data += signalResultNum;
 		}

 		std::vector<int> nmsResult;
 		cv::dnn::NMSBoxes(boxes, confidences, rectConfidenceThreshold, iouThreshold, nmsResult);
+
 		for (int i = 0; i < nmsResult.size(); ++i)
 		{
 			int idx = nmsResult[i];
@ -266,6 +270,5 @@ char* DCSP_CORE::WarmUpSession()
 			std::cout << "[DCSP_ONNX(CUDA)]: " << "Cuda warm-up cost " << post_process_time << " ms. " << std::endl;
 		}
 	}
-
 	return Ret;
 }