Improve YOLOv8 ONNX Runtime c++ example for all OS with CmakeList.txt support (#4274)
Signed-off-by: Onuralp SEZER <thunderbirdtr@fedoraproject.org> Signed-off-by: Onuralp SEZER <thunderbirdtr@gmail.com> Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Glenn Jocher <glenn.jocher@ultralytics.com>
This commit is contained in:
parent
c9be1f3cce
commit
22474e9ad5
5 changed files with 179 additions and 53 deletions
|
|
@ -2,7 +2,6 @@
|
|||
#include <regex>
|
||||
|
||||
#define benchmark
|
||||
#define ELOG
|
||||
|
||||
DCSP_CORE::DCSP_CORE()
|
||||
{
|
||||
|
|
@ -29,7 +28,7 @@ char* BlobFromImage(cv::Mat& iImg, T& iBlob)
|
|||
{
|
||||
for (int w = 0; w < imgWidth; w++)
|
||||
{
|
||||
iBlob[c * imgWidth * imgHeight + h * imgWidth + w] = (std::remove_pointer<T>::type)((iImg.at<cv::Vec3b>(h, w)[c]) / 255.0f);
|
||||
iBlob[c * imgWidth * imgHeight + h * imgWidth + w] = typename std::remove_pointer<T>::type((iImg.at<cv::Vec3b>(h, w)[c]) / 255.0f);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -40,8 +39,8 @@ char* BlobFromImage(cv::Mat& iImg, T& iBlob)
|
|||
char* PostProcess(cv::Mat& iImg, std::vector<int> iImgSize, cv::Mat& oImg)
|
||||
{
|
||||
cv::Mat img = iImg.clone();
|
||||
cv::resize(iImg, oImg, cv::Size(iImgSize.at(0), iImgSize.at(1)));
|
||||
if (img.channels() == 1)
|
||||
cv::resize(iImg, oImg, cv::Size(iImgSize.at(0), iImgSize.at(1)));
|
||||
if (img.channels() == 1)
|
||||
{
|
||||
cv::cvtColor(oImg, oImg, cv::COLOR_GRAY2BGR);
|
||||
}
|
||||
|
|
@ -75,17 +74,21 @@ char* DCSP_CORE::CreateSession(DCSP_INIT_PARAM &iParams)
|
|||
OrtCUDAProviderOptions cudaOption;
|
||||
cudaOption.device_id = 0;
|
||||
sessionOption.AppendExecutionProvider_CUDA(cudaOption);
|
||||
//OrtOpenVINOProviderOptions ovOption;
|
||||
//sessionOption.AppendExecutionProvider_OpenVINO(ovOption);
|
||||
}
|
||||
sessionOption.SetGraphOptimizationLevel(GraphOptimizationLevel::ORT_ENABLE_ALL);
|
||||
sessionOption.SetIntraOpNumThreads(iParams.IntraOpNumThreads);
|
||||
sessionOption.SetLogSeverityLevel(iParams.LogSeverityLevel);
|
||||
|
||||
#ifdef _WIN32
|
||||
int ModelPathSize = MultiByteToWideChar(CP_UTF8, 0, iParams.ModelPath.c_str(), static_cast<int>(iParams.ModelPath.length()), nullptr, 0);
|
||||
wchar_t* wide_cstr = new wchar_t[ModelPathSize + 1];
|
||||
MultiByteToWideChar(CP_UTF8, 0, iParams.ModelPath.c_str(), static_cast<int>(iParams.ModelPath.length()), wide_cstr, ModelPathSize);
|
||||
wide_cstr[ModelPathSize] = L'\0';
|
||||
const wchar_t* modelPath = wide_cstr;
|
||||
#else
|
||||
const char* modelPath = iParams.ModelPath.c_str();
|
||||
#endif // _WIN32
|
||||
|
||||
session = new Ort::Session(env, modelPath, sessionOption);
|
||||
Ort::AllocatorWithDefaultOptions allocator;
|
||||
size_t inputNodesNum = session->GetInputCount();
|
||||
|
|
@ -96,7 +99,6 @@ char* DCSP_CORE::CreateSession(DCSP_INIT_PARAM &iParams)
|
|||
strcpy(temp_buf, input_node_name.get());
|
||||
inputNodeNames.push_back(temp_buf);
|
||||
}
|
||||
|
||||
size_t OutputNodesNum = session->GetOutputCount();
|
||||
for (size_t i = 0; i < OutputNodesNum; i++)
|
||||
{
|
||||
|
|
@ -151,7 +153,7 @@ char* DCSP_CORE::RunSession(cv::Mat &iImg, std::vector<DCSP_RESULT>& oResult)
|
|||
template<typename N>
|
||||
char* DCSP_CORE::TensorProcess(clock_t& starttime_1, cv::Mat& iImg, N& blob, std::vector<int64_t>& inputNodeDims, std::vector<DCSP_RESULT>& oResult)
|
||||
{
|
||||
Ort::Value inputTensor = Ort::Value::CreateTensor<std::remove_pointer<N>::type>(Ort::MemoryInfo::CreateCpu(OrtDeviceAllocator, OrtMemTypeCPU), blob, 3 * imgSize.at(0) * imgSize.at(1), inputNodeDims.data(), inputNodeDims.size());
|
||||
Ort::Value inputTensor = Ort::Value::CreateTensor<typename std::remove_pointer<N>::type>(Ort::MemoryInfo::CreateCpu(OrtDeviceAllocator, OrtMemTypeCPU), blob, 3 * imgSize.at(0) * imgSize.at(1), inputNodeDims.data(), inputNodeDims.size());
|
||||
#ifdef benchmark
|
||||
clock_t starttime_2 = clock();
|
||||
#endif // benchmark
|
||||
|
|
@ -159,10 +161,11 @@ char* DCSP_CORE::TensorProcess(clock_t& starttime_1, cv::Mat& iImg, N& blob, std
|
|||
#ifdef benchmark
|
||||
clock_t starttime_3 = clock();
|
||||
#endif // benchmark
|
||||
|
||||
Ort::TypeInfo typeInfo = outputTensor.front().GetTypeInfo();
|
||||
auto tensor_info = typeInfo.GetTensorTypeAndShapeInfo();
|
||||
std::vector<int64_t>outputNodeDims = tensor_info.GetShape();
|
||||
std::remove_pointer<N>::type* output = outputTensor.front().GetTensorMutableData<std::remove_pointer<N>::type>();
|
||||
auto output = outputTensor.front().GetTensorMutableData<typename std::remove_pointer<N>::type>();
|
||||
delete blob;
|
||||
switch (modelType)
|
||||
{
|
||||
|
|
@ -183,7 +186,7 @@ char* DCSP_CORE::TensorProcess(clock_t& starttime_1, cv::Mat& iImg, N& blob, std
|
|||
for (int i = 0; i < strideNum; ++i)
|
||||
{
|
||||
float* classesScores = data + 4;
|
||||
cv::Mat scores(1, classesNum, CV_32FC1, classesScores);
|
||||
cv::Mat scores(1, this->classes.size(), CV_32FC1, classesScores);
|
||||
cv::Point class_id;
|
||||
double maxClassScore;
|
||||
cv::minMaxLoc(scores, 0, &maxClassScore, 0, &class_id);
|
||||
|
|
@ -203,13 +206,14 @@ char* DCSP_CORE::TensorProcess(clock_t& starttime_1, cv::Mat& iImg, N& blob, std
|
|||
int width = int(w * x_factor);
|
||||
int height = int(h * y_factor);
|
||||
|
||||
boxes.push_back(cv::Rect(left, top, width, height));
|
||||
boxes.emplace_back(left, top, width, height);
|
||||
}
|
||||
data += signalResultNum;
|
||||
}
|
||||
|
||||
std::vector<int> nmsResult;
|
||||
cv::dnn::NMSBoxes(boxes, confidences, rectConfidenceThreshold, iouThreshold, nmsResult);
|
||||
|
||||
for (int i = 0; i < nmsResult.size(); ++i)
|
||||
{
|
||||
int idx = nmsResult[i];
|
||||
|
|
@ -266,6 +270,5 @@ char* DCSP_CORE::WarmUpSession()
|
|||
std::cout << "[DCSP_ONNX(CUDA)]: " << "Cuda warm-up cost " << post_process_time << " ms. " << std::endl;
|
||||
}
|
||||
}
|
||||
|
||||
return Ret;
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue