当前位置 博文首页 > cumtchw:NVIDIA Jetson xavier上用cuda实现图像的resize和paddi
具体的api接口:
git clone -b cumtchw-patch-1 https://github.com/cumtchw/cuda_utils
下面是调用上述接口对图片进行resize的一个demo.
#include <fstream>
#include <iostream>
#include <thread>
#include <string>
#include <vector>
#include <cuda_runtime_api.h>
#include "opencv2/opencv.hpp"
#include <unistd.h>
#include "cuda_utils_sdk.h"
#include "cudaConverter.h"
using namespace cv;
using namespace std;
int main(int argc, char ** argv)
{
int img_w_{};
int img_h_{};
int img_step1_{};
vector<float> preprocess_output_data_;
size_t convert_input_size_{};
size_t convert_output_size_{};
uchar* convert_input_{};
float* convert_output_{};
int m_maxBatchSize = 1;
std::vector<float> mean_data_{ 0, 0, 0 };
int w = 1024;
int h = 1024;
int c =3;
int error{};
cv::Mat src0 = cv::imread("./test0.jpeg"); //720*405
cv::Mat src1 = cv::imread("./test1.jpeg");
vector<cv::Mat> resize_imgs;
resize_imgs.push_back(src0);
//resize_imgs.push_back(src1);
if (preprocess_output_data_.empty()) //变量定义为:vector<float> preprocess_output_data_;
{
preprocess_output_data_.resize(m_maxBatchSize * c * w * h * sizeof(float));
}
for(size_t i = 0; i < resize_imgs.size(); ++i)
{
auto & mat = resize_imgs[i];
img_w_ = mat.cols;
img_h_ = mat.rows;
img_step1_ = mat.step1();
if (convert_input_ == nullptr)
{
convert_input_size_ = m_maxBatchSize * img_step1_ * img_h_ * sizeof(unsigned char);
cout<<"m_maxBatchSize:"<<m_maxBatchSize<<",img_step1_:"<<img_step1_<<",img_h_:"<<img_h_<<endl;
cout<<"convert_input_size_:"<<convert_input_size_<<endl;
error = cuAllocMapped((void**)&convert_input_, convert_input_size_);
if (error != 0)
{
cout<<"cuAllocMapped failed for input buffer!"<<endl;
}
else
{
cout<<"cuAllocMapped success for input buffer!"<<endl;
}
}
else
{
assert(mat.step1() * mat.rows <= convert_input_size_);
}
if (convert_output_ == nullptr)
{
convert_output_size_ = m_maxBatchSize * c * w * h * sizeof(float);
cout<<"convert_output_size_:"<<convert_output_size_<<endl;
error = cuAllocMapped((void**)&convert_output_, convert_output_size_);
if (error != 0)
{
cout<<"cuAllocMapped failed for output buffer!"<<endl;
}
else
{
cout<<"cuAllocMapped success for output buffer!"<<endl;
}
}
//transfer data from opencv mat buffer to gpu
size_t buff_size = img_step1_ * img_h_ * sizeof(unsigned char);
memcpy(convert_input_ + i * buff_size, mat.data, buff_size);
cout<<"convert_input_ size:"<<buff_size<<endl;
cuStreamSynchronize(nullptr);
}
if(3 == c)
{
cu::cudaResizeConvert(convert_input_, img_w_, img_h_, img_step1_, ImageFormat::IMAGE_BGR8, convert_output_, w, h, ImageFormat::IMAGE_RGB32F_PLANAR
,resize_imgs.size() , mean_data_.data(), 1, (cudaStream_t)nullptr);
cuStreamSynchronize(nullptr);
}
uint64_t output_size = c * w * h * sizeof(float) * resize_imgs.size();
cout<<"c:"<<c<<",w:"<<w<<",h:"<<h<<"sizeof(float):"<<sizeof(float)<<",resize_imgs.size():"<<resize_imgs.size()<<endl;
memcpy(preprocess_output_data_.data(), convert_output_, output_size);
cout<<"preprocess_output_data_.size():"<<preprocess_output_data_.size()<<endl;
cv::Mat resMat = Mat(w, h, CV_32FC3);
for(int i=0; i < h; i++)//i表示在第几行.
{
for(int j =0;j<w;j++)//j表示在第几列.
{
resMat.at<Vec3f>(i,j)[2] = preprocess_output_data_.at(0*w*h + i*w + j);
resMat.at<Vec3f>(i,j)[1] = preprocess_output_data_.at(1*w*h + i*w + j);
resMat.at<Vec3f>(i,j)[0] = preprocess_output_data_.at(2*w*h + i*w + j);
}
}
imwrite("./result1.jpg", resMat);
printf("this is in the main\n");
return 0;
}
cs