CUDA中使用Cuft的模板匹配?
下面是我的代码,使用cufft中提供的FFT实现模板匹配任务。核心逻辑与此处提供的Matlab代码类似: Matlab代码:CUDA中使用Cuft的模板匹配?,cuda,cufft,Cuda,Cufft,下面是我的代码,使用cufft中提供的FFT实现模板匹配任务。核心逻辑与此处提供的Matlab代码类似: Matlab代码: #include <iostream> #include "cuda_runtime.h" #include "device_launch_parameters.h" #include <math.h> #include <chrono> #include <math_functions.h> #include <bi
#include <iostream>
#include "cuda_runtime.h"
#include "device_launch_parameters.h"
#include <math.h>
#include <chrono>
#include <math_functions.h>
#include <bitmap_image.hpp>
#include <cufft.h>
using namespace std;
#define errorHandler(stmt) \
do { \
cudaError_t err = stmt; \
if (err != cudaSuccess) { \
printf("[ERROR] Failed to run stmt %d, error body: %s\n", __LINE__, cudaGetErrorString(err)); \
return -1; } \
} while (0) \
#define M_PI 3.14159265
#define BLOCK_SIZE 1024
typedef float2 Complex;
int initiate_parallel_template_matching(bitmap_image, bitmap_image);
static __device__ __host__ inline Complex ComplexAdd(Complex, Complex);
static __device__ __host__ inline Complex ComplexScale(Complex, float);
static __device__ __host__ inline Complex ComplexMul(Complex, Complex);
static __global__ void ComplexPointwiseMulAndScale(cufftComplex *, cufftComplex *, int, float);
// Padding functions
int PadData(const cufftComplex *signal, cufftComplex **padded_signal, int signal_size,
const cufftComplex *filter_kernel, cufftComplex **padded_filter_kernel, int filter_kernel_size);
int get_number_of_occurances(cufftComplex * arr, unsigned int size);
int main()
{
bitmap_image main_image("Input Files/collection.bmp");
bitmap_image template_image("Input Files/collection_coin.bmp");
initiate_parallel_template_matching(main_image, template_image);
system("pause");
return 0;
}
int initiate_parallel_template_matching(bitmap_image main_image, bitmap_image template_image)
{
// Get sizes
int main_width = main_image.width();
int main_height = main_image.height();
int main_size = main_width * main_height;
int template_width = template_image.width();
int template_height = template_image.height();
int template_size = template_width * template_height;
unsigned char* h_main_image = new unsigned char[3 * main_size];
for (int col = 0; col < main_width; col++) {
for (int row = 0; row < main_height; row++) {
rgb_t colors;
main_image.get_pixel(col, row, colors);
h_main_image[(row * main_width + col) * 3 + 0] = colors.red;
h_main_image[(row * main_width + col) * 3 + 1] = colors.green;
h_main_image[(row * main_width + col) * 3 + 2] = colors.blue;
}
}
unsigned char* h_template_image = new unsigned char[3 * template_size];
for (int col = 0; col < template_width; col++) {
for (int row = 0; row < template_height; row++) {
rgb_t colors;
template_image.get_pixel(col, row, colors);
h_template_image[(row * template_width + col) * 3 + 0] = colors.red;
h_template_image[(row * template_width + col) * 3 + 1] = colors.green;
h_template_image[(row * template_width + col) * 3 + 2] = colors.blue;
}
}
cufftComplex* h_main_signal = (cufftComplex *)malloc(sizeof(cufftComplex) * main_width * main_height * 3);
cufftComplex* h_template_signal = (cufftComplex *)malloc(sizeof(cufftComplex) * template_width * template_height * 3);
int main_signal_size = main_width * main_height * 3;
int template_signal_size = template_width * template_height * 3;
for (int y = 0; y < main_height; y++) {
for (int x = 0; x < main_width; x++) {
h_main_signal[(y * main_width + x) * 3 + 0].x = (double)h_main_image[(y * main_width + x) * 3 + 0];
h_main_signal[(y * main_width + x) * 3 + 1].x = (double)h_main_image[(y * main_width + x) * 3 + 1];
h_main_signal[(y * main_width + x) * 3 + 2].x = (double)h_main_image[(y * main_width + x) * 3 + 2];
h_main_signal[(y * main_width + x) * 3 + 0].y = 0;
h_main_signal[(y * main_width + x) * 3 + 1].y = 0;
h_main_signal[(y * main_width + x) * 3 + 2].y = 0;
}
}
for (int y = 0; y < template_height; y++) {
for (int x = 0; x < template_width; x++) {
h_template_signal[(y * template_width + x) * 3 + 0].x = (double)h_template_image[(y * template_width + x) * 3 + 0];
h_template_signal[(y * template_width + x) * 3 + 1].x = (double)h_template_image[(y * template_width + x) * 3 + 1];
h_template_signal[(y * template_width + x) * 3 + 2].x = (double)h_template_image[(y * template_width + x) * 3 + 2];
h_template_signal[(y * template_width + x) * 3 + 0].y = 0;
h_template_signal[(y * template_width + x) * 3 + 1].y = 0;
h_template_signal[(y * template_width + x) * 3 + 2].y = 0;
}
}
cufftComplex* d_main_signal;
cufftComplex* d_template_signal;
cufftComplex* d_main_signal_out;
cufftComplex* d_template_signal_out;
cufftComplex* d_inversed;
int main_memsize = sizeof(cufftComplex) * main_signal_size;
int template_memsize = sizeof(cufftComplex) * template_signal_size;
// Pad image signals
cufftComplex *h_padded_main_signal;
cufftComplex *h_padded_template_signal;
int NEW_SIZE = PadData(h_main_signal, &h_padded_main_signal, main_signal_size, h_template_signal, &h_padded_template_signal, template_signal_size);
errorHandler(cudaMalloc((void**)&d_main_signal, sizeof(cufftComplex) * NEW_SIZE));
errorHandler(cudaMalloc((void**)&d_template_signal, sizeof(cufftComplex) * NEW_SIZE));
errorHandler(cudaMalloc((void**)&d_main_signal_out, sizeof(cufftComplex) * NEW_SIZE));
errorHandler(cudaMalloc((void**)&d_template_signal_out, sizeof(cufftComplex) * NEW_SIZE));
errorHandler(cudaMalloc((void**)&d_inversed, sizeof(cufftComplex) * NEW_SIZE));
errorHandler(cudaMemcpy(d_main_signal, h_padded_main_signal, sizeof(cufftComplex) * NEW_SIZE, cudaMemcpyHostToDevice));
errorHandler(cudaMemcpy(d_template_signal, h_padded_template_signal, sizeof(cufftComplex) * NEW_SIZE, cudaMemcpyHostToDevice));
// Plan for 2 CUFFT_FORWARDs
cufftHandle plan_main;
cufftHandle plan_template;
cufftPlan1d(&plan_main, NEW_SIZE, CUFFT_C2C, 1);
cufftPlan1d(&plan_template, NEW_SIZE, CUFFT_C2C, 1);
// Perform forward FFT
cufftExecC2C(plan_main, (cufftComplex *)d_main_signal, (cufftComplex *)d_main_signal_out, CUFFT_FORWARD);
cufftExecC2C(plan_template, (cufftComplex *)d_template_signal, (cufftComplex *)d_template_signal_out, CUFFT_FORWARD);
// Multiply the coefficients together and normalize the result
printf("Launching ComplexPointwiseMulAndScale<<< >>>\n");
dim3 gridDimensions((unsigned int)(ceil(NEW_SIZE / (float)BLOCK_SIZE)), 1, 1);
dim3 blockDimensions(BLOCK_SIZE, 1, 1);
ComplexPointwiseMulAndScale << <gridDimensions, blockDimensions >> >((cufftComplex *)d_main_signal_out, (cufftComplex *)d_template_signal_out, NEW_SIZE, 1.0f / NEW_SIZE);
errorHandler(cudaGetLastError());
// Perform the inverse fft on the main signal
cufftExecC2C(plan_main, (cufftComplex *)d_main_signal_out, (cufftComplex *)d_inversed, CUFFT_INVERSE);
// Copy data back to host
cufftComplex * h_correlation_signal;
h_correlation_signal = h_padded_main_signal;
errorHandler(cudaMemcpy(h_correlation_signal, d_inversed, sizeof(cufftComplex) * NEW_SIZE, cudaMemcpyDeviceToHost));
for (int i = 0; i < NEW_SIZE; i++) {
h_correlation_signal[i].x = abs(h_correlation_signal[i].x);
h_correlation_signal[i].y = abs(h_correlation_signal[i].y);
}
get_number_of_occurances(h_correlation_signal, NEW_SIZE);
// Cancel plans :))))
cufftDestroy(plan_main);
cufftDestroy(plan_template);
// Free allocated memory
errorHandler(cudaFree(d_main_signal));
errorHandler(cudaFree(d_template_signal));
errorHandler(cudaFree(d_main_signal_out));
errorHandler(cudaFree(d_template_signal_out));
free(h_main_image);
free(h_template_image);
free(h_main_signal);
free(h_template_signal);
free(h_padded_main_signal);
free(h_padded_template_signal);
return EXIT_SUCCESS;
}
///////////////////////////////////////////////////////////////////////////////////
// Function for padding original data
//////////////////////////////////////////////////////////////////////////////////
int PadData(const cufftComplex *signal, cufftComplex **padded_signal, int signal_size,
const cufftComplex *filter_kernel, cufftComplex **padded_filter_kernel, int filter_kernel_size)
{
int minRadius = filter_kernel_size / 2;
int maxRadius = filter_kernel_size - minRadius;
int new_size = signal_size + maxRadius;
// Pad signal
cufftComplex *new_data = (cufftComplex *)malloc(sizeof(cufftComplex) * new_size);
memcpy(new_data + 0, signal, signal_size * sizeof(cufftComplex));
memset(new_data + signal_size, 0, (new_size - signal_size) * sizeof(cufftComplex));
*padded_signal = new_data;
// Pad filter
new_data = (cufftComplex *)malloc(sizeof(cufftComplex) * new_size);
memcpy(new_data + 0, filter_kernel + minRadius, maxRadius * sizeof(cufftComplex));
memset(new_data + maxRadius, 0, (new_size - filter_kernel_size) * sizeof(cufftComplex));
memcpy(new_data + new_size - minRadius, filter_kernel, minRadius * sizeof(cufftComplex));
*padded_filter_kernel = new_data;
return new_size;
}
////////////////////////////////////////////////////////////////////////////////
// Complex operations
////////////////////////////////////////////////////////////////////////////////
// Complex addition
static __device__ __host__ inline Complex ComplexAdd(Complex a, Complex b)
{
Complex c;
c.x = a.x + b.x;
c.y = a.y + b.y;
return c;
}
// Complex scale
static __device__ __host__ inline Complex ComplexScale(Complex a, float s)
{
Complex c;
c.x = s * a.x;
c.y = s * a.y;
return c;
}
// Complex multiplication
static __device__ __host__ inline Complex ComplexMul(Complex a, Complex b)
{
Complex c;
c.x = a.x * b.x - a.y * b.y;
c.y = a.x * b.y + a.y * b.x;
return c;
}
// Complex pointwise multiplication
static __global__ void ComplexPointwiseMulAndScale(cufftComplex *a, cufftComplex *b, int size, float scale)
{
const int numThreads = blockDim.x * gridDim.x;
const int threadID = blockIdx.x * blockDim.x + threadIdx.x;
for (int i = threadID; i < size; i += numThreads)
{
a[i] = ComplexScale(ComplexMul(a[i], b[i]), scale);
}
}
int get_number_of_occurances(cufftComplex * arr, unsigned int size)
{
cufftComplex max = arr[0];
int num_of_occurs = 0;
for (unsigned int i = 1; i < size; i++) {
if (arr[i].x > max.x && arr[i].y > max.y) {
num_of_occurs = 1;
max = arr[i];
}
if (arr[i].x == max.x && arr[i].y == max.y)
num_of_occurs++;
}
wcout << "[Number of Occurances]: " << num_of_occurs << endl;
return num_of_occurs;
}
全部清除;全部关闭;
模板=rgb2gray(imread('possum_ear.jpg');
背景=rgb2gray(imread('possum.jpg'));
%%计算填充
bx=大小(背景,2);
by=大小(背景,1);
tx=尺寸(模板,2);%用于bbox放置
ty=尺寸(模板,1);
%%快速傅里叶变换
c=真实值(ifft2(fft2(背景)。*fft2(模板,bx));
%%查找峰值相关性
[max_c,imax]=max(abs(c(:));
[ypeak,xpeak]=find(c==max(c(:));
图形表面(c),阴影平坦;%小区相关性
%%显示最佳匹配
hFig=数字;
hAx=轴;
位置=[xpeak(1)-tx,ypeak(1)-ty,tx,ty];
imshow(背景,'父',hAx);
imrect(hAx,位置);
我的CUDA代码:
#include <iostream>
#include "cuda_runtime.h"
#include "device_launch_parameters.h"
#include <math.h>
#include <chrono>
#include <math_functions.h>
#include <bitmap_image.hpp>
#include <cufft.h>
using namespace std;
#define errorHandler(stmt) \
do { \
cudaError_t err = stmt; \
if (err != cudaSuccess) { \
printf("[ERROR] Failed to run stmt %d, error body: %s\n", __LINE__, cudaGetErrorString(err)); \
return -1; } \
} while (0) \
#define M_PI 3.14159265
#define BLOCK_SIZE 1024
typedef float2 Complex;
int initiate_parallel_template_matching(bitmap_image, bitmap_image);
static __device__ __host__ inline Complex ComplexAdd(Complex, Complex);
static __device__ __host__ inline Complex ComplexScale(Complex, float);
static __device__ __host__ inline Complex ComplexMul(Complex, Complex);
static __global__ void ComplexPointwiseMulAndScale(cufftComplex *, cufftComplex *, int, float);
// Padding functions
int PadData(const cufftComplex *signal, cufftComplex **padded_signal, int signal_size,
const cufftComplex *filter_kernel, cufftComplex **padded_filter_kernel, int filter_kernel_size);
int get_number_of_occurances(cufftComplex * arr, unsigned int size);
int main()
{
bitmap_image main_image("Input Files/collection.bmp");
bitmap_image template_image("Input Files/collection_coin.bmp");
initiate_parallel_template_matching(main_image, template_image);
system("pause");
return 0;
}
int initiate_parallel_template_matching(bitmap_image main_image, bitmap_image template_image)
{
// Get sizes
int main_width = main_image.width();
int main_height = main_image.height();
int main_size = main_width * main_height;
int template_width = template_image.width();
int template_height = template_image.height();
int template_size = template_width * template_height;
unsigned char* h_main_image = new unsigned char[3 * main_size];
for (int col = 0; col < main_width; col++) {
for (int row = 0; row < main_height; row++) {
rgb_t colors;
main_image.get_pixel(col, row, colors);
h_main_image[(row * main_width + col) * 3 + 0] = colors.red;
h_main_image[(row * main_width + col) * 3 + 1] = colors.green;
h_main_image[(row * main_width + col) * 3 + 2] = colors.blue;
}
}
unsigned char* h_template_image = new unsigned char[3 * template_size];
for (int col = 0; col < template_width; col++) {
for (int row = 0; row < template_height; row++) {
rgb_t colors;
template_image.get_pixel(col, row, colors);
h_template_image[(row * template_width + col) * 3 + 0] = colors.red;
h_template_image[(row * template_width + col) * 3 + 1] = colors.green;
h_template_image[(row * template_width + col) * 3 + 2] = colors.blue;
}
}
cufftComplex* h_main_signal = (cufftComplex *)malloc(sizeof(cufftComplex) * main_width * main_height * 3);
cufftComplex* h_template_signal = (cufftComplex *)malloc(sizeof(cufftComplex) * template_width * template_height * 3);
int main_signal_size = main_width * main_height * 3;
int template_signal_size = template_width * template_height * 3;
for (int y = 0; y < main_height; y++) {
for (int x = 0; x < main_width; x++) {
h_main_signal[(y * main_width + x) * 3 + 0].x = (double)h_main_image[(y * main_width + x) * 3 + 0];
h_main_signal[(y * main_width + x) * 3 + 1].x = (double)h_main_image[(y * main_width + x) * 3 + 1];
h_main_signal[(y * main_width + x) * 3 + 2].x = (double)h_main_image[(y * main_width + x) * 3 + 2];
h_main_signal[(y * main_width + x) * 3 + 0].y = 0;
h_main_signal[(y * main_width + x) * 3 + 1].y = 0;
h_main_signal[(y * main_width + x) * 3 + 2].y = 0;
}
}
for (int y = 0; y < template_height; y++) {
for (int x = 0; x < template_width; x++) {
h_template_signal[(y * template_width + x) * 3 + 0].x = (double)h_template_image[(y * template_width + x) * 3 + 0];
h_template_signal[(y * template_width + x) * 3 + 1].x = (double)h_template_image[(y * template_width + x) * 3 + 1];
h_template_signal[(y * template_width + x) * 3 + 2].x = (double)h_template_image[(y * template_width + x) * 3 + 2];
h_template_signal[(y * template_width + x) * 3 + 0].y = 0;
h_template_signal[(y * template_width + x) * 3 + 1].y = 0;
h_template_signal[(y * template_width + x) * 3 + 2].y = 0;
}
}
cufftComplex* d_main_signal;
cufftComplex* d_template_signal;
cufftComplex* d_main_signal_out;
cufftComplex* d_template_signal_out;
cufftComplex* d_inversed;
int main_memsize = sizeof(cufftComplex) * main_signal_size;
int template_memsize = sizeof(cufftComplex) * template_signal_size;
// Pad image signals
cufftComplex *h_padded_main_signal;
cufftComplex *h_padded_template_signal;
int NEW_SIZE = PadData(h_main_signal, &h_padded_main_signal, main_signal_size, h_template_signal, &h_padded_template_signal, template_signal_size);
errorHandler(cudaMalloc((void**)&d_main_signal, sizeof(cufftComplex) * NEW_SIZE));
errorHandler(cudaMalloc((void**)&d_template_signal, sizeof(cufftComplex) * NEW_SIZE));
errorHandler(cudaMalloc((void**)&d_main_signal_out, sizeof(cufftComplex) * NEW_SIZE));
errorHandler(cudaMalloc((void**)&d_template_signal_out, sizeof(cufftComplex) * NEW_SIZE));
errorHandler(cudaMalloc((void**)&d_inversed, sizeof(cufftComplex) * NEW_SIZE));
errorHandler(cudaMemcpy(d_main_signal, h_padded_main_signal, sizeof(cufftComplex) * NEW_SIZE, cudaMemcpyHostToDevice));
errorHandler(cudaMemcpy(d_template_signal, h_padded_template_signal, sizeof(cufftComplex) * NEW_SIZE, cudaMemcpyHostToDevice));
// Plan for 2 CUFFT_FORWARDs
cufftHandle plan_main;
cufftHandle plan_template;
cufftPlan1d(&plan_main, NEW_SIZE, CUFFT_C2C, 1);
cufftPlan1d(&plan_template, NEW_SIZE, CUFFT_C2C, 1);
// Perform forward FFT
cufftExecC2C(plan_main, (cufftComplex *)d_main_signal, (cufftComplex *)d_main_signal_out, CUFFT_FORWARD);
cufftExecC2C(plan_template, (cufftComplex *)d_template_signal, (cufftComplex *)d_template_signal_out, CUFFT_FORWARD);
// Multiply the coefficients together and normalize the result
printf("Launching ComplexPointwiseMulAndScale<<< >>>\n");
dim3 gridDimensions((unsigned int)(ceil(NEW_SIZE / (float)BLOCK_SIZE)), 1, 1);
dim3 blockDimensions(BLOCK_SIZE, 1, 1);
ComplexPointwiseMulAndScale << <gridDimensions, blockDimensions >> >((cufftComplex *)d_main_signal_out, (cufftComplex *)d_template_signal_out, NEW_SIZE, 1.0f / NEW_SIZE);
errorHandler(cudaGetLastError());
// Perform the inverse fft on the main signal
cufftExecC2C(plan_main, (cufftComplex *)d_main_signal_out, (cufftComplex *)d_inversed, CUFFT_INVERSE);
// Copy data back to host
cufftComplex * h_correlation_signal;
h_correlation_signal = h_padded_main_signal;
errorHandler(cudaMemcpy(h_correlation_signal, d_inversed, sizeof(cufftComplex) * NEW_SIZE, cudaMemcpyDeviceToHost));
for (int i = 0; i < NEW_SIZE; i++) {
h_correlation_signal[i].x = abs(h_correlation_signal[i].x);
h_correlation_signal[i].y = abs(h_correlation_signal[i].y);
}
get_number_of_occurances(h_correlation_signal, NEW_SIZE);
// Cancel plans :))))
cufftDestroy(plan_main);
cufftDestroy(plan_template);
// Free allocated memory
errorHandler(cudaFree(d_main_signal));
errorHandler(cudaFree(d_template_signal));
errorHandler(cudaFree(d_main_signal_out));
errorHandler(cudaFree(d_template_signal_out));
free(h_main_image);
free(h_template_image);
free(h_main_signal);
free(h_template_signal);
free(h_padded_main_signal);
free(h_padded_template_signal);
return EXIT_SUCCESS;
}
///////////////////////////////////////////////////////////////////////////////////
// Function for padding original data
//////////////////////////////////////////////////////////////////////////////////
int PadData(const cufftComplex *signal, cufftComplex **padded_signal, int signal_size,
const cufftComplex *filter_kernel, cufftComplex **padded_filter_kernel, int filter_kernel_size)
{
int minRadius = filter_kernel_size / 2;
int maxRadius = filter_kernel_size - minRadius;
int new_size = signal_size + maxRadius;
// Pad signal
cufftComplex *new_data = (cufftComplex *)malloc(sizeof(cufftComplex) * new_size);
memcpy(new_data + 0, signal, signal_size * sizeof(cufftComplex));
memset(new_data + signal_size, 0, (new_size - signal_size) * sizeof(cufftComplex));
*padded_signal = new_data;
// Pad filter
new_data = (cufftComplex *)malloc(sizeof(cufftComplex) * new_size);
memcpy(new_data + 0, filter_kernel + minRadius, maxRadius * sizeof(cufftComplex));
memset(new_data + maxRadius, 0, (new_size - filter_kernel_size) * sizeof(cufftComplex));
memcpy(new_data + new_size - minRadius, filter_kernel, minRadius * sizeof(cufftComplex));
*padded_filter_kernel = new_data;
return new_size;
}
////////////////////////////////////////////////////////////////////////////////
// Complex operations
////////////////////////////////////////////////////////////////////////////////
// Complex addition
static __device__ __host__ inline Complex ComplexAdd(Complex a, Complex b)
{
Complex c;
c.x = a.x + b.x;
c.y = a.y + b.y;
return c;
}
// Complex scale
static __device__ __host__ inline Complex ComplexScale(Complex a, float s)
{
Complex c;
c.x = s * a.x;
c.y = s * a.y;
return c;
}
// Complex multiplication
static __device__ __host__ inline Complex ComplexMul(Complex a, Complex b)
{
Complex c;
c.x = a.x * b.x - a.y * b.y;
c.y = a.x * b.y + a.y * b.x;
return c;
}
// Complex pointwise multiplication
static __global__ void ComplexPointwiseMulAndScale(cufftComplex *a, cufftComplex *b, int size, float scale)
{
const int numThreads = blockDim.x * gridDim.x;
const int threadID = blockIdx.x * blockDim.x + threadIdx.x;
for (int i = threadID; i < size; i += numThreads)
{
a[i] = ComplexScale(ComplexMul(a[i], b[i]), scale);
}
}
int get_number_of_occurances(cufftComplex * arr, unsigned int size)
{
cufftComplex max = arr[0];
int num_of_occurs = 0;
for (unsigned int i = 1; i < size; i++) {
if (arr[i].x > max.x && arr[i].y > max.y) {
num_of_occurs = 1;
max = arr[i];
}
if (arr[i].x == max.x && arr[i].y == max.y)
num_of_occurs++;
}
wcout << "[Number of Occurances]: " << num_of_occurs << endl;
return num_of_occurs;
}
#包括
#包括“cuda_runtime.h”
#包括“设备启动参数.h”
#包括
#包括
#包括
#包括
#包括
使用名称空间std;
#定义错误处理程序(stmt)\
做{\
cudaError\u t err=stmt\
如果(err!=cudaSuccess){\
printf(“[ERROR]无法运行stmt%d,错误正文:%s\n”,_行_uuu,cudaGetErrorString(err))\
返回-1;}\
}而(0)\
#定义M_PI 3.14159265
#定义块大小为1024
2型复合物;
int启动并行模板匹配(位图图像、位图图像);
静态、设备、主机、内联复杂复合体DD(复杂、复杂);
静态u_设备__主机_;内联复杂复杂度刻度(复杂,浮点);
静态、设备、主机、内联复杂复杂(复杂、复杂);
静态uuu全局uuuuu无效复数点式拟合比例(CuftComplex*,CuftComplex*,int,float);
//填充函数
int PadData(常数袖口复合体*信号,袖口复合体**填充信号,int信号大小,
常量cuftcomplex*过滤器内核,cuftcomplex**填充过滤器内核,int过滤器内核大小);
int获取发生次数(cufftComplex*arr,无符号int大小);
int main()
{
位图图像主图像(“输入文件/collection.bmp”);
位图图像模板图像(“输入文件/collection\u coin.bmp”);
启动并行模板匹配(主图像、模板图像);
系统(“暂停”);
返回0;
}
int启动并行模板匹配(位图图像主图像、位图图像模板图像)
{
//获取尺寸
int main_width=main_image.width();
int main_height=main_image.height();
int main_size=主_宽度*主_高度;
int template_width=template_image.width();
int template_height=template_image.height();
int template_size=模板宽度*模板高度;
无符号字符*h_main_image=新的无符号字符[3*main_size];
对于(int col=0;col