Cuda 袖口输出不正确
我对此程序有问题:Cuda 袖口输出不正确,cuda,cufft,Cuda,Cufft,我对此程序有问题: #include <stdlib.h> #include <stdio.h> #include <string.h> #include <math.h> #include <cufft.h> #include <cuComplex.h> #define SIGNAL_SIZE 1024 int main(int argc, char **argv) { cudaEvent_t st
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <math.h>
#include <cufft.h>
#include <cuComplex.h>
#define SIGNAL_SIZE 1024
int main(int argc, char **argv) {
cudaEvent_t start, stop;
cudaEventCreate(&start);
cudaEventCreate(&stop);
// Allocate host memory for the signal
cuDoubleComplex *h_signal = (cuDoubleComplex *) malloc(sizeof(cuDoubleComplex) * SIGNAL_SIZE);
// Initalize the memory for the signal
for (unsigned int i = 0; i < SIGNAL_SIZE; ++i) {
if((double)i/SIGNAL_SIZE>=0 && (double)i/SIGNAL_SIZE<0.5) h_signal[i].x = (double)i/SIGNAL_SIZE;
else if((double)i/SIGNAL_SIZE>=0.5 && (double)i/SIGNAL_SIZE<1) h_signal[i].x = (double)i/SIGNAL_SIZE-1;
h_signal[i].y = 0;
}
// Allocate device memory for signal
cuDoubleComplex *d_signal;
cudaMalloc((void **) &d_signal, SIGNAL_SIZE*sizeof(cuDoubleComplex));
// Copy host memory to device
cudaMemcpy(d_signal, h_signal, SIGNAL_SIZE*sizeof(cuDoubleComplex), cudaMemcpyHostToDevice);
cudaEventRecord(start, 0);
cufftHandle plan;
cufftPlan1d(&plan, SIGNAL_SIZE , CUFFT_C2C, 1);
// FFT computation
cufftExecC2C(plan, (cufftComplex *) d_signal, (cufftComplex *) d_signal,
CUFFT_FORWARD);
cufftExecC2C(plan, (cufftComplex *) d_signal, (cufftComplex *) d_signal, CUFFT_INVERSE);
cuDoubleComplex *h_signal_inv =(cuDoubleComplex *) malloc(sizeof(cuDoubleComplex) * SIGNAL_SIZE);
cudaMemcpy(h_signal_inv, d_signal, sizeof(cuDoubleComplex) * SIGNAL_SIZE, cudaMemcpyDeviceToHost);
cudaEventRecord(stop, 0);
cudaEventSynchronize(stop);
float elapsedTime;
cudaEventElapsedTime(&elapsedTime, start, stop);
printf("Elapsed Time: %3.1f ms\n", elapsedTime);
for(int i=0;i<SIGNAL_SIZE;i++) printf("\n%f %f", h_signal[i].x, h_signal_inv[i].x);
cufftDestroy(plan);
free(h_signal);
free(h_signal_inv);
cudaFree(d_signal);
cudaDeviceReset();
return 0;
}
#包括
#包括
#包括
#包括
#包括
#包括
#定义信号大小1024
int main(int argc,字符**argv){
cudaEvent\u t启动、停止;
cudaEventCreate(&start);
cudaEventCreate(&stop);
//为信号分配主机内存
cuDoubleComplex*h_信号=(cuDoubleComplex*)malloc(sizeof(cuDoubleComplex)*信号_大小);
//初始化信号的存储器
for(无符号整数i=0;i 如果((双)i/SIGNAL\u SIZE>=0&&(双)i/SIGNAL\u SIZE=0.5&&(双)i/SIGNAL\u SIZE,则会混淆数据类型
cuftdoublecomplex
与cuftcomplex
不同。使用cuftdoublecomplex
时
此外,为了在使用CUFFT进行前向变换和逆变换时查看数据奇偶性,它是:
cuFFT执行非规范化FFT;也就是说,对输入数据集执行正向FFT,然后对结果集执行反向FFT,生成与输入相等的数据,并按元素数进行缩放。按数据集大小的倒数缩放任一变换都留给用户执行
下面的代码解决了上述问题,应该可以提供更好的结果:
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <math.h>
#include <cufft.h>
#include <cuComplex.h>
#define SIGNAL_SIZE 1024
int main(int argc, char **argv) {
cudaEvent_t start, stop;
cudaEventCreate(&start);
cudaEventCreate(&stop);
// Allocate host memory for the signal
cuDoubleComplex *h_signal = (cuDoubleComplex *) malloc(sizeof(cuDoubleComplex) * SIGNAL_SIZE);
// Initalize the memory for the signal
for (unsigned int i = 0; i < SIGNAL_SIZE; ++i) {
if((double)i/SIGNAL_SIZE>=0 && (double)i/SIGNAL_SIZE<0.5) h_signal[i].x = (double)i/SIGNAL_SIZE;
else if((double)i/SIGNAL_SIZE>=0.5 && (double)i/SIGNAL_SIZE<1) h_signal[i].x = (double)i/SIGNAL_SIZE-1;
h_signal[i].y = 0;
}
// Allocate device memory for signal
cuDoubleComplex *d_signal;
cudaMalloc((void **) &d_signal, SIGNAL_SIZE*sizeof(cuDoubleComplex));
// Copy host memory to device
cudaMemcpy(d_signal, h_signal, SIGNAL_SIZE*sizeof(cuDoubleComplex), cudaMemcpyHostToDevice);
cudaEventRecord(start, 0);
cufftHandle plan;
cufftPlan1d(&plan, SIGNAL_SIZE , CUFFT_Z2Z, 1);
// FFT computation
cufftExecZ2Z(plan, d_signal, d_signal, CUFFT_FORWARD);
cufftExecZ2Z(plan, d_signal, d_signal, CUFFT_INVERSE);
cuDoubleComplex *h_signal_inv =(cuDoubleComplex *) malloc(sizeof(cuDoubleComplex) * SIGNAL_SIZE);
cudaMemcpy(h_signal_inv, d_signal, sizeof(cuDoubleComplex) * SIGNAL_SIZE, cudaMemcpyDeviceToHost);
cudaEventRecord(stop, 0);
cudaEventSynchronize(stop);
float elapsedTime;
cudaEventElapsedTime(&elapsedTime, start, stop);
printf("Elapsed Time: %3.1f ms\n", elapsedTime);
for(int i=0;i<SIGNAL_SIZE;i++) printf("\n%f %f", h_signal[i].x, h_signal_inv[i].x/SIGNAL_SIZE);
cufftDestroy(plan);
free(h_signal);
free(h_signal_inv);
cudaFree(d_signal);
cudaDeviceReset();
return 0;
}
#包括
#包括
#包括
#包括
#包括
#包括
#定义信号大小1024
int main(int argc,字符**argv){
cudaEvent\u t启动、停止;
cudaEventCreate(&start);
cudaEventCreate(&stop);
//为信号分配主机内存
cuDoubleComplex*h_信号=(cuDoubleComplex*)malloc(sizeof(cuDoubleComplex)*信号_大小);
//初始化信号的存储器
for(无符号整数i=0;i 如果((双)i/信号大小>=0&&(双)i/信号大小=0.5&&(双)i/SIGNAL_Size现在,随着最近引入的回调功能,您可以将IFFT规范化直接嵌入到cuFFT执行中。我还有一个问题!我想压缩初始信号,因此我必须在频率上设置一个阈值。我如何做到这一点?当我调用cufftExecZ2Z时,输出是什么?您的第一个问题是一个普通的FFT问题,而不是CUDA特定的问题。我建议将其作为一个新的、适当标记的问题提问,因为它可能会生成一个较长的答案。或者搜索它,您就可以得到。关于第二个问题,您已经安排了FFT(正向和反向)由于输入和输出的指针相同,这意味着前向FFT的输出存储在d_信号中,替换原始输入数据。