Cuda 袖口输出不正确

Cuda 袖口输出不正确,cuda,cufft,Cuda,Cufft,我对此程序有问题: #include <stdlib.h> #include <stdio.h> #include <string.h> #include <math.h> #include <cufft.h> #include <cuComplex.h> #define SIGNAL_SIZE 1024 int main(int argc, char **argv) { cudaEvent_t st

我对此程序有问题:

#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <math.h>
#include <cufft.h>
#include <cuComplex.h>

#define SIGNAL_SIZE        1024

int main(int argc, char **argv) {
   cudaEvent_t start, stop;
   cudaEventCreate(&start);
   cudaEventCreate(&stop);

   // Allocate host memory for the signal
   cuDoubleComplex *h_signal = (cuDoubleComplex *) malloc(sizeof(cuDoubleComplex) * SIGNAL_SIZE);

   // Initalize the memory for the signal
   for (unsigned int i = 0; i < SIGNAL_SIZE; ++i) {
      if((double)i/SIGNAL_SIZE>=0 && (double)i/SIGNAL_SIZE<0.5)  h_signal[i].x = (double)i/SIGNAL_SIZE;
      else if((double)i/SIGNAL_SIZE>=0.5 && (double)i/SIGNAL_SIZE<1)  h_signal[i].x = (double)i/SIGNAL_SIZE-1;
      h_signal[i].y = 0;
   }

// Allocate device memory for signal
   cuDoubleComplex *d_signal;

   cudaMalloc((void **) &d_signal, SIGNAL_SIZE*sizeof(cuDoubleComplex));
   // Copy host memory to device
   cudaMemcpy(d_signal, h_signal, SIGNAL_SIZE*sizeof(cuDoubleComplex), cudaMemcpyHostToDevice);


cudaEventRecord(start, 0);
   cufftHandle plan;
   cufftPlan1d(&plan, SIGNAL_SIZE , CUFFT_C2C, 1);

   // FFT computation
   cufftExecC2C(plan, (cufftComplex *) d_signal, (cufftComplex *) d_signal,
         CUFFT_FORWARD);

    cufftExecC2C(plan, (cufftComplex *) d_signal, (cufftComplex *) d_signal, CUFFT_INVERSE);

   cuDoubleComplex *h_signal_inv =(cuDoubleComplex *) malloc(sizeof(cuDoubleComplex) * SIGNAL_SIZE);
   cudaMemcpy(h_signal_inv, d_signal, sizeof(cuDoubleComplex) * SIGNAL_SIZE, cudaMemcpyDeviceToHost);


   cudaEventRecord(stop, 0);
   cudaEventSynchronize(stop);

   float elapsedTime;
   cudaEventElapsedTime(&elapsedTime, start, stop);
   printf("Elapsed Time:  %3.1f ms\n", elapsedTime);


    for(int i=0;i<SIGNAL_SIZE;i++) printf("\n%f %f", h_signal[i].x, h_signal_inv[i].x);

    cufftDestroy(plan);

   free(h_signal);
   free(h_signal_inv);

   cudaFree(d_signal);

   cudaDeviceReset();
   return 0;
}
#包括
#包括
#包括
#包括
#包括
#包括
#定义信号大小1024
int main(int argc,字符**argv){
cudaEvent\u t启动、停止;
cudaEventCreate(&start);
cudaEventCreate(&stop);
//为信号分配主机内存
cuDoubleComplex*h_信号=(cuDoubleComplex*)malloc(sizeof(cuDoubleComplex)*信号_大小);
//初始化信号的存储器
for(无符号整数i=0;i如果((双)i/SIGNAL\u SIZE>=0&&(双)i/SIGNAL\u SIZE=0.5&&(双)i/SIGNAL\u SIZE,则会混淆数据类型

cuftdoublecomplex
cuftcomplex
不同。使用
cuftdoublecomplex

此外,为了在使用CUFFT进行前向变换和逆变换时查看数据奇偶性,它是:

cuFFT执行非规范化FFT;也就是说,对输入数据集执行正向FFT,然后对结果集执行反向FFT,生成与输入相等的数据,并按元素数进行缩放。按数据集大小的倒数缩放任一变换都留给用户执行

下面的代码解决了上述问题,应该可以提供更好的结果:

#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <math.h>
#include <cufft.h>
#include <cuComplex.h>

#define SIGNAL_SIZE        1024

int main(int argc, char **argv) {
   cudaEvent_t start, stop;
   cudaEventCreate(&start);
   cudaEventCreate(&stop);

   // Allocate host memory for the signal
   cuDoubleComplex *h_signal = (cuDoubleComplex *) malloc(sizeof(cuDoubleComplex) * SIGNAL_SIZE);

   // Initalize the memory for the signal
   for (unsigned int i = 0; i < SIGNAL_SIZE; ++i) {
      if((double)i/SIGNAL_SIZE>=0 && (double)i/SIGNAL_SIZE<0.5)  h_signal[i].x = (double)i/SIGNAL_SIZE;
      else if((double)i/SIGNAL_SIZE>=0.5 && (double)i/SIGNAL_SIZE<1)  h_signal[i].x = (double)i/SIGNAL_SIZE-1;
      h_signal[i].y = 0;
   }

// Allocate device memory for signal
   cuDoubleComplex *d_signal;

   cudaMalloc((void **) &d_signal, SIGNAL_SIZE*sizeof(cuDoubleComplex));
   // Copy host memory to device
   cudaMemcpy(d_signal, h_signal, SIGNAL_SIZE*sizeof(cuDoubleComplex), cudaMemcpyHostToDevice);


cudaEventRecord(start, 0);
   cufftHandle plan;
   cufftPlan1d(&plan, SIGNAL_SIZE , CUFFT_Z2Z, 1);

   // FFT computation
   cufftExecZ2Z(plan, d_signal, d_signal, CUFFT_FORWARD);

    cufftExecZ2Z(plan, d_signal, d_signal, CUFFT_INVERSE);

   cuDoubleComplex *h_signal_inv =(cuDoubleComplex *) malloc(sizeof(cuDoubleComplex) * SIGNAL_SIZE);
   cudaMemcpy(h_signal_inv, d_signal, sizeof(cuDoubleComplex) * SIGNAL_SIZE, cudaMemcpyDeviceToHost);


   cudaEventRecord(stop, 0);
   cudaEventSynchronize(stop);

   float elapsedTime;
   cudaEventElapsedTime(&elapsedTime, start, stop);
   printf("Elapsed Time:  %3.1f ms\n", elapsedTime);


    for(int i=0;i<SIGNAL_SIZE;i++) printf("\n%f %f", h_signal[i].x, h_signal_inv[i].x/SIGNAL_SIZE);

    cufftDestroy(plan);

   free(h_signal);
   free(h_signal_inv);

   cudaFree(d_signal);

   cudaDeviceReset();
   return 0;
}
#包括
#包括
#包括
#包括
#包括
#包括
#定义信号大小1024
int main(int argc,字符**argv){
cudaEvent\u t启动、停止;
cudaEventCreate(&start);
cudaEventCreate(&stop);
//为信号分配主机内存
cuDoubleComplex*h_信号=(cuDoubleComplex*)malloc(sizeof(cuDoubleComplex)*信号_大小);
//初始化信号的存储器
for(无符号整数i=0;i如果((双)i/信号大小>=0&&(双)i/信号大小=0.5&&(双)i/SIGNAL_Size现在,随着最近引入的回调功能,您可以将IFFT规范化直接嵌入到cuFFT执行中。我还有一个问题!我想压缩初始信号,因此我必须在频率上设置一个阈值。我如何做到这一点?当我调用cufftExecZ2Z时,输出是什么?您的第一个问题是一个普通的FFT问题,而不是CUDA特定的问题。我建议将其作为一个新的、适当标记的问题提问,因为它可能会生成一个较长的答案。或者搜索它,您就可以得到。关于第二个问题,您已经安排了FFT(正向和反向)由于输入和输出的指针相同,这意味着前向FFT的输出存储在
d_信号中,替换原始输入数据。