Cuda 袖口双到复杂

Cuda 袖口双到复杂,cuda,cufft,Cuda,Cufft,我想用CuFFT库做一个从double到std::complex的FFT。我的代码看起来像 #include <complex> #include <iostream> #include <cufft.h> #include <cuda_runtime_api.h> typedef std::complex<double> Complex; using namespace std; int main(){ int n = 100

我想用CuFFT库做一个从double到std::complex的FFT。我的代码看起来像

#include <complex>
#include <iostream>
#include <cufft.h>
#include <cuda_runtime_api.h>

typedef std::complex<double> Complex;
using namespace std;

int main(){
  int n = 100;
  double* in;
  Complex* out;
  in = (double*) malloc(sizeof(double) * n);
  out = (Complex*) malloc(sizeof(Complex) * n/2+1);
  for(int i=0; i<n; i++){
     in[i] = 1;
  }

  cufftHandle plan;
  plan = cufftPlan1d(&plan, n, CUFFT_D2Z, 1);
  unsigned int mem_size = sizeof(double)*n;
  cufftDoubleReal *d_in;
  cufftDoubleComplex *d_out;
  cudaMalloc((void **)&d_in, mem_size);
  cudaMalloc((void **)&d_out, mem_size);
  cudaMemcpy(d_in, in, mem_size, cudaMemcpyHostToDevice);
  cudaMemcpy(d_out, out, mem_size, cudaMemcpyHostToDevice);
  int succes = cufftExecD2Z(plan,(cufftDoubleReal *) d_in,(cufftDoubleComplex *) d_out);
  cout << succes << endl;
  cudaMemcpy(out, d_out, mem_size, cudaMemcpyDeviceToHost);

  for(int i=0; i<n/2; i++){
     cout << "out: " << i << " "  << out[i].real() << " " <<  out[i].imag() << endl;
  }
  return 0;
}
#包括
#包括
#包括
#包括
typedef-std::复杂复合体;
使用名称空间std;
int main(){
int n=100;
双*英寸;
复杂*out;
in=(双*)malloc(sizeof(双)*n);
out=(复合物*)malloc(复合物的尺寸)*n/2+1);

对于(inti=0;i,您的代码有各种错误。您可能应该查看示例代码

  • 您应该对所有API返回值执行适当的cuda错误检查和适当的CUFT错误检查
  • cufftPlan1d
    函数的返回值不进入计划:

    plan = cufftPlan1d(&plan, n, CUFFT_D2Z, 1);
    
    函数本身设置计划(这就是为什么您将
    &plan
    传递给函数),然后当您将返回值赋给计划时,它会破坏函数设置的计划

  • 您正确地标识了输出的大小可以是
    ((N/2)+1)
    ,但是您没有在主机端为其正确分配空间:

    out = (Complex*) malloc(sizeof(Complex) * n/2+1);
    
    或在设备端:

    unsigned int mem_size = sizeof(double)*n;
    ...
    cudaMalloc((void **)&d_out, mem_size);
    
  • 下面的代码修复了上面的一些问题,足以得到您想要的结果(100,0,0,…)

    #包括
    #包括
    #包括
    #包括
    #定义cudaCheckErrors(msg)\
    做{\
    cudaError\u t\u err=cudaGetLastError()\
    如果(_err!=cudaSuccess){\
    fprintf(标准,“致命错误:%s(%s位于%s:%d)\n”\
    msg,cudaGetErrorString(_err)\
    __文件(行)\
    fprintf(stderr,“***失败-中止\n”)\
    出口(1)\
    } \
    }而(0)
    typedef-std::复杂复合体;
    使用名称空间std;
    int main(){
    int n=100;
    双*英寸;
    复杂*out;
    #ifdef到位
    in=(双*)malloc(sizeof(Complex)*(n/2+1));
    out=(复数*)in;
    #否则
    in=(双*)malloc(sizeof(双)*n);
    out=(复合物*)malloc(复合物)*(n/2+1)的大小);
    #恩迪夫
    
    对于(int i=0;iThanks),这非常有用。你是对的,我在错误检查方面非常懒惰。第2点非常糟糕,因为我今天早些时候犯了同样的错误,知道这是错误的,但没有注意到这个错误。再次感谢你,我以后会做得更好。
    #include <complex>
    #include <iostream>
    #include <cufft.h>
    #include <cuda_runtime_api.h>
    
    #define cudaCheckErrors(msg) \
        do { \
            cudaError_t __err = cudaGetLastError(); \
            if (__err != cudaSuccess) { \
                fprintf(stderr, "Fatal error: %s (%s at %s:%d)\n", \
                    msg, cudaGetErrorString(__err), \
                    __FILE__, __LINE__); \
                fprintf(stderr, "*** FAILED - ABORTING\n"); \
                exit(1); \
            } \
        } while (0)
    
    
    typedef std::complex<double> Complex;
    using namespace std;
    
    int main(){
      int n = 100;
      double* in;
      Complex* out;
    #ifdef IN_PLACE
      in = (double*) malloc(sizeof(Complex) * (n/2+1));
      out = (Complex*)in;
    #else
      in = (double*) malloc(sizeof(double) * n);
      out = (Complex*) malloc(sizeof(Complex) * (n/2+1));
    #endif
      for(int i=0; i<n; i++){
         in[i] = 1;
      }
    
      cufftHandle plan;
      cufftResult res = cufftPlan1d(&plan, n, CUFFT_D2Z, 1);
      if (res != CUFFT_SUCCESS)  {cout << "cufft plan error: " << res << endl; return 1;}
      cufftDoubleReal *d_in;
      cufftDoubleComplex *d_out;
      unsigned int out_mem_size = (n/2 + 1)*sizeof(cufftDoubleComplex);
    #ifdef IN_PLACE
      unsigned int in_mem_size = out_mem_size;
      cudaMalloc((void **)&d_in, in_mem_size);
      d_out = (cufftDoubleComplex *)d_in;
    #else
      unsigned int in_mem_size = sizeof(cufftDoubleReal)*n;
      cudaMalloc((void **)&d_in, in_mem_size);
      cudaMalloc((void **)&d_out, out_mem_size);
    #endif
      cudaCheckErrors("cuda malloc fail");
      cudaMemcpy(d_in, in, in_mem_size, cudaMemcpyHostToDevice);
      cudaCheckErrors("cuda memcpy H2D fail");
      res = cufftExecD2Z(plan,d_in, d_out);
      if (res != CUFFT_SUCCESS)  {cout << "cufft exec error: " << res << endl; return 1;}
      cudaMemcpy(out, d_out, out_mem_size, cudaMemcpyDeviceToHost);
      cudaCheckErrors("cuda memcpy D2H fail");
    
      for(int i=0; i<n/2; i++){
         cout << "out: " << i << " "  << out[i].real() << " " <<  out[i].imag() << endl;
      }
      return 0;
    }