Warning: file_get_contents(/data/phpspider/zhask/data//catemap/6/cplusplus/145.json): failed to open stream: No such file or directory in /data/phpspider/zhask/libs/function.php on line 167

Warning: Invalid argument supplied for foreach() in /data/phpspider/zhask/libs/tag.function.php on line 1116

Notice: Undefined index: in /data/phpspider/zhask/libs/function.php on line 180

Warning: array_chunk() expects parameter 1 to be array, null given in /data/phpspider/zhask/libs/function.php on line 181
C++ 关于cufft R2C和C2R_C++_Cuda_Cufft - Fatal编程技术网

C++ 关于cufft R2C和C2R

C++ 关于cufft R2C和C2R,c++,cuda,cufft,C++,Cuda,Cufft,我曾经用过cufft做过我的研究,但是在使用它的过程中出现了一些问题。我的步骤如下: __global__ void MultiplyKernel(cufftComplex *data, float *data1,cufftComplex *data2, unsigned vectorSize) { unsigned idx = blockIdx.x*blockDim.x+threadIdx.x; if (idx < vectorSize){ data[id

我曾经用过cufft做过我的研究,但是在使用它的过程中出现了一些问题。我的步骤如下:

__global__ void MultiplyKernel(cufftComplex *data, float *data1,cufftComplex *data2, unsigned vectorSize) {
    unsigned idx = blockIdx.x*blockDim.x+threadIdx.x;
    if (idx < vectorSize){
        data[idx].x = data2[idx].x*data1[idx];
        data[idx].y = data2[idx].y*data1[idx];
    }
}

__global__ void Scale(cufftReal *data, unsigned vectorSize) {
    unsigned idx = blockIdx.x*blockDim.x+threadIdx.x;
    if (idx < vectorSize){
        data[idx] = data[idx]/vectorSize;
    }
}

void ApplyKernel1(cufftReal *data2, float *ImageBuffer, float *KernelBuffer, unsigned int NX, unsigned int NY,unsigned int NZ)
{
      float *Akernel;
      cufftComplex *data_dev1, *data_dev2;
      cufftReal *data_dev3, *data_dev;
      cudaMalloc((void **)&Akernel, NX * NY * NZ * sizeof(float));
      cudaMalloc((void **)&data_dev3, NX * NY * NZ * sizeof(cufftReal));
      cudaMalloc((void **)&data_dev, NX * NY * NZ * sizeof(cufftComplex));
      cudaMalloc((void **)&data_dev1, NX * NY * NZ * sizeof(cufftComplex));
      cudaMalloc((void **)&data_dev2, NX * NY * NZ * sizeof(cufftComplex));
      cudaMemset(data_dev, 0, NX * NY * NZ * sizeof(cufftReal));
      cudaMemset(data_dev1, 0, NX * NY * NZ * sizeof(cufftComplex));
      cudaMemset(data_dev2, 0, NX * NY * NZ * sizeof(cufftComplex));
      //cufftComplex *resultFFT = (cufftComplex*)malloc(NX * NY * NZ * sizeof(cufftComplex));
      //cufftReal *resultIFFT = (cufftReal*)malloc(NX * NY * NZ * sizeof(cufftReal));

      cudaMemcpy(data_dev, ImageBuffer, NX * NY * NZ * sizeof(cufftReal), cudaMemcpyHostToDevice);

      cufftHandle plan;
      cufftPlan3d(&plan, NZ, NY, NX, CUFFT_R2C);
      cufftExecR2C(plan, data_dev, data_dev1);

      //Multiply kernel
      cudaMemcpy(Akernel, KernelBuffer, NX * NY * NZ * sizeof(float), cudaMemcpyHostToDevice);
      static const int BLOCK_SIZE = 1000;
      const int blockCount = (NX*NY*NZ+BLOCK_SIZE-1)/BLOCK_SIZE;
      MultiplyKernel <<<blockCount, BLOCK_SIZE>>> (data_dev2, Akernel, data_dev1, NX*NY*NZ);


      cufftDestroy(plan);
      //cufftPlan3d(&plan, NZ, NY, NX, CUFFT_C2R);
      cufftPlan3d(&plan, NZ,NY,NX, CUFFT_C2R);
      cufftExecC2R(plan, data_dev2, data_dev3 );
      Scale <<<blockCount, BLOCK_SIZE>>> (data_dev3, NX*NY*NZ);
      cudaMemcpy(data2, data_dev3, NZ * NY * NX * sizeof(cufftReal), cudaMemcpyDeviceToHost);

      cufftDestroy(plan);
      cudaFree(data_dev);
      cudaFree(data_dev1);
      cudaFree(data_dev2);
      cudaFree(data_dev3);
      cudaFree(Akernel);


}
  • 使用R2C对图像进行前向FFT
  • 将核系数与复数结果相乘
  • 用C2R对乘法结果进行逆FFT
  • 但是,当我使用复数结果对核进行乘法时,出现了一个严重的问题,即cuft复数结果与fftw的结果不相等,并且结果中存在大量的零。我知道R2C的结果大小是N1(N2/2+1),但我想得到完整的复杂结果。如何解决这个问题?i、 e.如何恢复R2C结果?如何将乘法结果放入C2R并得到正确答案

    我的机具程序代码如下所示:

    __global__ void MultiplyKernel(cufftComplex *data, float *data1,cufftComplex *data2, unsigned vectorSize) {
        unsigned idx = blockIdx.x*blockDim.x+threadIdx.x;
        if (idx < vectorSize){
            data[idx].x = data2[idx].x*data1[idx];
            data[idx].y = data2[idx].y*data1[idx];
        }
    }
    
    __global__ void Scale(cufftReal *data, unsigned vectorSize) {
        unsigned idx = blockIdx.x*blockDim.x+threadIdx.x;
        if (idx < vectorSize){
            data[idx] = data[idx]/vectorSize;
        }
    }
    
    void ApplyKernel1(cufftReal *data2, float *ImageBuffer, float *KernelBuffer, unsigned int NX, unsigned int NY,unsigned int NZ)
    {
          float *Akernel;
          cufftComplex *data_dev1, *data_dev2;
          cufftReal *data_dev3, *data_dev;
          cudaMalloc((void **)&Akernel, NX * NY * NZ * sizeof(float));
          cudaMalloc((void **)&data_dev3, NX * NY * NZ * sizeof(cufftReal));
          cudaMalloc((void **)&data_dev, NX * NY * NZ * sizeof(cufftComplex));
          cudaMalloc((void **)&data_dev1, NX * NY * NZ * sizeof(cufftComplex));
          cudaMalloc((void **)&data_dev2, NX * NY * NZ * sizeof(cufftComplex));
          cudaMemset(data_dev, 0, NX * NY * NZ * sizeof(cufftReal));
          cudaMemset(data_dev1, 0, NX * NY * NZ * sizeof(cufftComplex));
          cudaMemset(data_dev2, 0, NX * NY * NZ * sizeof(cufftComplex));
          //cufftComplex *resultFFT = (cufftComplex*)malloc(NX * NY * NZ * sizeof(cufftComplex));
          //cufftReal *resultIFFT = (cufftReal*)malloc(NX * NY * NZ * sizeof(cufftReal));
    
          cudaMemcpy(data_dev, ImageBuffer, NX * NY * NZ * sizeof(cufftReal), cudaMemcpyHostToDevice);
    
          cufftHandle plan;
          cufftPlan3d(&plan, NZ, NY, NX, CUFFT_R2C);
          cufftExecR2C(plan, data_dev, data_dev1);
    
          //Multiply kernel
          cudaMemcpy(Akernel, KernelBuffer, NX * NY * NZ * sizeof(float), cudaMemcpyHostToDevice);
          static const int BLOCK_SIZE = 1000;
          const int blockCount = (NX*NY*NZ+BLOCK_SIZE-1)/BLOCK_SIZE;
          MultiplyKernel <<<blockCount, BLOCK_SIZE>>> (data_dev2, Akernel, data_dev1, NX*NY*NZ);
    
    
          cufftDestroy(plan);
          //cufftPlan3d(&plan, NZ, NY, NX, CUFFT_C2R);
          cufftPlan3d(&plan, NZ,NY,NX, CUFFT_C2R);
          cufftExecC2R(plan, data_dev2, data_dev3 );
          Scale <<<blockCount, BLOCK_SIZE>>> (data_dev3, NX*NY*NZ);
          cudaMemcpy(data2, data_dev3, NZ * NY * NX * sizeof(cufftReal), cudaMemcpyDeviceToHost);
    
          cufftDestroy(plan);
          cudaFree(data_dev);
          cudaFree(data_dev1);
          cudaFree(data_dev2);
          cudaFree(data_dev3);
          cudaFree(Akernel);
    
    
    }
    
    \uuuuu全局\uuuuuu无效多线程(cufftComplex*数据、浮点*data1、cufftComplex*data2、无符号向量大小){
    无符号idx=blockIdx.x*blockDim.x+threadIdx.x;
    if(idx
    将R2C fft的结果乘以复数时,结果不再对应于对称阵列

    你不知道R2C和C2R变换是对称的,而CUFFT只计算了一半的解,因为这个?为什么不做C2C运算呢?是的,我用C2C做正向变换,将data.y设置为0,得到了正确的结果。但是,最后在乘以系数后,我想进行反向变换以获得真实结果,即C2R(反向),如何使用cufft库获得正确答案?(注意:我已经将一些系数乘以C2C结果。)感谢您的回答,我问如何从R2C结果中获得与C2C结果相等的完整结果。我想要整个结果,因为我想用它乘以一些系数,你能帮我吗?@KwuJohn复数的数目是实数的一半,因为如果输入数组是实数,频率n-k的响应是频率k响应的复共轭。如果您的过滤器不保留此属性,则过滤后的信号将不是纯真实的,使用C2R返回真实世界似乎有缺陷!