Warning: file_get_contents(/data/phpspider/zhask/data//catemap/4/matlab/16.json): failed to open stream: No such file or directory in /data/phpspider/zhask/libs/function.php on line 167

Warning: Invalid argument supplied for foreach() in /data/phpspider/zhask/libs/tag.function.php on line 1116

Notice: Undefined index: in /data/phpspider/zhask/libs/function.php on line 180

Warning: array_chunk() expects parameter 1 to be array, null given in /data/phpspider/zhask/libs/function.php on line 181
Matlab 利用cuFFT进行逆FFT的标度_Matlab_Cuda_Fft_Scaling_Cufft - Fatal编程技术网

Matlab 利用cuFFT进行逆FFT的标度

Matlab 利用cuFFT进行逆FFT的标度,matlab,cuda,fft,scaling,cufft,Matlab,Cuda,Fft,Scaling,Cufft,每当我用cuFFT绘制程序获得的值,并将结果与Matlab的结果进行比较时,我得到的是相同形状的图,最大值和最小值在相同的点上。然而,由CUFT产生的值远大于由Matlab产生的值。Matlab代码是 fs = 1000; % sample freq D = [0:1:4]'; % pulse delay times t = 0 : 1/fs : 4000/fs;

每当我用cuFFT绘制程序获得的值,并将结果与Matlab的结果进行比较时,我得到的是相同形状的图,最大值和最小值在相同的点上。然而,由CUFT产生的值远大于由Matlab产生的值。Matlab代码是

fs = 1000;                              % sample freq
D = [0:1:4]';                           % pulse delay times
t = 0 : 1/fs : 4000/fs;                 % signal evaluation time
w = 0.5;                                % width of each pulse
yp = pulstran(t,D,'rectpuls',w);
filt = conj(fliplr(yp));
xx = fft(yp,1024).*fft(filt,1024);
xx = (abs(ifft(xx)));    
具有相同输入的CUDA代码如下所示:

cufftExecC2C(plan, (cufftComplex *)d_signal, (cufftComplex *)d_signal, CUFFT_FORWARD);
cufftExecC2C(plan, (cufftComplex *)d_filter_signal, (cufftComplex *)d_filter_signal,     CUFFT_FORWARD);
ComplexPointwiseMul<<<blocksPerGrid, threadsPerBlock>>>(d_signal, d_filter_signal, NX);
cufftExecC2C(plan, (cufftComplex *)d_signal, (cufftComplex *)d_signal, CUFFT_INVERSE);
cufftExecC2C(平面,(cufftComplex*)d_信号,(cufftComplex*)d_信号,CUFFT_向前);
cufftExecC2C(平面图,(cufftComplex*)d_滤波器信号,(cufftComplex*)d_滤波器信号,CUFFT_正向);
复点式MUL(d_信号,d_滤波器_信号,NX);
cufftExecC2C(平面,(cufftComplex*)d_信号,(cufftComplex*)d_信号,CUFFT_逆);
cuFFT还执行批量大小为
2
1024
点FFT


比例因子为
NX=1024
,数值不正确。请告诉我该怎么做。

这是一个迟来的答案,可以将此问题从未回答列表中删除

您没有提供足够的信息来诊断您的问题,因为您没有指定设置袖口计划的方式。您甚至没有指定Matlab和cuFFT信号的形状是否完全相同(因此您只需进行缩放)或形状是否大致相同。不过,让我提出以下两点意见:

  • yp
    向量具有
    4000
    元素;与之相反的是,通过
    fft(yp,1024)
    ,您通过将信号截断为
    1024个
    元素来执行fft
  • 逆Cuft不按矢量元素的数量进行缩放
  • 为了方便(可能对其他用户有用),我在下面报告一个简单的FFT-IFFT方案,其中还包括使用CUDA推力库执行的缩放

    #include <cufft.h>
    #include <thrust/host_vector.h>
    #include <thrust/device_vector.h>
    
    /*********************/
    /* SCALE BY CONSTANT */
    /*********************/
    class Scale_by_constant
    {
        private:
            float c_;
    
        public:
            Scale_by_constant(float c) { c_ = c; };
    
            __host__ __device__ float2 operator()(float2 &a) const
            {
                float2 output;
    
                output.x = a.x / c_;
                output.y = a.y / c_;
    
                return output;
            }
    
    };
    
    int main(void){
    
        const int N=4;
    
        // --- Setting up input device vector    
        thrust::device_vector<float2> d_vec(N,make_cuComplex(1.f,2.f));
    
        cufftHandle plan;
        cufftPlan1d(&plan, N, CUFFT_C2C, 1);
    
        // --- Perform in-place direct Fourier transform
        cufftExecC2C(plan, thrust::raw_pointer_cast(d_vec.data()),thrust::raw_pointer_cast(d_vec.data()), CUFFT_FORWARD);
    
        // --- Perform in-place inverse Fourier transform
        cufftExecC2C(plan, thrust::raw_pointer_cast(d_vec.data()),thrust::raw_pointer_cast(d_vec.data()), CUFFT_INVERSE);
    
        thrust::transform(d_vec.begin(), d_vec.end(), d_vec.begin(), Scale_by_constant((float)(N)));
    
        // --- Setting up output host vector    
        thrust::host_vector<float2> h_vec(d_vec);
    
        for (int i=0; i<N; i++) printf("Element #%i; Real part = %f; Imaginary part: %f\n",i,h_vec[i].x,h_vec[i].y);
    
        getchar();
    }
    
    #包括
    #包括
    #包括
    /*********************/
    /*按常数缩放*/
    /*********************/
    类标度乘以常数
    {
    私人:
    浮点数;
    公众:
    按常数(浮点数c){c_u=c;}缩放;
    __主机\uuuuuuuuuu设备\uuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuu
    {
    浮动2输出;
    输出.x=a.x/c;
    输出y=a.y/c;
    返回输出;
    }
    };
    内部主(空){
    常数int N=4;
    //---设置输入设备向量
    推力:设备向量d向量(N,使复杂(1.f,2.f));
    卡夫坦德尔计划;
    袖口平面图(和平面图,N,袖口C2C,1);
    //---执行就地直接傅里叶变换
    cufftExecC2C(平面,推力::原始指针投射(d_vec.data()),推力::原始指针投射(d_vec.data()),CUFFT_向前);
    //---执行就地傅里叶逆变换
    cufftExecC2C(平面,推力::原始指针转换(d_vec.data()),推力::原始指针转换(d_vec.data()),CUFFT\u反转);
    推力::变换(d_vec.begin(),d_vec.end(),d_vec.begin(),按常量((float)(N))缩放);
    //---设置输出主机向量
    推力:主机向量h向量(d向量);
    
    对于(int i=0;i,随着cuFFT回调功能的引入,通过将规范化操作定义为
    \uuu设备
    函数,cuFFT执行的逆FFT所需的规范化可以直接嵌入
    cufftExecC2C
    调用中

    除了《cuFFT用户指南》,有关cuFFT回调功能,请参阅

    下面是通过cuFFT回调实现IFFT规范化的示例

        #include <stdio.h>
        #include <assert.h>
    
        #include "cuda_runtime.h"
        #include "device_launch_parameters.h"
    
        #include <cufft.h>
        #include <cufftXt.h>
    
        /********************/
        /* CUDA ERROR CHECK */
        /********************/
        #define gpuErrchk(ans) { gpuAssert((ans), __FILE__, __LINE__); }
        inline void gpuAssert(cudaError_t code, char *file, int line, bool abort=true)
        {
            if (code != cudaSuccess)
            {
                fprintf(stderr,"GPUassert: %s %s %d\n", cudaGetErrorString(code), file, line);
                if (abort) exit(code);
            }
        }
    
        /*********************/
        /* CUFFT ERROR CHECK */
        /*********************/
        // See http://stackoverflow.com/questions/16267149/cufft-error-handling
        #ifdef _CUFFT_H_
        static const char *_cudaGetErrorEnum(cufftResult error)
        {
            switch (error)
            {
                case CUFFT_SUCCESS:
                    return "CUFFT_SUCCESS";
    
                case CUFFT_INVALID_PLAN:
                    return "CUFFT_INVALID_PLAN";
    
                case CUFFT_ALLOC_FAILED:
                    return "CUFFT_ALLOC_FAILED";
    
                case CUFFT_INVALID_TYPE:
                     return "CUFFT_INVALID_TYPE";
    
                case CUFFT_INVALID_VALUE:
                    return "CUFFT_INVALID_VALUE";
    
                case CUFFT_INTERNAL_ERROR:
                    return "CUFFT_INTERNAL_ERROR";
    
                case CUFFT_EXEC_FAILED:
                    return "CUFFT_EXEC_FAILED";
    
                case CUFFT_SETUP_FAILED:
                    return "CUFFT_SETUP_FAILED";
    
                case CUFFT_INVALID_SIZE:
                    return "CUFFT_INVALID_SIZE";
    
                case CUFFT_UNALIGNED_DATA:
                    return "CUFFT_UNALIGNED_DATA";
            }
    
            return "<unknown>";
        }
        #endif
    
        #define cufftSafeCall(err)      __cufftSafeCall(err, __FILE__, __LINE__)
        inline void __cufftSafeCall(cufftResult err, const char *file, const int line)
        {
            if( CUFFT_SUCCESS != err) {
            fprintf(stderr, "CUFFT error in file '%s', line %d\n %s\nerror %d: %s\nterminating!\n",__FILE__, __LINE__,err, \
                                    _cudaGetErrorEnum(err)); \
            cudaDeviceReset(); assert(0); \
            }
        }
    
        __device__ void IFFT_Scaling(void *dataOut, size_t offset, cufftComplex element, void *callerInfo, void *sharedPtr) {
    
            float *scaling_factor = (float*)callerInfo;
    
            float2 output;
            output.x = cuCrealf(element);
            output.y = cuCimagf(element);
    
            output.x = output.x / scaling_factor[0];
            output.y = output.y / scaling_factor[0];
    
            ((float2*)dataOut)[offset] = output;
    }
    
        __device__ cufftCallbackStoreC d_storeCallbackPtr = IFFT_Scaling;
    
        /********/
        /* MAIN */
        /********/
        int main() {
    
            const int N = 16;
    
            cufftHandle plan;
    
            float2 *h_input             = (float2*)malloc(N*sizeof(float2));
            float2 *h_output1           = (float2*)malloc(N*sizeof(float2));
            float2 *h_output2           = (float2*)malloc(N*sizeof(float2));
    
            float2 *d_input;            gpuErrchk(cudaMalloc((void**)&d_input, N*sizeof(float2)));
            float2 *d_output1;          gpuErrchk(cudaMalloc((void**)&d_output1, N*sizeof(float2)));
            float2 *d_output2;          gpuErrchk(cudaMalloc((void**)&d_output2, N*sizeof(float2)));
    
            float *h_scaling_factor     = (float*)malloc(sizeof(float));
            h_scaling_factor[0] = 16.0f;
            float *d_scaling_factor;    gpuErrchk(cudaMalloc((void**)&d_scaling_factor, sizeof(float)));
            gpuErrchk(cudaMemcpy(d_scaling_factor, h_scaling_factor, sizeof(float), cudaMemcpyHostToDevice));
    
            for (int i=0; i<N; i++) {
                h_input[i].x = 1.0f;
                h_input[i].y = 0.f;
            }
    
            gpuErrchk(cudaMemcpy(d_input, h_input, N*sizeof(float2), cudaMemcpyHostToDevice));
    
            cufftSafeCall(cufftPlan1d(&plan, N, CUFFT_C2C, 1));
    
            cufftSafeCall(cufftExecC2C(plan, d_input, d_output1, CUFFT_FORWARD));
            gpuErrchk(cudaMemcpy(h_output1, d_output1, N*sizeof(float2), cudaMemcpyDeviceToHost));
            for (int i=0; i<N; i++) printf("Direct transform - %d - (%f, %f)\n", i, h_output1[i].x, h_output1[i].y);
    
            cufftCallbackStoreC h_storeCallbackPtr;
            gpuErrchk(cudaMemcpyFromSymbol(&h_storeCallbackPtr, d_storeCallbackPtr, sizeof(h_storeCallbackPtr)));
    
            cufftSafeCall(cufftXtSetCallback(plan, (void **)&h_storeCallbackPtr, CUFFT_CB_ST_COMPLEX, (void **)&d_scaling_factor));
    
            cufftSafeCall(cufftExecC2C(plan, d_output1, d_output2, CUFFT_INVERSE));
            gpuErrchk(cudaMemcpy(h_output2, d_output2, N*sizeof(float2), cudaMemcpyDeviceToHost));
            for (int i=0; i<N; i++) printf("Inverse transform - %d - (%f, %f)\n", i, h_output2[i].x, h_output2[i].y);
    
            cufftSafeCall(cufftDestroy(plan));
    
            gpuErrchk(cudaFree(d_input));
            gpuErrchk(cudaFree(d_output1));
            gpuErrchk(cudaFree(d_output2));
    
            return 0;
        }
    
    #包括
    #包括
    #包括“cuda_runtime.h”
    #包括“设备启动参数.h”
    #包括
    #包括
    /********************/
    /*CUDA错误检查*/
    /********************/
    #定义gpuerchk(ans){gpuAssert((ans),_文件_,_行__)}
    内联void gpuAssert(cudaError\u t代码,char*文件,int行,bool abort=true)
    {
    如果(代码!=cudaSuccess)
    {
    fprintf(标准,“GPUassert:%s%s%d\n”,cudaGetErrorString(代码)、文件、行);
    如果(中止)退出(代码);
    }
    }
    /*********************/
    /*袖口错误检查*/
    /*********************/
    //看http://stackoverflow.com/questions/16267149/cufft-error-handling
    #ifdef_CUFFT_H_
    静态常量字符*_cudaGetErrorEnum(袖套结果错误)
    {
    开关(错误)
    {
    成功案例:
    返回“CUFFT_SUCCESS”;
    案例卡夫图无效:
    返回“袖口计划无效”;
    案例CUFFT_ALLOC_失败:
    返回“CUFFT_ALLOC_失败”;
    箱子袖口类型无效:
    返回“袖口类型无效”;
    case CUFT_无效_值:
    返回“CUFFT\u无效值”;
    案例袖口内部错误:
    返回“CUFFT_内部错误”;
    案例CUFFT_EXEC_失败:
    返回“CUFFT_EXEC_失败”;
    案例袖口安装失败:
    返回“袖口安装失败”;
    箱子袖口尺寸无效:
    返回“袖口尺寸无效”;
    案例袖口未对齐数据:
    返回“袖口未对齐数据”;
    }
    返回“”;
    }
    #恩迪夫
    #定义cufftSafeCall(err)\ cufftSafeCall(err、\文件、\行)
    内联void\uuu cufftSafeCall(cufftResult err、const char*文件、const int行)
    {
    如果(成功!=错误){
    fprintf(stderr,“文件“%s”中的袖口错误,第%d行\n%s\n错误%d:%s\n正在删除!\n“,\uuuuu文件,\uuuu行\uuuu,错误\
    _cudaGetErrorEnum(err))\
    cudaDeviceReset();断言(0)\
    }
    }
    __设备\uuuuu无效IFFT\u缩放(无效*数据输出、大小\uu t偏移、袖口复杂元素、无效*调用者信息、无效*共享DPTR){
    float*比例因子=(float*)调用者信息;
    浮动2输出;
    输出x=cucreaf(元素);
    输出y=cuCimagf(元素);
    output.x=output.x/scaling\u
    
    #include <stdio.h>
    #include <assert.h>
    
    #include "cuda_runtime.h"
    #include "device_launch_parameters.h"
    
    #include <cufft.h>
    #include <cufftXt.h>
    
    #include <thrust/device_vector.h>
    
    #include "Utilities.cuh"
    #include "TimingGPU.cuh"
    
    //#define DISPLAY
    
    /*******************************/
    /* THRUST FUNCTOR IFFT SCALING */
    /*******************************/
    class Scale_by_constant
    {
        private:
            float c_;
    
        public:
            Scale_by_constant(float c) { c_ = c; };
    
            __host__ __device__ float2 operator()(float2 &a) const
            {
                float2 output;
    
                output.x = a.x / c_;
                output.y = a.y / c_;
    
                return output;
            }
    
    };
    
    /**********************************/
    /* IFFT SCALING CALLBACK FUNCTION */
    /**********************************/
    __device__ void IFFT_Scaling(void *dataOut, size_t offset, cufftComplex element, void *callerInfo, void *sharedPtr) {
    
        float *scaling_factor = (float*)callerInfo;
    
        float2 output;
        output.x = cuCrealf(element);
        output.y = cuCimagf(element);
    
        output.x = output.x / scaling_factor[0];
        output.y = output.y / scaling_factor[0];
    
        ((float2*)dataOut)[offset] = output;
    }
    
    __device__ cufftCallbackStoreC d_storeCallbackPtr = IFFT_Scaling;
    
    /********/
    /* MAIN */
    /********/
    int main() {
    
        const int N = 100000000;
    
        cufftHandle plan;           cufftSafeCall(cufftPlan1d(&plan, N, CUFFT_C2C, 1));
    
        TimingGPU timerGPU;
    
        float2 *h_input             = (float2*)malloc(N*sizeof(float2));
        float2 *h_output1           = (float2*)malloc(N*sizeof(float2));
        float2 *h_output2           = (float2*)malloc(N*sizeof(float2));
    
        float2 *d_input;            gpuErrchk(cudaMalloc((void**)&d_input, N*sizeof(float2)));
        float2 *d_output1;          gpuErrchk(cudaMalloc((void**)&d_output1, N*sizeof(float2)));
        float2 *d_output2;          gpuErrchk(cudaMalloc((void**)&d_output2, N*sizeof(float2)));
    
        // --- Callback function parameters
        float *h_scaling_factor     = (float*)malloc(sizeof(float));
        h_scaling_factor[0] = 16.0f;
        float *d_scaling_factor;    gpuErrchk(cudaMalloc((void**)&d_scaling_factor, sizeof(float)));
        gpuErrchk(cudaMemcpy(d_scaling_factor, h_scaling_factor, sizeof(float), cudaMemcpyHostToDevice));
    
        // --- Initializing the input on the host and moving it to the device
        for (int i = 0; i < N; i++) {
            h_input[i].x = 1.0f;
            h_input[i].y = 0.f;
        }
        gpuErrchk(cudaMemcpy(d_input, h_input, N * sizeof(float2), cudaMemcpyHostToDevice));
    
        // --- Execute direct FFT on the device and move the results to the host
        cufftSafeCall(cufftExecC2C(plan, d_input, d_output1, CUFFT_FORWARD));
    #ifdef DISPLAY
        gpuErrchk(cudaMemcpy(h_output1, d_output1, N * sizeof(float2), cudaMemcpyDeviceToHost));
        for (int i=0; i<N; i++) printf("Direct transform - %d - (%f, %f)\n", i, h_output1[i].x, h_output1[i].y);
    #endif
    
        // --- Execute inverse FFT with subsequent scaling on the device and move the results to the host
        timerGPU.StartCounter();
        cufftSafeCall(cufftExecC2C(plan, d_output1, d_output2, CUFFT_INVERSE));
        thrust::transform(thrust::device_pointer_cast(d_output2), thrust::device_pointer_cast(d_output2) + N, thrust::device_pointer_cast(d_output2), Scale_by_constant((float)(N)));
    #ifdef DISPLAY
        gpuErrchk(cudaMemcpy(h_output2, d_output2, N * sizeof(float2), cudaMemcpyDeviceToHost));
        for (int i=0; i<N; i++) printf("Inverse transform - %d - (%f, %f)\n", i, h_output2[i].x, h_output2[i].y);
    #endif
        printf("Timing NO callback %f\n", timerGPU.GetCounter());
    
        // --- Setup store callback
    //    timerGPU.StartCounter();
        cufftCallbackStoreC h_storeCallbackPtr;
        gpuErrchk(cudaMemcpyFromSymbol(&h_storeCallbackPtr, d_storeCallbackPtr, sizeof(h_storeCallbackPtr)));
        cufftSafeCall(cufftXtSetCallback(plan, (void **)&h_storeCallbackPtr, CUFFT_CB_ST_COMPLEX, (void **)&d_scaling_factor));
    
        // --- Execute inverse callback FFT on the device and move the results to the host
        timerGPU.StartCounter();
        cufftSafeCall(cufftExecC2C(plan, d_output1, d_output2, CUFFT_INVERSE));
    #ifdef DISPLAY
        gpuErrchk(cudaMemcpy(h_output2, d_output2, N * sizeof(float2), cudaMemcpyDeviceToHost));
        for (int i=0; i<N; i++) printf("Inverse transform - %d - (%f, %f)\n", i, h_output2[i].x, h_output2[i].y);
    #endif
        printf("Timing callback %f\n", timerGPU.GetCounter());
    
        cufftSafeCall(cufftDestroy(plan));
    
        gpuErrchk(cudaFree(d_input));
        gpuErrchk(cudaFree(d_output1));
        gpuErrchk(cudaFree(d_output2));
    
        return 0;
    }
    
    Non-callback 69.029762 ms
    Callback     65.868607 ms