Recursion OpenMP FFT乘法加速

Recursion OpenMP FFT乘法加速,recursion,fft,openmp,Recursion,Fft,Openmp,我有乘法多项式的递归FFT算法,我需要用openmp并行化它。经过一番研究和尝试,我终于明白了这一点 Complex * multiply(Complex *p1, Complex *p2) { #pragma omp parallel { //evaluate p1 #pragma omp single nowait pFFT(n,p1,1); #pragma omp single nowait pFFT(n,p2,1); } //...multiply part etc } void

我有乘法多项式的递归FFT算法,我需要用openmp并行化它。经过一番研究和尝试,我终于明白了这一点

Complex * multiply(Complex *p1, Complex *p2)
{
#pragma omp parallel
{
//evaluate p1
#pragma omp single nowait
pFFT(n,p1,1);

#pragma omp single nowait
pFFT(n,p2,1);
}

//...multiply part etc

}

void pFFT(int deg, Complex *pol,int sign)
{
if( deg == 1)
    return;

//divide polynom into two parts with even and odd coeficients
Complex *even = new Complex [deg/2];
Complex *odd = new Complex [deg/2];


for(int i = 0;i<deg/2;i++)
{
    even[i] = pol[2*i];
    odd[i]  = pol[2*i+1];
}


#pragma omp task
pFFT(deg/2,even,sign);
#pragma omp task
pFFT(deg/2,odd,sign);
#pragma omp taskwait
//wn = n-th root of unity
int x = lg2(deg);
Complex wn;
wn.re = pcos[x];
wn.im = sign*psin[x];
Complex w;
w.re = 1;
w.im = 0;
Complex *ret = pol;

Complex product;
if(deg==2)
{
        product = mul(odd,&w);
        ret[0].re = even[0].re+product.re;
        ret[0].im = even[0].im+product.im;
        ret[1].re = even[0].re-product.re;
        ret[1].im = even[0].im-product.im;
}
else
    for(int i = 0;i<deg/2-1;i+=2)
    {
        product = mul(odd+i,&w);
        ret[i].re = even[i].re+product.re;
        ret[i].im = even[i].im+product.im;
        ret[i+deg/2].re = even[i].re-product.re;
        ret[i+deg/2].im = even[i].im-product.im;
        w = mul(&w,&wn);
        product = mul(odd+i+1,&w);
        ret[i+1].re = even[i+1].re+product.re;
        ret[i+1].im = even[i+1].im+product.im;
        ret[i+1+deg/2].re = even[i+1].re-product.re;
        ret[i+1+deg/2].im = even[i+1].im-product.im;
        w = mul(&w,&wn);
    }
delete[] even;
delete[] odd;
}
复数*乘法(复数*p1,复数*p2)
{
#pragma-omp并行
{
//评估p1
#pragma-omp-single-nowait
pFFT(n,p1,1);
#pragma-omp-single-nowait
pFFT(n,p2,1);
}
//…乘法部分等
}
无效pFFT(整数度、复数*pol、整数符号)
{
如果(度==1)
返回;
//用奇偶系数将多项式分成两部分
络合物*偶数=新络合物[deg/2];
复数*奇数=新复数[deg/2];

对于(int i=0;i您应该尽早停止并行化。尝试使用大小16、64、256。在大小2时停止会创建太多小的任务,并且会产生相对巨大的开销