Parallel processing 使用openmp并行化streamcluster_Parallel Processing_Openmp_Pragma

Parallel processing 使用openmp并行化streamcluster

parallel-processing

Parallel processing 使用openmp并行化streamcluster,parallel-processing,openmp,pragma,Parallel Processing,Openmp,Pragma,我正在尝试并行化一个名为streamcluster的程序。更具体地说，根据我使用的Scalasca工具，名为pgain的函数花费了程序的大部分时间，因此这是我应该并行化的函数。在这里，您可以看到函数和我在并行化过程中所做的努力。问题是，我所实现的唯一一件事，就是让程序花费更多的时间来执行 streamcluster中的原始pgain功能： double pgain ( long x, Points *points, double z, long int *numcenters ) { int i

我正在尝试并行化一个名为streamcluster的程序。更具体地说，根据我使用的Scalasca工具，名为pgain的函数花费了程序的大部分时间，因此这是我应该并行化的函数。在这里，您可以看到函数和我在并行化过程中所做的努力。问题是，我所实现的唯一一件事，就是让程序花费更多的时间来执行

streamcluster中的原始pgain功能：

double pgain ( long x, Points *points, double z, long int *numcenters )
{
int i;
int number_of_centers_to_close = 0;

static double *work_mem;
static double gl_cost_of_opening_x;
static int gl_number_of_centers_to_close;

int stride = *numcenters + 2;
//make stride a multiple of CACHE_LINE
int cl = CACHE_LINE/sizeof ( double );
if ( stride % cl != 0 ) {
    stride = cl * ( stride / cl + 1 );
}
int K = stride - 2 ; // K==*numcenters

//my own cost of opening x
double cost_of_opening_x = 0;

work_mem = ( double* ) malloc ( 2 * stride * sizeof ( double ) );
gl_cost_of_opening_x = 0;
gl_number_of_centers_to_close = 0;

/*
 * For each center, we have a *lower* field that indicates
 * how much we will save by closing the center.
 */
int count = 0;
for ( int i = 0; i < points->num; i++ ) {
    if ( is_center[i] ) {
        center_table[i] = count++;
    }
}
work_mem[0] = 0;

//now we finish building the table. clear the working memory.
memset ( switch_membership, 0, points->num * sizeof ( bool ) );
memset ( work_mem, 0, stride*sizeof ( double ) );
memset ( work_mem+stride,0,stride*sizeof ( double ) );

//my *lower* fields
double* lower = &work_mem[0];
//global *lower* fields
double* gl_lower = &work_mem[stride];

for ( i = 0; i < points->num; i++ ) {
    float x_cost = dist ( points->p[i], points->p[x], points->dim ) * points->p[i].weight;
    float current_cost = points->p[i].cost;

    if ( x_cost < current_cost ) {

        // point i would save cost just by switching to x
        // (note that i cannot be a median,
        // or else dist(p[i], p[x]) would be 0)

        switch_membership[i] = 1;
        cost_of_opening_x += x_cost - current_cost;

    } else {

        // cost of assigning i to x is at least current assignment cost of i

        // consider the savings that i's **current** median would realize
        // if we reassigned that median and all its members to x;
        // note we've already accounted for the fact that the median
        // would save z by closing; now we have to subtract from the savings
        // the extra cost of reassigning that median and its members
        int assign = points->p[i].assign;
        lower[center_table[assign]] += current_cost - x_cost;
    }
}

// at this time, we can calculate the cost of opening a center
// at x; if it is negative, we'll go through with opening it

for ( int i = 0; i < points->num; i++ ) {
    if ( is_center[i] ) {
        double low = z + work_mem[center_table[i]];
        gl_lower[center_table[i]] = low;
        if ( low > 0 ) {
            // i is a median, and
            // if we were to open x (which we still may not) we'd close i

            // note, we'll ignore the following quantity unless we do open x
            ++number_of_centers_to_close;
            cost_of_opening_x -= low;
        }
    }
}
//use the rest of working memory to store the following
work_mem[K] = number_of_centers_to_close;
work_mem[K+1] = cost_of_opening_x;

gl_number_of_centers_to_close = ( int ) work_mem[K];
gl_cost_of_opening_x = z + work_mem[K+1];

// Now, check whether opening x would save cost; if so, do it, and
// otherwise do nothing

if ( gl_cost_of_opening_x < 0 ) {
    //  we'd save money by opening x; we'll do it
    for ( int i = 0; i < points->num; i++ ) {
        bool close_center = gl_lower[center_table[points->p[i].assign]] > 0 ;
        if ( switch_membership[i] || close_center ) {
            // Either i's median (which may be i itself) is closing,
            // or i is closer to x than to its current median
            points->p[i].cost = points->p[i].weight * dist ( points->p[i], points->p[x], points->dim );
            points->p[i].assign = x;
        }
    }
    for ( int i = 0; i < points->num; i++ ) {
        if ( is_center[i] && gl_lower[center_table[i]] > 0 ) {
            is_center[i] = false;
        }
    }
    if ( x >= 0 && x < points->num ) {
        is_center[x] = true;
    }

    *numcenters = *numcenters + 1 - gl_number_of_centers_to_close;
} else {
    gl_cost_of_opening_x = 0;  // the value we'll return
}

free ( work_mem );

return -gl_cost_of_opening_x;
}

double pgain（长x，点*Points，双z，长int*numcenters）
{
int i；
int number of_centers_to_close=0；
静态双*工作记忆；
期初的静态双总账成本；
要关闭的中心的静态int gl数量；
整数步长=*numcenters+2；
//使步幅为缓存线的倍数
int cl=缓存线/sizeof（双精度）；
如果（步长%cl！=0）{
步幅=cl*（步幅/cl+1）；
}
int K=stride-2；//K==*numcenters
//我自己开x的费用
期初双倍成本x=0；
work_mem=（双*）malloc（2*步幅*大小（双））；
期初总账成本x=0；
gl_中心数_至_关闭=0；
/*
*对于每个中心，我们有一个*下*字段，表示
*关闭中心可以节省多少钱。
*/
整数计数=0；
对于（int i=0；inum；i++）{
if（is_center[i]）{
中心_表[i]=计数++；
}
}
work_mem[0]=0；
//现在我们完成构建表。清除工作内存。
memset（开关_成员，0，点->num*sizeof（bool））；
memset（work_mem，0，步长*sizeof（double））；
memset（工作记忆+步幅，0，步幅*sizeof（双））；
//我的*下*字段
double*lower=&work_mem[0]；
//全局*下*字段
双*gl_lower=&work_mem[stride]；
对于（i=0；inum；i++）{
浮动x_成本=距离（点->p[i]，点->p[x]，点->尺寸）*点->p[i]。重量；
浮动当前成本=点数->p[i]。成本；
如果（x_成本<当前_成本）{
//只要切换到x，我就可以节省成本
//（请注意，我不能是中位数，
//否则dist（p[i]，p[x]）将为0）
开关_成员[i]=1；
期初成本=期初成本-当期成本；
}否则{
//将i分配给x的成本至少是i的当前分配成本
/考虑我目前的**会实现的储蓄
//如果我们将该中位数及其所有成员重新分配给x；
//注意，我们已经考虑了中位数
//会通过关闭来节省z；现在我们必须从节省中减去
//重新分配中位数及其成员的额外成本
int assign=点->p[i]。分配；
下[中心_表[分配]+=当前_成本-x_成本；
}
}
//此时，我们可以计算开设中心的成本
//在x；如果是负数，我们将继续打开它
对于（int i=0；inum；i++）{
if（is_center[i]）{
双低=z+工作记忆[中心表格[i]；
gl_下[中心_表[i]]=低；
如果（低>0）{
//我是中位数，而且
//如果我们打开x（现在可能还没有），我们会关闭i
//注意，我们将忽略以下数量，除非我们打开x
++要关闭的中心数量；
开业成本=低；
}
}
}
//使用剩余的工作内存存储以下内容
work_mem[K]=要关闭的_中心的数量；
工时成本[K+1]=期初成本；
gl_中心数_至_关闭=（int）工时_内存[K]；
总账期初成本=z+工时成本[K+1]；
//现在，检查打开x是否可以节省成本；如果可以，请这样做，然后
//否则什么也不做
if（总帐成本，期初成本，x<0）{
//开x可以省钱，我们会做的
对于（int i=0；inum；i++）{
bool close_center=gl_lower[中心表格[点->p[i].分配]]>0；
如果（切换_成员身份[i]| |关闭_中心）{
//要么我的中位数（可能是我自己）正在接近，
//或者i更接近x，而不是它当前的中值
点->p[i]。成本=点->p[i]。重量*距离（点->p[i]，点->p[x]，点->尺寸）；
点->p[i]。分配=x；
}
}
对于（int i=0；inum；i++）{
如果（是否为中心[i]&&gl较低[i]]>0）{
is_center[i]=假；
}
}
如果（x>=0&&xnum）{
是_center[x]=真；
}
*numcenters=*numcenters+1-总帐\u中心数\u到\u关闭；
}否则{
gl\u期初成本\u\u x=0；//我们将返回的值
}
免费（工作日）；
退货-期初总账成本；
}

这就是我所做的并行化：

double pgain ( long x, Points *points, double z, long int *numcenters )
{
int i;
int number_of_centers_to_close = 0;

static double *work_mem;
static double gl_cost_of_opening_x;
static int gl_number_of_centers_to_close;

int stride = *numcenters + 2;
//make stride a multiple of CACHE_LINE
int cl = CACHE_LINE/sizeof ( double );
if ( stride % cl != 0 ) {
    stride = cl * ( stride / cl + 1 );
}
int K = stride - 2 ; // K==*numcenters

//my own cost of opening x
double cost_of_opening_x = 0;

work_mem = ( double* ) malloc ( 2 * stride * sizeof ( double ) );
gl_cost_of_opening_x = 0;
gl_number_of_centers_to_close = 0;

/*
 * For each center, we have a *lower* field that indicates
 * how much we will save by closing the center.
 */
int count = 0;
for ( int i = 0; i < points->num; i++ ) {
    if ( is_center[i] ) {
        center_table[i] = count++;
    }
}
work_mem[0] = 0;

//now we finish building the table. clear the working memory.
memset ( switch_membership, 0, points->num * sizeof ( bool ) );
memset ( work_mem, 0, stride*sizeof ( double ) );
memset ( work_mem+stride,0,stride*sizeof ( double ) );

//my *lower* fields
double* lower = &work_mem[0];
//global *lower* fields
double* gl_lower = &work_mem[stride];
float x_cost=0.0;
float current_cost=0.0;

#pragma omp parallel for private(current_cost,x_cost) 
shared(cost_of_opening_x)  


for ( i = 0; i < points->num; i++ ) {

    x_cost = dist ( points->p[i], points->p[x], points->dim ) * points->p[i].weight;
    current_cost = points->p[i].cost;

    if ( x_cost < current_cost ) {

        // point i would save cost just by switching to             // x
        // (note that i cannot be a median,
        // or else dist(p[i], p[x]) would be 0)

        switch_membership[i] = 1;

        cost_of_opening_x += x_cost - current_cost;
        {
        #pragma omp flush(cost_of_opening_x)
        }
    } else {

        // cost of assigning i to x is at least current assignment cost of i

        // consider the savings that i's **current** median would realize
        // if we reassigned that median and all its members to x;
        // note we've already accounted for the fact that the median
        // would save z by closing; now we have to subtract from the savings
        // the extra cost of reassigning that median and its members
        int assign = points->p[i].assign;

        lower[center_table[assign]] += current_cost - x_cost;
        {
        #pragma omp flush(lower) 
        }
    }
#pragma omp barrier
{
#pragma omp flush(lower,cost_of_opening_x)
}   
}

// at this time, we can calculate the cost of opening a center
// at x; if it is negative, we'll go through with opening it
for ( int i = 0; i < points->num; i++ ) {
    if ( is_center[i] ) {
        double low = z + work_mem[center_table[i]];
        gl_lower[center_table[i]] = low;
        if ( low > 0 ) {
            // i is a median, and
            // if we were to open x (which we still may not) we'd close i

            // note, we'll ignore the following quantity unless we do open x
            ++number_of_centers_to_close;
            cost_of_opening_x -= low;
        }
    }
}
//use the rest of working memory to store the following
work_mem[K] = number_of_centers_to_close;
work_mem[K+1] = cost_of_opening_x;

gl_number_of_centers_to_close = ( int ) work_mem[K];
gl_cost_of_opening_x = z + work_mem[K+1];

// Now, check whether opening x would save cost; if so, do it, and
// otherwise do nothing

if ( gl_cost_of_opening_x < 0 ) {
    //  we'd save money by opening x; we'll do it
    #pragma omp parallel for
    for ( int i = 0; i < points->num; i++ ) {
        bool close_center = gl_lower[center_table[points->p[i].assign]] > 0 
;
        if ( switch_membership[i] || close_center ) {
            // Either i's median (which may be i itself) is closing,
            // or i is closer to x than to its current median
            points->p[i].cost = points->p[i].weight * dist ( points->p[i], 
points->p[x], points->dim );
            points->p[i].assign = x;
        }
    }
    for ( int i = 0; i < points->num; i++ ) {
        if ( is_center[i] && gl_lower[center_table[i]] > 0 ) {
            is_center[i] = false;
        }
    }
    if ( x >= 0 && x < points->num ) {
        is_center[x] = true;
    }

    *numcenters = *numcenters + 1 - gl_number_of_centers_to_close;
} else {
    gl_cost_of_opening_x = 0;  // the value we'll return
}

free ( work_mem );

return -gl_cost_of_opening_x;
}

double pgain（长x，点*Points，双z，长int*numcenters）
{
int i；
int number of_centers_to_close=0；
静态双*工作记忆；
期初的静态双总账成本；
要关闭的中心的静态int gl数量；
整数步长=*numcenters+2；
//使步幅为缓存线的倍数
int cl=缓存线/sizeof（双精度）；
如果（步长%cl！=0）{
步幅=cl*（步幅/cl+1）；
}
int K=stride-2；//K==*numcenters
//我自己开x的费用
期初双倍成本x=0；
work_mem=（双*）malloc（2*步幅*大小（双））；
期初总账成本x=0；
gl_中心数_至_关闭=0；
/*
*对于每个中心，我们有一个*下*字段，表示
*关闭中心可以节省多少钱。
*/
整数计数=0；
对于（int i=0；inum；i++）{
if（is_center[i]）{
中心_表[i]=计数++；
}
}
work_mem[0]=0；
//现在我们完成构建表。清除工作内存。
memset（开关_成员，0，点->num*sizeof（bool））；
memset（work_mem，0，步长*sizeof（double））；
memset（工作记忆+步幅，0，步幅*sizeof（双））；
//我的*下*字段
double*lower=&work_mem[0]；
//全局*下*字段
双*gl_lower=&work_mem[stride]；
浮动x_成本=0.0；
浮动电流成本=0.0；
#pragma omp并行专用（当前成本、x成本）
分摊（费用）_
double pgain ( long x, Points *points, double z, long int *numcenters )
{
    int i;
    int number_of_centers_to_close = 0;

    static double *work_mem;
    static double gl_cost_of_opening_x;
    static int gl_number_of_centers_to_close;

    int stride = *numcenters + 2;
    //make stride a multiple of CACHE_LINE
    int cl = CACHE_LINE/sizeof ( double );
    if ( stride % cl != 0 ) {
        stride = cl * ( stride / cl + 1 );
    }
    int K = stride - 2 ; // K==*numcenters

    //my own cost of opening x
    double cost_of_opening_x = 0;
    work_mem = ( double* ) malloc ( 2 * stride * sizeof ( double ) );
    gl_cost_of_opening_x = 0;
    gl_number_of_centers_to_close = 0;

    int count = 0;
    //my *lower* fields
    double* lower;
    //global *lower* fields
    double* gl_lower;

    #pragma omp parallel
    {
        /*
         * For each center, we have a *lower* field that indicates
         * how much we will save by closing the center.
         */

        int i;
        #pragma omp for private(i)
        for ( i = 0; i < points->num; i++ ) {
            if ( is_center[i] ) {
                #pragma omp critical
                center_table[i] = count++;
            }
        }

        #pragma omp single
        work_mem[0] = 0;

        #pragma omp sections
        {
            //now we finish building the table. clear the working memory.
            #pragma omp section
            memset ( switch_membership, 0, points->num * sizeof ( bool ) );

            #pragma omp section
            memset ( work_mem, 0, stride*sizeof ( double ) );

            #pragma omp section
            memset ( work_mem+stride,0,stride*sizeof ( double ) );
        }

        #pragma omp single
        {
            lower = &work_mem[0];
            gl_lower = &work_mem[stride];
        }

        float x_cost, current_cost;

        #pragma omp for private(i, x_cost, current_cost)
        for ( i = 0; i < points->num; i++ ) {

            x_cost = dist ( points->p[i], points->p[x], points->dim ) * points->p[i].weight;
            current_cost = points->p[i].cost;

            if ( x_cost < current_cost ) {

                // point i would save cost just by switching to x
                // (note that i cannot be a median,
                // or else dist(p[i], p[x]) would be 0)

                switch_membership[i] = 1;

                #pragma omp critical
                cost_of_opening_x += x_cost - current_cost;

            } else {

                // cost of assigning i to x is at least current assignment cost of i

                // consider the savings that i's **current** median would realize
                // if we reassigned that median and all its members to x;
                // note we've already accounted for the fact that the median
                // would save z by closing; now we have to subtract from the savings
                // the extra cost of reassigning that median and its members
                int assign = points->p[i].assign;

                #pragma omp critical
                lower[center_table[assign]] += current_cost - x_cost;
            }
        }

        // at this time, we can calculate the cost of opening a center
        // at x; if it is negative, we'll go through with opening it

        double low;

        #pragma omp for private(i, low)
        for ( int i = 0; i < points->num; i++ ) {
            if ( is_center[i] ) {
                low = z + work_mem[center_table[i]];
                #pragma omp critical
                gl_lower[center_table[i]] = low;
                if ( low > 0 ) {
                    // i is a median, and
                    // if we were to open x (which we still may not) we'd close i

                    // note, we'll ignore the following quantity unless we do open x
                    #pragma omp atomic
                    ++number_of_centers_to_close;
                    #pragma omp critical
                    cost_of_opening_x -= low;
                }
            }
        }

        #pragma omp sections
        {
            //use the rest of working memory to store the following
            #pragma omp section
            work_mem[K] = number_of_centers_to_close;

            #pragma omp section
            work_mem[K+1] = cost_of_opening_x;

            #pragma omp section
            gl_number_of_centers_to_close = ( int ) work_mem[K];

            #pragma omp section
            gl_cost_of_opening_x = z + work_mem[K+1];
        }

        // Now, check whether opening x would save cost; if so, do it, and
        // otherwise do nothing
        bool close_center;

        if ( gl_cost_of_opening_x < 0 ) {
            //  we'd save money by opening x; we'll do it
            #pragma omp for private(i)
            for ( i = 0; i < points->num; i++ ) {
                close_center = gl_lower[center_table[points->p[i].assign]] > 0 ;
                if ( switch_membership[i] || close_center ) {
                    // Either i's median (which may be i itself) is closing,
                    // or i is closer to x than to its current median
                    points->p[i].cost = points->p[i].weight * dist ( points->p[i], points->p[x], points->dim );
                    points->p[i].assign = x;
                }
            }
            #pragma omp for private(i)
            for ( i = 0; i < points->num; i++ ) {
                if ( is_center[i] && gl_lower[center_table[i]] > 0 ) {
                    is_center[i] = false;
                }
            }
            if ( x >= 0 && x < points->num ) {
                is_center[x] = true;
            }

            #pragma omp single
            *numcenters = *numcenters + 1 - gl_number_of_centers_to_close;
        } else {
            #pragma omp single
            gl_cost_of_opening_x = 0;  // the value we'll return
        }

        #pragma omp single
        free ( work_mem );
    }

    return -gl_cost_of_opening_x;
}