Parallel processing 使用openmp并行化streamcluster

Parallel processing 使用openmp并行化streamcluster,parallel-processing,openmp,pragma,Parallel Processing,Openmp,Pragma,我正在尝试并行化一个名为streamcluster的程序。更具体地说,根据我使用的Scalasca工具,名为pgain的函数花费了程序的大部分时间,因此这是我应该并行化的函数。在这里,您可以看到函数和我在并行化过程中所做的努力。问题是,我所实现的唯一一件事,就是让程序花费更多的时间来执行 streamcluster中的原始pgain功能: double pgain ( long x, Points *points, double z, long int *numcenters ) { int i

我正在尝试并行化一个名为streamcluster的程序。更具体地说,根据我使用的Scalasca工具,名为pgain的函数花费了程序的大部分时间,因此这是我应该并行化的函数。在这里,您可以看到函数和我在并行化过程中所做的努力。问题是,我所实现的唯一一件事,就是让程序花费更多的时间来执行

streamcluster中的原始pgain功能:

double pgain ( long x, Points *points, double z, long int *numcenters )
{
int i;
int number_of_centers_to_close = 0;

static double *work_mem;
static double gl_cost_of_opening_x;
static int gl_number_of_centers_to_close;

int stride = *numcenters + 2;
//make stride a multiple of CACHE_LINE
int cl = CACHE_LINE/sizeof ( double );
if ( stride % cl != 0 ) {
    stride = cl * ( stride / cl + 1 );
}
int K = stride - 2 ; // K==*numcenters

//my own cost of opening x
double cost_of_opening_x = 0;

work_mem = ( double* ) malloc ( 2 * stride * sizeof ( double ) );
gl_cost_of_opening_x = 0;
gl_number_of_centers_to_close = 0;

/*
 * For each center, we have a *lower* field that indicates
 * how much we will save by closing the center.
 */
int count = 0;
for ( int i = 0; i < points->num; i++ ) {
    if ( is_center[i] ) {
        center_table[i] = count++;
    }
}
work_mem[0] = 0;

//now we finish building the table. clear the working memory.
memset ( switch_membership, 0, points->num * sizeof ( bool ) );
memset ( work_mem, 0, stride*sizeof ( double ) );
memset ( work_mem+stride,0,stride*sizeof ( double ) );

//my *lower* fields
double* lower = &work_mem[0];
//global *lower* fields
double* gl_lower = &work_mem[stride];

for ( i = 0; i < points->num; i++ ) {
    float x_cost = dist ( points->p[i], points->p[x], points->dim ) * points->p[i].weight;
    float current_cost = points->p[i].cost;

    if ( x_cost < current_cost ) {

        // point i would save cost just by switching to x
        // (note that i cannot be a median,
        // or else dist(p[i], p[x]) would be 0)

        switch_membership[i] = 1;
        cost_of_opening_x += x_cost - current_cost;

    } else {

        // cost of assigning i to x is at least current assignment cost of i

        // consider the savings that i's **current** median would realize
        // if we reassigned that median and all its members to x;
        // note we've already accounted for the fact that the median
        // would save z by closing; now we have to subtract from the savings
        // the extra cost of reassigning that median and its members
        int assign = points->p[i].assign;
        lower[center_table[assign]] += current_cost - x_cost;
    }
}

// at this time, we can calculate the cost of opening a center
// at x; if it is negative, we'll go through with opening it

for ( int i = 0; i < points->num; i++ ) {
    if ( is_center[i] ) {
        double low = z + work_mem[center_table[i]];
        gl_lower[center_table[i]] = low;
        if ( low > 0 ) {
            // i is a median, and
            // if we were to open x (which we still may not) we'd close i

            // note, we'll ignore the following quantity unless we do open x
            ++number_of_centers_to_close;
            cost_of_opening_x -= low;
        }
    }
}
//use the rest of working memory to store the following
work_mem[K] = number_of_centers_to_close;
work_mem[K+1] = cost_of_opening_x;

gl_number_of_centers_to_close = ( int ) work_mem[K];
gl_cost_of_opening_x = z + work_mem[K+1];

// Now, check whether opening x would save cost; if so, do it, and
// otherwise do nothing

if ( gl_cost_of_opening_x < 0 ) {
    //  we'd save money by opening x; we'll do it
    for ( int i = 0; i < points->num; i++ ) {
        bool close_center = gl_lower[center_table[points->p[i].assign]] > 0 ;
        if ( switch_membership[i] || close_center ) {
            // Either i's median (which may be i itself) is closing,
            // or i is closer to x than to its current median
            points->p[i].cost = points->p[i].weight * dist ( points->p[i], points->p[x], points->dim );
            points->p[i].assign = x;
        }
    }
    for ( int i = 0; i < points->num; i++ ) {
        if ( is_center[i] && gl_lower[center_table[i]] > 0 ) {
            is_center[i] = false;
        }
    }
    if ( x >= 0 && x < points->num ) {
        is_center[x] = true;
    }

    *numcenters = *numcenters + 1 - gl_number_of_centers_to_close;
} else {
    gl_cost_of_opening_x = 0;  // the value we'll return
}

free ( work_mem );

return -gl_cost_of_opening_x;
}
double pgain(长x,点*Points,双z,长int*numcenters)
{
int i;
int number of_centers_to_close=0;
静态双*工作记忆;
期初的静态双总账成本;
要关闭的中心的静态int gl数量;
整数步长=*numcenters+2;
//使步幅为缓存线的倍数
int cl=缓存线/sizeof(双精度);
如果(步长%cl!=0){
步幅=cl*(步幅/cl+1);
}
int K=stride-2;//K==*numcenters
//我自己开x的费用
期初双倍成本x=0;
work_mem=(双*)malloc(2*步幅*大小(双));
期初总账成本x=0;
gl_中心数_至_关闭=0;
/*
*对于每个中心,我们有一个*下*字段,表示
*关闭中心可以节省多少钱。
*/
整数计数=0;
对于(int i=0;inum;i++){
if(is_center[i]){
中心_表[i]=计数++;
}
}
work_mem[0]=0;
//现在我们完成构建表。清除工作内存。
memset(开关_成员,0,点->num*sizeof(bool));
memset(work_mem,0,步长*sizeof(double));
memset(工作记忆+步幅,0,步幅*sizeof(双));
//我的*下*字段
double*lower=&work_mem[0];
//全局*下*字段
双*gl_lower=&work_mem[stride];
对于(i=0;inum;i++){
浮动x_成本=距离(点->p[i],点->p[x],点->尺寸)*点->p[i]。重量;
浮动当前成本=点数->p[i]。成本;
如果(x_成本<当前_成本){
//只要切换到x,我就可以节省成本
//(请注意,我不能是中位数,
//否则dist(p[i],p[x])将为0)
开关_成员[i]=1;
期初成本=期初成本-当期成本;
}否则{
//将i分配给x的成本至少是i的当前分配成本
/考虑我目前的**会实现的储蓄
//如果我们将该中位数及其所有成员重新分配给x;
//注意,我们已经考虑了中位数
//会通过关闭来节省z;现在我们必须从节省中减去
//重新分配中位数及其成员的额外成本
int assign=点->p[i]。分配;
下[中心_表[分配]+=当前_成本-x_成本;
}
}
//此时,我们可以计算开设中心的成本
//在x;如果是负数,我们将继续打开它
对于(int i=0;inum;i++){
if(is_center[i]){
双低=z+工作记忆[中心表格[i];
gl_下[中心_表[i]]=低;
如果(低>0){
//我是中位数,而且
//如果我们打开x(现在可能还没有),我们会关闭i
//注意,我们将忽略以下数量,除非我们打开x
++要关闭的中心数量;
开业成本=低;
}
}
}
//使用剩余的工作内存存储以下内容
work_mem[K]=要关闭的_中心的数量;
工时成本[K+1]=期初成本;
gl_中心数_至_关闭=(int)工时_内存[K];
总账期初成本=z+工时成本[K+1];
//现在,检查打开x是否可以节省成本;如果可以,请这样做,然后
//否则什么也不做
if(总帐成本,期初成本,x<0){
//开x可以省钱,我们会做的
对于(int i=0;inum;i++){
bool close_center=gl_lower[中心表格[点->p[i].分配]]>0;
如果(切换_成员身份[i]| |关闭_中心){
//要么我的中位数(可能是我自己)正在接近,
//或者i更接近x,而不是它当前的中值
点->p[i]。成本=点->p[i]。重量*距离(点->p[i],点->p[x],点->尺寸);
点->p[i]。分配=x;
}
}
对于(int i=0;inum;i++){
如果(是否为中心[i]&&gl较低[i]]>0){
is_center[i]=假;
}
}
如果(x>=0&&xnum){
是_center[x]=真;
}
*numcenters=*numcenters+1-总帐\u中心数\u到\u关闭;
}否则{
gl\u期初成本\u\u x=0;//我们将返回的值
}
免费(工作日);
退货-期初总账成本;
}
这就是我所做的并行化:

double pgain ( long x, Points *points, double z, long int *numcenters )
{
int i;
int number_of_centers_to_close = 0;

static double *work_mem;
static double gl_cost_of_opening_x;
static int gl_number_of_centers_to_close;

int stride = *numcenters + 2;
//make stride a multiple of CACHE_LINE
int cl = CACHE_LINE/sizeof ( double );
if ( stride % cl != 0 ) {
    stride = cl * ( stride / cl + 1 );
}
int K = stride - 2 ; // K==*numcenters

//my own cost of opening x
double cost_of_opening_x = 0;

work_mem = ( double* ) malloc ( 2 * stride * sizeof ( double ) );
gl_cost_of_opening_x = 0;
gl_number_of_centers_to_close = 0;

/*
 * For each center, we have a *lower* field that indicates
 * how much we will save by closing the center.
 */
int count = 0;
for ( int i = 0; i < points->num; i++ ) {
    if ( is_center[i] ) {
        center_table[i] = count++;
    }
}
work_mem[0] = 0;

//now we finish building the table. clear the working memory.
memset ( switch_membership, 0, points->num * sizeof ( bool ) );
memset ( work_mem, 0, stride*sizeof ( double ) );
memset ( work_mem+stride,0,stride*sizeof ( double ) );

//my *lower* fields
double* lower = &work_mem[0];
//global *lower* fields
double* gl_lower = &work_mem[stride];
float x_cost=0.0;
float current_cost=0.0;

#pragma omp parallel for private(current_cost,x_cost) 
shared(cost_of_opening_x)  


for ( i = 0; i < points->num; i++ ) {

    x_cost = dist ( points->p[i], points->p[x], points->dim ) * points->p[i].weight;
    current_cost = points->p[i].cost;

    if ( x_cost < current_cost ) {

        // point i would save cost just by switching to             // x
        // (note that i cannot be a median,
        // or else dist(p[i], p[x]) would be 0)

        switch_membership[i] = 1;

        cost_of_opening_x += x_cost - current_cost;
        {
        #pragma omp flush(cost_of_opening_x)
        }
    } else {

        // cost of assigning i to x is at least current assignment cost of i

        // consider the savings that i's **current** median would realize
        // if we reassigned that median and all its members to x;
        // note we've already accounted for the fact that the median
        // would save z by closing; now we have to subtract from the savings
        // the extra cost of reassigning that median and its members
        int assign = points->p[i].assign;

        lower[center_table[assign]] += current_cost - x_cost;
        {
        #pragma omp flush(lower) 
        }
    }
#pragma omp barrier
{
#pragma omp flush(lower,cost_of_opening_x)
}   
}

// at this time, we can calculate the cost of opening a center
// at x; if it is negative, we'll go through with opening it
for ( int i = 0; i < points->num; i++ ) {
    if ( is_center[i] ) {
        double low = z + work_mem[center_table[i]];
        gl_lower[center_table[i]] = low;
        if ( low > 0 ) {
            // i is a median, and
            // if we were to open x (which we still may not) we'd close i

            // note, we'll ignore the following quantity unless we do open x
            ++number_of_centers_to_close;
            cost_of_opening_x -= low;
        }
    }
}
//use the rest of working memory to store the following
work_mem[K] = number_of_centers_to_close;
work_mem[K+1] = cost_of_opening_x;

gl_number_of_centers_to_close = ( int ) work_mem[K];
gl_cost_of_opening_x = z + work_mem[K+1];

// Now, check whether opening x would save cost; if so, do it, and
// otherwise do nothing

if ( gl_cost_of_opening_x < 0 ) {
    //  we'd save money by opening x; we'll do it
    #pragma omp parallel for
    for ( int i = 0; i < points->num; i++ ) {
        bool close_center = gl_lower[center_table[points->p[i].assign]] > 0 
;
        if ( switch_membership[i] || close_center ) {
            // Either i's median (which may be i itself) is closing,
            // or i is closer to x than to its current median
            points->p[i].cost = points->p[i].weight * dist ( points->p[i], 
points->p[x], points->dim );
            points->p[i].assign = x;
        }
    }
    for ( int i = 0; i < points->num; i++ ) {
        if ( is_center[i] && gl_lower[center_table[i]] > 0 ) {
            is_center[i] = false;
        }
    }
    if ( x >= 0 && x < points->num ) {
        is_center[x] = true;
    }

    *numcenters = *numcenters + 1 - gl_number_of_centers_to_close;
} else {
    gl_cost_of_opening_x = 0;  // the value we'll return
}

free ( work_mem );

return -gl_cost_of_opening_x;
}
double pgain(长x,点*Points,双z,长int*numcenters)
{
int i;
int number of_centers_to_close=0;
静态双*工作记忆;
期初的静态双总账成本;
要关闭的中心的静态int gl数量;
整数步长=*numcenters+2;
//使步幅为缓存线的倍数
int cl=缓存线/sizeof(双精度);
如果(步长%cl!=0){
步幅=cl*(步幅/cl+1);
}
int K=stride-2;//K==*numcenters
//我自己开x的费用
期初双倍成本x=0;
work_mem=(双*)malloc(2*步幅*大小(双));
期初总账成本x=0;
gl_中心数_至_关闭=0;
/*
*对于每个中心,我们有一个*下*字段,表示
*关闭中心可以节省多少钱。
*/
整数计数=0;
对于(int i=0;inum;i++){
if(is_center[i]){
中心_表[i]=计数++;
}
}
work_mem[0]=0;
//现在我们完成构建表。清除工作内存。
memset(开关_成员,0,点->num*sizeof(bool));
memset(work_mem,0,步长*sizeof(double));
memset(工作记忆+步幅,0,步幅*sizeof(双));
//我的*下*字段
double*lower=&work_mem[0];
//全局*下*字段
双*gl_lower=&work_mem[stride];
浮动x_成本=0.0;
浮动电流成本=0.0;
#pragma omp并行专用(当前成本、x成本)
分摊(费用)_
double pgain ( long x, Points *points, double z, long int *numcenters )
{
    int i;
    int number_of_centers_to_close = 0;

    static double *work_mem;
    static double gl_cost_of_opening_x;
    static int gl_number_of_centers_to_close;

    int stride = *numcenters + 2;
    //make stride a multiple of CACHE_LINE
    int cl = CACHE_LINE/sizeof ( double );
    if ( stride % cl != 0 ) {
        stride = cl * ( stride / cl + 1 );
    }
    int K = stride - 2 ; // K==*numcenters

    //my own cost of opening x
    double cost_of_opening_x = 0;
    work_mem = ( double* ) malloc ( 2 * stride * sizeof ( double ) );
    gl_cost_of_opening_x = 0;
    gl_number_of_centers_to_close = 0;

    int count = 0;
    //my *lower* fields
    double* lower;
    //global *lower* fields
    double* gl_lower;

    #pragma omp parallel
    {
        /*
         * For each center, we have a *lower* field that indicates
         * how much we will save by closing the center.
         */

        int i;
        #pragma omp for private(i)
        for ( i = 0; i < points->num; i++ ) {
            if ( is_center[i] ) {
                #pragma omp critical
                center_table[i] = count++;
            }
        }

        #pragma omp single
        work_mem[0] = 0;

        #pragma omp sections
        {
            //now we finish building the table. clear the working memory.
            #pragma omp section
            memset ( switch_membership, 0, points->num * sizeof ( bool ) );

            #pragma omp section
            memset ( work_mem, 0, stride*sizeof ( double ) );

            #pragma omp section
            memset ( work_mem+stride,0,stride*sizeof ( double ) );
        }

        #pragma omp single
        {
            lower = &work_mem[0];
            gl_lower = &work_mem[stride];
        }

        float x_cost, current_cost;

        #pragma omp for private(i, x_cost, current_cost)
        for ( i = 0; i < points->num; i++ ) {

            x_cost = dist ( points->p[i], points->p[x], points->dim ) * points->p[i].weight;
            current_cost = points->p[i].cost;

            if ( x_cost < current_cost ) {

                // point i would save cost just by switching to x
                // (note that i cannot be a median,
                // or else dist(p[i], p[x]) would be 0)

                switch_membership[i] = 1;

                #pragma omp critical
                cost_of_opening_x += x_cost - current_cost;

            } else {

                // cost of assigning i to x is at least current assignment cost of i

                // consider the savings that i's **current** median would realize
                // if we reassigned that median and all its members to x;
                // note we've already accounted for the fact that the median
                // would save z by closing; now we have to subtract from the savings
                // the extra cost of reassigning that median and its members
                int assign = points->p[i].assign;

                #pragma omp critical
                lower[center_table[assign]] += current_cost - x_cost;
            }
        }

        // at this time, we can calculate the cost of opening a center
        // at x; if it is negative, we'll go through with opening it

        double low;

        #pragma omp for private(i, low)
        for ( int i = 0; i < points->num; i++ ) {
            if ( is_center[i] ) {
                low = z + work_mem[center_table[i]];
                #pragma omp critical
                gl_lower[center_table[i]] = low;
                if ( low > 0 ) {
                    // i is a median, and
                    // if we were to open x (which we still may not) we'd close i

                    // note, we'll ignore the following quantity unless we do open x
                    #pragma omp atomic
                    ++number_of_centers_to_close;
                    #pragma omp critical
                    cost_of_opening_x -= low;
                }
            }
        }

        #pragma omp sections
        {
            //use the rest of working memory to store the following
            #pragma omp section
            work_mem[K] = number_of_centers_to_close;

            #pragma omp section
            work_mem[K+1] = cost_of_opening_x;

            #pragma omp section
            gl_number_of_centers_to_close = ( int ) work_mem[K];

            #pragma omp section
            gl_cost_of_opening_x = z + work_mem[K+1];
        }

        // Now, check whether opening x would save cost; if so, do it, and
        // otherwise do nothing
        bool close_center;

        if ( gl_cost_of_opening_x < 0 ) {
            //  we'd save money by opening x; we'll do it
            #pragma omp for private(i)
            for ( i = 0; i < points->num; i++ ) {
                close_center = gl_lower[center_table[points->p[i].assign]] > 0 ;
                if ( switch_membership[i] || close_center ) {
                    // Either i's median (which may be i itself) is closing,
                    // or i is closer to x than to its current median
                    points->p[i].cost = points->p[i].weight * dist ( points->p[i], points->p[x], points->dim );
                    points->p[i].assign = x;
                }
            }
            #pragma omp for private(i)
            for ( i = 0; i < points->num; i++ ) {
                if ( is_center[i] && gl_lower[center_table[i]] > 0 ) {
                    is_center[i] = false;
                }
            }
            if ( x >= 0 && x < points->num ) {
                is_center[x] = true;
            }

            #pragma omp single
            *numcenters = *numcenters + 1 - gl_number_of_centers_to_close;
        } else {
            #pragma omp single
            gl_cost_of_opening_x = 0;  // the value we'll return
        }

        #pragma omp single
        free ( work_mem );
    }

    return -gl_cost_of_opening_x;
}