Parallel processing 使用openmp并行化streamcluster
我正在尝试并行化一个名为streamcluster的程序。更具体地说,根据我使用的Scalasca工具,名为pgain的函数花费了程序的大部分时间,因此这是我应该并行化的函数。在这里,您可以看到函数和我在并行化过程中所做的努力。问题是,我所实现的唯一一件事,就是让程序花费更多的时间来执行 streamcluster中的原始pgain功能:Parallel processing 使用openmp并行化streamcluster,parallel-processing,openmp,pragma,Parallel Processing,Openmp,Pragma,我正在尝试并行化一个名为streamcluster的程序。更具体地说,根据我使用的Scalasca工具,名为pgain的函数花费了程序的大部分时间,因此这是我应该并行化的函数。在这里,您可以看到函数和我在并行化过程中所做的努力。问题是,我所实现的唯一一件事,就是让程序花费更多的时间来执行 streamcluster中的原始pgain功能: double pgain ( long x, Points *points, double z, long int *numcenters ) { int i
double pgain ( long x, Points *points, double z, long int *numcenters )
{
int i;
int number_of_centers_to_close = 0;
static double *work_mem;
static double gl_cost_of_opening_x;
static int gl_number_of_centers_to_close;
int stride = *numcenters + 2;
//make stride a multiple of CACHE_LINE
int cl = CACHE_LINE/sizeof ( double );
if ( stride % cl != 0 ) {
stride = cl * ( stride / cl + 1 );
}
int K = stride - 2 ; // K==*numcenters
//my own cost of opening x
double cost_of_opening_x = 0;
work_mem = ( double* ) malloc ( 2 * stride * sizeof ( double ) );
gl_cost_of_opening_x = 0;
gl_number_of_centers_to_close = 0;
/*
* For each center, we have a *lower* field that indicates
* how much we will save by closing the center.
*/
int count = 0;
for ( int i = 0; i < points->num; i++ ) {
if ( is_center[i] ) {
center_table[i] = count++;
}
}
work_mem[0] = 0;
//now we finish building the table. clear the working memory.
memset ( switch_membership, 0, points->num * sizeof ( bool ) );
memset ( work_mem, 0, stride*sizeof ( double ) );
memset ( work_mem+stride,0,stride*sizeof ( double ) );
//my *lower* fields
double* lower = &work_mem[0];
//global *lower* fields
double* gl_lower = &work_mem[stride];
for ( i = 0; i < points->num; i++ ) {
float x_cost = dist ( points->p[i], points->p[x], points->dim ) * points->p[i].weight;
float current_cost = points->p[i].cost;
if ( x_cost < current_cost ) {
// point i would save cost just by switching to x
// (note that i cannot be a median,
// or else dist(p[i], p[x]) would be 0)
switch_membership[i] = 1;
cost_of_opening_x += x_cost - current_cost;
} else {
// cost of assigning i to x is at least current assignment cost of i
// consider the savings that i's **current** median would realize
// if we reassigned that median and all its members to x;
// note we've already accounted for the fact that the median
// would save z by closing; now we have to subtract from the savings
// the extra cost of reassigning that median and its members
int assign = points->p[i].assign;
lower[center_table[assign]] += current_cost - x_cost;
}
}
// at this time, we can calculate the cost of opening a center
// at x; if it is negative, we'll go through with opening it
for ( int i = 0; i < points->num; i++ ) {
if ( is_center[i] ) {
double low = z + work_mem[center_table[i]];
gl_lower[center_table[i]] = low;
if ( low > 0 ) {
// i is a median, and
// if we were to open x (which we still may not) we'd close i
// note, we'll ignore the following quantity unless we do open x
++number_of_centers_to_close;
cost_of_opening_x -= low;
}
}
}
//use the rest of working memory to store the following
work_mem[K] = number_of_centers_to_close;
work_mem[K+1] = cost_of_opening_x;
gl_number_of_centers_to_close = ( int ) work_mem[K];
gl_cost_of_opening_x = z + work_mem[K+1];
// Now, check whether opening x would save cost; if so, do it, and
// otherwise do nothing
if ( gl_cost_of_opening_x < 0 ) {
// we'd save money by opening x; we'll do it
for ( int i = 0; i < points->num; i++ ) {
bool close_center = gl_lower[center_table[points->p[i].assign]] > 0 ;
if ( switch_membership[i] || close_center ) {
// Either i's median (which may be i itself) is closing,
// or i is closer to x than to its current median
points->p[i].cost = points->p[i].weight * dist ( points->p[i], points->p[x], points->dim );
points->p[i].assign = x;
}
}
for ( int i = 0; i < points->num; i++ ) {
if ( is_center[i] && gl_lower[center_table[i]] > 0 ) {
is_center[i] = false;
}
}
if ( x >= 0 && x < points->num ) {
is_center[x] = true;
}
*numcenters = *numcenters + 1 - gl_number_of_centers_to_close;
} else {
gl_cost_of_opening_x = 0; // the value we'll return
}
free ( work_mem );
return -gl_cost_of_opening_x;
}
double pgain(长x,点*Points,双z,长int*numcenters)
{
int i;
int number of_centers_to_close=0;
静态双*工作记忆;
期初的静态双总账成本;
要关闭的中心的静态int gl数量;
整数步长=*numcenters+2;
//使步幅为缓存线的倍数
int cl=缓存线/sizeof(双精度);
如果(步长%cl!=0){
步幅=cl*(步幅/cl+1);
}
int K=stride-2;//K==*numcenters
//我自己开x的费用
期初双倍成本x=0;
work_mem=(双*)malloc(2*步幅*大小(双));
期初总账成本x=0;
gl_中心数_至_关闭=0;
/*
*对于每个中心,我们有一个*下*字段,表示
*关闭中心可以节省多少钱。
*/
整数计数=0;
对于(int i=0;inum;i++){
if(is_center[i]){
中心_表[i]=计数++;
}
}
work_mem[0]=0;
//现在我们完成构建表。清除工作内存。
memset(开关_成员,0,点->num*sizeof(bool));
memset(work_mem,0,步长*sizeof(double));
memset(工作记忆+步幅,0,步幅*sizeof(双));
//我的*下*字段
double*lower=&work_mem[0];
//全局*下*字段
双*gl_lower=&work_mem[stride];
对于(i=0;inum;i++){
浮动x_成本=距离(点->p[i],点->p[x],点->尺寸)*点->p[i]。重量;
浮动当前成本=点数->p[i]。成本;
如果(x_成本<当前_成本){
//只要切换到x,我就可以节省成本
//(请注意,我不能是中位数,
//否则dist(p[i],p[x])将为0)
开关_成员[i]=1;
期初成本=期初成本-当期成本;
}否则{
//将i分配给x的成本至少是i的当前分配成本
/考虑我目前的**会实现的储蓄
//如果我们将该中位数及其所有成员重新分配给x;
//注意,我们已经考虑了中位数
//会通过关闭来节省z;现在我们必须从节省中减去
//重新分配中位数及其成员的额外成本
int assign=点->p[i]。分配;
下[中心_表[分配]+=当前_成本-x_成本;
}
}
//此时,我们可以计算开设中心的成本
//在x;如果是负数,我们将继续打开它
对于(int i=0;inum;i++){
if(is_center[i]){
双低=z+工作记忆[中心表格[i];
gl_下[中心_表[i]]=低;
如果(低>0){
//我是中位数,而且
//如果我们打开x(现在可能还没有),我们会关闭i
//注意,我们将忽略以下数量,除非我们打开x
++要关闭的中心数量;
开业成本=低;
}
}
}
//使用剩余的工作内存存储以下内容
work_mem[K]=要关闭的_中心的数量;
工时成本[K+1]=期初成本;
gl_中心数_至_关闭=(int)工时_内存[K];
总账期初成本=z+工时成本[K+1];
//现在,检查打开x是否可以节省成本;如果可以,请这样做,然后
//否则什么也不做
if(总帐成本,期初成本,x<0){
//开x可以省钱,我们会做的
对于(int i=0;inum;i++){
bool close_center=gl_lower[中心表格[点->p[i].分配]]>0;
如果(切换_成员身份[i]| |关闭_中心){
//要么我的中位数(可能是我自己)正在接近,
//或者i更接近x,而不是它当前的中值
点->p[i]。成本=点->p[i]。重量*距离(点->p[i],点->p[x],点->尺寸);
点->p[i]。分配=x;
}
}
对于(int i=0;inum;i++){
如果(是否为中心[i]&&gl较低[i]]>0){
is_center[i]=假;
}
}
如果(x>=0&&xnum){
是_center[x]=真;
}
*numcenters=*numcenters+1-总帐\u中心数\u到\u关闭;
}否则{
gl\u期初成本\u\u x=0;//我们将返回的值
}
免费(工作日);
退货-期初总账成本;
}
这就是我所做的并行化:
double pgain ( long x, Points *points, double z, long int *numcenters )
{
int i;
int number_of_centers_to_close = 0;
static double *work_mem;
static double gl_cost_of_opening_x;
static int gl_number_of_centers_to_close;
int stride = *numcenters + 2;
//make stride a multiple of CACHE_LINE
int cl = CACHE_LINE/sizeof ( double );
if ( stride % cl != 0 ) {
stride = cl * ( stride / cl + 1 );
}
int K = stride - 2 ; // K==*numcenters
//my own cost of opening x
double cost_of_opening_x = 0;
work_mem = ( double* ) malloc ( 2 * stride * sizeof ( double ) );
gl_cost_of_opening_x = 0;
gl_number_of_centers_to_close = 0;
/*
* For each center, we have a *lower* field that indicates
* how much we will save by closing the center.
*/
int count = 0;
for ( int i = 0; i < points->num; i++ ) {
if ( is_center[i] ) {
center_table[i] = count++;
}
}
work_mem[0] = 0;
//now we finish building the table. clear the working memory.
memset ( switch_membership, 0, points->num * sizeof ( bool ) );
memset ( work_mem, 0, stride*sizeof ( double ) );
memset ( work_mem+stride,0,stride*sizeof ( double ) );
//my *lower* fields
double* lower = &work_mem[0];
//global *lower* fields
double* gl_lower = &work_mem[stride];
float x_cost=0.0;
float current_cost=0.0;
#pragma omp parallel for private(current_cost,x_cost)
shared(cost_of_opening_x)
for ( i = 0; i < points->num; i++ ) {
x_cost = dist ( points->p[i], points->p[x], points->dim ) * points->p[i].weight;
current_cost = points->p[i].cost;
if ( x_cost < current_cost ) {
// point i would save cost just by switching to // x
// (note that i cannot be a median,
// or else dist(p[i], p[x]) would be 0)
switch_membership[i] = 1;
cost_of_opening_x += x_cost - current_cost;
{
#pragma omp flush(cost_of_opening_x)
}
} else {
// cost of assigning i to x is at least current assignment cost of i
// consider the savings that i's **current** median would realize
// if we reassigned that median and all its members to x;
// note we've already accounted for the fact that the median
// would save z by closing; now we have to subtract from the savings
// the extra cost of reassigning that median and its members
int assign = points->p[i].assign;
lower[center_table[assign]] += current_cost - x_cost;
{
#pragma omp flush(lower)
}
}
#pragma omp barrier
{
#pragma omp flush(lower,cost_of_opening_x)
}
}
// at this time, we can calculate the cost of opening a center
// at x; if it is negative, we'll go through with opening it
for ( int i = 0; i < points->num; i++ ) {
if ( is_center[i] ) {
double low = z + work_mem[center_table[i]];
gl_lower[center_table[i]] = low;
if ( low > 0 ) {
// i is a median, and
// if we were to open x (which we still may not) we'd close i
// note, we'll ignore the following quantity unless we do open x
++number_of_centers_to_close;
cost_of_opening_x -= low;
}
}
}
//use the rest of working memory to store the following
work_mem[K] = number_of_centers_to_close;
work_mem[K+1] = cost_of_opening_x;
gl_number_of_centers_to_close = ( int ) work_mem[K];
gl_cost_of_opening_x = z + work_mem[K+1];
// Now, check whether opening x would save cost; if so, do it, and
// otherwise do nothing
if ( gl_cost_of_opening_x < 0 ) {
// we'd save money by opening x; we'll do it
#pragma omp parallel for
for ( int i = 0; i < points->num; i++ ) {
bool close_center = gl_lower[center_table[points->p[i].assign]] > 0
;
if ( switch_membership[i] || close_center ) {
// Either i's median (which may be i itself) is closing,
// or i is closer to x than to its current median
points->p[i].cost = points->p[i].weight * dist ( points->p[i],
points->p[x], points->dim );
points->p[i].assign = x;
}
}
for ( int i = 0; i < points->num; i++ ) {
if ( is_center[i] && gl_lower[center_table[i]] > 0 ) {
is_center[i] = false;
}
}
if ( x >= 0 && x < points->num ) {
is_center[x] = true;
}
*numcenters = *numcenters + 1 - gl_number_of_centers_to_close;
} else {
gl_cost_of_opening_x = 0; // the value we'll return
}
free ( work_mem );
return -gl_cost_of_opening_x;
}
double pgain(长x,点*Points,双z,长int*numcenters)
{
int i;
int number of_centers_to_close=0;
静态双*工作记忆;
期初的静态双总账成本;
要关闭的中心的静态int gl数量;
整数步长=*numcenters+2;
//使步幅为缓存线的倍数
int cl=缓存线/sizeof(双精度);
如果(步长%cl!=0){
步幅=cl*(步幅/cl+1);
}
int K=stride-2;//K==*numcenters
//我自己开x的费用
期初双倍成本x=0;
work_mem=(双*)malloc(2*步幅*大小(双));
期初总账成本x=0;
gl_中心数_至_关闭=0;
/*
*对于每个中心,我们有一个*下*字段,表示
*关闭中心可以节省多少钱。
*/
整数计数=0;
对于(int i=0;inum;i++){
if(is_center[i]){
中心_表[i]=计数++;
}
}
work_mem[0]=0;
//现在我们完成构建表。清除工作内存。
memset(开关_成员,0,点->num*sizeof(bool));
memset(work_mem,0,步长*sizeof(double));
memset(工作记忆+步幅,0,步幅*sizeof(双));
//我的*下*字段
double*lower=&work_mem[0];
//全局*下*字段
双*gl_lower=&work_mem[stride];
浮动x_成本=0.0;
浮动电流成本=0.0;
#pragma omp并行专用(当前成本、x成本)
分摊(费用)_
double pgain ( long x, Points *points, double z, long int *numcenters )
{
int i;
int number_of_centers_to_close = 0;
static double *work_mem;
static double gl_cost_of_opening_x;
static int gl_number_of_centers_to_close;
int stride = *numcenters + 2;
//make stride a multiple of CACHE_LINE
int cl = CACHE_LINE/sizeof ( double );
if ( stride % cl != 0 ) {
stride = cl * ( stride / cl + 1 );
}
int K = stride - 2 ; // K==*numcenters
//my own cost of opening x
double cost_of_opening_x = 0;
work_mem = ( double* ) malloc ( 2 * stride * sizeof ( double ) );
gl_cost_of_opening_x = 0;
gl_number_of_centers_to_close = 0;
int count = 0;
//my *lower* fields
double* lower;
//global *lower* fields
double* gl_lower;
#pragma omp parallel
{
/*
* For each center, we have a *lower* field that indicates
* how much we will save by closing the center.
*/
int i;
#pragma omp for private(i)
for ( i = 0; i < points->num; i++ ) {
if ( is_center[i] ) {
#pragma omp critical
center_table[i] = count++;
}
}
#pragma omp single
work_mem[0] = 0;
#pragma omp sections
{
//now we finish building the table. clear the working memory.
#pragma omp section
memset ( switch_membership, 0, points->num * sizeof ( bool ) );
#pragma omp section
memset ( work_mem, 0, stride*sizeof ( double ) );
#pragma omp section
memset ( work_mem+stride,0,stride*sizeof ( double ) );
}
#pragma omp single
{
lower = &work_mem[0];
gl_lower = &work_mem[stride];
}
float x_cost, current_cost;
#pragma omp for private(i, x_cost, current_cost)
for ( i = 0; i < points->num; i++ ) {
x_cost = dist ( points->p[i], points->p[x], points->dim ) * points->p[i].weight;
current_cost = points->p[i].cost;
if ( x_cost < current_cost ) {
// point i would save cost just by switching to x
// (note that i cannot be a median,
// or else dist(p[i], p[x]) would be 0)
switch_membership[i] = 1;
#pragma omp critical
cost_of_opening_x += x_cost - current_cost;
} else {
// cost of assigning i to x is at least current assignment cost of i
// consider the savings that i's **current** median would realize
// if we reassigned that median and all its members to x;
// note we've already accounted for the fact that the median
// would save z by closing; now we have to subtract from the savings
// the extra cost of reassigning that median and its members
int assign = points->p[i].assign;
#pragma omp critical
lower[center_table[assign]] += current_cost - x_cost;
}
}
// at this time, we can calculate the cost of opening a center
// at x; if it is negative, we'll go through with opening it
double low;
#pragma omp for private(i, low)
for ( int i = 0; i < points->num; i++ ) {
if ( is_center[i] ) {
low = z + work_mem[center_table[i]];
#pragma omp critical
gl_lower[center_table[i]] = low;
if ( low > 0 ) {
// i is a median, and
// if we were to open x (which we still may not) we'd close i
// note, we'll ignore the following quantity unless we do open x
#pragma omp atomic
++number_of_centers_to_close;
#pragma omp critical
cost_of_opening_x -= low;
}
}
}
#pragma omp sections
{
//use the rest of working memory to store the following
#pragma omp section
work_mem[K] = number_of_centers_to_close;
#pragma omp section
work_mem[K+1] = cost_of_opening_x;
#pragma omp section
gl_number_of_centers_to_close = ( int ) work_mem[K];
#pragma omp section
gl_cost_of_opening_x = z + work_mem[K+1];
}
// Now, check whether opening x would save cost; if so, do it, and
// otherwise do nothing
bool close_center;
if ( gl_cost_of_opening_x < 0 ) {
// we'd save money by opening x; we'll do it
#pragma omp for private(i)
for ( i = 0; i < points->num; i++ ) {
close_center = gl_lower[center_table[points->p[i].assign]] > 0 ;
if ( switch_membership[i] || close_center ) {
// Either i's median (which may be i itself) is closing,
// or i is closer to x than to its current median
points->p[i].cost = points->p[i].weight * dist ( points->p[i], points->p[x], points->dim );
points->p[i].assign = x;
}
}
#pragma omp for private(i)
for ( i = 0; i < points->num; i++ ) {
if ( is_center[i] && gl_lower[center_table[i]] > 0 ) {
is_center[i] = false;
}
}
if ( x >= 0 && x < points->num ) {
is_center[x] = true;
}
#pragma omp single
*numcenters = *numcenters + 1 - gl_number_of_centers_to_close;
} else {
#pragma omp single
gl_cost_of_opening_x = 0; // the value we'll return
}
#pragma omp single
free ( work_mem );
}
return -gl_cost_of_opening_x;
}