Parallel processing 用openmp优化N-queen
我正在学习OPENMP,并编写了以下代码来解决nqueens问题Parallel processing 用openmp优化N-queen,parallel-processing,openmp,Parallel Processing,Openmp,我正在学习OPENMP,并编写了以下代码来解决nqueens问题 //Full Code: https://github.com/Shafaet/Codes/blob/master/OPENMP/Parallel%20N- Queen%20problem.cpp int n; int call(int col,int rowmask,int dia1,int dia2) { if(col==n) { return 1; } int row,
//Full Code: https://github.com/Shafaet/Codes/blob/master/OPENMP/Parallel%20N- Queen%20problem.cpp
int n;
int call(int col,int rowmask,int dia1,int dia2)
{
if(col==n)
{
return 1;
}
int row,ans=0;
for(row=0;row<n;row++)
{
if(!(rowmask & (1<<row)) & !(dia1 & (1<<(row+col))) & !(dia2 & (1<<((row+n-1)-col))))
{
ans+=call(col+1,rowmask|1<<row,dia1|(1<<(row+col)), dia2|(1<<((row+n-1)-col)));
}
}
return ans;
}
double parallel()
{
double st=omp_get_wtime();
int ans=0;
int i;
int rowmask=0,dia1=0,dia2=0;
#pragma omp parallel for reduction(+:ans) shared(i,rowmask)
for(i=0;i<n;i++)
{
rowmask=0;
dia1=0,dia2=0;
int col=0,row=i;
ans+=call(1,rowmask|1<<row,dia1|(1<<(row+col)), dia2|(1<<((row+n-1)-col)));
}
printf("Found %d configuration for n=%d\n",ans,n);
double en=omp_get_wtime();
printf("Time taken using openmp %lf\n",en-st);
return en-st;
}
double serial()
{
double st=omp_get_wtime();
int ans=0;
int i;
int rowmask=0,dia1=0,dia2=0;
for(i=0;i<n;i++)
{
rowmask=0;
dia1=0,dia2=0;
int col=0,row=i;
ans+=call(1,rowmask|1<<row,dia1|(1<<(row+col)), dia2|(1<<((row+n-1)-col)));
}
printf("Found %d configuration for n=%d\n",ans,n);
double en=omp_get_wtime();
printf("Time taken without openmp %lf\n",en-st);
return en-st;
}
int main()
{
double average=0;
int count=0;
for(int i=2;i<=13;i++)
{
count++;
n=i;
double stime=serial();
double ptime=parallel();
printf("OpenMP is %lf times faster for n=%d\n",stime/ptime,n);
average+=stime/ptime;
puts("===============");
}
printf("On average OpenMP is %lf times faster\n",average/count);
return 0;
}
//完整代码:https://github.com/Shafaet/Codes/blob/master/OPENMP/Parallel%20N-皇后%20problem.cpp
int n;
int调用(int列、int行掩码、int dia1、int dia2)
{
if(col==n)
{
返回1;
}
int行,ans=0;
对于(row=0;row您的代码似乎使用了经典的回溯N-Queens递归算法,这对于N-Queens求解来说并不是最快的,但是(由于简单性)在并行性基础练习方面是最生动的。
也就是说:这非常简单,因此除了基本的“并行for”和reduce之外,您不希望它自然地演示许多高级OpenMP方法
但是,就您在寻找学习并行性以及可能更清晰和更好的学习曲线而言,还有一个(在许多可能的实现中)实现可用,它使用相同的算法,但从教育角度来看,更易于阅读和生动:
void setQueen(int queens[], int row, int col) {
//check all previously placed rows for attacks
for(int i=0; i<row; i++) {
// vertical attacks
if (queens[i]==col) {
return;
}
// diagonal attacks
if (abs(queens[i]-col) == (row-i) ) {
return;
}
}
// column is ok, set the queen
queens[row]=col;
if(row==size-1) {
#pragma omp atomic
nrOfSolutions++; //Placed final queen, found a solution
}
else {
// try to fill next row
for(int i=0; i<size; i++) {
setQueen(queens, row+1, i);
}
}
}
//Function to find all solutions for nQueens problem on size x size chessboard.
void solve() {
#pragma omp parallel for
for(int i=0; i<size; i++) {
// try all positions in first row
int * queens = new int[size]; //array representing queens placed on a chess board. Index is row position, value is column.
setQueen(queens, 0, i);
delete[](queens);
}
}
void setQueen(int queen[],int行,int列){
//检查所有先前放置的行是否存在攻击
对于(int i=0;i我知道我参加聚会有点晚,但您可以使用任务队列进行进一步优化。(结果快7-10%)不知道为什么。下面是我使用的代码:
#include <iostream> // std::cout, cin, cerr ...
#include <iomanip> // modify std::out
#include <omp.h>
using namespace std;
int nrOfSolutions=0;
int size=0;
void print(int queens[]) {
cerr << "Solution " << nrOfSolutions << endl;
for(int row=0; row<size; row++) {
for(int col=0; col<size; col++) {
if(queens[row]==col) {
cout << "Q";
}
else {
cout << "-";
}
}
cout << endl;
}
}
void setQueen(int queens[], int row, int col, int id) {
for(int i=0; i<row; i++) {
// vertical attacks
if (queens[i]==col) {
return;
}
// diagonal attacks
if (abs(queens[i]-col) == (row-i) ) {
return;
}
}
// column is ok, set the queen
queens[row]=col;
if(row==size-1) {
// only one thread should print allowed to print at a time
{
// increasing the solution counter is not atomic
#pragma omp critical
nrOfSolutions++;
#ifdef _DEBUG
#pragma omp critical
print(queens);
#endif
}
}
else {
// try to fill next row
for(int i=0; i<size; i++) {
setQueen(queens, row+1, i, id);
}
}
}
void solve() {
int myid=0 ;
#pragma omp parallel
#pragma omp single
{
for(int i=0; i<size; i++) {
/*
#ifdef _OMP //(???)
myid = omp_get_thread_num();
#endif
#ifdef _DEBUG
cout << "ThreadNum: " << myid << endl ;
#endif
*/
// try all positions in first row
// create separate array for each recursion
// started here
#pragma omp task
setQueen(new int[size], 0, i, myid);
}
}
}
int main(int argc, char*argv[]) {
if(argc !=2) {
cerr << "Usage: nq-openmp-taskq boardSize.\n";
return 0;
}
size = atoi(argv[1]);
cout << "Starting OpenMP Task Queue solver for size " << size << "...\n";
double st=omp_get_wtime();
solve();
double en=omp_get_wtime();
printf("Time taken using openmp %lf\n",en-st);
cout << "Number of solutions: " << nrOfSolutions << endl;
return 0;
}
#包括//std::cout、cin、cerr。。。
#include//modify std::out
#包括
使用名称空间std;
int nrof解=0;
int size=0;
无效打印(整数皇后[]){
cerr你可以看看RosettaCode。我首先在F77中编写了它,然后将它改编为使用OpenMP。它只使用“并行do”,就像你的一样。但老实说,如果你不改变算法,除了在多个内核上并行运行(这应该已经用你的并行实现了)外,OpenMP还能带来什么?如果你只是在学习OpenMP,那么你显然需要学习private
和shared
i
,rowmaks
,dia1
,dia2
应该是private
。因为i
是一个迭代器,无论如何它都是私有的。你还可以设置rowmaks
,dia1
和dia2
在竞争条件下归零,然后将它们传递给一个函数,该函数使它们成为私有的,所以大部分情况下都是偶然的,最终一切正常。