CUDA中两个类之间共享指针
我想用CUDA创建一个顶点和边结构。 我有两节课CUDA中两个类之间共享指针,cuda,Cuda,我想用CUDA创建一个顶点和边结构。 我有两节课 Connection { public: float value; Connection() { this->value = 0; } } Node { public: Connection *incoming; Connection *outgoing; int lenIncoming; int lenOutgoing; node(C
Connection {
public:
float value;
Connection()
{
this->value = 0;
}
}
Node
{
public:
Connection *incoming;
Connection *outgoing;
int lenIncoming;
int lenOutgoing;
node(Connection *incoming, Connection *outgoing, int lenIncoming, int lenOutgoing)
{
this->incoming = incoming;
this->outgoing = outgoing;
this->lenIncoming = lenIncoming;
this->lenOutgoing = lenOutgoing;
}
}
当我“连接”节点时,我会执行以下操作:
Connection XA = Connection(10);
Connection AB = Connection(2);
Connection XB = Connection(10);
Connection BX = Connection(2);
Connection* incomingA;
Connection* outgoingA;
Connection* ingoingB;
Connection* outgoingB;
cudaMallocManaged(&incomingA, 1 * sizeof(Connection*));
cudaMallocManaged(&outgoingA, 1 * sizeof(Connection*));
cudaMallocManaged(&ingoingB, 2 * sizeof(Connection*));
cudaMallocManaged(&outgoingB, 1 * sizeof(Connection*));
incomingA[0] = XA;
outgoingA[0] = AB;
incomingB[0] = XB;
incomingB[1] = AB;
outgoingB[0]= BX;
Node nodeA = Node(incomingA, outgoingA);
Node nodeB = Node(incomingB, outgoingB);
我希望发生的事情是,当我更改nodaA->outing[0]的值时,该值来自节点中的方法,它应该会影响nodaB.incoming[1].value,但情况并非如此
当我从nodeA中更改该值时,它仍然是nodeB中的起始值。我想,既然我将指针的副本传递给了对象,我的意思是它更新了原始对象,但似乎我弄错了,或者我在过程中犯了一些错误
如果您有任何关于如何做到这一点的建议,我们将不胜感激
(顺便说一句,我之所以使用类连接而不是浮点数,是因为将来它将包含更多)
这些类是在主机上创建的
节点有一个名为run的方法,该方法正在设备上运行
__device__ __host__
run()
{
for(int i=0; i<this->lenIncoming; i++)
{
this->incoming[i].value += 1;
}
for(int i=0; i< this->lenOutgoing; i++)
{
this->outgoing[i].value += 2;
}
}
内核是通过运行
kernel_run<<<1, 1>> > (nodes);
内核运行(节点);
当使用Nsight进行调试时,我可以看到nodeA中的值在本地发生了变化 正如您已经提到的,问题在于对象
AB
、XB
、BX
等是通过值而不是通过引用分配的,因此每次使用对象时(即每次将其分配给传入或传出连接时)都会对其进行复制,并且从一个操作更新到AB
不会影响AB
的任何其他实例
一种可能的解决方案是将所有对象设置为“单例”,并通过引用引用它们。为了在主机和设备上实现这一点,我们将使用cudamalocmanaged
为这些对象进行分配。下面是一个例子:
$ cat t1494.cu
#include <iostream>
class Connection {
public:
float value;
Connection()
{
this->value = 0;
}
Connection(float val)
{
this->value = val;
}
};
class Node
{
public:
Connection **incoming;
Connection **outgoing;
int lenIncoming;
int lenOutgoing;
Node(Connection **incoming, Connection **outgoing, int lenIncoming, int lenOutgoing)
{
this->incoming = incoming;
this->outgoing = outgoing;
this->lenIncoming = lenIncoming;
this->lenOutgoing = lenOutgoing;
}
__device__ __host__
void run()
{
for(int i=0; i<this->lenIncoming; i++)
{
this->incoming[i]->value += 1;
}
for(int i=0; i< this->lenOutgoing; i++)
{
this->outgoing[i]->value += 2;
}
}
};
__global__
void kernel_run(Node *nodes)
{
nodes[0].run();
nodes[1].run();
};
int main(){
Connection *XA;
cudaMallocManaged(&XA, sizeof(Connection));
*XA = Connection(10);
Connection *AB;
cudaMallocManaged(&AB, sizeof(Connection));
*AB = Connection(2);
Connection *XB;
cudaMallocManaged(&XB, sizeof(Connection));
*XB = Connection(10);
Connection *BX;
cudaMallocManaged(&BX, sizeof(Connection));
*BX = Connection(2);
Connection ** incomingA;
Connection ** outgoingA;
Connection ** incomingB;
Connection ** outgoingB;
cudaMallocManaged(&incomingA, 1 * sizeof(Connection*));
cudaMallocManaged(&outgoingA, 1 * sizeof(Connection*));
cudaMallocManaged(&incomingB, 2 * sizeof(Connection*));
cudaMallocManaged(&outgoingB, 1 * sizeof(Connection*));
incomingA[0] = XA;
outgoingA[0] = AB;
incomingB[0] = XB;
incomingB[1] = AB;
outgoingB[0]= BX;
Node *nodes;
cudaMallocManaged(&nodes, 2 * sizeof(Node));
nodes[0] = Node(incomingA, outgoingA, 1, 1);
nodes[1] = Node(incomingB, outgoingB, 2, 1);
std::cout << nodes[0].incoming[0]->value << std::endl;
std::cout << nodes[0].outgoing[0]->value << std::endl;
std::cout << nodes[1].incoming[0]->value << std::endl;
std::cout << nodes[1].incoming[1]->value << std::endl;
std::cout << nodes[1].outgoing[0]->value << std::endl;
kernel_run<<<1, 1>> > (nodes);
cudaDeviceSynchronize();
std::cout << nodes[0].incoming[0]->value << std::endl;
std::cout << nodes[0].outgoing[0]->value << std::endl;
std::cout << nodes[1].incoming[0]->value << std::endl;
std::cout << nodes[1].incoming[1]->value << std::endl;
std::cout << nodes[1].outgoing[0]->value << std::endl;
}
$ nvcc -o t1494 t1494.cu
$ cuda-memcheck ./t1494
========= CUDA-MEMCHECK
10
2
10
2
2
11
5
11
5
4
========= ERROR SUMMARY: 0 errors
$
$cat t1494.cu
#包括
类连接{
公众:
浮动值;
连接()
{
该->值=0;
}
连接(浮动值)
{
该->值=val;
}
};
类节点
{
公众:
连接**输入;
连接**输出;
他来了;
int-lenouting;
节点(连接**传入,连接**传出,int-lencoming,int-lenouting)
{
此->传入=传入;
这->外出=外出;
这->列宁来了=列宁来了;
此->lenouting=lenouting;
}
__设备主机__
无效运行()
{
for(int i=0;i;i++)
{
此->传入[i]->值+=1;
}
对于(inti=0;ilenouting;i++)
{
此->输出[i]->值+=2;
}
}
};
__全球的__
无效内核运行(节点*节点)
{
节点[0]。运行();
节点[1]。运行();
};
int main(){
连接*XA;
cudaMallocManaged(&XA,sizeof(Connection));
*XA=连接(10);
连接*AB;
CudamAllocManager(&AB,sizeof(连接));
*AB=连接(2);
连接*XB;
cudaMallocManaged(&XB,sizeof(Connection));
*XB=连接(10);
连接*BX;
CudamAllocManager(&BX,sizeof(连接));
*BX=连接(2);
连接**输入A;
连接**支出a;
连接**收入b;
连接**支出b;
CudamAllocManager(和incomingA,1*sizeof(连接*);
CudaMallocManager(和outgoingA,1*sizeof(连接*);
CudaMallocManager(和incomingB,2*sizeof(连接*);
cudaMallocManaged(和outgoingB,1*sizeof(Connection*));
incomingA[0]=XA;
支出a[0]=AB;
incomingB[0]=XB;
收入b[1]=AB;
支出B[0]=BX;
节点*节点;
cudaMallocManaged(&nodes,2*sizeof(Node));
节点[0]=节点(收入、支出、1、1);
节点[1]=节点(收入B,支出B,2,1);
std::cout value什么是主机代码,什么是设备代码?这都是主机代码吗?您是否正在执行cudaMalloc,例如,incomingA
?请提供更多详细信息。如果有一个完整的代码,可以显示对输出值的修改没有以您想要的方式反映出来,那将非常好。是的,对不起。为了尽量简短,我现在已经更新了问题,有了更多信息,如果我应该包含更多信息,请给我一个提示。NodeB应该定义为传入长度为2。这里是我的问题所在,因为AB在节点a内传出,它增加了2,因此在节点B内执行时应该是4,因此,最终结果应该是5。换句话说,我希望不同的节点在本例(AB)@talonmes中编辑相同的连接实例“当我从nodeA中更改值时,它仍然是nodeB中的起始值。我认为,自从我将指针的副本传递给该对象后,这意味着它更新了原始对象,但我似乎弄错了,或者我在这一过程中犯了一些错误。“引用我想说的问题。无论如何,谢谢你的帮助。
$ cat t1494.cu
#include <iostream>
class Connection {
public:
float value;
Connection()
{
this->value = 0;
}
Connection(float val)
{
this->value = val;
}
};
class Node
{
public:
Connection **incoming;
Connection **outgoing;
int lenIncoming;
int lenOutgoing;
Node(Connection **incoming, Connection **outgoing, int lenIncoming, int lenOutgoing)
{
this->incoming = incoming;
this->outgoing = outgoing;
this->lenIncoming = lenIncoming;
this->lenOutgoing = lenOutgoing;
}
__device__ __host__
void run()
{
for(int i=0; i<this->lenIncoming; i++)
{
this->incoming[i]->value += 1;
}
for(int i=0; i< this->lenOutgoing; i++)
{
this->outgoing[i]->value += 2;
}
}
};
__global__
void kernel_run(Node *nodes)
{
nodes[0].run();
nodes[1].run();
};
int main(){
Connection *XA;
cudaMallocManaged(&XA, sizeof(Connection));
*XA = Connection(10);
Connection *AB;
cudaMallocManaged(&AB, sizeof(Connection));
*AB = Connection(2);
Connection *XB;
cudaMallocManaged(&XB, sizeof(Connection));
*XB = Connection(10);
Connection *BX;
cudaMallocManaged(&BX, sizeof(Connection));
*BX = Connection(2);
Connection ** incomingA;
Connection ** outgoingA;
Connection ** incomingB;
Connection ** outgoingB;
cudaMallocManaged(&incomingA, 1 * sizeof(Connection*));
cudaMallocManaged(&outgoingA, 1 * sizeof(Connection*));
cudaMallocManaged(&incomingB, 2 * sizeof(Connection*));
cudaMallocManaged(&outgoingB, 1 * sizeof(Connection*));
incomingA[0] = XA;
outgoingA[0] = AB;
incomingB[0] = XB;
incomingB[1] = AB;
outgoingB[0]= BX;
Node *nodes;
cudaMallocManaged(&nodes, 2 * sizeof(Node));
nodes[0] = Node(incomingA, outgoingA, 1, 1);
nodes[1] = Node(incomingB, outgoingB, 2, 1);
std::cout << nodes[0].incoming[0]->value << std::endl;
std::cout << nodes[0].outgoing[0]->value << std::endl;
std::cout << nodes[1].incoming[0]->value << std::endl;
std::cout << nodes[1].incoming[1]->value << std::endl;
std::cout << nodes[1].outgoing[0]->value << std::endl;
kernel_run<<<1, 1>> > (nodes);
cudaDeviceSynchronize();
std::cout << nodes[0].incoming[0]->value << std::endl;
std::cout << nodes[0].outgoing[0]->value << std::endl;
std::cout << nodes[1].incoming[0]->value << std::endl;
std::cout << nodes[1].incoming[1]->value << std::endl;
std::cout << nodes[1].outgoing[0]->value << std::endl;
}
$ nvcc -o t1494 t1494.cu
$ cuda-memcheck ./t1494
========= CUDA-MEMCHECK
10
2
10
2
2
11
5
11
5
4
========= ERROR SUMMARY: 0 errors
$