Cuda 内核中类方法的外部调用_Cuda

Cuda 内核中类方法的外部调用

cuda

Cuda 内核中类方法的外部调用,cuda,Cuda,我有一个类FPlan，它有许多方法，比如排列和打包 __host__ __device__ void Perturb_action(FPlan *dfp){ dfp->perturb(); dfp->packing(); } __global__ void Vector_Perturb(FPlan **dfp, int n){ int i=threadIx.x; if(i<n) Perturb_action(dfp[i]); } in main: FPlan *

我有一个类

FPlan

，它有许多方法，比如排列和打包

__host__ __device__ void Perturb_action(FPlan *dfp){
  dfp->perturb();
  dfp->packing();
}

__global__ void Vector_Perturb(FPlan **dfp, int n){

int i=threadIx.x;
if(i<n) Perturb_action(dfp[i]);
}

in main:

FPlan **fp_vec;
fp_vec=(FPlan**)malloc(VEC_SIZE*sizeof(FPlan*));
//initialize the vec
for(int i=0; i<VEC_SIZE;i++)
 fp_vec[i]=&fp;
//fp of type FPlan that is initialized

int v_sz=sizeof(fp_vec);
double test=fp_vec[0]->getCost();
printf("the cost before perturb %f\n"test);
FPlan **value;
cudaMalloc(&value,v_sz);
cudaMemcpy(value,&fp_vec,v_sz,cudaMemcpyHostToDevice);

//call kernel
dim3 threadsPerBlock(VEC_SIZE);
dim3 numBlocks(1);
Vector_Perturb<<<numBlocks,threadsPerBlock>>> (value,VEC_SIZE);
cudaMemcpy(fp_vec,value,v_sz,cudaMemcpyDeviceToHost);
test=fp_vec[0]->getCost();
printf("the cost after perturb %f\n"test);
test=fp_vec[1]->getCost();
printf("the cost after perturb %f\n"test);

\uuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuu{
dfp->扰动（）；
dfp->packing（）；
}
__全局无效向量扰动（FPlan**dfp，int n）{
int i=threadIx.x；
if（igetCost（）；
printf（“扰动%f\n”测试后的成本）；
test=fp_vec[1]->getCost（）；
printf（“扰动%f\n”测试后的成本）；

我在获得

fp_vec[0]

printf的前一个排列，成本为0.8。对

fp_vec[0]

值inf和

fp_vec[1]

值0.8进行排列后

排列后的预期输出应该类似于

fp_-vec[0]=0.7

和

fp_-vec[1]=0.9

。我想将这些排列应用于

FPlan

类型的数组

我缺少什么？调用CUDA支持的外部函数吗？

这似乎是当今常见的问题：

考虑以下代码：

#include <stdio.h>
#include <stdlib.h>
int main() {
    int* arr = (int*) malloc(100);
    printf("sizeof(arr) = %i", sizeof(arr));
    return 0;
}

您正在设备上分配4（或8）个字节并复制4（或8）个字节。结果未定义（可能每次都是垃圾）

除此之外，您还应该对CUDA呼叫进行正确的错误检查。

看一看：

我认为代码还有其他问题。您不能使用指针数组调用单个

cudaMemcpy

将对象数组复制到设备。因此，对于类似这样的问题（“为什么代码不工作？”），您需要提供一个。

int v_sz=sizeof(fp_vec);
double test=fp_vec[0]->getCost();
printf("the cost before perturb %f\n"test);
FPlan **value;
cudaMalloc(&value,v_sz);
cudaMemcpy(value,&fp_vec,v_sz,cudaMemcpyHostToDevice);