CUDA错误结果
我有一些非常简单的CUDA代码和自定义类:CUDA错误结果,cuda,Cuda,我有一些非常简单的CUDA代码和自定义类: #ifdef __CUDACC__ #define CUDA_CALLABLE_MEMBER __host__ __device__ #else #define CUDA_CALLABLE_MEMBER #endif class Foo { public: int val; CUDA_CALLABLE_MEMBER Foo() { } CUDA_CALLABLE_MEMBER ~Foo() {} CUDA_C
#ifdef __CUDACC__
#define CUDA_CALLABLE_MEMBER __host__ __device__
#else
#define CUDA_CALLABLE_MEMBER
#endif
class Foo {
public:
int val;
CUDA_CALLABLE_MEMBER Foo() {
}
CUDA_CALLABLE_MEMBER ~Foo() {}
CUDA_CALLABLE_MEMBER int getVal() {
return val;
}
};
以下是自定义类:
#ifdef __CUDACC__
#define CUDA_CALLABLE_MEMBER __host__ __device__
#else
#define CUDA_CALLABLE_MEMBER
#endif
class Foo {
public:
int val;
CUDA_CALLABLE_MEMBER Foo() {
}
CUDA_CALLABLE_MEMBER ~Foo() {}
CUDA_CALLABLE_MEMBER int getVal() {
return val;
}
};
以下是内核:
#include <cuda.h>
#include <cuda_runtime.h>
#include <stdio.h>
#include "Custom.h"
#include <string>
#include <iostream>
using namespace std;
__global__ void someKernel(int maxNumberOfThreads, Foo* fooArray, int n)
{
int idx = blockIdx.x*blockDim.x + threadIdx.x;
if (idx < n){
printf("Hello from thread # %i(block #: %i) with value %d\n",idx, blockIdx.x, fooArray[idx].val);
}
}
extern void cuda_doStuff(Foo* fooArray, int n)
{
int numberOfBlocks = 10;
int threadsPerBlock = 100;
int maxNumberOfThreads = 10;
someKernel << <numberOfBlocks, threadsPerBlock >> >(maxNumberOfThreads, fooArray, n);
cudaDeviceSynchronize();
}
我试过双打,也得到了一些随机结果。有人知道原因吗?这一行导致问题:
cudaError_t error = cudaMemcpy(d_fooArray, &fooArray, bytes, cudaMemcpyHostToDevice);
您正在传递指针的地址作为第二个参数。
相反,它必须是指针本身:
cudaError_t error = cudaMemcpy(d_fooArray, fooArray, bytes, cudaMemcpyHostToDevice);
谢谢,m.s.修好了!
cudaError_t error = cudaMemcpy(d_fooArray, fooArray, bytes, cudaMemcpyHostToDevice);