CUDA错误结果

CUDA错误结果,cuda,Cuda,我有一些非常简单的CUDA代码和自定义类: #ifdef __CUDACC__ #define CUDA_CALLABLE_MEMBER __host__ __device__ #else #define CUDA_CALLABLE_MEMBER #endif class Foo { public: int val; CUDA_CALLABLE_MEMBER Foo() { } CUDA_CALLABLE_MEMBER ~Foo() {} CUDA_C

我有一些非常简单的CUDA代码和自定义类:

#ifdef __CUDACC__
#define CUDA_CALLABLE_MEMBER __host__ __device__
#else
#define CUDA_CALLABLE_MEMBER
#endif 

class Foo {
public:
    int val;
    CUDA_CALLABLE_MEMBER Foo() {
    }
    CUDA_CALLABLE_MEMBER ~Foo() {}
    CUDA_CALLABLE_MEMBER int getVal() {
        return val;
    }
};
以下是自定义类:

#ifdef __CUDACC__
#define CUDA_CALLABLE_MEMBER __host__ __device__
#else
#define CUDA_CALLABLE_MEMBER
#endif 

class Foo {
public:
    int val;
    CUDA_CALLABLE_MEMBER Foo() {
    }
    CUDA_CALLABLE_MEMBER ~Foo() {}
    CUDA_CALLABLE_MEMBER int getVal() {
        return val;
    }
};
以下是内核:

#include <cuda.h>
#include <cuda_runtime.h>
#include <stdio.h>
#include "Custom.h"
#include <string>
#include <iostream>
using namespace std;

__global__ void someKernel(int maxNumberOfThreads, Foo* fooArray, int n)
{
    int idx = blockIdx.x*blockDim.x + threadIdx.x;
    if (idx < n){
        printf("Hello from thread # %i(block #: %i) with value %d\n",idx, blockIdx.x, fooArray[idx].val);
    }
}

extern void cuda_doStuff(Foo* fooArray, int n)
{
    int numberOfBlocks = 10;
    int threadsPerBlock = 100;
    int maxNumberOfThreads = 10;
    someKernel << <numberOfBlocks, threadsPerBlock >> >(maxNumberOfThreads, fooArray, n);
    cudaDeviceSynchronize();
}

我试过双打,也得到了一些随机结果。有人知道原因吗?

这一行导致问题:

cudaError_t error = cudaMemcpy(d_fooArray, &fooArray, bytes, cudaMemcpyHostToDevice);
您正在传递指针的地址作为第二个参数。 相反,它必须是指针本身:

cudaError_t error = cudaMemcpy(d_fooArray, fooArray, bytes, cudaMemcpyHostToDevice);

谢谢,m.s.修好了!
cudaError_t error = cudaMemcpy(d_fooArray, fooArray, bytes, cudaMemcpyHostToDevice);