C++ 按转换中的值推送异常块内核_C++_C++11_Cuda

C++ 按转换中的值推送异常块内核

c++ c++11 cuda

C++ 按转换中的值推送异常块内核,c++,c++11,cuda,C++,C++11,Cuda,我正在研究一个优化问题，其中包含各种类似形式的数学函数，因此我将它们扭曲成FunctionObj template <typename T> struct FunctionObj { T a; FunctionObj(): a(1) { } }; 我真正想做的是sum{func（x）}，所以我定义了一个FuncEvalF函子来利用struct:：tranform\u reduce template <typename T> struct

我正在研究一个优化问题，其中包含各种类似形式的数学函数，因此我将它们扭曲成

FunctionObj

template <typename T>
struct FunctionObj
{
    T a;
     FunctionObj(): a(1)
    {
    }
};

我真正想做的是

sum{func（x）}

，所以我定义了一个

FuncEvalF

函子来利用

struct:：tranform\u reduce

template <typename T>
struct FuncEvalF
{
    const FunctionObj<T>& f_obj;
    __host__ __device__ inline FuncEvalF(const FunctionObj<T>& in_f_obj) :f_obj(in_f_obj)
{

}
    __host__ __device__ inline T operator()(T x)
    {
        return FuncEval(f_obj, x);
    }
};

template <typename T>
__host__ __device__ inline T BatchFuncEval(const FunctionObj<T>  &f_obj, int size, const T *x_in);
template<>
inline float BatchFuncEval< float>(const FunctionObj<float>  &f_obj, int size, const float *x_in)
{
    return thrust::transform_reduce(thrust::device, thrust::device_pointer_cast(x_in), thrust::device_pointer_cast(x_in + size), FuncEvalF<float>(f_obj), static_cast<float>(0), thrust::plus<float>());
}

auto func = FuncEvalF<float>(FunctionObj<float>());
    float result = 0;
    try
    {
        result = thrust::transform_reduce(thrust::device, thrust::device_pointer_cast(dev_a), thrust::device_pointer_cast(dev_a + 10000), func, static_cast<float>(0), thrust::plus<float>());

    }
    catch (std::exception e)
    {
        printf("%s in thurst \n ", e.what());
    }

例外情况出现了：

bulk\u kernel\u by\u value

，即使我将10000更改为10。只有将

FuncEval

的定义改为

return x;

程序将输出正确但无意义的答案。我不禁要问我的代码出了什么问题？谢谢你的关注。下面是完整的代码，cuda 7.0 sm_20

#include <cuda_runtime.h>
#include <device_launch_parameters.h>

#include <thrust/device_vector.h>
#include <thrust/functional.h>
#include <thrust/inner_product.h>
#include <thrust/iterator/zip_iterator.h>
#include <thrust/reduce.h>
#include <thrust/execution_policy.h>
#include <thrust/transform_reduce.h>
#include <thrust/transform.h>

#include <stdio.h>

template <typename T>
struct FunctionObj
{
    T a;
     FunctionObj(): a(1)
    {
    }

};

template <typename T>
__host__ __device__  inline T FuncEval(const FunctionObj<T> &f_obj, T x)
{
    return f_obj.a+x;
}


template <typename T>
struct FuncEvalF
{
    const FunctionObj<T>& f_obj;
    __host__ __device__ inline FuncEvalF(const FunctionObj<T>& in_f_obj) :f_obj(in_f_obj)
    {

    }
    __host__ __device__ inline T operator()(T x)
    {
        return FuncEval(f_obj, x);
    }
};
template <typename T>
__host__ __device__ inline T BatchFuncEval(const FunctionObj<T>  &f_obj, int size, const T *x_in);
template<>
inline float BatchFuncEval< float>(const FunctionObj<float>  &f_obj, int size, const float *x_in)
{
    return thrust::transform_reduce(thrust::device, thrust::device_pointer_cast(x_in), thrust::device_pointer_cast(x_in + size), FuncEvalF<float>(f_obj), static_cast<float>(0), thrust::plus<float>());
}
int main()
{
    cudaError_t cudaE;
    float a[10000] = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 };
    float* dev_a;
    cudaE = cudaMalloc((void**)(&dev_a), sizeof(float) * 10000);
    cudaE = cudaMemcpy(dev_a, a, sizeof(float) * 10000, cudaMemcpyHostToDevice);
    auto func = FuncEvalF<float>(FunctionObj<float>());
    float result = 0;
    try
    {
        result = thrust::transform_reduce(thrust::device, thrust::device_pointer_cast(dev_a), thrust::device_pointer_cast(dev_a + 10000), func, static_cast<float>(0), thrust::plus<float>());

    }
    catch (std::exception e)
    {
        printf("%s in thurst \n ", e.what());
    }
    printf("the gpu float result is %f\n", result);
    cudaFree(dev_a);
}

#包括
#包括
#包括
#包括
#包括
#包括
#包括
#包括
#包括
#包括
#包括
模板
结构函数对象
{
Tα；
FunctionObj（）：a（1）
{
}
};
模板
__主机设备内联T函数（常量函数obj和f函数obj，T x）
{
返回f_obj.a+x；
}
模板
结构函数
{
const FunctionObj&f_obj；
__主机设备内联函数alf（常量函数obj和in函数obj）：f函数obj（in函数obj）
{
}
__主机\设备\内联T运算符（）（T x）
{
返回函数（f_obj，x）；
}
};
模板
__主机uuuu设备uuu内联T BatchFuncEval（常量FunctionObj&f_obj，int size，常量T*x_in）；
模板
内联float BatchFuncEval（常量FunctionObj&f_obj，int size，常量float*x_in）
{
返回推力：：变换减少（推力：：设备，推力：：设备指针强制转换（x_英寸），推力：：设备指针强制转换（x_英寸+大小），函数自动转换（f_obj），静态强制转换（0），推力：：加号（）；
}
int main（）
{
!；
浮点数a[10000]={1,2,3,4,5,6,7,8,9,10}；
浮动*dev_a；
cudaE=cudamaloc（（void**）（&dev_a），sizeof（float）*10000；
cudaE=cudaMemcpy（dev_a，a，sizeof（float）*10000，cudaMemcpyHostToDevice）；
auto func=FuncEvalF（FunctionObj（））；
浮动结果=0；
尝试
{
结果=推力：：转换\减少（推力：：设备，推力：：设备\指针\转换（开发a），推力：：设备\指针\转换（开发a+10000），函数，静态\转换（0），推力：：加（）；
}
捕获（标准：：异常e）
{
printf（“%s in thurst\n”，e.what（））；
}
printf（“gpu浮点结果是%f\n”，result）；
cudaFree（dev_a）；
}

问题在于

struct functevalf

中的

f_obj

是一个

常量FunctionObj&

它在主机

FunctionObj（）

上被实例化为临时对象，但对它的引用以后不再有效

解决此问题的一种方法是创建其副本，而不是保留对其的引用：

template <typename T>
struct FuncEvalF
{
    FunctionObj<T> f_obj;
    ....
}

模板
结构函数
{
功能对象f_obj；
....
}

请将您发布的代码转换为一个简短、完整的工作示例，以说明您的问题。如果没有这些，就不可能说问题出在哪里，我只是把所有的代码都粘贴在这里。你能发布一个没有坏语言的版本吗？如果你想像成年人一样被对待，也要像成年人一样对待这里的其他人。对不起，更新了。整天和它战斗让我很生气。这解决了问题。非常感谢你。在刷新此页面之前，我正在尝试使用

asch:：device\u vector[1]

将所有

Func

相关对象移动到设备内存中。终于松了一口气，是的。

template <typename T>
struct FuncEvalF
{
    FunctionObj<T> f_obj;
    ....
}