C++ 如何在设备上运行推力：：计数？（Cuda）_C++_Cuda_Geometry_Thrust_Ransac

C++ 如何在设备上运行推力：：计数？（Cuda）

c++ cuda geometry

C++ 如何在设备上运行推力：：计数？（Cuda）,c++,cuda,geometry,thrust,ransac,C++,Cuda,Geometry,Thrust,Ransac,我想实施RANSAC。我生成了60k个点和500个平面，我想计算每个平面附近有多少个点。然后选择具有最大值的一个生成向量（d_-vec）和平面（d_-pl）并将它们传输到GPU后，我使用推力：：变换，并在其中使用推力：如果计算闭合点的数量不幸的是，我得到了这个错误： 1>D:\Projects\cuda\CudaTest\CudaTest>"C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v9.0\bin\nvcc.exe"

我想实施RANSAC。我生成了60k个点和500个平面，我想计算每个平面附近有多少个点。然后选择具有最大值的一个

生成向量（

d_-vec

）和平面（

d_-pl

）并将它们传输到GPU后，我使用

推力：：变换

，并在其中使用

推力：如果

计算闭合点的数量

不幸的是，我得到了这个错误：

1>D:\Projects\cuda\CudaTest\CudaTest>"C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v9.0\bin\nvcc.exe" -gencode=arch=compute_30,code=\"sm_30,compute_30\" --use-local-env --cl-version 2015 -ccbin "C:\Program Files (x86)\Microsoft Visual Studio 14.0\VC\bin\x86_amd64" -x cu  -I"C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v9.0\include" -I"C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v9.0\include"     --keep-dir x64\Release -maxrregcount=0  --machine 64 --compile -cudart static     -DWIN32 -DWIN64 -DNDEBUG -D_CONSOLE -D_MBCS -Xcompiler "/EHsc /W3 /nologo /O2 /FS /Zi  /MD " -o x64\Release\kernel.cu.obj "D:\Projects\cuda\CudaTest\CudaTest\kernel.cu"
1>C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v9.0\include\thrust/detail/type_traits/pointer_traits.h(201): error : calling a __host__ function("thrust::detail::vector_base< ::Vec3,  ::thrust::device_malloc_allocator< ::Vec3> > ::begin") from a __device__ function("thrust::cuda_cub::__transform::unary_transform_f< ::thrust::detail::normal_iterator< ::thrust::device_ptr< ::Plane> > ,  ::thrust::detail::normal_iterator< ::thrust::device_ptr<int> > ,  ::thrust::cuda_cub::__transform::no_stencil_tag,  ::plane_functor,  ::thrust::cuda_cub::__transform::always_true_predicate> ::operator ()<long long> ") is not allowed
1>C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v9.0\include\thrust/detail/type_traits/pointer_traits.h(201): error : identifier "thrust::detail::vector_base< ::Vec3,  ::thrust::device_malloc_allocator< ::Vec3> > ::begin" is undefined in device code
1>D:/Projects/cuda/CudaTest/CudaTest/kernel.cu(84): error : calling a __host__ function("thrust::detail::vector_base< ::Vec3,  ::thrust::device_malloc_allocator< ::Vec3> > ::end") from a __device__ function("thrust::cuda_cub::__transform::unary_transform_f< ::thrust::detail::normal_iterator< ::thrust::device_ptr< ::Plane> > ,  ::thrust::detail::normal_iterator< ::thrust::device_ptr<int> > ,  ::thrust::cuda_cub::__transform::no_stencil_tag,  ::plane_functor,  ::thrust::cuda_cub::__transform::always_true_predicate> ::operator ()<long long> ") is not allowed
1>D:/Projects/cuda/CudaTest/CudaTest/kernel.cu(84): error : identifier "thrust::detail::vector_base< ::Vec3,  ::thrust::device_malloc_allocator< ::Vec3> > ::end" is undefined in device code

1>D:\Projects\cuda\CudaTest\CudaTest>“C:\Program Files\NVIDIA GPU Computing Toolkit\cuda\v9.0\bin\nvcc.exe”-gencode=arch=compute\u 30，code=\“sm\u 30，compute\u 30\”--使用本地环境--cl版本2015-ccbin“C:\Program Files（x86）\Microsoft Visual Studio 14.0\VC\bin\x86\x86\u amd64”-x cu-I“C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v9.0\include”-I“C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v9.0\include”-keep dir x64\Release-maxrregcount=0--machine 64--compile cudart static-DWIN32-DWIN64-DNDEBUG-D_控制台-D_MBCS-Xcompiler”/EHsc/W3/nologo/O2/FS/Zi-o x64\Release\kernel.cu.obj”D:\Projects\cuda\CudaTest\CudaTest\kernel.cu”
1> C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v9.0\include\推力/细节/类型/指针特性/指针特性.h（201）：错误：从一个uu设备函数（）调用一个u主机函数（“推力：：细节：：向量基<：：Vec3，：：推力：：设备malloc_分配器<：：Vec3>：：开始”）推力：：cuda_cub:：：：推力：：变换：：一元变换<：：推力：：细节：：正常迭代器<：：推力：：设备_ptr<：：平面>>，：：推力：：细节：：正常迭代器<：：推力：：设备_ptr>，：：推力：：cuda_cub:：：无模具标记，：：平面(函子，：：推力：：设备)cuda(变换：：始终)真(谓词：：）运算符（））不允许
1> C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v9.0\include\推力/细节/类型\特性/指针\特性.h（201）：错误：标识符“推力：：细节：：向量\基<：：Vec3，：：推力：：设备\ malloc \分配程序<：：Vec3>>：：开始”在设备代码中未定义
1> D:/Projects/cuda/CudaTest/CudaTest/kernel.cu（84）：错误：从一个uu设备u函数调用一个u主机u函数（“推力：：细节：：向量u基<：：Vec3，：：推力：：设备malloc_分配器<：：Vec3>：：end”）推力：：cuda_cub:：：：推力：：变换：：一元变换<：：推力：：细节：：正常迭代器<：：推力：：设备_ptr<：：平面>>，：：推力：：细节：：正常迭代器<：：推力：：设备_ptr>，：：推力：：cuda_cub:：：无模具标记，：：平面(函子，：：推力：：设备)cuda(变换：：始终)真(谓词：：）运算符（））不允许
1> D:/Projects/cuda/CudaTest/CudaTest/kernel.cu（84）：错误：设备代码中未定义标识符“推力：：细节：：向量\基<：：Vec3，：：推力：：设备\分配程序<：：Vec3>>：：end”

如果从设备代码调用推力：：计数怎么可能？我错了什么？这是完整的代码：

#include <thrust/host_vector.h>
#include <thrust/device_vector.h>
#include <thrust/generate.h>
#include <thrust/sort.h>
#include <thrust/copy.h>
#include <thrust/execution_policy.h>
#include <algorithm>
#include <iostream>
#include <cstdlib>
#include <time.h>
#include <thrust/count.h>
#include <thrust/extrema.h>

struct Vec3 {
    float x;
    float y;
    float z;


    friend std::ostream& operator<<(std::ostream& os, const Vec3& dt);
};

std::ostream& operator<<(std::ostream& os, const Vec3& dt)
{
    os << dt.x << ", " << dt.y << ", " << dt.z;
    return os;
}

struct Plane {

    float a;
    float b;
    float c;
    float d;

    // https://keisan.casio.com/exec/system/1223596129
    static Plane FromPoints(Vec3 A, Vec3 B, Vec3 C) {
        Plane ret;

        ret.a = (B.y - A.y)*(C.z - A.z) - (C.y - A.y)*(B.z - A.z);
        ret.b = (B.z - A.z)*(C.x - A.x) - (C.z - A.z)*(B.x - A.x);
        ret.c = (B.x - A.x)*(C.y - A.y) - (C.x - A.x)*(B.y - A.y);

        ret.d = -(ret.a*A.x + ret.b*A.y + ret.c*A.z);

        return ret;

    }

};

Vec3 generator() {
    return {
        float(rand()) / float(RAND_MAX) * 1000.f,
        float(rand()) / float(RAND_MAX) * 1000.f,
        float(rand()) / float(RAND_MAX) * 1000.f
    };
}

int index_generator() {
    return rand() % 69632;
}

struct plane_distance {

    const Plane pl;

    __device__ plane_distance(const Plane pl) : pl(pl) {}

    __device__ bool operator()(const Vec3& vv) const {
        return fabsf(pl.a*vv.x + pl.b*vv.y + pl.c*vv.z + pl.d) / sqrtf(pl.a*pl.a + pl.b*pl.b + pl.c*pl.c) > 0.128f;
    }

};

struct plane_functor
{
    thrust::device_vector<Vec3>& d_vec;

    plane_functor(thrust::device_vector<Vec3>& d_vec) : d_vec(d_vec) {}

    __device__ int operator()(const Plane& pl) const {

        return thrust::count_if(thrust::device, d_vec.begin(), d_vec.end(), plane_distance(pl));

    }
};

int main(void)
{


    // Generate random points for testing

    std::cout << "Generating..." << std::endl;

    // generate random vectors serially
    thrust::host_vector<Vec3> h_vec(65536);
    std::generate(h_vec.begin(), h_vec.end(), generator);

    // Generate random planes
    thrust::host_vector<Plane> h_pl(512);
    std::generate(h_pl.begin(), h_pl.end(), [&h_vec]() {

        return Plane::FromPoints(
            h_vec[index_generator()],
            h_vec[index_generator()],
            h_vec[index_generator()]
        );

    });

    std::cout << "Transfer" << std::endl;

    // transfer data to the device
    thrust::device_vector<Vec3> d_vec = h_vec;
    thrust::device_vector<Plane> d_pl = h_pl;
    thrust::device_vector<int> counts(512);

    std::cout << "Searching" << std::endl;

    thrust::transform(thrust::device, d_pl.begin(), d_pl.end(), counts.begin(), plane_functor(d_vec));

    auto result = thrust::max_element(thrust::device, counts.begin(), counts.end());

    std::cout << "Press any key to exit" << std::endl;
    std::cin.get();

    return 0;
}

#包括
#包括
#包括
#包括
#包括
#包括
#包括
#包括
#包括
#包括
#包括
#包括
结构向量3{
浮动x；
浮动y；
浮动z；
friend std:：ostream&operator正如评论中所建议的，在设备代码中访问device\u vector
是非法的。它们是（尽管名称不同）在编写本文时，所有推力版本中的主机端抽象都可用。之所以会出现错误，是因为您的函子正在设备代码中调用设备_向量的复制构造，这需要构造新的容器，这将调用内存分配，并且无法编译
您应该能够使用原始设备指针来实现这一点，例如：
struct plane_functor
{
    Vec3* d_vec0;
    Vec3* d_vec1;

    __host__ __device__ plane_functor(Vec3* d_vec0, Vec3* d_vec1) : d_vec0(d_vec0), d_vec1(d_vec1) {}

    __device__ int operator()(const Plane& pl) const {

        return thrust::count_if(thrust::device, d_vec0, d_vec1, plane_distance(pl));

    }
};

// ....

Vec3* d_vec0 = thrust::raw_pointer_cast(d_vec.data());
Vec3* d_vec1 = d_vec0 + (d_vec.end() - d_vec.begin());
thrust::transform(d_pl.begin(), d_pl.end(), counts.begin(), plane_functor( d_vec0, d_vec1 ) );

请注意，虽然这是为我编译的，但我无法运行您的代码，因为主机端初始化lambda在我尝试运行它时会爆炸。还请密切注意标记和基于策略的执行的混合。编写的asch:：transform
调用即使有有效的functor，也会失败，因为device\u vec的组合tor
迭代器和推力：：设备
正如评论中所建议的，在设备代码中访问设备向量
是非法的。它们是（尽管名称不同）在编写本文时，所有推力版本中的主机端抽象都可用。之所以会出现错误，是因为您的函子正在设备代码中调用设备_向量的复制构造，这需要构造新的容器，这将调用内存分配，并且无法编译
您应该能够使用原始设备指针来实现这一点，例如：
struct plane_functor
{
    Vec3* d_vec0;
    Vec3* d_vec1;

    __host__ __device__ plane_functor(Vec3* d_vec0, Vec3* d_vec1) : d_vec0(d_vec0), d_vec1(d_vec1) {}

    __device__ int operator()(const Plane& pl) const {

        return thrust::count_if(thrust::device, d_vec0, d_vec1, plane_distance(pl));

    }
};

// ....

Vec3* d_vec0 = thrust::raw_pointer_cast(d_vec.data());
Vec3* d_vec1 = d_vec0 + (d_vec.end() - d_vec.begin());
thrust::transform(d_pl.begin(), d_pl.end(), counts.begin(), plane_functor( d_vec0, d_vec1 ) );

请注意，虽然这是为我编译的，但我无法运行您的代码，因为主机端初始化lambda在我尝试运行它时会爆炸。还请密切注意标记和基于策略的执行的混合。编写的asch:：transform
调用即使有有效的functor，也会失败，因为device\u vec的组合tor
迭代器和推力：：设备
问题是由于试图在设备代码中使用设备向量容器造成的。这是非法的。我需要在设备函数中访问该设备向量容器。我应该如何重新编写此代码？转换为原始指针。对转换的调用也因同样的原因被中断se执行策略或基于标记的API。不要混合它们。我用指针替换了引用，但仍然出现此错误。我尝试使用循环来避免count\u if
，但是获取d\u向量的大小并访问它也会导致编译错误，这可能是有趣的问题，因为尝试使用hin设备代码。这是非法的。我需要访问设备函数中的设备向量容器。我应该如何重新编写此代码？转换为原始指针。对转换的调用也因同样的原因被中断。使用执行策略或基于标记的API。不要混合使用。我用指针替换了引用，但仍然会出现此错误或者，我