在CUDA设备代码中使用std:：vector_Cuda

在CUDA设备代码中使用std:：vector

cuda

在CUDA设备代码中使用std:：vector,cuda,Cuda,问题是：有没有办法在Cuda内核中使用“vector”类？当我尝试时，会出现以下错误： error : calling a host function("std::vector<int, std::allocator<int> > ::push_back") from a __device__/__global__ function not allowed 您不能在CUDA中使用STL，但您可以使用来做您想做的事情。否则，只需将向量的内容复制到设备上并正常操作即可。您不

问题是：有没有办法在Cuda内核中使用“vector”类？当我尝试时，会出现以下错误：

error : calling a host function("std::vector<int, std::allocator<int> > ::push_back") from a __device__/__global__ function not allowed

您不能在CUDA中使用STL，但您可以使用来做您想做的事情。否则，只需将向量的内容复制到设备上并正常操作即可。

您不能在设备代码中使用

std:：vector，您应该使用数组。
在cuda库推力中，您可以使用推力：：设备向量定义设备上的向量，主机STL向量和设备向量之间的数据传输非常简单。你可以参考这个有用的链接：找到一些有用的例子。
我认为你可以自己实现一个设备向量，因为CUDA支持设备代码中的动态内存分配。还支持操作员新建/删除。这是CUDA中一个极其简单的device vector原型，但它确实可以工作。它还没有得到充分的测试
template<typename T>
class LocalVector
{
private:
    T* m_begin;
    T* m_end;

    size_t capacity;
    size_t length;
    __device__ void expand() {
        capacity *= 2;
        size_t tempLength = (m_end - m_begin);
        T* tempBegin = new T[capacity];

        memcpy(tempBegin, m_begin, tempLength * sizeof(T));
        delete[] m_begin;
        m_begin = tempBegin;
        m_end = m_begin + tempLength;
        length = static_cast<size_t>(m_end - m_begin);
    }
public:
    __device__  explicit LocalVector() : length(0), capacity(16) {
        m_begin = new T[capacity];
        m_end = m_begin;
    }
    __device__ T& operator[] (unsigned int index) {
        return *(m_begin + index);//*(begin+index)
    }
    __device__ T* begin() {
        return m_begin;
    }
    __device__ T* end() {
        return m_end;
    }
    __device__ ~LocalVector()
    {
        delete[] m_begin;
        m_begin = nullptr;
    }

    __device__ void add(T t) {

        if ((m_end - m_begin) >= capacity) {
            expand();
        }

        new (m_end) T(t);
        m_end++;
        length++;
    }
    __device__ T pop() {
        T endElement = (*m_end);
        delete m_end;
        m_end--;
        return endElement;
    }

    __device__ size_t getSize() {
        return length;
    }
};

模板
类LocalVector
{
私人：
开始；
T*m_端；
容量大小；
尺寸与长度；
__设备\无效扩展（）{
容量*=2；
大小\u t模板长度=（m\u结束-m\u开始）；
T*tempBegin=新的T[容量]；
memcpy（tempBegin、m_begin、tempLength*sizeof（T））；
删除[]m_begin；
m_begin=临时开始；
m_end=m_begin+模板长度；
长度=静态浇铸（m_结束-m_开始）；
}
公众：
__设备\显式LocalVector（）：长度（0），容量（16）{
m_begin=新的T[容量]；
m_end=m_begin；
}
__设备\uuut&运算符[]（无符号整数索引）{
return*（m_begin+index）；//*（begin+index）
}
__设备\uuut*开始（）{
返回m_开始；
}
__设备_uut*end（）{
返回m_end；
}
__设备\本地向量（）
{
删除[]m_begin；
m_begin=nullptr；
}
__设备无效添加（T）{
如果（（m_结束-m_开始）>=容量）{
展开（）；
}
新(mu end)T(T);；
m_end++；
长度++；
}
__设备\uuut pop（）{
T end元素=（*m_end）；
删除m_end；
m_end--；
返回元素；
}
__设备大小{
返回长度；
}
};
+1完全合法的问题（不确定为什么它被否决。不幸的是，目前的答案是否定的。我看不出这有什么帮助，因为推力：：设备_向量也不能在内核中使用。即使它是正确的，也可能会因为expand（）中的内存分配而变慢功能。尽管付出了很大的努力。
template<typename T>
class LocalVector
{
private:
    T* m_begin;
    T* m_end;

    size_t capacity;
    size_t length;
    __device__ void expand() {
        capacity *= 2;
        size_t tempLength = (m_end - m_begin);
        T* tempBegin = new T[capacity];

        memcpy(tempBegin, m_begin, tempLength * sizeof(T));
        delete[] m_begin;
        m_begin = tempBegin;
        m_end = m_begin + tempLength;
        length = static_cast<size_t>(m_end - m_begin);
    }
public:
    __device__  explicit LocalVector() : length(0), capacity(16) {
        m_begin = new T[capacity];
        m_end = m_begin;
    }
    __device__ T& operator[] (unsigned int index) {
        return *(m_begin + index);//*(begin+index)
    }
    __device__ T* begin() {
        return m_begin;
    }
    __device__ T* end() {
        return m_end;
    }
    __device__ ~LocalVector()
    {
        delete[] m_begin;
        m_begin = nullptr;
    }

    __device__ void add(T t) {

        if ((m_end - m_begin) >= capacity) {
            expand();
        }

        new (m_end) T(t);
        m_end++;
        length++;
    }
    __device__ T pop() {
        T endElement = (*m_end);
        delete m_end;
        m_end--;
        return endElement;
    }

    __device__ size_t getSize() {
        return length;
    }
};