C++11 在CUDA函数中使用尖点矩阵？_C++11_Parallel Processing_Cuda_Gpu_Cusp Library

C++11 在CUDA函数中使用尖点矩阵？

c++11 parallel-processing cuda

C++11 在CUDA函数中使用尖点矩阵？,c++11,parallel-processing,cuda,gpu,cusp-library,C++11,Parallel Processing,Cuda,Gpu,Cusp Library,我想写一个核函数，将两个尖点矩阵a和B作为输入，然后并行地将数据填充到B中 #include <cusp/coo_matrix.h> #include <cusp/print.h> #include <iostream> __global__ void kernel_example(cusp::coo_matrix<int,float,cusp::host_memory>* A, cusp::coo_matrix<int,float,cus

我想写一个核函数，将两个尖点矩阵a和B作为输入，
然后并行地将数据填充到B中

#include <cusp/coo_matrix.h>
#include <cusp/print.h>
#include <iostream>

__global__ void kernel_example(cusp::coo_matrix<int,float,cusp::host_memory>* A,
cusp::coo_matrix<int,float,cusp::host_memory>* B){
    printf("hello from kernel...");
    //actual operations go here.
}

int main(void)
{
    // allocate storage
    cusp::coo_matrix<int,float,cusp::host_memory> A(4,3,6);
    cusp::coo_matrix<int,float,cusp::host_memory> B(4,3,6);

    // initialize matrix entries on host
    A.row_indices[0] = 0; A.column_indices[0] = 0; A.values[0] = 10;
    A.row_indices[1] = 0; A.column_indices[1] = 2; A.values[1] = 20;
    A.row_indices[2] = 2; A.column_indices[2] = 2; A.values[2] = 30;
    A.row_indices[3] = 3; A.column_indices[3] = 0; A.values[3] = 40;
    A.row_indices[4] = 3; A.column_indices[4] = 1; A.values[4] = 50;
    A.row_indices[5] = 3; A.column_indices[5] = 2; A.values[5] = 60;

    kernel_example<<<1,1>>>(A,B);
    cudaDeviceSynchronize();    

    return 0;
}

#包括
#包括
#包括
__全局无效内核示例（cusp:：coo_矩阵*A，
尖点：：coo_矩阵*B）{
printf（“来自内核的你好…”）；
//实际操作在这里进行。
}
内部主（空）
{
//分配存储
尖点：coo_矩阵A（4,3,6）；
尖点：coo_矩阵B（4,3,6）；
//在主机上初始化矩阵项
A.行索引[0]=0；A.列索引[0]=0；A.值[0]=10；
A.行索引[1]=0；A.列索引[1]=2；A.值[1]=20；
A.行索引[2]=2；A.列索引[2]=2；A.值[2]=30；
A.行索引[3]=3；A.列索引[3]=0；A.值[3]=40；
A.行索引[4]=3；A.列索引[4]=1；A.值[4]=50；
A.行索引[5]=3；A.列索引[5]=2；A.值[5]=60；
内核u示例（A，B）；
cudaDeviceSynchronize（）；
返回0；
}

随后出现以下错误：

error: no suitable conversion function from "cusp::coo_matrix<int, float, cusp::host_memory>" to "cusp::coo_matrix<int, float, cusp::host_memory> *" exists

错误：不存在从“cusp:：coo_矩阵”到“cusp:：coo_矩阵*”的合适转换函数

我该怎么做？

错误是因为函数签名用于指针，而您正在传递一个对象。您可以通过引用传递，它将生成

应该是

__global__ void kernel_example(cusp::coo_matrix<int, float, cusp::host_memory>& A,
    cusp::coo_matrix<int, float, cusp::host_memory>& B) {
    printf("hello from kernel...");
    //actual operations go here.
}

\uuuuu全局\uuuuuu无效内核\u示例（cusp:：coo\u矩阵&A，
cusp:：coo_矩阵和B）{
printf（“来自内核的你好…”）；
//实际操作在这里进行。
}

这几乎肯定不是正确的方法。是的，您可以使用pass-by-reference参数消除编译错误，但实际上无法在CUDA内核中使用该参数。是的，这正是我在使用代码后发现的问题。你提前回答了，谢谢你，罗伯特@当然，不清楚这个人在问什么。他们是否盲目地在CUSP网站上发布示例代码？真正的问题是设备和主机功能之间的区别吗？我投票决定结束。不管问什么问题，我永远不会建议有人在CUDA内核上使用参考参数。这几乎是不正确的。这里列出了一种可能的方法。你的问题可以说是那个问题的翻版。您应该阅读，并理解cusp中存储的

cusp:：host\u memory

和

cusp:：device\u memory

之间的区别。要实现这些功能，您需要使用

cusp:：device\u memory