Cuda 空主机阵列的推力操作_Cuda_Thrust

Cuda 空主机阵列的推力操作

cuda

Cuda 空主机阵列的推力操作,cuda,thrust,Cuda,Thrust,我想做一些推力操作，但我不确定具体如何现在，我正在接收一个满是零的am数组（h_a数组）我有： #include <cstdio> #include <cstdlib> #include <cmath> #include <iostream> #include <cuda.h> #include <cuda_runtime_api.h> #include <thrust/device_ptr.h> #in

我想做一些推力操作，但我不确定具体如何

现在，我正在接收一个满是零的am数组（h_a数组）

我有：

#include <cstdio>
#include <cstdlib>
#include <cmath>
#include <iostream>

#include <cuda.h>
#include <cuda_runtime_api.h>

#include <thrust/device_ptr.h>
#include <thrust/fill.h>
#include <thrust/transform.h>
#include <thrust/functional.h>
#include <thrust/device_vector.h>
#include <thrust/host_vector.h>
#include <thrust/copy.h>
#include <thrust/generate.h>


template <typename T>
struct square
{
    __host__ __device__
    T operator()( const T& x ) const
    {
        return x * x;
    }

};


int
main(
             int argc,
    const char * argv[] )
{
    const size_t NbOfPoints  = 256;

    int BlocksPerGridX    = 16;
    int BlocksPerGridY    = 16;

    int ThreadsPerBlockX  = 16;
    int ThreadsPerBlockY  = 16;

    // generate random data on the host
    thrust::host_vector<float> h_Kx ( NbOfPoints );
    thrust::generate( h_Kx.begin(), h_Kx.end(), rand );

    thrust::host_vector<float> h_Ky ( NbOfPoints );
    thrust::generate( h_Ky.begin(), h_Ky.end(), rand );

    // transfer to device
    thrust::device_vector<float> dev_Kx = h_Kx;
    thrust::device_vector<float> dev_Ky = h_Ky;

    // create arrays for holding the number of threads per block in each dimension
    int * X , * Y;
    cudaMalloc((void **) &X, ThreadsPerBlockX * BlocksPerGridX * sizeof(*X) );
    cudaMalloc((void **) &Y, ThreadsPerBlockY * BlocksPerGridY * sizeof(*Y) );

    // wrap raw pointer with a device_ptr
    thrust::device_ptr<int> dev_X ( X );
    thrust::device_ptr<int> dev_Y ( Y );

    // use device_ptr in Thrust algorithms
    thrust::fill( dev_X, dev_X + ( ThreadsPerBlockX * BlocksPerGridX ) , (int) 0 );
    thrust::fill( dev_Y, dev_Y + ( ThreadsPerBlockY * BlocksPerGridY ) , (int) 0 );

    // setup arguments
    square<float> square_op;

    // create various vectors
    thrust::device_vector<int> distX ( NbOfPoints );
    thrust::device_vector<int> distY ( NbOfPoints );
    thrust::device_vector<unsigned int> Tmp ( NbOfPoints );
    thrust::host_vector<unsigned int> h_a ( NbOfPoints );
    thrust::device_vector<unsigned int> distXSquared ( NbOfPoints );
    thrust::device_vector<unsigned int> distYSquared ( NbOfPoints );


    // compute distX = dev_Kx - dev_X and distY = dev_Ky - dev_Y
    thrust::transform( dev_Kx.begin(), dev_Kx.begin(), dev_X , distX.begin() , thrust::minus<float>() );
    thrust::transform( dev_Ky.begin(), dev_Ky.begin(), dev_Y , distY.begin() , thrust::minus<float>() );

    //square distances
    thrust::transform( distX.begin(), distX.end(), distXSquared.begin(), square_op );
    thrust::transform( distY.begin(), distY.end(), distYSquared.begin(), square_op );

    // compute Tmp =  distX + distY
    thrust::transform( distXSquared.begin() ,distXSquared.begin() , distYSquared.begin() , Tmp.begin() , thrust::plus<unsigned int>() );
    thrust::copy( Tmp.begin(), Tmp.end(), h_a.begin() );


    for ( int i = 0; i < 5; i ++ )
        printf("\n temp = %u",h_a[ i ] );


return 0;
}

#包括
#包括
#包括
#包括
#包括
#包括
#包括
#包括
#包括
#包括
#包括
#包括
#包括
#包括
模板
结构广场
{
__主机设备__
T运算符（）（常数T&x）常数
{
返回x*x；
}
};
int
主要(
int argc，
常量字符*argv[]
{
const size_t NbOfPoints=256；
int BlocksPerGridX=16；
int BlocksPerGridY=16；
int ThreadsPerBlockX=16；
int ThreadsPerBlockY=16；
//在主机上生成随机数据
推力：主向量h_Kx（nbof点）；
生成（h_Kx.begin（），h_Kx.end（），rand）；
推力：主向量h_-Ky（nbof点）；
生成（h_Ky.begin（），h_Ky.end（），rand）；
//转移到设备
推力：装置矢量dev\ukx=h\ukx；
推力：设备矢量dev_Ky=h_Ky；
//创建数组以容纳每个维度中每个块的线程数
int*X，*Y；
cudaMalloc（（void**）和X，ThreadsPerBlockX*BlocksPerGridX*sizeof（*X））；
Cudamaloc（（空心**）和Y，螺纹牢固*块状块状*尺寸（*Y））；
//用设备包装原始指针\u ptr
推力：设备的ptr开发X（X）；
推力：设备ptr开发（Y）；
//在推力算法中使用设备ptr
推力：填充（dev_X，dev_X+（ThreadsPerBlockX*BlocksPerGridX），（int）0）；
推力：填充（dev_Y，dev_Y+（ThreadsPerBlockY*BlocksPerGridY），（int）0）；
//设置参数
广场；
//创建各种向量
推力：装置的矢量距离（NBOF点）；
推力：装置矢量距离（NBOF点）；
推力：装置矢量Tmp（NBOF点）；
推力：主向量h（n点）；
推力：设备的矢量距离平方（N个点）；
推力：装置的矢量距离平方（N个点）；
//计算distX=dev_Kx-dev_X和distY=dev_Ky-dev_Y
推力：：变换（dev_Kx.begin（）、dev_Kx.begin（）、dev_X、distX.begin（）、推力：：减号（））；
转换（dev_-Ky.begin（），dev_-Ky.begin（），dev_-Y，distY.begin（），推力：：减号（））；
//平方距离
转换（distX.begin（），distX.end（），distXSquared.begin（），square_op）；
推力：：变换（distY.begin（），distY.end（），distYSquared.begin（），square_op）；
//计算Tmp=distX+distY
推力：：变换（distXSquared.begin（），distXSquared.begin（），distYSquared.begin（），Tmp.begin（），推力：：plus（））；
复制（Tmp.begin（），Tmp.end（），h_a.begin（））；
对于（int i=0；i<5；i++）
printf（“\n temp=%u”，h_a[i]）；
返回0；
}

更新：

除了Robert Crovella的编辑之外，还必须编辑为整数：

square<int> square_op;
thrust::transform( dev_Kx.begin(), dev_Kx.end(), dev_X , distX.begin() , thrust::minus<int>() );
thrust::transform( dev_Ky.begin(), dev_Ky.end(), dev_Y , distY.begin() , thrust::minus<int>() );

square\u op；
推力：：变换（dev_Kx.begin（）、dev_Kx.end（）、dev_X、distX.begin（）、推力：：减号（））；
转换（dev_-Ky.begin（）、dev_-Ky.end（）、dev_-Y、distY.begin（）、推力：：减号（））；

您有几个执行零长度转换的实例：

thrust::transform( dev_Kx.begin(), dev_Kx.begin(), dev_X , distX.begin() , thrust::minus<float>() );
thrust::transform( dev_Ky.begin(), dev_Ky.begin(), dev_Y , distY.begin() , thrust::minus<float>() );

变换（dev_Kx.begin（），dev_Kx.begin（），dev_X，distX.begin（），推力：：减号（））；转换（dev_-Ky.begin（），dev_-Ky.begin（），dev_-Y，distY.begin（），推力：：减号（））；以及：

stress:：transform（distXSquared.begin（），distXSquared.begin（），distYSquared.begin（），Tmp.begin（），stress:：plus（））；

由于上述每个变换的前两个参数相同，因此所做的功为零。大概您希望相应的

.end（）

迭代器位于第二个位置，而不是

.begin（）

当我进行这些更改时，我会打印出非零值。它们非常大，但你似乎在对大值进行平方运算，所以我不确定你的意图是什么。

这些“各种错误”到底是什么？@Park Young-Bae:I updated.。发布一个有人可以自己编译和运行的示例有多困难？我感到失望的是，即使在问了200个问题之后，您似乎仍然没有理解这个地方是如何工作的。您需要提供一个（“为什么这个代码不工作？”）。这是其他人可以复制、粘贴、编译和运行的东西，而无需添加任何内容或更改任何内容，就可以看到问题。@Talonmes:好的，我更新了..您好，我只是想问一下。如果上面的“dev_X”是一个向量，我会使用“dev_X.begin（）”。然后，当我们使用“begin（）”时，这意味着整个向量？从开始到结束？谢谢！您可能需要

std:：vector

.begin（）

是属于

向量

类的成员函数，它返回一个“指向”向量开头（即第一个元素）的迭代器。这并不意味着将使用整个向量。如果仍然不清楚，我建议您发布一个新问题。注意：好的，但是在上面的一行（转换），我想做一个减法“dev_Kx-dev_X”。我如何确保它也通过所有dev_X元素？因为参数在trasnform函数中是5。我建议您发布一个新问题。注意：好，

thrust::transform( distXSquared.begin() ,distXSquared.begin() , distYSquared.begin() , Tmp.begin() , thrust::plus<unsigned int>() );