Vector 求组元素推力，然后求平均值_Vector_Cuda_Thrust

Vector 求组元素推力，然后求平均值

vector cuda

Vector 求组元素推力，然后求平均值,vector,cuda,thrust,Vector,Cuda,Thrust,我有四个像这样的向量 d_xx[0]= 0.75 d_yy[0]= 0.75 d_vx[0]= 1.05488 d_vy[0]= 0.0427136 d_xx[1]= 0.25 d_yy[1]= 0.75 d_vx[1]= 2.03481 d_vy[1]= -0.757107 d_xx[2]= 0.75 d_yy[2]= 0.25 d_vx[2]= 0.234851 d_vy[2]= 1.63537 d_xx[3]= 0.25 d_yy[3]= 0.2

我有四个像这样的向量

d_xx[0]= 0.75   d_yy[0]= 0.75   d_vx[0]= 1.05488    d_vy[0]= 0.0427136
d_xx[1]= 0.25   d_yy[1]= 0.75   d_vx[1]= 2.03481    d_vy[1]= -0.757107
d_xx[2]= 0.75   d_yy[2]= 0.25   d_vx[2]= 0.234851   d_vy[2]= 1.63537
d_xx[3]= 0.25   d_yy[3]= 0.25   d_vx[3]= -0.442835  d_vy[3]= -0.00224912
d_xx[4]= 1.75   d_yy[4]= 0.75   d_vx[4]= 1.86096    d_vy[4]= -0.822878
d_xx[5]= 1.25   d_yy[5]= 0.75   d_vx[5]= -1.52816   d_vy[5]= -1.94596
...

如何找到给定范围内

xx_low=0

，

xx_high=1

和

yy_low=0

，

yy_high=1

之间的元素，然后我想用推力找到列

d_vx[N]

和

d_vy[N]

的平均值

最好的方法是什么？我必须先排序吗？

我至少可以想到3种可能的实现：

sort

元素，

如果找到满足范围标准的最后一个元素，则查找，然后reduce


copy\u如果
所有满足范围标准的元素都被复制到一个新的向量中，那么，reduce
transform\u reduce
使用自定义函子将不在所需范围内的元素置零
以下代码实现了第三个想法：
#include <thrust/device_vector.h>
#include <thrust/iterator/zip_iterator.h>
#include <thrust/transform_reduce.h>
#include <thrust/tuple.h>
#include <iostream>

template <typename T>
struct nullify
{
  T xx_low;
  T xx_high;
  T yy_low;
  T yy_high;

  nullify(T xx_low, T xx_high, T yy_low, T yy_high) : xx_low(xx_low), xx_high(xx_high), yy_low(yy_low), yy_high(yy_high){}

  using result_type = thrust::tuple<T,T,std::size_t>;

  template <typename Tuple>
  __host__ __device__
  result_type operator()(const Tuple& t)
  {
    const T& xx = thrust::get<0>(t);
    const T& yy = thrust::get<1>(t);

    return (xx >= xx_low && xx <= xx_high && yy >= yy_low && yy <= yy_high) ? thrust::make_tuple(thrust::get<2>(t), thrust::get<3>(t), 1) : thrust::make_tuple(T(0),T(0),0);
  }
};

struct tuple_plus
{
    template <typename Tuple>
    __host__ __device__
    Tuple operator()(const Tuple& lhs, const Tuple& rhs)
    {
        return thrust::make_tuple(thrust::get<0>(lhs) + thrust::get<0>(rhs),
                                  thrust::get<1>(lhs) + thrust::get<1>(rhs),
                                  thrust::get<2>(lhs) + thrust::get<2>(rhs));
    }
};


int main()
{
    using T = float;
    thrust::device_vector<T> d_xx(6);
    thrust::device_vector<T> d_yy(6);
    thrust::device_vector<T> d_vx(6);
    thrust::device_vector<T> d_vy(6);
    d_xx[0]= 0.75; d_yy[0]= 0.75; d_vx[0]= 1.05488;   d_vy[0]= 0.0427136;
    d_xx[1]= 0.25; d_yy[1]= 0.75; d_vx[1]= 2.03481;   d_vy[1]= -0.757107;
    d_xx[2]= 0.75; d_yy[2]= 0.25; d_vx[2]= 0.234851;  d_vy[2]= 1.63537;
    d_xx[3]= 0.25; d_yy[3]= 0.25; d_vx[3]= -0.442835; d_vy[3]= -0.00224912;
    d_xx[4]= 1.75; d_yy[4]= 0.75; d_vx[4]= 1.86096;   d_vy[4]= -0.822878;
    d_xx[5]= 1.25; d_yy[5]= 0.75; d_vx[5]= -1.52816;  d_vy[5]= -1.94596;

    T xx_low  = 0;
    T xx_high = 1;
    T yy_low  = 0;
    T yy_high = 1; 

    auto zip_begin = thrust::make_zip_iterator(thrust::make_tuple(d_xx.begin(), d_yy.begin(), d_vx.begin(), d_vy.begin()));
    auto zip_end = thrust::make_zip_iterator(thrust::make_tuple(d_xx.end(), d_yy.end(), d_vx.end(), d_vy.end()));

    using Functor = nullify<T>;
    using ResultTuple = typename Functor::result_type;
    ResultTuple result = thrust::transform_reduce(zip_begin, zip_end, nullify<T>(xx_low, xx_high, yy_low, yy_high), thrust::make_tuple(T(0), T(0), 0), tuple_plus());

    T avg_d_vx = thrust::get<0>(result) / thrust::get<2>(result);
    T avg_d_vy = thrust::get<1>(result) / thrust::get<2>(result);
    std::cout << "avg_d_vx=" << avg_d_vx << " avg_d_vy=" << avg_d_vy << std::endl;
}

这个代码使用Boost C++库吗？我在编译时出现了一个错误“error:”template“不允许”it@AnnasNasrudin此代码不使用boost，但它使用C++11（需要CUDA7）。我使用了命令nvcc--std=C++11 ave.cu-o ave
，但上面显示了错误。我正在使用Cuda编译工具，7.5版，V7.5.17版和g++（Ubuntu 5.4.0-6ubuntu1~16.04.1）5.4.0 20160609编译器。@AnnasNasrudin我认为模板
和\uuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuu设备的顺序不正确，我在回答中纠正了这一点，很抱歉有离题的评论，但我很高兴你还在回答cuda标签上的问题。
avg_d_vx=0.720426 avg_d_vy=0.229682