Vector 求组元素推力,然后求平均值

Vector 求组元素推力,然后求平均值,vector,cuda,thrust,Vector,Cuda,Thrust,我有四个像这样的向量 d_xx[0]= 0.75 d_yy[0]= 0.75 d_vx[0]= 1.05488 d_vy[0]= 0.0427136 d_xx[1]= 0.25 d_yy[1]= 0.75 d_vx[1]= 2.03481 d_vy[1]= -0.757107 d_xx[2]= 0.75 d_yy[2]= 0.25 d_vx[2]= 0.234851 d_vy[2]= 1.63537 d_xx[3]= 0.25 d_yy[3]= 0.2

我有四个像这样的向量

d_xx[0]= 0.75   d_yy[0]= 0.75   d_vx[0]= 1.05488    d_vy[0]= 0.0427136
d_xx[1]= 0.25   d_yy[1]= 0.75   d_vx[1]= 2.03481    d_vy[1]= -0.757107
d_xx[2]= 0.75   d_yy[2]= 0.25   d_vx[2]= 0.234851   d_vy[2]= 1.63537
d_xx[3]= 0.25   d_yy[3]= 0.25   d_vx[3]= -0.442835  d_vy[3]= -0.00224912
d_xx[4]= 1.75   d_yy[4]= 0.75   d_vx[4]= 1.86096    d_vy[4]= -0.822878
d_xx[5]= 1.25   d_yy[5]= 0.75   d_vx[5]= -1.52816   d_vy[5]= -1.94596
...
如何找到给定范围内
xx_low=0
xx_high=1
yy_low=0
yy_high=1
之间的元素,然后我想用推力找到列
d_vx[N]
d_vy[N]
的平均值


最好的方法是什么?我必须先排序吗?

我至少可以想到3种可能的实现:

  • sort
    元素,
    如果找到满足范围标准的最后一个元素,则查找,然后
    reduce
  • copy\u如果
    所有满足范围标准的元素都被复制到一个新的向量中,那么,
    reduce
  • transform\u reduce
    使用自定义函子将不在所需范围内的元素置零
  • 以下代码实现了第三个想法:

    #include <thrust/device_vector.h>
    #include <thrust/iterator/zip_iterator.h>
    #include <thrust/transform_reduce.h>
    #include <thrust/tuple.h>
    #include <iostream>
    
    template <typename T>
    struct nullify
    {
      T xx_low;
      T xx_high;
      T yy_low;
      T yy_high;
    
      nullify(T xx_low, T xx_high, T yy_low, T yy_high) : xx_low(xx_low), xx_high(xx_high), yy_low(yy_low), yy_high(yy_high){}
    
      using result_type = thrust::tuple<T,T,std::size_t>;
    
      template <typename Tuple>
      __host__ __device__
      result_type operator()(const Tuple& t)
      {
        const T& xx = thrust::get<0>(t);
        const T& yy = thrust::get<1>(t);
    
        return (xx >= xx_low && xx <= xx_high && yy >= yy_low && yy <= yy_high) ? thrust::make_tuple(thrust::get<2>(t), thrust::get<3>(t), 1) : thrust::make_tuple(T(0),T(0),0);
      }
    };
    
    struct tuple_plus
    {
        template <typename Tuple>
        __host__ __device__
        Tuple operator()(const Tuple& lhs, const Tuple& rhs)
        {
            return thrust::make_tuple(thrust::get<0>(lhs) + thrust::get<0>(rhs),
                                      thrust::get<1>(lhs) + thrust::get<1>(rhs),
                                      thrust::get<2>(lhs) + thrust::get<2>(rhs));
        }
    };
    
    
    int main()
    {
        using T = float;
        thrust::device_vector<T> d_xx(6);
        thrust::device_vector<T> d_yy(6);
        thrust::device_vector<T> d_vx(6);
        thrust::device_vector<T> d_vy(6);
        d_xx[0]= 0.75; d_yy[0]= 0.75; d_vx[0]= 1.05488;   d_vy[0]= 0.0427136;
        d_xx[1]= 0.25; d_yy[1]= 0.75; d_vx[1]= 2.03481;   d_vy[1]= -0.757107;
        d_xx[2]= 0.75; d_yy[2]= 0.25; d_vx[2]= 0.234851;  d_vy[2]= 1.63537;
        d_xx[3]= 0.25; d_yy[3]= 0.25; d_vx[3]= -0.442835; d_vy[3]= -0.00224912;
        d_xx[4]= 1.75; d_yy[4]= 0.75; d_vx[4]= 1.86096;   d_vy[4]= -0.822878;
        d_xx[5]= 1.25; d_yy[5]= 0.75; d_vx[5]= -1.52816;  d_vy[5]= -1.94596;
    
        T xx_low  = 0;
        T xx_high = 1;
        T yy_low  = 0;
        T yy_high = 1; 
    
        auto zip_begin = thrust::make_zip_iterator(thrust::make_tuple(d_xx.begin(), d_yy.begin(), d_vx.begin(), d_vy.begin()));
        auto zip_end = thrust::make_zip_iterator(thrust::make_tuple(d_xx.end(), d_yy.end(), d_vx.end(), d_vy.end()));
    
        using Functor = nullify<T>;
        using ResultTuple = typename Functor::result_type;
        ResultTuple result = thrust::transform_reduce(zip_begin, zip_end, nullify<T>(xx_low, xx_high, yy_low, yy_high), thrust::make_tuple(T(0), T(0), 0), tuple_plus());
    
        T avg_d_vx = thrust::get<0>(result) / thrust::get<2>(result);
        T avg_d_vy = thrust::get<1>(result) / thrust::get<2>(result);
        std::cout << "avg_d_vx=" << avg_d_vx << " avg_d_vy=" << avg_d_vy << std::endl;
    }
    

    这个代码使用Boost C++库吗?我在编译时出现了一个错误“error:”template“不允许”it@AnnasNasrudin此代码不使用boost,但它使用C++11(需要CUDA7)。我使用了命令
    nvcc--std=C++11 ave.cu-o ave
    ,但上面显示了错误。我正在使用Cuda编译工具,7.5版,V7.5.17版和g++(Ubuntu 5.4.0-6ubuntu1~16.04.1)5.4.0 20160609编译器。@AnnasNasrudin我认为
    模板
    \uuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuu设备
    的顺序不正确,我在回答中纠正了这一点,很抱歉有离题的评论,但我很高兴你还在回答cuda标签上的问题。
    avg_d_vx=0.720426 avg_d_vy=0.229682