Vector 求组元素推力,然后求平均值
我有四个像这样的向量Vector 求组元素推力,然后求平均值,vector,cuda,thrust,Vector,Cuda,Thrust,我有四个像这样的向量 d_xx[0]= 0.75 d_yy[0]= 0.75 d_vx[0]= 1.05488 d_vy[0]= 0.0427136 d_xx[1]= 0.25 d_yy[1]= 0.75 d_vx[1]= 2.03481 d_vy[1]= -0.757107 d_xx[2]= 0.75 d_yy[2]= 0.25 d_vx[2]= 0.234851 d_vy[2]= 1.63537 d_xx[3]= 0.25 d_yy[3]= 0.2
d_xx[0]= 0.75 d_yy[0]= 0.75 d_vx[0]= 1.05488 d_vy[0]= 0.0427136
d_xx[1]= 0.25 d_yy[1]= 0.75 d_vx[1]= 2.03481 d_vy[1]= -0.757107
d_xx[2]= 0.75 d_yy[2]= 0.25 d_vx[2]= 0.234851 d_vy[2]= 1.63537
d_xx[3]= 0.25 d_yy[3]= 0.25 d_vx[3]= -0.442835 d_vy[3]= -0.00224912
d_xx[4]= 1.75 d_yy[4]= 0.75 d_vx[4]= 1.86096 d_vy[4]= -0.822878
d_xx[5]= 1.25 d_yy[5]= 0.75 d_vx[5]= -1.52816 d_vy[5]= -1.94596
...
如何找到给定范围内xx_low=0
,xx_high=1
和yy_low=0
,yy_high=1
之间的元素,然后我想用推力找到列d_vx[N]
和d_vy[N]
的平均值
最好的方法是什么?我必须先排序吗?我至少可以想到3种可能的实现:
sort
元素,如果找到满足范围标准的最后一个元素,则查找,然后reduce
copy\u如果
所有满足范围标准的元素都被复制到一个新的向量中,那么,reduce
transform\u reduce
使用自定义函子将不在所需范围内的元素置零#include <thrust/device_vector.h>
#include <thrust/iterator/zip_iterator.h>
#include <thrust/transform_reduce.h>
#include <thrust/tuple.h>
#include <iostream>
template <typename T>
struct nullify
{
T xx_low;
T xx_high;
T yy_low;
T yy_high;
nullify(T xx_low, T xx_high, T yy_low, T yy_high) : xx_low(xx_low), xx_high(xx_high), yy_low(yy_low), yy_high(yy_high){}
using result_type = thrust::tuple<T,T,std::size_t>;
template <typename Tuple>
__host__ __device__
result_type operator()(const Tuple& t)
{
const T& xx = thrust::get<0>(t);
const T& yy = thrust::get<1>(t);
return (xx >= xx_low && xx <= xx_high && yy >= yy_low && yy <= yy_high) ? thrust::make_tuple(thrust::get<2>(t), thrust::get<3>(t), 1) : thrust::make_tuple(T(0),T(0),0);
}
};
struct tuple_plus
{
template <typename Tuple>
__host__ __device__
Tuple operator()(const Tuple& lhs, const Tuple& rhs)
{
return thrust::make_tuple(thrust::get<0>(lhs) + thrust::get<0>(rhs),
thrust::get<1>(lhs) + thrust::get<1>(rhs),
thrust::get<2>(lhs) + thrust::get<2>(rhs));
}
};
int main()
{
using T = float;
thrust::device_vector<T> d_xx(6);
thrust::device_vector<T> d_yy(6);
thrust::device_vector<T> d_vx(6);
thrust::device_vector<T> d_vy(6);
d_xx[0]= 0.75; d_yy[0]= 0.75; d_vx[0]= 1.05488; d_vy[0]= 0.0427136;
d_xx[1]= 0.25; d_yy[1]= 0.75; d_vx[1]= 2.03481; d_vy[1]= -0.757107;
d_xx[2]= 0.75; d_yy[2]= 0.25; d_vx[2]= 0.234851; d_vy[2]= 1.63537;
d_xx[3]= 0.25; d_yy[3]= 0.25; d_vx[3]= -0.442835; d_vy[3]= -0.00224912;
d_xx[4]= 1.75; d_yy[4]= 0.75; d_vx[4]= 1.86096; d_vy[4]= -0.822878;
d_xx[5]= 1.25; d_yy[5]= 0.75; d_vx[5]= -1.52816; d_vy[5]= -1.94596;
T xx_low = 0;
T xx_high = 1;
T yy_low = 0;
T yy_high = 1;
auto zip_begin = thrust::make_zip_iterator(thrust::make_tuple(d_xx.begin(), d_yy.begin(), d_vx.begin(), d_vy.begin()));
auto zip_end = thrust::make_zip_iterator(thrust::make_tuple(d_xx.end(), d_yy.end(), d_vx.end(), d_vy.end()));
using Functor = nullify<T>;
using ResultTuple = typename Functor::result_type;
ResultTuple result = thrust::transform_reduce(zip_begin, zip_end, nullify<T>(xx_low, xx_high, yy_low, yy_high), thrust::make_tuple(T(0), T(0), 0), tuple_plus());
T avg_d_vx = thrust::get<0>(result) / thrust::get<2>(result);
T avg_d_vy = thrust::get<1>(result) / thrust::get<2>(result);
std::cout << "avg_d_vx=" << avg_d_vx << " avg_d_vy=" << avg_d_vy << std::endl;
}
这个代码使用Boost C++库吗?我在编译时出现了一个错误“error:”template“不允许”it@AnnasNasrudin此代码不使用boost,但它使用C++11(需要CUDA7)。我使用了命令
nvcc--std=C++11 ave.cu-o ave
,但上面显示了错误。我正在使用Cuda编译工具,7.5版,V7.5.17版和g++(Ubuntu 5.4.0-6ubuntu1~16.04.1)5.4.0 20160609编译器。@AnnasNasrudin我认为模板
和\uuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuu设备
的顺序不正确,我在回答中纠正了这一点,很抱歉有离题的评论,但我很高兴你还在回答cuda标签上的问题。
avg_d_vx=0.720426 avg_d_vy=0.229682