CUDA推力复制仅当满足谓词时才转换结果

CUDA推力复制仅当满足谓词时才转换结果,cuda,thrust,Cuda,Thrust,我想对输入推力::设备_向量执行转换,并且仅当结果满足谓词时才将结果复制到输出向量。因此,结果的数量可能小于输入设备_向量的大小(类似于推力::复制_的输出向量,如果)。我还没有找到一种使用推力::转换_if实现这一点的方法。目前,我可以使用推力::变换和推力::删除_,如果,如下例所示: #include <thrust/random.h> #include <thrust/iterator/counting_iterator.h> #include <thrus

我想对输入
推力::设备_向量执行转换,并且仅当结果满足谓词时才将结果复制到输出向量。因此,结果的数量可能小于输入设备_向量的大小(类似于
推力::复制_的输出向量,如果
)。我还没有找到一种使用推力::转换_if实现这一点的方法。目前,我可以使用
推力::变换
推力::删除_,如果
,如下例所示:

#include <thrust/random.h>
#include <thrust/iterator/counting_iterator.h>
#include <thrust/device_vector.h>
#include <thrust/transform.h>
#include <thrust/remove.h>
#include <iostream>

__host__ __device__ unsigned int hash(unsigned int a) {
  a = (a+0x7ed55d16) + (a<<12);
  a = (a^0xc761c23c) ^ (a>>19);
  a = (a+0x165667b1) + (a<<5);
  a = (a+0xd3a2646c) ^ (a<<9);
  a = (a+0xfd7046c5) + (a<<3);
  a = (a^0xb55a4f09) ^ (a>>16);
  return a;
};

struct add_random {
  __host__ __device__ add_random() {}
  __device__ int operator()(const int n, const int x) const {
    thrust::default_random_engine rng(hash(n));
    thrust::uniform_int_distribution<int> uniform(0, 11);
    return uniform(rng)+x;
  } 
};

struct is_greater {
  __host__ __device__ bool operator()(const int x) {
    return x > 6 ;
  }
};

int main(void) {
  int x[5] = {10, 2, 5, 3, 0};
  thrust::device_vector<int> d_x(x, x+5);

  thrust::transform(
      thrust::counting_iterator<int>(0),
      thrust::counting_iterator<int>(5),
      d_x.begin(),
      d_x.begin(),
      add_random());

  std::cout << "after adding random number:" << std::endl;
  std::ostream_iterator<int> o(std::cout, " ");
  thrust::copy(d_x.begin(), d_x.end(), o);
  std::cout << std::endl;

  thrust::device_vector<int>::iterator new_end(thrust::remove_if(d_x.begin(), d_x.end(), is_greater()));

  std::cout << "after removing values greater than 6:" << std::endl;
  thrust::copy(d_x.begin(), new_end, o);
  std::cout << std::endl;

  return 0;
}

我希望避免将结果复制到内存中两次,在上面的示例中,首先通过
struch::transform
,然后通过
struch::remove\u
。是否可以用一个转换函数获得上述输出?我该怎么做?我最关心的是计算成本,因此任何优化的解决方案,即使不使用推力库,也会非常好。

欢迎来到推力迭代器的世界。通过查看。特别是,推力变换迭代器可经常用于替换应用于另一推力算法输入的推力变换操作,“融合”两个算法为单个操作

下面是一个适用于您的案例的工作示例:

$ cat t1254.cu
#include <thrust/random.h>
#include <thrust/iterator/counting_iterator.h>
#include <thrust/iterator/transform_iterator.h>
#include <thrust/device_vector.h>
#include <thrust/transform.h>
#include <thrust/remove.h>
#include <iostream>

__host__ __device__ unsigned int hash(unsigned int a) {
  a = (a+0x7ed55d16) + (a<<12);
  a = (a^0xc761c23c) ^ (a>>19);
  a = (a+0x165667b1) + (a<<5);
  a = (a+0xd3a2646c) ^ (a<<9);
  a = (a+0xfd7046c5) + (a<<3);
  a = (a^0xb55a4f09) ^ (a>>16);
  return a;
};

struct add_random : public thrust::unary_function<thrust::tuple<int, int>, int> {
  __host__ __device__ int operator()(thrust::tuple<int, int> t) const {
    int n = thrust::get<0>(t);
    int x = thrust::get<1>(t);
    thrust::default_random_engine rng(hash(n));
    thrust::uniform_int_distribution<int> uniform(0, 11);
    return uniform(rng)+x;
  }
};

struct is_greater {
  __host__ __device__ bool operator()(const int x) {
    return x < 6 ;
  }
};

int main(void) {
  int x[5] = {10, 2, 5, 3, 0};
  thrust::device_vector<int> d_x(x, x+5);
  thrust::device_vector<int> d_r(5);
  int rsize = thrust::copy_if(thrust::make_transform_iterator(thrust::make_zip_iterator(thrust::make_tuple(thrust::counting_iterator<int>(0), d_x.begin())), add_random()), thrust::make_transform_iterator(thrust::make_zip_iterator(thrust::make_tuple(thrust::counting_iterator<int>(5), d_x.end())), add_random()), d_r.begin(), is_greater())- d_r.begin();
  std::cout << "after removing values greater than 6:" << std::endl;
  thrust::copy_n(d_r.begin(), rsize, std::ostream_iterator<int>(std::cout, " "));
  std::cout << std::endl;

  return 0;
}
$ nvcc -o t1254 t1254.cu
$ ./t1254
after removing values greater than 6:
4
$
$cat t1254.cu
#包括
#包括
#包括
#包括
#包括
#包括
#包括
__主机\设备\无符号整数散列(无符号整数a){
a=(a+0x7ed55d16)+(a19);

a=(a+0x165667b1)+(如果你使用的是C++11,你可以简化很长一行的推力调用,就像这样:谢谢你Robert Crovella,即使我在推力::copy_if中用d_x替换d_r也是有效的,这正是我想要的。抱歉,但是用
d_x
替换
d_r
copy_if
无效,请参阅for
copy_if
。)e输入和输出范围不得重叠。
$ cat t1254.cu
#include <thrust/random.h>
#include <thrust/iterator/counting_iterator.h>
#include <thrust/iterator/transform_iterator.h>
#include <thrust/device_vector.h>
#include <thrust/transform.h>
#include <thrust/remove.h>
#include <iostream>

__host__ __device__ unsigned int hash(unsigned int a) {
  a = (a+0x7ed55d16) + (a<<12);
  a = (a^0xc761c23c) ^ (a>>19);
  a = (a+0x165667b1) + (a<<5);
  a = (a+0xd3a2646c) ^ (a<<9);
  a = (a+0xfd7046c5) + (a<<3);
  a = (a^0xb55a4f09) ^ (a>>16);
  return a;
};

struct add_random : public thrust::unary_function<thrust::tuple<int, int>, int> {
  __host__ __device__ int operator()(thrust::tuple<int, int> t) const {
    int n = thrust::get<0>(t);
    int x = thrust::get<1>(t);
    thrust::default_random_engine rng(hash(n));
    thrust::uniform_int_distribution<int> uniform(0, 11);
    return uniform(rng)+x;
  }
};

struct is_greater {
  __host__ __device__ bool operator()(const int x) {
    return x < 6 ;
  }
};

int main(void) {
  int x[5] = {10, 2, 5, 3, 0};
  thrust::device_vector<int> d_x(x, x+5);
  thrust::device_vector<int> d_r(5);
  int rsize = thrust::copy_if(thrust::make_transform_iterator(thrust::make_zip_iterator(thrust::make_tuple(thrust::counting_iterator<int>(0), d_x.begin())), add_random()), thrust::make_transform_iterator(thrust::make_zip_iterator(thrust::make_tuple(thrust::counting_iterator<int>(5), d_x.end())), add_random()), d_r.begin(), is_greater())- d_r.begin();
  std::cout << "after removing values greater than 6:" << std::endl;
  thrust::copy_n(d_r.begin(), rsize, std::ostream_iterator<int>(std::cout, " "));
  std::cout << std::endl;

  return 0;
}
$ nvcc -o t1254 t1254.cu
$ ./t1254
after removing values greater than 6:
4
$