Cuda 当键是字符串或字符数组时，如何使用推力实现按键reduce_Cuda_Thrust

Cuda 当键是字符串或字符数组时，如何使用推力实现按键reduce

cuda

Cuda 当键是字符串或字符数组时，如何使用推力实现按键reduce,cuda,thrust,Cuda,Thrust,输入： BC BD BC BC BD CD 输出： BC 3 BD 2 CD 1 若我使用char类型作为键，那个么它是可用的。但似乎推力不支持字符串作为键 #include <thrust/device_vector.h> #include <thrust/iterator/constant_iterator.h> #include <thrust/reduce.h> #include <string> int main(void) { s

输入：

BC
BD
BC
BC
BD
CD

输出：

BC 3
BD 2
CD 1

若我使用char类型作为键，那个么它是可用的。但似乎推力不支持字符串作为键

#include <thrust/device_vector.h>
#include <thrust/iterator/constant_iterator.h>
#include <thrust/reduce.h>
#include <string>

int main(void)
{
  std::string data = "aaabbbbbcddeeeeeeeeeff";

  size_t N = data.size();

  thrust::device_vector<char> input(data.begin(), data.end());

  thrust::device_vector<char> output(N);
  thrust::device_vector<int>  lengths(N);

  size_t num_runs =
    thrust::reduce_by_key(input.begin(), input.end(),        
                      thrust::constant_iterator<int>(1), 
                      output.begin(),                    
                      lengths.begin()                    
                      ).first - output.begin();
   return 0;
}

#包括
#包括
#包括
#包括
内部主（空）
{
std:：string data=“aaabbbbcddeeeff”；
size_t N=data.size（）；
推力：设备向量输入（data.begin（），data.end（））；
推力：设备_矢量输出（N）；
推力：设备_矢量长度（N）；
大小\u t数量\u运行=
推力：：按键减少推力（input.begin（），input.end（），
推力：常数迭代器（1），
output.begin（），
长度。开始（）
).first-output.begin（）；
返回0；
}

如何使用推力实现它？

向@AngryLettuce道歉，这里有两种可能的方法：

方法1：

创建一个用于保存关键帧的结构。该结构将为键中的每个字符包含一个

char

项

对键进行排序

以将相似的键组合在一起。看起来，您真正想要的只是每个键类型的计数，而不管它在序列中出现在何处。为了便于使用

按键减少键数

，必须首先将类似的键分组在一起。否则，

reduce_by_key

会将被不同中间键分隔的键视为不同的键序列。很明显，从你想要的输入和输出来看，这不是你想要的

现在对已排序的键使用

reduce\u by\u key

，以进行类似键的计数

步骤2（对于此方法）需要一个函子来对键进行排序，步骤3需要一个函子来识别“相等”键的含义，这是

按键减少所需的
方法2：
创建两个单独的char
device\u vector
，一个用于保存每个键的第一个字母，另一个用于保存每个键的第二个字母。然后，我们将在代码的其余部分使用zip_迭代器
，将这两个向量视为一个统一的“键”向量
排序
压缩键向量。在这种情况下，推力知道如何对基本类型的压缩向量进行排序，并且不需要单独的排序函子
对压缩（和排序）的键向量执行按键减少
。这同样不需要单独的相等函子。推力知道如何确定基本类型压缩向量的相等性
除了不需要任何函子定义之外，第二种方法可能也会更快，因为与第一种方法中的AoS（结构数组）相比，zip_迭代器
倾向于改进数据访问
下面是一个工作示例，演示了这两种方法：
$ cat t1004.cu
#include <thrust/device_vector.h>
#include <thrust/sort.h>
#include <thrust/reduce.h>
#include <thrust/iterator/constant_iterator.h>
#include <iostream>

#include <thrust/iterator/zip_iterator.h>

struct key {
  char k1;
  char k2;
};

struct sort_functor{
  __host__ __device__ bool operator()(key &k1, key &k2){
    if (k1.k1 < k2.k1) return true;
    if (k1.k1 > k2.k1) return false;
    if (k1.k2 < k2.k2) return true;
    return false;}
};

struct equal_key{
  __host__ __device__ bool operator()(key k1, key k2){
    if ((k1.k1 == k2.k1)&&(k1.k2 == k2.k2)) return true;
    return false;}
};

int main(){

  key data[] = {{'B','C'},{'B','D'},{'B','C'},{'B','C'},{'B','D'},{'C','D'}};;
  size_t dsize = sizeof(data)/sizeof(key);


//method 1
  thrust::device_vector<key> keys(data, data+dsize);
  thrust::device_vector<key> keys_out(dsize);
  thrust::device_vector<int> lengths(dsize);
  thrust::sort(keys.begin(), keys.end(), sort_functor());
  int rsize = thrust::reduce_by_key(keys.begin(), keys.end(), thrust::constant_iterator<int>(1), keys_out.begin(), lengths.begin(),equal_key()).first - keys_out.begin();
  std::cout << "Method1:" << std::endl;
  for (int i = 0; i < rsize; i++){
    key temp = keys_out[i];
    int len = lengths[i];
    std::cout << " " << temp.k1 << temp.k2 << " " <<  len << std::endl;}

//method 2

  //get the key data into 2 separate vectors.
  //there are more efficient ways to do this
  //but this is not the crux of your question

  thrust::device_vector<char> k1;
  thrust::device_vector<char> k2;
  for (int i = 0; i < dsize; i++){
    k1.push_back(data[i].k1);
    k2.push_back(data[i].k2);}

  thrust::sort(thrust::make_zip_iterator(thrust::make_tuple(k1.begin(), k2.begin())), thrust::make_zip_iterator(thrust::make_tuple(k1.end(), k2.end())));

  thrust::device_vector<char> k1r(dsize);
  thrust::device_vector<char> k2r(dsize);
  rsize = thrust::reduce_by_key(thrust::make_zip_iterator(thrust::make_tuple(k1.begin(), k2.begin())), thrust::make_zip_iterator(thrust::make_tuple(k1.end(), k2.end())), thrust::constant_iterator<int>(1), thrust::make_zip_iterator(thrust::make_tuple(k1r.begin(), k2r.begin())), lengths.begin()).first - thrust::make_zip_iterator(thrust::make_tuple(k1r.begin(),k2r.begin()));
  std::cout << "Method2:" << std::endl;
  for (int i = 0; i < rsize; i++){
    char c1 = k1r[i];
    char c2 = k2r[i];
    int len = lengths[i];
    std::cout << " " << c1 << c2 << " " <<  len << std::endl;}

  return 0;
}
$ nvcc -o t1004 t1004.cu
$ ./t1004
Method1:
 BC 3
 BD 2
 CD 1
Method2:
 BC 3
 BD 2
 CD 1
$

$cat t1004.cu
#包括
#包括
#包括
#包括
#包括
#包括
结构键{
字符k1；
焦k2；
};
结构排序函数{
__主机设备布尔运算符（）（键和k1、键和k2）{
如果（k1.k1k2.k1）返回false；
如果（k1.k2std：：cout向@AngryLettuce道歉，这里有两种可能的方法：
方法1：
创建一个用于保存密钥的结构。该结构将为密钥中的每个字符包含一个char
项
对键进行排序
以将相似的键组合在一起。似乎您真正想要的只是对每种键类型的计数，而不管它在序列中出现在何处。为了便于使用按键缩减
，必须首先将相似的键组合在一起。否则，按键缩减
会将它们视为由不同的中间键分隔为不同的键序列。从您想要的输入和输出可以明显看出，这不是您想要的
现在对已排序的键使用reduce\u by\u key
，以进行类似键的计数
步骤2（对于此方法）需要一个函子来对键进行排序，步骤3需要一个函子来识别“相等”键的含义，这是按键减少所需的
方法2：
创建两个单独的char
device\u vector
，一个用于保存每个键的第一个字母，另一个用于保存每个键的第二个字母。然后，我们将在代码的其余部分使用zip\u迭代器
，将这两个向量视为一个统一的“键”向量
sort
压缩键向量。在这种情况下，推力知道如何对基本类型的压缩向量进行排序，并且不需要单独的排序函子
在压缩（和排序）键向量上执行按键减少。\u
。这再次不需要单独的相等函子。推力知道如何确定基本类型压缩向量的相等性
除了不需要任何函子定义之外，第二种方法可能也会更快，因为与第一种方法中的AoS（结构数组）相比，zip_迭代器
倾向于改进数据访问
下面是一个工作示例，演示了这两种方法：
$ cat t1004.cu
#include <thrust/device_vector.h>
#include <thrust/sort.h>
#include <thrust/reduce.h>
#include <thrust/iterator/constant_iterator.h>
#include <iostream>

#include <thrust/iterator/zip_iterator.h>

struct key {
  char k1;
  char k2;
};

struct sort_functor{
  __host__ __device__ bool operator()(key &k1, key &k2){
    if (k1.k1 < k2.k1) return true;
    if (k1.k1 > k2.k1) return false;
    if (k1.k2 < k2.k2) return true;
    return false;}
};

struct equal_key{
  __host__ __device__ bool operator()(key k1, key k2){
    if ((k1.k1 == k2.k1)&&(k1.k2 == k2.k2)) return true;
    return false;}
};

int main(){

  key data[] = {{'B','C'},{'B','D'},{'B','C'},{'B','C'},{'B','D'},{'C','D'}};;
  size_t dsize = sizeof(data)/sizeof(key);


//method 1
  thrust::device_vector<key> keys(data, data+dsize);
  thrust::device_vector<key> keys_out(dsize);
  thrust::device_vector<int> lengths(dsize);
  thrust::sort(keys.begin(), keys.end(), sort_functor());
  int rsize = thrust::reduce_by_key(keys.begin(), keys.end(), thrust::constant_iterator<int>(1), keys_out.begin(), lengths.begin(),equal_key()).first - keys_out.begin();
  std::cout << "Method1:" << std::endl;
  for (int i = 0; i < rsize; i++){
    key temp = keys_out[i];
    int len = lengths[i];
    std::cout << " " << temp.k1 << temp.k2 << " " <<  len << std::endl;}

//method 2

  //get the key data into 2 separate vectors.
  //there are more efficient ways to do this
  //but this is not the crux of your question

  thrust::device_vector<char> k1;
  thrust::device_vector<char> k2;
  for (int i = 0; i < dsize; i++){
    k1.push_back(data[i].k1);
    k2.push_back(data[i].k2);}

  thrust::sort(thrust::make_zip_iterator(thrust::make_tuple(k1.begin(), k2.begin())), thrust::make_zip_iterator(thrust::make_tuple(k1.end(), k2.end())));

  thrust::device_vector<char> k1r(dsize);
  thrust::device_vector<char> k2r(dsize);
  rsize = thrust::reduce_by_key(thrust::make_zip_iterator(thrust::make_tuple(k1.begin(), k2.begin())), thrust::make_zip_iterator(thrust::make_tuple(k1.end(), k2.end())), thrust::constant_iterator<int>(1), thrust::make_zip_iterator(thrust::make_tuple(k1r.begin(), k2r.begin())), lengths.begin()).first - thrust::make_zip_iterator(thrust::make_tuple(k1r.begin(),k2r.begin()));
  std::cout << "Method2:" << std::endl;
  for (int i = 0; i < rsize; i++){
    char c1 = k1r[i];
    char c2 = k2r[i];
    int len = lengths[i];
    std::cout << " " << c1 << c2 << " " <<  len << std::endl;}

  return 0;
}
$ nvcc -o t1004 t1004.cu
$ ./t1004
Method1:
 BC 3
 BD 2
 CD 1
Method2:
 BC 3
 BD 2
 CD 1
$

$cat t1004.cu
#包括
#包括
#包括
#包括