Sorting 如何使用cuda计算矩阵每行中元素的顺序？_Sorting_Cuda_Thrust

Sorting 如何使用cuda计算矩阵每行中元素的顺序？

sorting cuda

Sorting 如何使用cuda计算矩阵每行中元素的顺序？,sorting,cuda,thrust,Sorting,Cuda,Thrust,我正在寻找如何使用cuda/沿矩阵的行或列进行argsort 这意味着给定一个矩阵，如： A = [[ 3.4257, -1.2345, 0.6232, -0.1354], [-1.6639, 0.1557, -0.1763, 1.0257], [0.6863, 0.0992, 1.4487, 0.0157]]. 我需要计算每行中元素的顺序，因此输出为： index = [[1, 3, 2, 0], [0, 2, 1, 3],

我正在寻找如何使用cuda/沿矩阵的行或列进行

argsort

这意味着给定一个矩阵，如：

A = [[ 3.4257, -1.2345,  0.6232, -0.1354], 
     [-1.6639,  0.1557, -0.1763,  1.0257], 
     [0.6863,  0.0992,  1.4487,  0.0157]].

我需要计算每行中元素的顺序，因此输出为：

index = [[1, 3, 2, 0],
         [0, 2, 1, 3],
         [3, 1, 0, 2]]

我怎样才能做到这一点呢？

这可以通过

推力：：排序来实现。我们需要一组行索引和一组列索引。行索引用于确保在行之间划分排序顺序。列索引将在排序后为我们提供结果
将值、行索引、列索引压缩在一起。创建排序函子，该函子先对行排序，然后对值排序。输出是重新排列的列索引
$ cat t114.cu
#include <thrust/sort.h>
#include <thrust/iterator/zip_iterator.h>
#include <thrust/device_vector.h>
#include <thrust/host_vector.h>
#include <iostream>

using namespace thrust::placeholders;

struct my_sort_functor
{
  template <typename T1, typename T2>
  __host__ __device__
  bool operator()(const T1 &t1, const T2 &t2){
    if  (thrust::get<1>(t1) < thrust::get<1>(t2)) return true;
    if  (thrust::get<1>(t1) > thrust::get<1>(t2)) return false;
    if  (thrust::get<0>(t1) < thrust::get<0>(t2)) return true;
    return false;
  }
};

typedef float mt;
typedef int it;

int main(){

  mt A[] = { 3.4257, -1.2345,  0.6232, -0.1354,
            -1.6639,  0.1557, -0.1763,  1.0257,
             0.6863,  0.0992,  1.4487,  0.0157};
  const int rows = 3;
  const int cols = 4;
  thrust::device_vector<mt> d_A(A, A+rows*cols);
  thrust::device_vector<it> row_idx(d_A.size());
  thrust::device_vector<it> col_idx(d_A.size());
  thrust::sequence(row_idx.begin(), row_idx.end());
  thrust::sequence(col_idx.begin(), col_idx.end());
  thrust::transform(row_idx.begin(), row_idx.end(), row_idx.begin(), _1/cols);
  thrust::transform(col_idx.begin(), col_idx.end(), col_idx.begin(), _1%cols);
  auto my_zip_iterator = thrust::make_zip_iterator(thrust::make_tuple(d_A.begin(), row_idx.begin(), col_idx.begin()));
  thrust::sort(my_zip_iterator, my_zip_iterator+rows*cols, my_sort_functor());
  thrust::host_vector<it> h_col_idx = col_idx;
  thrust::copy_n(h_col_idx.begin(), rows*cols, std::ostream_iterator<it>(std::cout, ","));
  std::cout << std::endl;
}
$ nvcc -o t114 t114.cu
$ ./t114
1,3,2,0,0,2,1,3,3,1,0,2,
$

$cat t114.cu
#包括
#包括
#包括
#包括
#包括
使用命名空间推力：：占位符；
结构我的排序函子
{
模板
__主机设备__
布尔运算符（）（常数T1和T1，常数T2和T2）{
if（推力：：get（t1）<推力：：get（t2））返回true；
if（推力：：获取（t1）>推力：：获取（t2））返回false；
if（推力：：get（t1）<推力：：get（t2））返回true；
返回false；
}
};
类型定义浮动mt；
键入def int；
int main（）{
mt A[]={3.4257，-1.2345,0.6232，-0.1354，
-1.6639,  0.1557, -0.1763,  1.0257,
0.6863,  0.0992,  1.4487,  0.0157};
const int rows=3；
常数int cols=4；
推力：装置矢量d（A，A+行*cols）；
推力：设备向量行idx（d_A.size（））；
推力：设备向量列idx（d_A.size（））；
顺序（row_idx.begin（），row_idx.end（））；
序列（col_idx.begin（），col_idx.end（））；
转换（row_idx.begin（），row_idx.end（），row_idx.begin（），_1/cols）；
转换（col_idx.begin（），col_idx.end（），col_idx.begin（），_1%cols）；
自动my_zip_迭代器=推力：：make_zip_迭代器（推力：：make_元组（d_A.begin（），row_idx.begin（），col_idx.begin（））；
推力：：排序（my_-zip_迭代器，my_-zip_迭代器+行*cols，my_-sort_函子（））；
推力：主机向量h\u col\u idx=col\u idx；
推力：：复制（h_col_idx.begin（），rows*cols，std：：ostream_迭代器（std：：cout，“，”）；
std:：cout@RobertCrovella:这是该行值升序的正确argsort[-1.2345，-0.1354,0.6232,3.4257]而且给定的推力没有按键分段排序，没有直接的方法可以做到这一点。是的，你可能会从一个稳定的排序中破解一些东西，但没有直接实现任何东西。是的，我很困惑。
$ cat t114.cu
#include <thrust/sort.h>
#include <thrust/device_vector.h>
#include <thrust/host_vector.h>
#include <thrust/copy.h>
#include <iostream>

using namespace thrust::placeholders;
typedef float mt;
typedef int it;

struct my_sort_functor
{
  mt *d;
  it cols;
  my_sort_functor(mt *_d, it _cols) : d(_d), cols(_cols) {};
  __host__ __device__
  bool operator()(const it &t1, const it &t2){
    it row1 = t1/cols;
    it row2 = t2/cols;
    if  (row1 < row2) return true;
    if  (row1 > row2) return false;
    if  (d[t1] < d[t2]) return true;
    return false;
  }
};

int main(){

  mt A[] = { 3.4257, -1.2345,  0.6232, -0.1354,
            -1.6639,  0.1557, -0.1763,  1.0257,
             0.6863,  0.0992,  1.4487,  0.0157};
  const int rows = 3;
  const int cols = 4;
  thrust::device_vector<mt> d_A(A, A+rows*cols);
  thrust::device_vector<it> idx(d_A.size());
  thrust::sequence(idx.begin(), idx.end());
  thrust::sort(idx.begin(), idx.end(), my_sort_functor(thrust::raw_pointer_cast(d_A.data()), cols));
  thrust::transform(idx.begin(), idx.end(), idx.begin(), _1%cols);
  thrust::host_vector<it> h_idx = idx;
  thrust::copy_n(h_idx.begin(), rows*cols, std::ostream_iterator<it>(std::cout, ","));
  std::cout << std::endl;
}
$ nvcc -o t114 t114.cu
$ ./t114
1,3,2,0,0,2,1,3,3,1,0,2,
$

$ cat t114.cu
#include <thrust/sort.h>
#include <thrust/device_vector.h>
#include <thrust/host_vector.h>
#include <thrust/sequence.h>
#include <thrust/transform.h>
#include <iostream>

using namespace thrust::placeholders;
typedef float mt;
typedef unsigned it;

struct my_sort_functor
{
  mt *d;
  it cols;
  my_sort_functor(mt *_d, it _cols) : d(_d), cols(_cols) {};
  __host__ __device__
  bool operator()(const it &t1, const it &t2){
    it row1 = t1>>16;
    it row2 = t2>>16;
    if  (row1 < row2) return true;
    if  (row1 > row2) return false;
    it col1 = t1&65535;
    it col2 = t2&65535;
    it i1 = row1*cols+col1;
    it i2 = row2*cols+col2;
    if  (d[i1] < d[i2]) return true;
    return false;
  }
};

struct my_transform_functor
{
  it cols;
  my_transform_functor(it _cols) : cols(_cols) {};
  __host__ __device__
    it operator()(const it &t1){
      it row = t1/cols;
      it col = t1 - row*cols;
      return (row << 16) + col;
    }
};

int main(){

  mt A[] = { 3.4257, -1.2345,  0.6232, -0.1354,
            -1.6639,  0.1557, -0.1763,  1.0257,
             0.6863,  0.0992,  1.4487,  0.0157};
  // assume rows and cols are each less than 65536
  const int rows = 3;
  const int cols = 4;
  thrust::device_vector<mt> d_A(A, A+rows*cols);
  thrust::device_vector<it> idx(d_A.size());
  thrust::sequence(idx.begin(), idx.end());
  thrust::transform(idx.begin(), idx.end(), idx.begin(), my_transform_functor(cols));
  thrust::sort(idx.begin(), idx.end(), my_sort_functor(thrust::raw_pointer_cast(d_A.data()), cols));
  thrust::host_vector<it> h_idx = idx;
  for (int i = 0; i < rows*cols; i++) std::cout << (h_idx[i]&65535) << ",";
  std::cout << std::endl;
}
$ nvcc -o t114 t114.cu
$ ./t114
1,3,2,0,0,2,1,3,3,1,0,2,
$