Sorting 如何使用cuda计算矩阵每行中元素的顺序?
我正在寻找如何使用cuda/沿矩阵的行或列进行Sorting 如何使用cuda计算矩阵每行中元素的顺序?,sorting,cuda,thrust,Sorting,Cuda,Thrust,我正在寻找如何使用cuda/沿矩阵的行或列进行argsort 这意味着给定一个矩阵,如: A = [[ 3.4257, -1.2345, 0.6232, -0.1354], [-1.6639, 0.1557, -0.1763, 1.0257], [0.6863, 0.0992, 1.4487, 0.0157]]. 我需要计算每行中元素的顺序,因此输出为: index = [[1, 3, 2, 0], [0, 2, 1, 3],
argsort
这意味着给定一个矩阵,如:
A = [[ 3.4257, -1.2345, 0.6232, -0.1354],
[-1.6639, 0.1557, -0.1763, 1.0257],
[0.6863, 0.0992, 1.4487, 0.0157]].
我需要计算每行中元素的顺序,因此输出为:
index = [[1, 3, 2, 0],
[0, 2, 1, 3],
[3, 1, 0, 2]]
我怎样才能做到这一点呢?这可以通过
推力::排序来实现。我们需要一组行索引和一组列索引。行索引用于确保在行之间划分排序顺序。列索引将在排序后为我们提供结果
将值、行索引、列索引压缩在一起。创建排序函子,该函子先对行排序,然后对值排序。输出是重新排列的列索引
$ cat t114.cu
#include <thrust/sort.h>
#include <thrust/iterator/zip_iterator.h>
#include <thrust/device_vector.h>
#include <thrust/host_vector.h>
#include <iostream>
using namespace thrust::placeholders;
struct my_sort_functor
{
template <typename T1, typename T2>
__host__ __device__
bool operator()(const T1 &t1, const T2 &t2){
if (thrust::get<1>(t1) < thrust::get<1>(t2)) return true;
if (thrust::get<1>(t1) > thrust::get<1>(t2)) return false;
if (thrust::get<0>(t1) < thrust::get<0>(t2)) return true;
return false;
}
};
typedef float mt;
typedef int it;
int main(){
mt A[] = { 3.4257, -1.2345, 0.6232, -0.1354,
-1.6639, 0.1557, -0.1763, 1.0257,
0.6863, 0.0992, 1.4487, 0.0157};
const int rows = 3;
const int cols = 4;
thrust::device_vector<mt> d_A(A, A+rows*cols);
thrust::device_vector<it> row_idx(d_A.size());
thrust::device_vector<it> col_idx(d_A.size());
thrust::sequence(row_idx.begin(), row_idx.end());
thrust::sequence(col_idx.begin(), col_idx.end());
thrust::transform(row_idx.begin(), row_idx.end(), row_idx.begin(), _1/cols);
thrust::transform(col_idx.begin(), col_idx.end(), col_idx.begin(), _1%cols);
auto my_zip_iterator = thrust::make_zip_iterator(thrust::make_tuple(d_A.begin(), row_idx.begin(), col_idx.begin()));
thrust::sort(my_zip_iterator, my_zip_iterator+rows*cols, my_sort_functor());
thrust::host_vector<it> h_col_idx = col_idx;
thrust::copy_n(h_col_idx.begin(), rows*cols, std::ostream_iterator<it>(std::cout, ","));
std::cout << std::endl;
}
$ nvcc -o t114 t114.cu
$ ./t114
1,3,2,0,0,2,1,3,3,1,0,2,
$
$cat t114.cu
#包括
#包括
#包括
#包括
#包括
使用命名空间推力::占位符;
结构我的排序函子
{
模板
__主机设备__
布尔运算符()(常数T1和T1,常数T2和T2){
if(推力::get(t1)<推力::get(t2))返回true;
if(推力::获取(t1)>推力::获取(t2))返回false;
if(推力::get(t1)<推力::get(t2))返回true;
返回false;
}
};
类型定义浮动mt;
键入def int;
int main(){
mt A[]={3.4257,-1.2345,0.6232,-0.1354,
-1.6639, 0.1557, -0.1763, 1.0257,
0.6863, 0.0992, 1.4487, 0.0157};
const int rows=3;
常数int cols=4;
推力:装置矢量d(A,A+行*cols);
推力:设备向量行idx(d_A.size());
推力:设备向量列idx(d_A.size());
顺序(row_idx.begin(),row_idx.end());
序列(col_idx.begin(),col_idx.end());
转换(row_idx.begin(),row_idx.end(),row_idx.begin(),_1/cols);
转换(col_idx.begin(),col_idx.end(),col_idx.begin(),_1%cols);
自动my_zip_迭代器=推力::make_zip_迭代器(推力::make_元组(d_A.begin(),row_idx.begin(),col_idx.begin());
推力::排序(my_-zip_迭代器,my_-zip_迭代器+行*cols,my_-sort_函子());
推力:主机向量h\u col\u idx=col\u idx;
推力::复制(h_col_idx.begin(),rows*cols,std::ostream_迭代器(std::cout,“,”);
std::cout@RobertCrovella:这是该行值升序的正确argsort[-1.2345,-0.1354,0.6232,3.4257]
而且给定的推力没有按键分段排序,没有直接的方法可以做到这一点。是的,你可能会从一个稳定的排序中破解一些东西,但没有直接实现任何东西。是的,我很困惑。
$ cat t114.cu
#include <thrust/sort.h>
#include <thrust/device_vector.h>
#include <thrust/host_vector.h>
#include <thrust/copy.h>
#include <iostream>
using namespace thrust::placeholders;
typedef float mt;
typedef int it;
struct my_sort_functor
{
mt *d;
it cols;
my_sort_functor(mt *_d, it _cols) : d(_d), cols(_cols) {};
__host__ __device__
bool operator()(const it &t1, const it &t2){
it row1 = t1/cols;
it row2 = t2/cols;
if (row1 < row2) return true;
if (row1 > row2) return false;
if (d[t1] < d[t2]) return true;
return false;
}
};
int main(){
mt A[] = { 3.4257, -1.2345, 0.6232, -0.1354,
-1.6639, 0.1557, -0.1763, 1.0257,
0.6863, 0.0992, 1.4487, 0.0157};
const int rows = 3;
const int cols = 4;
thrust::device_vector<mt> d_A(A, A+rows*cols);
thrust::device_vector<it> idx(d_A.size());
thrust::sequence(idx.begin(), idx.end());
thrust::sort(idx.begin(), idx.end(), my_sort_functor(thrust::raw_pointer_cast(d_A.data()), cols));
thrust::transform(idx.begin(), idx.end(), idx.begin(), _1%cols);
thrust::host_vector<it> h_idx = idx;
thrust::copy_n(h_idx.begin(), rows*cols, std::ostream_iterator<it>(std::cout, ","));
std::cout << std::endl;
}
$ nvcc -o t114 t114.cu
$ ./t114
1,3,2,0,0,2,1,3,3,1,0,2,
$
$ cat t114.cu
#include <thrust/sort.h>
#include <thrust/device_vector.h>
#include <thrust/host_vector.h>
#include <thrust/sequence.h>
#include <thrust/transform.h>
#include <iostream>
using namespace thrust::placeholders;
typedef float mt;
typedef unsigned it;
struct my_sort_functor
{
mt *d;
it cols;
my_sort_functor(mt *_d, it _cols) : d(_d), cols(_cols) {};
__host__ __device__
bool operator()(const it &t1, const it &t2){
it row1 = t1>>16;
it row2 = t2>>16;
if (row1 < row2) return true;
if (row1 > row2) return false;
it col1 = t1&65535;
it col2 = t2&65535;
it i1 = row1*cols+col1;
it i2 = row2*cols+col2;
if (d[i1] < d[i2]) return true;
return false;
}
};
struct my_transform_functor
{
it cols;
my_transform_functor(it _cols) : cols(_cols) {};
__host__ __device__
it operator()(const it &t1){
it row = t1/cols;
it col = t1 - row*cols;
return (row << 16) + col;
}
};
int main(){
mt A[] = { 3.4257, -1.2345, 0.6232, -0.1354,
-1.6639, 0.1557, -0.1763, 1.0257,
0.6863, 0.0992, 1.4487, 0.0157};
// assume rows and cols are each less than 65536
const int rows = 3;
const int cols = 4;
thrust::device_vector<mt> d_A(A, A+rows*cols);
thrust::device_vector<it> idx(d_A.size());
thrust::sequence(idx.begin(), idx.end());
thrust::transform(idx.begin(), idx.end(), idx.begin(), my_transform_functor(cols));
thrust::sort(idx.begin(), idx.end(), my_sort_functor(thrust::raw_pointer_cast(d_A.data()), cols));
thrust::host_vector<it> h_idx = idx;
for (int i = 0; i < rows*cols; i++) std::cout << (h_idx[i]&65535) << ",";
std::cout << std::endl;
}
$ nvcc -o t114 t114.cu
$ ./t114
1,3,2,0,0,2,1,3,3,1,0,2,
$