Cuda 在float3元组上唯一

Cuda 在float3元组上唯一,cuda,unique,thrust,Cuda,Unique,Thrust,我试图在float3元组上使用struch::unique。然而,它似乎没有返回正确的结果。以下是一个完整的示例: #include <iostream> #include <thrust/tuple.h> #include <thrust/device_vector.h> #include <thrust/unique.h> // --- Equality between two float3's __host__ __device__ __f

我试图在
float3
元组上使用
struch::unique
。然而,它似乎没有返回正确的结果。以下是一个完整的示例:

#include <iostream>
#include <thrust/tuple.h>
#include <thrust/device_vector.h>
#include <thrust/unique.h>

// --- Equality between two float3's
__host__ __device__ __forceinline__ bool operator==(const float3 &a, const float3 &b) { 
    return ((a.x == b.x) && (a.y == b.y) && (a.z == b.z)); }

// --- Binary predicate for a tuple pair
typedef thrust::tuple<float3, float3> tuple_t;
struct tupleEqual
{
    __host__ __device__
        bool operator()(tuple_t x, tuple_t y)
    {
        return ((x.get<0>() == y.get<0>()) && (x.get<1>() == y.get<1>()));
    }
};

/********/
/* MAIN */
/********/
int main(void)
{
    const int N = 6;

    thrust::device_vector<float3> v(N), d(N);
    thrust::device_vector<tuple_t> vd(N);

    v[0] = make_float3(2.f, 5.f, 9.f);      d[0] = make_float3(2.f, 3.f, 10.f);
    v[1] = make_float3(3.f, 2.f, 1.f);      d[1] = make_float3(2.f, 5.f, 9.f);
    v[2] = make_float3(2.f, 5.f, 9.f);      d[2] = make_float3(2.f, 3.f, 10.f);
    v[3] = make_float3(2.f, 3.f, 10.f);     d[3] = make_float3(2.f, 5.f, 9.f);
    v[4] = make_float3(2.f, 3.f, 10.f);     d[4] = make_float3(1.f, 1.f, 1.f);
    v[5] = make_float3(2.f, 5.f, 9.f);      d[5] = make_float3(2.f, 3.f, 10.f);

    vd[0] = thrust::make_tuple(v[0], d[0]);
    vd[1] = thrust::make_tuple(v[1], d[1]);
    vd[2] = thrust::make_tuple(v[2], d[2]);
    vd[3] = thrust::make_tuple(v[3], d[3]);
    vd[4] = thrust::make_tuple(v[4], d[4]);
    vd[5] = thrust::make_tuple(v[5], d[5]);

    auto new_end = thrust::unique(vd.begin(), vd.end(), tupleEqual());

    const size_t Nnew = new_end - vd.begin();

    printf("Nnew = %d\n", Nnew);
    for (int k = 0; k < Nnew; k++) {
        tuple_t temp = vd[k];
        float3 vtemp = thrust::get<0>(temp);
        float3 dtemp = thrust::get<1>(temp);
        printf("%d %f %f %f %f %f %f\n", k, vtemp.x, vtemp.y, vtemp.z, dtemp.x, dtemp.y, dtemp.z);
    }

    return 0;
}
这正是没有任何重复删除的输入

我使用Windows10、VisualStudio2015进行编译,使用CUDA8.0或CUDA9.1(结果相同)


我的问题是:我做错了什么?

您在输出中没有看到任何变化的原因是您的输入不包含任何重复序列。如果我修改代码中的输入:

#include <iostream>
#include <thrust/tuple.h>
#include <thrust/device_vector.h>
#include <thrust/unique.h>

__host__ __device__ __forceinline__ bool operator==(const float3 &a, const float3 &b) { 
    return ((a.x == b.x) && (a.y == b.y) && (a.z == b.z)); }

typedef thrust::tuple<float3, float3> tuple_t;
struct tupleEqual
{
    __host__ __device__
        bool operator()(tuple_t x, tuple_t y)
    {
        return ((x.get<0>() == y.get<0>()) && (x.get<1>() == y.get<1>()));
    }
};

int main(void)
{
    const int N = 6;

    thrust::device_vector<float3> v(N), d(N);
    thrust::device_vector<tuple_t> vd(N);

    v[0] = make_float3(2.f, 5.f, 9.f);      d[0] = make_float3(2.f, 3.f, 10.f);
    v[1] = make_float3(2.f, 5.f, 9.f);      d[1] = make_float3(2.f, 3.f, 10.f);
    v[2] = make_float3(2.f, 3.f, 10.f);     d[2] = make_float3(2.f, 5.f, 9.f);
    v[3] = make_float3(2.f, 3.f, 10.f);     d[3] = make_float3(2.f, 5.f, 9.f);
    v[4] = make_float3(2.f, 3.f, 10.f);     d[4] = make_float3(2.f, 5.f, 9.f);
    v[5] = make_float3(2.f, 3.f, 10.f);     d[5] = make_float3(2.f, 5.f, 9.f);

    vd[0] = thrust::make_tuple(v[0], d[0]);
    vd[1] = thrust::make_tuple(v[1], d[1]);
    vd[2] = thrust::make_tuple(v[2], d[2]);
    vd[3] = thrust::make_tuple(v[3], d[3]);
    vd[4] = thrust::make_tuple(v[4], d[4]);
    vd[5] = thrust::make_tuple(v[5], d[5]);

    auto new_end = thrust::unique(vd.begin(), vd.end(), tupleEqual());
    const size_t Nnew = new_end - vd.begin();

    printf("Nnew = %zu\n", Nnew);
    for (int k = 0; k < Nnew; k++) {
        tuple_t temp = vd[k];
        float3 vtemp = thrust::get<0>(temp);
        float3 dtemp = thrust::get<1>(temp);
        printf("%d %f %f %f %f %f %f\n", k, vtemp.x, vtemp.y, vtemp.z, dtemp.x, dtemp.y, dtemp.z);
    }

    return 0;
}
仅删除输入迭代器中相同序列的重复。它不排序。引用文件:

对于[第一,最后]范围内的每组连续元素 使用相同的值,“唯一”将删除 小组


强调我的错误。您在这里唯一的错误是理解函数执行的操作。您编写的代码是正确的,并按预期工作。

您没有看到输出中任何更改的原因是您的输入不包含任何重复序列。如果我修改代码中的输入:

#include <iostream>
#include <thrust/tuple.h>
#include <thrust/device_vector.h>
#include <thrust/unique.h>

__host__ __device__ __forceinline__ bool operator==(const float3 &a, const float3 &b) { 
    return ((a.x == b.x) && (a.y == b.y) && (a.z == b.z)); }

typedef thrust::tuple<float3, float3> tuple_t;
struct tupleEqual
{
    __host__ __device__
        bool operator()(tuple_t x, tuple_t y)
    {
        return ((x.get<0>() == y.get<0>()) && (x.get<1>() == y.get<1>()));
    }
};

int main(void)
{
    const int N = 6;

    thrust::device_vector<float3> v(N), d(N);
    thrust::device_vector<tuple_t> vd(N);

    v[0] = make_float3(2.f, 5.f, 9.f);      d[0] = make_float3(2.f, 3.f, 10.f);
    v[1] = make_float3(2.f, 5.f, 9.f);      d[1] = make_float3(2.f, 3.f, 10.f);
    v[2] = make_float3(2.f, 3.f, 10.f);     d[2] = make_float3(2.f, 5.f, 9.f);
    v[3] = make_float3(2.f, 3.f, 10.f);     d[3] = make_float3(2.f, 5.f, 9.f);
    v[4] = make_float3(2.f, 3.f, 10.f);     d[4] = make_float3(2.f, 5.f, 9.f);
    v[5] = make_float3(2.f, 3.f, 10.f);     d[5] = make_float3(2.f, 5.f, 9.f);

    vd[0] = thrust::make_tuple(v[0], d[0]);
    vd[1] = thrust::make_tuple(v[1], d[1]);
    vd[2] = thrust::make_tuple(v[2], d[2]);
    vd[3] = thrust::make_tuple(v[3], d[3]);
    vd[4] = thrust::make_tuple(v[4], d[4]);
    vd[5] = thrust::make_tuple(v[5], d[5]);

    auto new_end = thrust::unique(vd.begin(), vd.end(), tupleEqual());
    const size_t Nnew = new_end - vd.begin();

    printf("Nnew = %zu\n", Nnew);
    for (int k = 0; k < Nnew; k++) {
        tuple_t temp = vd[k];
        float3 vtemp = thrust::get<0>(temp);
        float3 dtemp = thrust::get<1>(temp);
        printf("%d %f %f %f %f %f %f\n", k, vtemp.x, vtemp.y, vtemp.z, dtemp.x, dtemp.y, dtemp.z);
    }

    return 0;
}
仅删除输入迭代器中相同序列的重复。它不排序。引用文档:

对于[第一,最后]范围内的每组连续元素 使用相同的值,“唯一”将删除 小组


强调我的问题。这里你唯一的错误是理解函数执行的操作。你编写的代码是正确的,工作正常。

Talonmes已经回答了我的问题,指出重复元素必须是连续的,这是我昨天遗漏的

下面我对我的代码进行了修改,引入了双重排序,使用a,与
v
d
相关

#include <iostream>
#include <thrust/tuple.h>
#include <thrust/device_vector.h>
#include <thrust/unique.h>
#include <thrust/sort.h>

/*********************************/
/* EQUALITY BETWEEN TWO FLOAT3'S */
/*********************************/
__host__ __device__ __forceinline__ bool operator==(const float3 &a, const float3 &b) {
    return ((a.x == b.x) && (a.y == b.y) && (a.z == b.z)); }

/*************************************/
/* BINARY PREDICATE FOR A TUPLE PAIR */
/*************************************/
typedef thrust::tuple<float3, float3> tuple_t;
struct tupleEqual
{
    __host__ __device__
        bool operator()(tuple_t x, tuple_t y)
    {
        return ((x.get<0>() == y.get<0>()) && (x.get<1>() == y.get<1>()));
    }
};

/**********************************/
/* MORTON ENCODER KERNEL FUNCTION */
/**********************************/
// --- Expands a 10-bit integer into 30 bits by inserting 2 zeros after each bit.
__host__ __device__ __forceinline__ unsigned int expandBits(unsigned int v)
{
    v = (v * 0x00010001u) & 0xFF0000FFu;
    v = (v * 0x00000101u) & 0x0F00F00Fu;
    v = (v * 0x00000011u) & 0xC30C30C3u;
    v = (v * 0x00000005u) & 0x49249249u;
    return v;
}

// --- Calculates a 30-bit Morton code for the given 3D point located within the unit cube [0,1].
__host__ __device__ __forceinline__ unsigned int morton3D(float x, float y, float z)
{
    x = min(max(x * 1024.0f, 0.0f), 1023.0f);
    y = min(max(y * 1024.0f, 0.0f), 1023.0f);
    z = min(max(z * 1024.0f, 0.0f), 1023.0f);
    unsigned int xx = expandBits((unsigned int)x);
    unsigned int yy = expandBits((unsigned int)y);
    unsigned int zz = expandBits((unsigned int)z);
    return xx * 4 + yy * 2 + zz;
}

/*************************/
/* CUSTOMIZED COMPARATOR */
/*************************/
struct customizedComparator {
    __host__ __device__
        bool operator()(const tuple_t &t1, const tuple_t &t2) {

        float3 v1 = t1.get<0>();
        float3 d1 = t1.get<1>();

        float3 v2 = t2.get<0>();
        float3 d2 = t2.get<1>();

        unsigned int m1 = morton3D(v1.x, v1.y, v1.z);
        unsigned int n1 = morton3D(v2.x, v2.y, v2.z);

        unsigned int p1 = morton3D(d1.x, d1.y, d1.z);
        unsigned int q1 = morton3D(d2.x, d2.y, d2.z);

        if (m1 != n1) return (m1 < n1);
        else return (p1 < q1);

    }
};

/********/
/* MAIN */
/********/
int main(void)
{
    const int N = 6;

    thrust::device_vector<float3> v(N), d(N);

    v[0] = make_float3(.2f, .5f, .09f);     d[0] = make_float3(0.2f, 0.3f, 0.1f);
    v[1] = make_float3(.3f, .2f, .1f);      d[1] = make_float3(.2f, .5f, .09f);
    v[2] = make_float3(.2f, .5f, .09f);     d[2] = make_float3(0.2f, 0.3f, 0.1f);
    v[3] = make_float3(0.2f, 0.3f, 0.1f);   d[3] = make_float3(.2f, .5f, .09f);
    v[4] = make_float3(0.2f, 0.3f, 0.1f);   d[4] = make_float3(.1f, .1f, .1f);
    v[5] = make_float3(.2f, .5f, .09f);     d[5] = make_float3(0.2f, 0.3f, 0.1f);

    thrust::sort(thrust::make_zip_iterator(thrust::make_tuple(v.begin(), d.begin())), thrust::make_zip_iterator(thrust::make_tuple(v.begin(), d.begin())) + N, customizedComparator());

    auto new_end = thrust::unique(thrust::make_zip_iterator(thrust::make_tuple(v.begin(), d.begin())), thrust::make_zip_iterator(thrust::make_tuple(v.begin(), d.begin())) + N, tupleEqual());

    const size_t Nnew = new_end - thrust::make_zip_iterator(thrust::make_tuple(v.begin(), d.begin()));

    printf("Nnew = %d\n", Nnew);
    for (int k = 0; k < Nnew; k++) {
        float3 vtemp = v[k];
        float3 dtemp = d[k];
        printf("%d %f %f %f %f %f %f\n", k, vtemp.x, vtemp.y, vtemp.z, dtemp.x, dtemp.y, dtemp.z);
    }

    return 0;
}
#包括
#包括
#包括
#包括
#包括
/*********************************/
/*两个浮点数3之间的相等*/
/*********************************/
__主机设备强制内联布尔运算符==(常量浮点3&a、常量浮点3&b){
返回((a.x==b.x)&&&(a.y==b.y)&&&(a.z==b.z));}
/*************************************/
/*元组对的二进制谓词*/
/*************************************/
typedef推力::元组;
结构tupleEqual
{
__主机设备__
布尔运算符()(元组x,元组y)
{
返回((x.get()==y.get())&&(x.get()==y.get());
}
};
/**********************************/
/*莫顿编码器核函数*/
/**********************************/
//---通过在每个位后插入2个零,将10位整数扩展为30位。
__主机\uuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuu
{
v=(v*0x00010001u)和0xFF0000FFu;
v=(v*0x00000101u)和0x0F00F00Fu;
v=(v*0x00000011u)和0xC30C30C30U;
v=(v*0x00000005u)和0x49249249u;
返回v;
}
//---计算单位立方体[0,1]内给定3D点的30位Morton代码。
__主机\uuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuu
{
x=最小值(最大值(x*1024.0f,0.0f),1023.0f);
y=最小值(最大值(y*1024.0f,0.0f),1023.0f);
z=最小值(最大值(z*1024.0f,0.0f),1023.0f);
无符号整数xx=扩展位((无符号整数)x);
无符号整数yy=扩展位((无符号整数)y);
无符号整数zz=扩展位((无符号整数)z);
返回xx*4+yy*2+zz;
}
/*************************/
/*定制比较器*/
/*************************/
结构自定义比较器{
__主机设备__
布尔运算符()(常数元组t&t1,常数元组t&t2){
float3 v1=t1.get();
float3 d1=t1.get();
float3 v2=t2.get();
float3 d2=t2.get();
无符号整数m1=morton3D(v1.x,v1.y,v1.z);
无符号int n1=morton3D(v2.x,v2.y,v2.z);
无符号整数p1=morton3D(d1.x,d1.y,d1.z);
无符号整数q1=morton3D(d2.x,d2.y,d2.z);
如果(m1!=n1)返回(m1
Talonmes已经回答了我的问题,指出重复元素必须是连续的,这是我昨天遗漏的

下面我对我的代码进行了修改,引入了双重排序,使用a,与
v
d
相关

#include <iostream>
#include <thrust/tuple.h>
#include <thrust/device_vector.h>
#include <thrust/unique.h>
#include <thrust/sort.h>

/*********************************/
/* EQUALITY BETWEEN TWO FLOAT3'S */
/*********************************/
__host__ __device__ __forceinline__ bool operator==(const float3 &a, const float3 &b) {
    return ((a.x == b.x) && (a.y == b.y) && (a.z == b.z)); }

/*************************************/
/* BINARY PREDICATE FOR A TUPLE PAIR */
/*************************************/
typedef thrust::tuple<float3, float3> tuple_t;
struct tupleEqual
{
    __host__ __device__
        bool operator()(tuple_t x, tuple_t y)
    {
        return ((x.get<0>() == y.get<0>()) && (x.get<1>() == y.get<1>()));
    }
};

/**********************************/
/* MORTON ENCODER KERNEL FUNCTION */
/**********************************/
// --- Expands a 10-bit integer into 30 bits by inserting 2 zeros after each bit.
__host__ __device__ __forceinline__ unsigned int expandBits(unsigned int v)
{
    v = (v * 0x00010001u) & 0xFF0000FFu;
    v = (v * 0x00000101u) & 0x0F00F00Fu;
    v = (v * 0x00000011u) & 0xC30C30C3u;
    v = (v * 0x00000005u) & 0x49249249u;
    return v;
}

// --- Calculates a 30-bit Morton code for the given 3D point located within the unit cube [0,1].
__host__ __device__ __forceinline__ unsigned int morton3D(float x, float y, float z)
{
    x = min(max(x * 1024.0f, 0.0f), 1023.0f);
    y = min(max(y * 1024.0f, 0.0f), 1023.0f);
    z = min(max(z * 1024.0f, 0.0f), 1023.0f);
    unsigned int xx = expandBits((unsigned int)x);
    unsigned int yy = expandBits((unsigned int)y);
    unsigned int zz = expandBits((unsigned int)z);
    return xx * 4 + yy * 2 + zz;
}

/*************************/
/* CUSTOMIZED COMPARATOR */
/*************************/
struct customizedComparator {
    __host__ __device__
        bool operator()(const tuple_t &t1, const tuple_t &t2) {

        float3 v1 = t1.get<0>();
        float3 d1 = t1.get<1>();

        float3 v2 = t2.get<0>();
        float3 d2 = t2.get<1>();

        unsigned int m1 = morton3D(v1.x, v1.y, v1.z);
        unsigned int n1 = morton3D(v2.x, v2.y, v2.z);

        unsigned int p1 = morton3D(d1.x, d1.y, d1.z);
        unsigned int q1 = morton3D(d2.x, d2.y, d2.z);

        if (m1 != n1) return (m1 < n1);
        else return (p1 < q1);

    }
};

/********/
/* MAIN */
/********/
int main(void)
{
    const int N = 6;

    thrust::device_vector<float3> v(N), d(N);

    v[0] = make_float3(.2f, .5f, .09f);     d[0] = make_float3(0.2f, 0.3f, 0.1f);
    v[1] = make_float3(.3f, .2f, .1f);      d[1] = make_float3(.2f, .5f, .09f);
    v[2] = make_float3(.2f, .5f, .09f);     d[2] = make_float3(0.2f, 0.3f, 0.1f);
    v[3] = make_float3(0.2f, 0.3f, 0.1f);   d[3] = make_float3(.2f, .5f, .09f);
    v[4] = make_float3(0.2f, 0.3f, 0.1f);   d[4] = make_float3(.1f, .1f, .1f);
    v[5] = make_float3(.2f, .5f, .09f);     d[5] = make_float3(0.2f, 0.3f, 0.1f);

    thrust::sort(thrust::make_zip_iterator(thrust::make_tuple(v.begin(), d.begin())), thrust::make_zip_iterator(thrust::make_tuple(v.begin(), d.begin())) + N, customizedComparator());

    auto new_end = thrust::unique(thrust::make_zip_iterator(thrust::make_tuple(v.begin(), d.begin())), thrust::make_zip_iterator(thrust::make_tuple(v.begin(), d.begin())) + N, tupleEqual());

    const size_t Nnew = new_end - thrust::make_zip_iterator(thrust::make_tuple(v.begin(), d.begin()));

    printf("Nnew = %d\n", Nnew);
    for (int k = 0; k < Nnew; k++) {
        float3 vtemp = v[k];
        float3 dtemp = d[k];
        printf("%d %f %f %f %f %f %f\n", k, vtemp.x, vtemp.y, vtemp.z, dtemp.x, dtemp.y, dtemp.z);
    }

    return 0;
}
#包括
#包括
#包括
#包括
#包括
/*********************************/
/*两个浮点数3之间的相等*/
/*********************************/
__主机设备用于C