Warning: file_get_contents(/data/phpspider/zhask/data//catemap/6/cplusplus/142.json): failed to open stream: No such file or directory in /data/phpspider/zhask/libs/function.php on line 167

Warning: Invalid argument supplied for foreach() in /data/phpspider/zhask/libs/tag.function.php on line 1116

Notice: Undefined index: in /data/phpspider/zhask/libs/function.php on line 180

Warning: array_chunk() expects parameter 1 to be array, null given in /data/phpspider/zhask/libs/function.php on line 181
C++ 利用推力产生均匀分布的随机数_C++_Cuda_Thrust - Fatal编程技术网

C++ 利用推力产生均匀分布的随机数

C++ 利用推力产生均匀分布的随机数,c++,cuda,thrust,C++,Cuda,Thrust,我需要使用推力生成一个随机数介于0.0和1.0之间的向量。我能找到的唯一一个有文档记录的示例生成了非常大的随机数(推力::生成(myvector.begin()、myvector.end()、rand)。 我相信答案很简单,但我希望得到任何建议。推力有随机发生器,可以用来生成随机数序列。要将它们与设备向量一起使用,需要创建一个函子,返回随机发生器序列的不同元素。最简单的方法是使用trans计数迭代器的形成。一个非常简单的完整示例(在本例中,生成1.0到2.0之间的随机单精度数)如下所示: #in

我需要使用
推力
生成一个随机数介于
0.0
1.0
之间的向量。我能找到的唯一一个有文档记录的示例生成了非常大的随机数(
推力::生成(myvector.begin()、myvector.end()、rand
)。
我相信答案很简单,但我希望得到任何建议。

推力有随机发生器,可以用来生成随机数序列。要将它们与设备向量一起使用,需要创建一个函子,返回随机发生器序列的不同元素。最简单的方法是使用trans计数迭代器的形成。一个非常简单的完整示例(在本例中,生成1.0到2.0之间的随机单精度数)如下所示:

#include <thrust/random.h>
#include <thrust/device_vector.h>
#include <thrust/transform.h>
#include <thrust/iterator/counting_iterator.h>
#include <iostream>

struct prg
{
    float a, b;

    __host__ __device__
    prg(float _a=0.f, float _b=1.f) : a(_a), b(_b) {};

    __host__ __device__
        float operator()(const unsigned int n) const
        {
            thrust::default_random_engine rng;
            thrust::uniform_real_distribution<float> dist(a, b);
            rng.discard(n);

            return dist(rng);
        }
};


int main(void)
{
    const int N = 20;

    thrust::device_vector<float> numbers(N);
    thrust::counting_iterator<unsigned int> index_sequence_begin(0);

    thrust::transform(index_sequence_begin,
            index_sequence_begin + N,
            numbers.begin(),
            prg(1.f,2.f));

    for(int i = 0; i < N; i++)
    {
        std::cout << numbers[i] << std::endl;
    }

    return 0;
}
#包括
#包括
#包括
#包括
#包括
结构prg
{
浮子a、b;
__主机设备__
prg(float_a=0.f,float_b=1.f):a(_a),b(_b){};
__主机设备__
浮点运算符()(常量无符号整数n)常量
{
推力:默认随机发动机转速;
推力:均匀实分布距离(a,b);
废弃(n);
返回距离(rng);
}
};
内部主(空)
{
常数int N=20;
推力:设备_矢量号(N);
计数迭代器索引序列开始(0);
推力::变换(索引序列开始,
索引\u序列\u开始+N,
number.begin(),
prg(1.f,2.f));
对于(int i=0;istd::cout由@Talonmes建议的方法有许多有用的特性。下面是另一种方法,它模仿了您引用的示例:

#include <thrust/host_vector.h>
#include <thrust/generate.h>
#include <iostream>
#define DSIZE 5


__host__ static __inline__ float rand_01()
{
    return ((float)rand()/RAND_MAX);
}

int main(){
  thrust::host_vector<float> h_1(DSIZE);

  thrust::generate(h_1.begin(), h_1.end(), rand_01);
  std::cout<< "Values generated: " << std::endl;
  for (unsigned i=0; i<DSIZE; i++)
    std::cout<< h_1[i] << " : ";
  std::cout<<std::endl;
return 0;
}
#包括
#包括
#包括
#定义DSIZE 5
__主机\uuuuuuuu静态\uuuuuuu内联\uuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuu
{
返回((浮动)rand()/rand_MAX);
}
int main(){
推力:主机向量h_1(DSIZE);
生成(h_1.begin(),h_1.end(),rand_01);

std::cout这可能不是对您问题的直接回答,但是cuRand库在这个概念上非常强大。您可以在GPU和CPU上生成随机数,并且它包含许多分布函数(正态分布等)

在此链接上搜索标题:“NVIDIA策展和实施”:


需要注意的是,不要一次又一次地生成生成器,它会进行一些预计算。调用curandGenerateUniform相当快,生成的值介于0.0和1.0之间。

这个问题已经有了令人满意的答案。特别是,OP和Robert Crovella在talonmi的时候处理了
推力::生成
es建议使用
推力::变换

我认为还有另一种可能性,即使用
推力::for_each
,因此我将发布一个使用这种原语的完整示例,仅供记录

我也在为不同的解决方案计时

代码

#include <iostream>

#include <thrust\host_vector.h>
#include <thrust\generate.h>
#include <thrust\for_each.h>
#include <thrust\execution_policy.h>
#include <thrust\random.h>

#include "TimingCPU.h"

/**************************************************/
/* RANDOM NUMBERS GENERATION STRUCTS AND FUNCTION */
/**************************************************/
template<typename T>
struct rand_01 {
    __host__ T operator()(T& VecElem) const { return (T)rand() / RAND_MAX; }
};

template<typename T>
struct rand_01_for_each {
    __host__ void operator()(T& VecElem) const { VecElem = (T)rand() / RAND_MAX; }
};

template<typename T>
__host__ T rand_01_fcn() { return ((T)rand() / RAND_MAX); }

struct prg
{
    float a, b;

    __host__ __device__
        prg(float _a = 0.f, float _b = 1.f) : a(_a), b(_b) {};

    __host__ __device__
        float operator()(const unsigned int n) const
    {
        thrust::default_random_engine rng;
        thrust::uniform_real_distribution<float> dist(a, b);
        rng.discard(n);

        return dist(rng);
    }
};

/********/
/* MAIN */
/********/
int main() {

    TimingCPU timerCPU;

    const int N = 2 << 18;          
    //const int N = 64;

    const int numIters = 50;

    thrust::host_vector<double>     h_v1(N);
    thrust::host_vector<double>     h_v2(N);
    thrust::host_vector<double>     h_v3(N);
    thrust::host_vector<double>     h_v4(N);

    printf("N = %d\n", N);

    double timing = 0.;
    for (int k = 0; k < numIters; k++) {
        timerCPU.StartCounter();
        thrust::transform(thrust::host, h_v1.begin(), h_v1.end(), h_v1.begin(), rand_01<double>());
        timing = timing + timerCPU.GetCounter();
    }
    printf("Timing using transform = %f\n", timing / numIters);

    timing = 0.;
    for (int k = 0; k < numIters; k++) {
        timerCPU.StartCounter();
        thrust::counting_iterator<unsigned int> index_sequence_begin(0);
        thrust::transform(index_sequence_begin,
            index_sequence_begin + N,
            h_v2.begin(),
            prg(0.f, 1.f));
        timing = timing + timerCPU.GetCounter();
    }
    printf("Timing using transform and internal Thrust random generator = %f\n", timing / numIters);

    timing = 0.;
    for (int k = 0; k < numIters; k++) {
        timerCPU.StartCounter();
        thrust::for_each(h_v3.begin(), h_v3.end(), rand_01_for_each<double>());
        timing = timing + timerCPU.GetCounter();
    }
    timerCPU.StartCounter();
    printf("Timing using for_each = %f\n", timing / numIters);

    //std::cout << "Values generated: " << std::endl;
    //for (int k = 0; k < N; k++)
    //  std::cout << h_v3[k] << " : ";
    //std::cout << std::endl;

    timing = 0.;
    for (int k = 0; k < numIters; k++) {
        timerCPU.StartCounter();
        thrust::generate(h_v4.begin(), h_v4.end(), rand_01_fcn<double>);
        timing = timing + timerCPU.GetCounter();
    }
    timerCPU.StartCounter();
    printf("Timing using generate = %f\n", timing / numIters);

    //std::cout << "Values generated: " << std::endl;
    //for (int k = 0; k < N; k++)
    //  std::cout << h_v4[k] << " : ";
    //std::cout << std::endl;

    //std::cout << "Values generated: " << std::endl;
    //for (int k = 0; k < N * 2; k++)
    //  std::cout << h_v[k] << " : ";
    //std::cout << std::endl;

    return 0;
}
除了使用
推力
的内部随机数生成器而不是
随机数生成器的第二个计时之外,其他计时是等效的


请注意,与其他解决方案不同的是,由于用于生成随机数的函数不能有输入参数,因此,与其他解决方案不同的是,
asch::generate
更为严格。因此,例如,不可能按常量缩放输入参数。

我认为您的include具有/inversed@zardilior到期了linux和windows中文件路径的不同约定。
#include <iostream>

#include <thrust\host_vector.h>
#include <thrust\generate.h>
#include <thrust\for_each.h>
#include <thrust\execution_policy.h>
#include <thrust\random.h>

#include "TimingCPU.h"

/**************************************************/
/* RANDOM NUMBERS GENERATION STRUCTS AND FUNCTION */
/**************************************************/
template<typename T>
struct rand_01 {
    __host__ T operator()(T& VecElem) const { return (T)rand() / RAND_MAX; }
};

template<typename T>
struct rand_01_for_each {
    __host__ void operator()(T& VecElem) const { VecElem = (T)rand() / RAND_MAX; }
};

template<typename T>
__host__ T rand_01_fcn() { return ((T)rand() / RAND_MAX); }

struct prg
{
    float a, b;

    __host__ __device__
        prg(float _a = 0.f, float _b = 1.f) : a(_a), b(_b) {};

    __host__ __device__
        float operator()(const unsigned int n) const
    {
        thrust::default_random_engine rng;
        thrust::uniform_real_distribution<float> dist(a, b);
        rng.discard(n);

        return dist(rng);
    }
};

/********/
/* MAIN */
/********/
int main() {

    TimingCPU timerCPU;

    const int N = 2 << 18;          
    //const int N = 64;

    const int numIters = 50;

    thrust::host_vector<double>     h_v1(N);
    thrust::host_vector<double>     h_v2(N);
    thrust::host_vector<double>     h_v3(N);
    thrust::host_vector<double>     h_v4(N);

    printf("N = %d\n", N);

    double timing = 0.;
    for (int k = 0; k < numIters; k++) {
        timerCPU.StartCounter();
        thrust::transform(thrust::host, h_v1.begin(), h_v1.end(), h_v1.begin(), rand_01<double>());
        timing = timing + timerCPU.GetCounter();
    }
    printf("Timing using transform = %f\n", timing / numIters);

    timing = 0.;
    for (int k = 0; k < numIters; k++) {
        timerCPU.StartCounter();
        thrust::counting_iterator<unsigned int> index_sequence_begin(0);
        thrust::transform(index_sequence_begin,
            index_sequence_begin + N,
            h_v2.begin(),
            prg(0.f, 1.f));
        timing = timing + timerCPU.GetCounter();
    }
    printf("Timing using transform and internal Thrust random generator = %f\n", timing / numIters);

    timing = 0.;
    for (int k = 0; k < numIters; k++) {
        timerCPU.StartCounter();
        thrust::for_each(h_v3.begin(), h_v3.end(), rand_01_for_each<double>());
        timing = timing + timerCPU.GetCounter();
    }
    timerCPU.StartCounter();
    printf("Timing using for_each = %f\n", timing / numIters);

    //std::cout << "Values generated: " << std::endl;
    //for (int k = 0; k < N; k++)
    //  std::cout << h_v3[k] << " : ";
    //std::cout << std::endl;

    timing = 0.;
    for (int k = 0; k < numIters; k++) {
        timerCPU.StartCounter();
        thrust::generate(h_v4.begin(), h_v4.end(), rand_01_fcn<double>);
        timing = timing + timerCPU.GetCounter();
    }
    timerCPU.StartCounter();
    printf("Timing using generate = %f\n", timing / numIters);

    //std::cout << "Values generated: " << std::endl;
    //for (int k = 0; k < N; k++)
    //  std::cout << h_v4[k] << " : ";
    //std::cout << std::endl;

    //std::cout << "Values generated: " << std::endl;
    //for (int k = 0; k < N * 2; k++)
    //  std::cout << h_v[k] << " : ";
    //std::cout << std::endl;

    return 0;
}
N = 2097152
Timing using transform = 33.202298
Timing using transform and internal Thrust random generator = 264.508662
Timing using for_each = 33.155237
Timing using generate = 35.309399