C++ C++;:无需替换的离散分布抽样
我想从一个不需要替换(即不需要重复)的离散分布中取样 使用函数离散_分布,可以进行替换采样。通过这个函数,我以一种非常粗略的方式实现了无需替换的采样:C++ C++;:无需替换的离散分布抽样,c++,distribution,sampling,C++,Distribution,Sampling,我想从一个不需要替换(即不需要重复)的离散分布中取样 使用函数离散_分布,可以进行替换采样。通过这个函数,我以一种非常粗略的方式实现了无需替换的采样: #include <iostream> #include <random> #include <vector> #include <array> int main() { const int sampleSize = 8; // Size of the sample std::
#include <iostream>
#include <random>
#include <vector>
#include <array>
int main()
{
const int sampleSize = 8; // Size of the sample
std::vector<double> weights = {2,2,1,1,2,2,1,1,2,2}; // 10 possible outcome with different weights
std::random_device rd;
std::mt19937 generator(rd());
/// WITH REPLACEMENT
std::discrete_distribution<int> distribution(weights.begin(), weights.end());
std::array<int, 10> p ={};
for(int i=0; i<sampleSize; ++i){
int number = distribution(generator);
++p[number];
}
std::cout << "Discrete_distribution with replacement:" << std::endl;
for (int i=0; i<10; ++i)
std::cout << i << ": " << std::string(p[i],'*') << std::endl;
/// WITHOUT REPLACEMENT
p = {};
for(int i=0; i<sampleSize; ++i){
std::discrete_distribution<int> distribution(weights.begin(), weights.end());
int number = distribution(generator);
weights[number] = 0; // the weight associate to the sampled value is set to 0
++p[number];
}
std::cout << "Discrete_distribution without replacement:" << std::endl;
for (int i=0; i<10; ++i)
std::cout << i << ": " << std::string(p[i],'*') << std::endl;
return 0;
}
#包括
#包括
#包括
#包括
int main()
{
const int sampleSize=8;//样本大小
std::vector weights={2,2,1,1,2,2,1,1,2,2};//10不同权重的可能结果
std::随机_装置rd;
std::mt19937发生器(rd());
///替换
离散分布(weights.begin(),weights.end());
std::数组p={};
对于(int i=0;i此解决方案可能会短一些。不幸的是,它需要在每个步骤中创建一个离散分布
对象,这在绘制大量样本时可能是禁止的
#include <iostream>
#include <boost/random/discrete_distribution.hpp>
#include <boost/random/mersenne_twister.hpp>
using namespace boost::random;
int main(int, char**) {
std::vector<double> w = { 2, 2, 1, 1, 2, 2, 1, 1, 2, 2 };
discrete_distribution<> dist(w);
int n = 10;
boost::random::mt19937 gen;
std::vector<int> samples;
for (auto i = 0; i < n; i++) {
samples.push_back(dist(gen));
w[*samples.rbegin()] = 0;
dist = discrete_distribution<>(w);
}
for (auto iter : samples) {
std::cout << iter << " ";
}
return 0;
}
#包括
#包括
#包括
使用名称空间boost::random;
int main(int,char**){
向量w={2,2,1,1,2,2,1,1,2,2};
离散分布区(w);
int n=10;
boost::random::mt19937 gen;
std::载体样本;
用于(自动i=0;i std::coutAleph0的现有答案是我测试过的答案中最好的。我试着对原始解决方案、Aleph0添加的解决方案和新解决方案进行基准测试,当现有的离散分布
超过已添加项的50%时,才进行新的离散分布
(当分布生成样本中已有项时重新绘制)
我用样本量==总体规模,权重等于指数进行测试。我认为问题的原始解决方案在O(n^2)
中运行,我的新解决方案在O(n logn)
中运行,论文中的解决方案似乎在O(n)
中运行
代码:
#包括
#包括
#包括
#包括
#包括
#包括
#包括
#包括
const int sampleSize=20000;
使用名称空间boost::random;
静态void BM_重用分布(基准::状态和状态){
向量权重;
权重。调整大小(采样);
用于(自动:状态){
for(int i=0;i距离大小/2){
distSize-=裸露的;
numAdded=0;
分布=
离散分布(weights.begin(),weights.end());
}
int编号=配电(发电机);
如果(!权重[数量]){
i-=1;
继续;
}否则{
权重[数字]=0;
o[i]=数字;
裸体+=1;
}
}
}
}
基准(BM_再利用分布);
静态void BM_NewDistribution(基准::状态和状态){
向量权重;
权重。调整大小(采样);
用于(自动:状态){
for(int i=0;iy.first;});
std::载体样本;
转换(valIndices.begin(),valIndices.end(),
标准:背面插入器(样品),
[](自动v){返回v.second;});
}
}
基准(BM_SomePaper);
基准_MAIN();
下面的代码可能会有帮助。在底部有一个有趣的算法来从这样的分布中进行采样。但是我没有发现任何库都有你想要的函数,至少没有C++库。谢谢你的建议。我想知道是否有办法避免在每一步中创建离散分布。但是它。可能没那么容易…太好了。我会看一看。非常感谢!谢谢你的编辑。我尝试了代码,但它没有对原始向量的子集进行采样。如果我将向量的最后一个元素w写为“2”,我就不会得到2个元素的样本。我必须说,对于像我这样的新手来说,代码非常复杂。你能帮我理解一下吗这是什么?Cheesok,我刚刚发现我必须取向量样本的第一个元素。是吗?你是对的。如果你把最后一个权重改成一个高值,比如说100。那么你很有可能,样本9是列表中的第一个。如果我去掉STL函数,代码可能会更可读?
#include <iostream>
#include <iterator>
#include <boost/random/uniform_01.hpp>
#include <boost/random/mersenne_twister.hpp>
using namespace boost::random;
int main(int, char**) {
std::vector<double> w = { 2, 2, 1, 1, 2, 2, 1, 1, 2, 10 };
uniform_01<> dist;
boost::random::mt19937 gen;
std::vector<double> vals;
std::generate_n(std::back_inserter(vals), w.size(), [&dist,&gen]() { return dist(gen); });
std::transform(vals.begin(), vals.end(), w.begin(), vals.begin(), [&](auto r, auto w) { return std::pow(r, 1. / w); });
std::vector<std::pair<double, int>> valIndices;
size_t index = 0;
std::transform(vals.begin(), vals.end(), std::back_inserter(valIndices), [&index](auto v) { return std::pair<double,size_t>(v,index++); });
std::sort(valIndices.begin(), valIndices.end(), [](auto x, auto y) { return x.first > y.first; });
std::vector<int> samples;
std::transform(valIndices.begin(), valIndices.end(), std::back_inserter(samples), [](auto v) { return v.second; });
for (auto iter : samples) {
std::cout << iter << " ";
}
return 0;
}
#include <iostream>
#include <iterator>
#include <boost/random/uniform_01.hpp>
#include <boost/random/mersenne_twister.hpp>
#include <algorithm>
using namespace boost::random;
int main(int, char**) {
std::vector<double> w = { 2, 2, 1, 1, 2, 2, 1, 1, 2, 1000 };
uniform_01<> dist;
boost::random::mt19937 gen(342575235);
std::vector<double> vals;
for (auto iter : w) {
vals.push_back(std::pow(dist(gen), 1. / iter));
}
// Sorting vals, but retain the indices.
// There is unfortunately no easy way to do this with STL.
std::vector<std::pair<int, double>> valsWithIndices;
for (size_t iter = 0; iter < vals.size(); iter++) {
valsWithIndices.emplace_back(iter, vals[iter]);
}
std::sort(valsWithIndices.begin(), valsWithIndices.end(), [](auto x, auto y) {return x.second > y.second; });
std::vector<size_t> samples;
int sampleSize = 8;
for (auto iter = 0; iter < sampleSize; iter++) {
samples.push_back(valsWithIndices[iter].first);
}
for (auto iter : samples) {
std::cout << iter << " ";
}
return 0;
}
-------------------------------------------------------------
Benchmark Time CPU Iterations
-------------------------------------------------------------
BM_Reuse 25252721 ns 25251731 ns 26
BM_NewDistribution 17338706125 ns 17313620000 ns 1
BM_SomePaper 6789525 ns 6779400 ns 100
#include <array>
#include <benchmark/benchmark.h>
#include <boost/random/mersenne_twister.hpp>
#include <boost/random/uniform_01.hpp>
#include <iostream>
#include <iterator>
#include <random>
#include <vector>
const int sampleSize = 20000;
using namespace boost::random;
static void BM_ReuseDistribution(benchmark::State &state) {
std::vector<double> weights;
weights.resize(sampleSize);
for (auto _ : state) {
for (int i = 0; i < sampleSize; i++) {
weights[i] = i + 1;
}
std::random_device rd;
std::mt19937 generator(rd());
int o[sampleSize];
std::discrete_distribution<int> distribution(weights.begin(),
weights.end());
int numAdded = 0;
int distSize = sampleSize;
for (int i = 0; i < sampleSize; ++i) {
if (numAdded > distSize / 2) {
distSize -= numAdded;
numAdded = 0;
distribution =
std::discrete_distribution<int>(weights.begin(), weights.end());
}
int number = distribution(generator);
if (!weights[number]) {
i -= 1;
continue;
} else {
weights[number] = 0;
o[i] = number;
numAdded += 1;
}
}
}
}
BENCHMARK(BM_ReuseDistribution);
static void BM_NewDistribution(benchmark::State &state) {
std::vector<double> weights;
weights.resize(sampleSize);
for (auto _ : state) {
for (int i = 0; i < sampleSize; i++) {
weights[i] = i + 1;
}
std::random_device rd;
std::mt19937 generator(rd());
int o[sampleSize];
for (int i = 0; i < sampleSize; ++i) {
std::discrete_distribution<int> distribution(weights.begin(),
weights.end());
int number = distribution(generator);
weights[number] = 0;
o[i] = number;
}
}
}
BENCHMARK(BM_NewDistribution);
static void BM_SomePaper(benchmark::State &state) {
std::vector<double> w;
w.resize(sampleSize);
for (auto _ : state) {
for (int i = 0; i < sampleSize; i++) {
w[i] = i + 1;
}
uniform_01<> dist;
boost::random::mt19937 gen;
std::vector<double> vals;
std::generate_n(std::back_inserter(vals), w.size(),
[&dist, &gen]() { return dist(gen); });
std::transform(vals.begin(), vals.end(), w.begin(), vals.begin(),
[&](auto r, auto w) { return std::pow(r, 1. / w); });
std::vector<std::pair<double, int>> valIndices;
size_t index = 0;
std::transform(
vals.begin(), vals.end(), std::back_inserter(valIndices),
[&index](auto v) { return std::pair<double, size_t>(v, index++); });
std::sort(valIndices.begin(), valIndices.end(),
[](auto x, auto y) { return x.first > y.first; });
std::vector<int> samples;
std::transform(valIndices.begin(), valIndices.end(),
std::back_inserter(samples),
[](auto v) { return v.second; });
}
}
BENCHMARK(BM_SomePaper);
BENCHMARK_MAIN();