Warning: file_get_contents(/data/phpspider/zhask/data//catemap/4/algorithm/10.json): failed to open stream: No such file or directory in /data/phpspider/zhask/libs/function.php on line 167

Warning: Invalid argument supplied for foreach() in /data/phpspider/zhask/libs/tag.function.php on line 1116

Notice: Undefined index: in /data/phpspider/zhask/libs/function.php on line 180

Warning: array_chunk() expects parameter 1 to be array, null given in /data/phpspider/zhask/libs/function.php on line 181
C++ “优化”;二元“U折”;算法并使其左(或右)关联_C++_Algorithm_Optimization_C++14_Template Meta Programming - Fatal编程技术网

C++ “优化”;二元“U折”;算法并使其左(或右)关联

C++ “优化”;二元“U折”;算法并使其左(或右)关联,c++,algorithm,optimization,c++14,template-meta-programming,C++,Algorithm,Optimization,C++14,Template Meta Programming,以下是我为C++14提出的一些建议解决方案,并考虑了这些解决方案: #include <algorithm> #include <exception> #include <iterator> #include <cstddef> template<class It, class Func> auto binary_fold(It begin, It end, Func op) -> decltype(op(*begin, *e

以下是我为C++14提出的一些建议解决方案,并考虑了这些解决方案:

#include <algorithm>
#include <exception>
#include <iterator>
#include <cstddef>

template<class It, class Func>
auto binary_fold(It begin, It end, Func op) ->  decltype(op(*begin, *end)) {
  std::ptrdiff_t diff = end - begin;
  switch (diff) {
    case 0: throw std::out_of_range("binary fold on empty container");
    case 1: return *begin;
    case 2: return op(*begin, *(begin + 1));
    default: { // first round to the nearest multiple of 2 and then advance
      It mid{begin};
      int div = diff/2;
      int offset = (div%2 == 1) ? (div+1) : div; // round to the closest multiple of two (upwards)
      std::advance(mid, offset);
      return op( binary_fold(begin,mid,op), binary_fold(mid,end,op) );
    }
  }
}
在某些情况下(如上述情况),算法将是左关联的,但在其他情况下(如以下情况),算法将是右关联的:

  std::vector<int> v = {7,4,9,2,6,8};
  auto result = mar::binary_fold(v.cbegin(), v.cend(), std::minus<int>());
我想知道如何进一步优化该算法,以便:

a。它肯定是左或右关联的

b。它尽可能快(这将被放在openGL绘图循环中,所以它必须非常快)

c。制作一个TMP版本,当容器的大小已知时,它将在编译时计算偏移量(这对于我的应用程序来说不是必需的,但我只是好奇如何做到这一点)

我对b的最初想法。迭代版本可能会更快,并且偏移量计算可以进一步优化(可能使用一些位魔术?)。不过我还是被卡住了

我编写了一个“总是左关联”的迭代版本,其中包含一些您也可以使用的计时运行。在启用编译器优化之前,它的性能稍差

10000次迭代的总运行时间,有5000个值

g++ --std=c++11 main.cpp && ./a.out
Recursive elapsed:9642msec
Iterative elapsed:10189msec

$ g++ --std=c++11 -O1 main.cpp && ./a.out
Recursive elapsed:3468msec
Iterative elapsed:3098msec
Iterative elapsed:3359msec # another run
Recursive elapsed:3668msec

$ g++ --std=c++11 -O2 main.cpp && ./a.out
Recursive elapsed:3193msec
Iterative elapsed:2763msec
Recursive elapsed:3184msec # another run
Iterative elapsed:2696msec

$ g++ --std=c++11 -O3 main.cpp && ./a.out
Recursive elapsed:3183msec
Iterative elapsed:2685msec
Recursive elapsed:3106msec # another run
Iterative elapsed:2681msec
Recursive elapsed:3054msec # another run
Iterative elapsed:2653msec
编译器可以比递归更容易地优化循环

#include <algorithm>
#include <functional>
#include <iostream>
#include <numeric>
#include <random>
#include <vector>    

template<class It, class Func>
auto binary_fold_rec(It begin, It end, Func op) ->  decltype(op(*begin, *end)) {
  std::ptrdiff_t diff = end - begin;
  switch (diff) {
    case 0: throw std::out_of_range("binary fold on empty container");
    case 1: return *begin;
    case 2: return op(*begin, *(begin + 1));
    default: { // first round to the nearest multiple of 2 and then advance
      It mid{begin};
      int div = diff/2;
      int offset = (div%2 == 1) ? (div+1) : div; // round to the closest multiple of two (upwards)
      std::advance(mid, offset);
      return op( binary_fold_rec(begin,mid,op), binary_fold_rec(mid,end,op) );
    }
  }
}


// left-associative
template<class It, class Func>
auto binary_fold_it(It begin, It end, Func op) -> decltype(op(*begin, *end)) {
    // Allocates enough scratch to begin with that we don't need to mess with again.
    std::ptrdiff_t diff = end - begin;
    std::vector<decltype(op(*begin, *end))> scratch (static_cast<int>(diff));
    auto scratch_current = scratch.begin();

    if(diff == 0) {
        throw std::out_of_range("binary fold on empty container.");
    }

    while(diff > 1) {
        auto fake_end = (diff & 1) ? end - 1 : end; 
        while(begin != fake_end) {
            (*scratch_current++) = op(*begin, *(begin+1));
            begin += 2; // silly c++ can't guarantee ++ order, so increment here.
        }
        if(fake_end != end) {
            *scratch_current++ = *begin;
        }
        end = scratch_current;
        begin = scratch_current = scratch.begin();
        diff = end - begin;
    };
    return scratch[0];
}


void run(std::initializer_list<int> elems, int expected) {
    std::vector<int> v(elems);
    auto result = binary_fold_it(v.begin(), v.end(), std::minus<int>());
    std::cout << result << std::endl;
    assert(binary_fold_it(v.begin(), v.end(), std::minus<int>()) == expected);
}

constexpr int rolls = 10000;
constexpr int min_val = -1000;
constexpr int max_val = 1000;
constexpr int num_vals = 5000;

std::vector<int> random_vector() {
    // Thanks http://stackoverflow.com/questions/21516575/fill-a-vector-with-random-numbers-c
    // for saving me time.
    std::uniform_int_distribution<int> distribution(min_val, max_val);
    std::default_random_engine generator;
    std::vector<int> data(num_vals);
    std::generate(data.begin(), data.end(), [&]() { return distribution(generator); });
    return data;
}

template<typename It, typename Func>
void evaluate(void(*func)(It, It, Func), const char* message) {
    auto start = std::chrono::high_resolution_clock::now();
    for(int i=0; i<rolls; i++) {
        auto data = random_vector();
        func(data.begin(), data.end(), std::minus<int>());
    }
    auto end = std::chrono::high_resolution_clock::now();
    std::cout << message << std::chrono::duration_cast<std::chrono::milliseconds>(end-start).count() << "msec\n";
}


void evaluate(void(*func)(), const char* message) {
    auto start = std::chrono::high_resolution_clock::now();
    for(int i=0; i<rolls; i++) {
        func();
    }
    auto end = std::chrono::high_resolution_clock::now();
    std::cout << message << std::chrono::duration_cast<std::chrono::milliseconds>(end-start).count() << "msec\n";
}


void time_it() {
    auto data = random_vector();
    binary_fold_it(data.begin(), data.end(), std::minus<int>());
}


void time_rec() {
    auto data = random_vector();
    binary_fold_rec(data.begin(), data.end(), std::minus<int>());
}


int main() {
    evaluate(time_rec, "Recursive elapsed:");
    evaluate(time_it, "Iterative elapsed:");
    return 0;
}
#包括
#包括
#包括
#包括
#包括
#包括
模板
自动二进制(开始、结束、函数运算)->decltype(运算(*开始、*结束)){
std::ptrdiff_t diff=结束-开始;
开关(差分){
案例0:抛出std::超出范围(“空容器上的二进制折叠”);
案例1:返回*开始;
案例2:返回op(*begin,*(begin+1));
默认值:{//第一轮取最接近的2的倍数,然后前进
它在{begin}中间;
int div=diff/2;
int offset=(div%2==1)?(div+1):div;//四舍五入到两个(向上)中最接近的倍数
标准:前进(中间,偏移);
返回op(二进制折叠rec(开始,中间,op),二进制折叠rec(中间,结束,op));
}
}
}
//左联想
模板
自动二进制(开始、结束、函数运算)->decltype(运算(*开始、*结束)){
//分配足够的刮痕开始,我们不需要再弄乱。
std::ptrdiff_t diff=结束-开始;
标准::矢量划痕(静态_铸造(差异));
自动刮伤\当前=刮伤。开始();
如果(差异==0){
抛出std::超出范围(“空容器上的二进制折叠”);
}
而(差异>1){
自动假结束=(差异&1)?结束-1:结束;
while(开始!=假结束){
(*scratch_current++)=op(*begin,*(begin+1));
开始+ = 2;//愚蠢的C++不能保证++顺序,所以在这里增加。
}
如果(假结束!=结束){
*划痕_当前+=*开始;
}
结束=刮伤电流;
begin=scratch_current=scratch.begin();
diff=结束-开始;
};
返回划痕[0];
}
无效运行(标准::初始值设定项\列表元素,预期为int){
std::向量v(elems);
自动结果=binary_fold_it(v.begin()、v.end()、std::minus());

我有两个TMP版本。我想哪一个更好,取决于数据类型:

解决方案A:

首先,让我们为分割点找到一个很好的偏移量(二的幂似乎很大):

我使用与MtRoad相同的程序进行测量。在我的机器上,差异没有MtRoad报告的那么大。使用
-O3
解决方案A和B似乎比MtRoad的版本稍快,但实际上,您需要使用您的类型和数据进行测试


备注:我没有对我的版本进行太严格的测试。

好的。我有一个迭代版本,它是左关联的,我可以推广(我希望)对于TMP。你想让它对奇数个元素右联想,还是总是左联想或右联想?出于好奇:今天早上,赏金又运行了三天。这是个坏消息吗?我打算写另一个答案,我想我还有很多时间。我对赏金很陌生,不知道它到底是怎么工作的。我看到了一个很好的相关回答,就在那里给出了。好吧,我还是添加了我的TMP变体。就我个人而言,我认为你应该等到这段时间结束后再颁发奖金。但我不知道这是否有一个公认的礼仪。@Rumburak,谢谢,非常感谢。我用
boost::circular_buffer
尝试了这一点我在这一行得到一个
no-available重载'='
错误:
end=scratch\u current
。我认为
end
有一个类型
It
scratch\u current
类型
std::vector::iterator
。在您的示例中,这两个类型是一致的,因为您使用的是
std::vector
(由于
std::减号也会导致
int
)。解决这个问题的方法是什么?很好!您可以使用“scratch type”模板参数将向量替换为您期望的容器类型。或者使用给定的积分器更新函数以就地写入,并避免复制(没有尝试,但应该会成功)。除非有办法从迭代器推断容器的类型。或者可能有?
(7-4) - (9-2) - (6-8) = -2
g++ --std=c++11 main.cpp && ./a.out
Recursive elapsed:9642msec
Iterative elapsed:10189msec

$ g++ --std=c++11 -O1 main.cpp && ./a.out
Recursive elapsed:3468msec
Iterative elapsed:3098msec
Iterative elapsed:3359msec # another run
Recursive elapsed:3668msec

$ g++ --std=c++11 -O2 main.cpp && ./a.out
Recursive elapsed:3193msec
Iterative elapsed:2763msec
Recursive elapsed:3184msec # another run
Iterative elapsed:2696msec

$ g++ --std=c++11 -O3 main.cpp && ./a.out
Recursive elapsed:3183msec
Iterative elapsed:2685msec
Recursive elapsed:3106msec # another run
Iterative elapsed:2681msec
Recursive elapsed:3054msec # another run
Iterative elapsed:2653msec
#include <algorithm>
#include <functional>
#include <iostream>
#include <numeric>
#include <random>
#include <vector>    

template<class It, class Func>
auto binary_fold_rec(It begin, It end, Func op) ->  decltype(op(*begin, *end)) {
  std::ptrdiff_t diff = end - begin;
  switch (diff) {
    case 0: throw std::out_of_range("binary fold on empty container");
    case 1: return *begin;
    case 2: return op(*begin, *(begin + 1));
    default: { // first round to the nearest multiple of 2 and then advance
      It mid{begin};
      int div = diff/2;
      int offset = (div%2 == 1) ? (div+1) : div; // round to the closest multiple of two (upwards)
      std::advance(mid, offset);
      return op( binary_fold_rec(begin,mid,op), binary_fold_rec(mid,end,op) );
    }
  }
}


// left-associative
template<class It, class Func>
auto binary_fold_it(It begin, It end, Func op) -> decltype(op(*begin, *end)) {
    // Allocates enough scratch to begin with that we don't need to mess with again.
    std::ptrdiff_t diff = end - begin;
    std::vector<decltype(op(*begin, *end))> scratch (static_cast<int>(diff));
    auto scratch_current = scratch.begin();

    if(diff == 0) {
        throw std::out_of_range("binary fold on empty container.");
    }

    while(diff > 1) {
        auto fake_end = (diff & 1) ? end - 1 : end; 
        while(begin != fake_end) {
            (*scratch_current++) = op(*begin, *(begin+1));
            begin += 2; // silly c++ can't guarantee ++ order, so increment here.
        }
        if(fake_end != end) {
            *scratch_current++ = *begin;
        }
        end = scratch_current;
        begin = scratch_current = scratch.begin();
        diff = end - begin;
    };
    return scratch[0];
}


void run(std::initializer_list<int> elems, int expected) {
    std::vector<int> v(elems);
    auto result = binary_fold_it(v.begin(), v.end(), std::minus<int>());
    std::cout << result << std::endl;
    assert(binary_fold_it(v.begin(), v.end(), std::minus<int>()) == expected);
}

constexpr int rolls = 10000;
constexpr int min_val = -1000;
constexpr int max_val = 1000;
constexpr int num_vals = 5000;

std::vector<int> random_vector() {
    // Thanks http://stackoverflow.com/questions/21516575/fill-a-vector-with-random-numbers-c
    // for saving me time.
    std::uniform_int_distribution<int> distribution(min_val, max_val);
    std::default_random_engine generator;
    std::vector<int> data(num_vals);
    std::generate(data.begin(), data.end(), [&]() { return distribution(generator); });
    return data;
}

template<typename It, typename Func>
void evaluate(void(*func)(It, It, Func), const char* message) {
    auto start = std::chrono::high_resolution_clock::now();
    for(int i=0; i<rolls; i++) {
        auto data = random_vector();
        func(data.begin(), data.end(), std::minus<int>());
    }
    auto end = std::chrono::high_resolution_clock::now();
    std::cout << message << std::chrono::duration_cast<std::chrono::milliseconds>(end-start).count() << "msec\n";
}


void evaluate(void(*func)(), const char* message) {
    auto start = std::chrono::high_resolution_clock::now();
    for(int i=0; i<rolls; i++) {
        func();
    }
    auto end = std::chrono::high_resolution_clock::now();
    std::cout << message << std::chrono::duration_cast<std::chrono::milliseconds>(end-start).count() << "msec\n";
}


void time_it() {
    auto data = random_vector();
    binary_fold_it(data.begin(), data.end(), std::minus<int>());
}


void time_rec() {
    auto data = random_vector();
    binary_fold_rec(data.begin(), data.end(), std::minus<int>());
}


int main() {
    evaluate(time_rec, "Recursive elapsed:");
    evaluate(time_it, "Iterative elapsed:");
    return 0;
}
template<std::ptrdiff_t diff, std::ptrdiff_t V = 2>
struct offset
{
  static constexpr std::ptrdiff_t value =
      (V * 2 < diff - 1) ? offset<diff, V * 2>::value : V;
};

// End recursion
template<std::ptrdiff_t diff>
struct offset<diff, 1<<16>
{
  static constexpr std::ptrdiff_t value = 1<<16;
};

// Some special cases
template<> 
struct offset<0, 2>
{
  static constexpr std::ptrdiff_t value = 0;
};

template<>
struct offset<1, 2> 
{
  static constexpr std::ptrdiff_t value = 0;
};

template<>
struct offset<2, 2>
{
  static constexpr std::ptrdiff_t value = 0;
};
template <std::ptrdiff_t diff, class It, class Func>
auto binary_fold_tmp(It begin, It end, Func op)
    -> decltype(op(*begin, *end))
{
  assert(end - begin == diff);
  switch (diff)
  {
    case 0:
      assert(false);
      return 0;  // This will never happen
    case 1:
      return *begin;
    case 2:
      return op(*begin, *(begin + 1));
    default:
    {  // first round to the nearest multiple of 2 and then advance
      It mid{begin};
      std::advance(mid, offset<diff>::value);
      auto left = binary_fold_tmp<offset<diff>::value>(begin, mid, op);
      auto right =
          binary_fold_tmp<diff - offset<diff>::value>(mid, end, op);
      return op(left, right);
    }
  }
}
template <class It, class Func>
auto binary_fold(It begin, It end, Func op)
    -> decltype(op(*begin, *end))
{
  const auto diff = end - begin;
  assert(diff > 0);
  switch (diff)
  {
    case 1:
      return binary_fold_tmp<1>(begin, end, op);
    case 2:
      return binary_fold_tmp<2>(begin, end, op);
    case 3:
      return binary_fold_tmp<3>(begin, end, op);
    case 4:
      return binary_fold_tmp<4>(begin, end, op);
    case 5:
      return binary_fold_tmp<5>(begin, end, op);
    case 6:
      return binary_fold_tmp<6>(begin, end, op);
    case 7:
      return binary_fold_tmp<7>(begin, end, op);
    case 8:
      return binary_fold_tmp<8>(begin, end, op);
    default:
      if (diff < 16)
        return op(binary_fold_tmp<8>(begin, begin + 8, op),
                  binary_fold(begin + 8, end, op));
      else if (diff < 32)
        return op(binary_fold_tmp<16>(begin, begin + 16, op),
                  binary_fold(begin + 16, end, op));
      else
        return op(binary_fold_tmp<32>(begin, begin + 32, op),
                  binary_fold(begin + 32, end, op));
  }
}
template <std::ptrdiff_t diff, class It, class Func, size_t... Is>
auto binary_fold_pairs_impl(It begin,
                            It end,
                            Func op,
                            const std::index_sequence<Is...>&)
    -> decltype(op(*begin, *end))
{
  std::decay_t<decltype(*begin)> pairs[diff / 2] = {
      op(*(begin + 2 * Is), *(begin + 2 * Is + 1))...};

  if (diff == 2)
    return pairs[0];
  else
    return binary_fold_pairs_impl<diff / 2>(
        &pairs[0],
        &pairs[0] + diff / 2,
        op,
        std::make_index_sequence<diff / 4>{});
}

template <std::ptrdiff_t diff, class It, class Func>
auto binary_fold_pairs(It begin, It end, Func op) -> decltype(op(*begin, *end))
{
  return binary_fold_pairs_impl<diff>(
      begin, end, op, std::make_index_sequence<diff / 2>{});
}
template <class It, class Func>
auto binary_fold_mix(It begin, It end, Func op) -> decltype(op(*begin, *end))
{
  const auto diff = end - begin;
  assert(diff > 0);
  switch (diff)
  {
    case 1:
      return *begin;
    case 2:
      return binary_fold_pairs<2>(begin, end, op);
    case 3:
      return op(binary_fold_pairs<2>(begin, begin + 1, op),
                *(begin + (diff - 1)));
    case 4:
      return binary_fold_pairs<4>(begin, end, op);
    case 5:
      return op(binary_fold_pairs<4>(begin, begin + 4, op),
                *(begin + (diff - 1)));
    case 6:
      return op(binary_fold_pairs<4>(begin, begin + 4, op),
                binary_fold_pairs<4>(begin + 4, begin + 6, op));
    case 7:
      return op(binary_fold_pairs<4>(begin, begin + 4, op),
                binary_fold_mix(begin + 4, begin + 7, op));
    case 8:
      return binary_fold_pairs<8>(begin, end, op);
    default:
      if (diff <= 16)
        return op(binary_fold_pairs<8>(begin, begin + 8, op),
                  binary_fold_mix(begin + 8, end, op));
      else if (diff <= 32)
        return op(binary_fold_pairs<16>(begin, begin + 16, op),
                  binary_fold_mix(begin + 16, end, op));
      else
        return op(binary_fold_pairs<32>(begin, begin + 32, op),
                  binary_fold_mix(begin + 32, end, op));
  }
}