Warning: file_get_contents(/data/phpspider/zhask/data//catemap/6/cplusplus/135.json): failed to open stream: No such file or directory in /data/phpspider/zhask/libs/function.php on line 167

Warning: Invalid argument supplied for foreach() in /data/phpspider/zhask/libs/tag.function.php on line 1116

Notice: Undefined index: in /data/phpspider/zhask/libs/function.php on line 180

Warning: array_chunk() expects parameter 1 to be array, null given in /data/phpspider/zhask/libs/function.php on line 181
C++ Boost多阵列中最快的元素访问方法_C++_Performance_Boost_Boost Multi Array - Fatal编程技术网

C++ Boost多阵列中最快的元素访问方法

C++ Boost多阵列中最快的元素访问方法,c++,performance,boost,boost-multi-array,C++,Performance,Boost,Boost Multi Array,什么更快?使用元素选择操作符访问多数组的元素,或者使用迭代器遍历多数组 在我的例子中,我每次都需要对多数组的所有元素进行完整的遍历。访问boost::multi_数组的每个元素的最快方法是通过data()和num_elements() 使用data() 一个简单的测试给出: g++ -O3 -fomit-frame-pointer -march=native (GCC v4.8.2) Writing (index): 9.70651 Writing (data): 2.22353 Read

什么更快?使用元素选择操作符访问多数组的元素,或者使用迭代器遍历多数组


在我的例子中,我每次都需要对多数组的所有元素进行完整的遍历。

访问
boost::multi_数组的每个元素的最快方法是通过
data()
num_elements()

使用
data()

一个简单的测试给出:

g++ -O3 -fomit-frame-pointer -march=native   (GCC v4.8.2)
Writing (index): 9.70651
Writing (data):  2.22353
Reading (index): 4.5973 (found 1)
Reading (data):  3.53811 (found 1)

clang++ -O3 -fomit-frame-pointer -march=native   (CLANG v3.3)
Writing (index): 5.49858
Writing (data):  2.13678
Reading (index): 5.07324 (found 1)
Reading (data):  2.55109 (found 1)
默认情况下,boost访问方法执行范围检查。如果提供的索引超出为数组定义的范围,则断言将中止程序。要禁用范围检查,可以在应用程序中包含
multi\u array.hpp
之前定义
BOOST\u disable\u断言
预处理器宏

这将大大减少性能差异:

g++ -O3 -fomit-frame-pointer -march=native   (GCC v4.8.2)
Writing (index): 3.15244
Writing (data):  2.23002
Reading (index): 1.89553 (found 1)
Reading (data):  1.54427 (found 1)

clang++ -O3 -fomit-frame-pointer -march=native   (CLANG v3.3)
Writing (index): 2.24831
Writing (data):  2.12853
Reading (index): 2.59164 (found 1)
Reading (data):  2.52141 (found 1)
性能差异增加(即
data()
更快):

  • 具有更高数量的维度
  • 使用较少的元素(对于大量元素,对元素的访问不会像将这些元素加载到CPU缓存中的缓存压力那样重要。预取器将坐在那里尝试加载这些元素,这将占用大量时间)
无论如何,这种优化不太可能在实际程序中产生可测量的差异。除非通过广泛的测试最终确定它是某种瓶颈的来源,否则您不应该担心这一点

资料来源:

#include <chrono>
#include <iostream>

// #define BOOST_DISABLE_ASSERTS
#include <boost/multi_array.hpp>

int main()
{
  using array3 = boost::multi_array<unsigned, 3>;
  using index = array3::index;

  using clock = std::chrono::high_resolution_clock;
  using duration = std::chrono::duration<double>;

  constexpr unsigned d1(300), d2(400), d3(200), sup(100);

  array3 A(boost::extents[d1][d2][d3]);

  // Writing via index
  const auto t_begin1(clock::now());
  unsigned values1(0);
  for (unsigned n(0); n < sup; ++n)
    for (index i(0); i != d1; ++i)
      for (index j(0); j != d2; ++j)
        for (index k(0); k != d3; ++k)
          A[i][j][k] = ++values1;
  const auto t_end1(clock::now());

  // Writing directly
  const auto t_begin2(clock::now());
  unsigned values2(0);
  for (unsigned n(0); n < sup; ++n)
  {
    const auto sup(A.data() + A.num_elements());

    for (auto i(A.data()); i != sup; ++i)
      *i = ++values2;
  }
  const auto t_end2(clock::now());

  // Reading via index
  const auto t_begin3(clock::now());
  bool found1(false);
  for (unsigned n(0); n < sup; ++n)
    for (index i(0); i != d1; ++i)
      for (index j(0); j != d2; ++j)
        for (index k(0); k != d3; ++k)
          if (A[i][j][k] == values1)
            found1 = true;
  const auto t_end3(clock::now());

  // Reading directly
  const auto t_begin4(clock::now());
  bool found2(false);
  for (unsigned n(0); n < sup; ++n)
  {
    const auto sup(A.data() + A.num_elements());

    for (auto i(A.data()); i != sup; ++i)
      if (*i == values2)
        found2 = true;
  }
  const auto t_end4(clock::now());

  std::cout << "Writing (index): "
            << std::chrono::duration_cast<duration>(t_end1 - t_begin1).count()
            << std::endl
            << "Writing (data):  "
            << std::chrono::duration_cast<duration>(t_end2 - t_begin2).count()
            << std::endl
            << "Reading (index): "
            << std::chrono::duration_cast<duration>(t_end3 - t_begin3).count()
            << " (found " << found1 << ")" << std::endl
            << "Reading (data):  "
            << std::chrono::duration_cast<duration>(t_end4 - t_begin4).count()
            << " (found " << found2 << ")" << std::endl;

  return 0;
}
#包括
#包括
//#定义BOOST_DISABLE_断言
#包括
int main()
{
使用array3=boost::multi_数组;
使用index=array3::index;
使用时钟=标准::时钟::高分辨率时钟;
使用duration=std::chrono::duration;
constexpr无符号d1(300)、d2(400)、d3(200)、sup(100);
阵列3a(boost::区段[d1][d2][d3]);
//通过索引写入
const auto t_begin1(clock::now());
无符号值1(0);
for(无符号n(0);nstd::cout访问
boost::multi_数组的每个元素的最快方法是通过
data()
num_elements()

使用
data()

一个简单的测试给出:

g++ -O3 -fomit-frame-pointer -march=native   (GCC v4.8.2)
Writing (index): 9.70651
Writing (data):  2.22353
Reading (index): 4.5973 (found 1)
Reading (data):  3.53811 (found 1)

clang++ -O3 -fomit-frame-pointer -march=native   (CLANG v3.3)
Writing (index): 5.49858
Writing (data):  2.13678
Reading (index): 5.07324 (found 1)
Reading (data):  2.55109 (found 1)
默认情况下,boost访问方法执行范围检查。如果提供的索引超出为数组定义的范围,断言将中止程序。要禁用范围检查,您可以在应用程序中包括
多数组.hpp
之前定义
boost\u disable\u ASSERTS
预处理器宏

这将大大减少性能差异:

g++ -O3 -fomit-frame-pointer -march=native   (GCC v4.8.2)
Writing (index): 3.15244
Writing (data):  2.23002
Reading (index): 1.89553 (found 1)
Reading (data):  1.54427 (found 1)

clang++ -O3 -fomit-frame-pointer -march=native   (CLANG v3.3)
Writing (index): 2.24831
Writing (data):  2.12853
Reading (index): 2.59164 (found 1)
Reading (data):  2.52141 (found 1)
性能差异增加(即
data()
更快):

  • 具有更高数量的维度
  • 使用较少的元素(对于大量元素,对元素的访问不会像将这些元素加载到CPU缓存中的缓存压力那样重要。预取器将坐在那里尝试加载这些元素,这将占用大量时间)
无论如何,这种优化不太可能在实际程序中产生可测量的差异。除非通过广泛的测试最终确定它是某种瓶颈的来源,否则您不应该担心这一点

资料来源:

#include <chrono>
#include <iostream>

// #define BOOST_DISABLE_ASSERTS
#include <boost/multi_array.hpp>

int main()
{
  using array3 = boost::multi_array<unsigned, 3>;
  using index = array3::index;

  using clock = std::chrono::high_resolution_clock;
  using duration = std::chrono::duration<double>;

  constexpr unsigned d1(300), d2(400), d3(200), sup(100);

  array3 A(boost::extents[d1][d2][d3]);

  // Writing via index
  const auto t_begin1(clock::now());
  unsigned values1(0);
  for (unsigned n(0); n < sup; ++n)
    for (index i(0); i != d1; ++i)
      for (index j(0); j != d2; ++j)
        for (index k(0); k != d3; ++k)
          A[i][j][k] = ++values1;
  const auto t_end1(clock::now());

  // Writing directly
  const auto t_begin2(clock::now());
  unsigned values2(0);
  for (unsigned n(0); n < sup; ++n)
  {
    const auto sup(A.data() + A.num_elements());

    for (auto i(A.data()); i != sup; ++i)
      *i = ++values2;
  }
  const auto t_end2(clock::now());

  // Reading via index
  const auto t_begin3(clock::now());
  bool found1(false);
  for (unsigned n(0); n < sup; ++n)
    for (index i(0); i != d1; ++i)
      for (index j(0); j != d2; ++j)
        for (index k(0); k != d3; ++k)
          if (A[i][j][k] == values1)
            found1 = true;
  const auto t_end3(clock::now());

  // Reading directly
  const auto t_begin4(clock::now());
  bool found2(false);
  for (unsigned n(0); n < sup; ++n)
  {
    const auto sup(A.data() + A.num_elements());

    for (auto i(A.data()); i != sup; ++i)
      if (*i == values2)
        found2 = true;
  }
  const auto t_end4(clock::now());

  std::cout << "Writing (index): "
            << std::chrono::duration_cast<duration>(t_end1 - t_begin1).count()
            << std::endl
            << "Writing (data):  "
            << std::chrono::duration_cast<duration>(t_end2 - t_begin2).count()
            << std::endl
            << "Reading (index): "
            << std::chrono::duration_cast<duration>(t_end3 - t_begin3).count()
            << " (found " << found1 << ")" << std::endl
            << "Reading (data):  "
            << std::chrono::duration_cast<duration>(t_end4 - t_begin4).count()
            << " (found " << found2 << ")" << std::endl;

  return 0;
}
#包括
#包括
//#定义BOOST_DISABLE_断言
#包括
int main()
{
使用array3=boost::multi_数组;
使用index=array3::index;
使用时钟=标准::时钟::高分辨率时钟;
使用duration=std::chrono::duration;
constexpr无符号d1(300)、d2(400)、d3(200)、sup(100);
阵列3a(boost::区段[d1][d2][d3]);
//通过索引写入
const auto t_begin1(clock::now());
无符号值1(0);
for(无符号n(0);n