C++ 为什么从二进制存档反序列化到std::map时会有空间开销

C++ 为什么从二进制存档反序列化到std::map时会有空间开销,c++,memory,data-structures,boost-serialization,overhead,C++,Memory,Data Structures,Boost Serialization,Overhead,这是我的节目: void loadB(map<unsigned int,myParam> & myParams) { std::ifstream ifs("/tmp/all_params", std::ios::in | std::ios::binary); if( ifs.good() ){ try{ boost::archive::binary_iarchive ia(ifs); ia >

这是我的节目:

void loadB(map<unsigned int,myParam> & myParams)
{
    std::ifstream ifs("/tmp/all_params", std::ios::in | std::ios::binary);
    if( ifs.good() ){
        try{
            boost::archive::binary_iarchive ia(ifs);
            ia >> myParams;

            ifs.close();
        }catch(boost::archive::archive_exception& ex){
            syslog(LOG_NOTICE, "Archive Exception during deserializing params");
        }
    }else{  }
}

这当然有道理

例如,当
/tmp/all_params
是使用以下程序生成的文件时:

#include <boost/serialization/map.hpp>
#include <boost/archive/binary_oarchive.hpp>
#include <boost/archive/binary_iarchive.hpp>
#include <boost/random.hpp>
#include <boost/bind.hpp>

struct myParam { 
    std::string data; 
    template <typename Ar> void serialize(Ar& ar, unsigned) {
        ar & data;
    }
};

static inline std::string generate_value() {
    static auto rand_char = boost::bind(boost::uniform_int<unsigned char>(0,255), boost::mt19937{});

    std::string s;
    std::generate_n(back_inserter(s), rand_char(), rand_char);
    return s;
}

using Map = std::map<unsigned int,myParam>;

Map generate_data(unsigned n) {
    Map map;

    for (unsigned i=0; i<n; ++i)
        map.emplace(i, myParam { generate_value() });

    return map;
}

#include <fstream>
#include <iostream>

int main() {
    {
        std::ofstream ofs("/tmp/all_params", std::ios::binary);
        boost::archive::binary_oarchive oa(ofs);

        auto data = generate_data(10ul<<19);
        oa << data;
        std::cout << "Serialized " << data.size() << " entries\n";
    }
}
峰值使用率快照为1.2 GiB:

当然,您可以优化内存布局,例如使用(
ordered_unique_range_t
insertion重载!)和一个自定义分配器(例如,其中的字符串)。这将减少/消除开销:

经过调整的代码:

#include <boost/serialization/map.hpp>
#include <boost/serialization/collections_load_imp.hpp>
#include <boost/serialization/collections_save_imp.hpp>
#include <boost/container/flat_map.hpp>
#include <boost/archive/binary_oarchive.hpp>
#include <boost/archive/binary_iarchive.hpp>
#include <boost/random.hpp>
#include <boost/bind.hpp>
#include <boost/utility/string_ref.hpp>
#include <cassert>

namespace string_pool {
    static auto pool = []{
        std::vector<char> init;
        init.reserve(700ul<<20); // 700MiB
        return init;
    }();

    using entry = boost::string_ref;

    entry add(std::string const& s) {
        assert((pool.capacity() >= (pool.size() + s.size())));

        auto it = pool.end();
        pool.insert(it, s.begin(), s.end());
        return { &*it, s.size() };
    }

    static inline entry generate_random() {
        static auto rand_char = boost::bind(boost::uniform_int<unsigned char>(0,255), boost::mt19937{});

        static std::string s; // non-reentrant, but for lazy demo
        s.resize(rand_char());
        std::generate_n(s.begin(), s.size(), rand_char);
        return add(s);
    }
}

struct myParam { 
    string_pool::entry data;

    template <typename Ar> void save(Ar& ar, unsigned) const {
        std::string s = data.to_string();
        ar & s;
    }
    template <typename Ar> void load(Ar& ar, unsigned) {
        std::string s;
        ar & s;
        data = string_pool::add(s);
    }
    BOOST_SERIALIZATION_SPLIT_MEMBER()
};

// flat map serialization
namespace boost { 
namespace serialization {

    template<class Archive, typename...TArgs>
    inline void save(
        Archive & ar,
        const boost::container::flat_map<TArgs...> &t,
        const unsigned int /* file_version */
    ){
        boost::serialization::stl::save_collection<
            Archive, 
            boost::container::flat_map<TArgs...> 
        >(ar, t);
    }

    template<class Archive, typename...TArgs>
    inline void load(Archive & ar, boost::container::flat_map<TArgs...> &t, const unsigned int /* file_version */) {
        boost::serialization::stl::load_collection<Archive, boost::container::flat_map<TArgs...>,
            boost::serialization::stl::archive_input_map<Archive, boost::container::flat_map<TArgs...> >, 
            boost::serialization::stl::reserve_imp   <boost::container::flat_map<TArgs...> >
        >(ar, t);
    }

    // split non-intrusive serialization function member into separate
    // non intrusive save/load member functions
    template<class Archive, typename...TArgs>
    inline void serialize(Archive & ar, boost::container::flat_map<TArgs...> &t, const unsigned int file_version) {
        boost::serialization::split_free(ar, t, file_version);
    }
}
}

using Map = boost::container::flat_map<unsigned int,myParam>;

Map generate_data(unsigned n) {
    Map map;
    map.reserve(n);
    std::cout << "Capacity: " << map.capacity() << "\n";

    for (unsigned i=0; i<n; ++i)
        map.emplace(i, myParam { string_pool::generate_random() });

    std::cout << "Capacity: " << map.capacity() << "\n";
    std::cout << "Total length: " << std::accumulate(
            map.begin(), map.end(), 0ul, [](size_t acc, Map::value_type const& v) {
                return acc + v.second.data.size();
            }) << "\n";
    return map;
}

#include <fstream>
#include <iostream>

int main() {
    {
        std::ofstream ofs("/tmp/all_params", std::ios::binary);
        boost::archive::binary_oarchive oa(ofs);

        auto data = generate_data(10ul<<19);
        oa << data;
        std::cout << "Serialized " << data.size() << " entries\n";
    }
}
#包括
#包括
#包括
#包括
#包括
#包括
#包括
#包括
#包括
#包括
命名空间字符串池{
静态自动池=[]{
std::向量初始化;
初始储备(700ul(ar,t);
}
模板
内联无效加载(归档&ar,boost::容器::平面映射&t,常量unsigned int/*文件版本*/){
boost::serialization::stl::load_集合(ar,t);
}
//将非侵入式序列化函数成员拆分为单独的
//非侵入式保存/加载成员函数
模板
内联void序列化(归档&ar,boost::容器::平面映射&t,常量unsigned int file\u版本){
boost::serialization::split_free(ar、t、file_版本);
}
}
}
使用Map=boost::container::flat\u Map;
映射生成_数据(无符号n){
地图;
保护区地图(n);

std::cout To SSCCE cave…是的。在没有Boost序列化的情况下,可能会发生内存泄漏。这使得使用内存变得很容易。啊。问题实际上不是内存泄漏。这只是一个转移视线的问题。问题是关于标准堆分配器的(潜在)低效率问题“到SSCCE洞穴…”提供了一个经过调整的版本,使用了
boost::string_ref
boost::container::flat_map
,它显示了在预先适当保留时内存占用减少了32%。实际上,您希望有一个更智能的字符串池,但这应该是一个很好的演示来回答您的问题。
==27420== Memcheck, a memory error detector
==27420== Copyright (C) 2002-2013, and GNU GPL'd, by Julian Seward et al.
==27420== Using Valgrind-3.10.0.SVN and LibVEX; rerun with -h for copyright info
==27420== Command: ./test
==27420== 
Serialized 5242880 entries
==27420== 
==27420== HEAP SUMMARY:
==27420==     in use at exit: 0 bytes in 0 blocks
==27420==   total heap usage: 47,021,247 allocs, 47,021,247 frees, 3,069,877,283 bytes allocated
==27420== 
==27420== All heap blocks were freed -- no leaks are possible
==27420== 
#include <boost/serialization/map.hpp>
#include <boost/serialization/collections_load_imp.hpp>
#include <boost/serialization/collections_save_imp.hpp>
#include <boost/container/flat_map.hpp>
#include <boost/archive/binary_oarchive.hpp>
#include <boost/archive/binary_iarchive.hpp>
#include <boost/random.hpp>
#include <boost/bind.hpp>
#include <boost/utility/string_ref.hpp>
#include <cassert>

namespace string_pool {
    static auto pool = []{
        std::vector<char> init;
        init.reserve(700ul<<20); // 700MiB
        return init;
    }();

    using entry = boost::string_ref;

    entry add(std::string const& s) {
        assert((pool.capacity() >= (pool.size() + s.size())));

        auto it = pool.end();
        pool.insert(it, s.begin(), s.end());
        return { &*it, s.size() };
    }

    static inline entry generate_random() {
        static auto rand_char = boost::bind(boost::uniform_int<unsigned char>(0,255), boost::mt19937{});

        static std::string s; // non-reentrant, but for lazy demo
        s.resize(rand_char());
        std::generate_n(s.begin(), s.size(), rand_char);
        return add(s);
    }
}

struct myParam { 
    string_pool::entry data;

    template <typename Ar> void save(Ar& ar, unsigned) const {
        std::string s = data.to_string();
        ar & s;
    }
    template <typename Ar> void load(Ar& ar, unsigned) {
        std::string s;
        ar & s;
        data = string_pool::add(s);
    }
    BOOST_SERIALIZATION_SPLIT_MEMBER()
};

// flat map serialization
namespace boost { 
namespace serialization {

    template<class Archive, typename...TArgs>
    inline void save(
        Archive & ar,
        const boost::container::flat_map<TArgs...> &t,
        const unsigned int /* file_version */
    ){
        boost::serialization::stl::save_collection<
            Archive, 
            boost::container::flat_map<TArgs...> 
        >(ar, t);
    }

    template<class Archive, typename...TArgs>
    inline void load(Archive & ar, boost::container::flat_map<TArgs...> &t, const unsigned int /* file_version */) {
        boost::serialization::stl::load_collection<Archive, boost::container::flat_map<TArgs...>,
            boost::serialization::stl::archive_input_map<Archive, boost::container::flat_map<TArgs...> >, 
            boost::serialization::stl::reserve_imp   <boost::container::flat_map<TArgs...> >
        >(ar, t);
    }

    // split non-intrusive serialization function member into separate
    // non intrusive save/load member functions
    template<class Archive, typename...TArgs>
    inline void serialize(Archive & ar, boost::container::flat_map<TArgs...> &t, const unsigned int file_version) {
        boost::serialization::split_free(ar, t, file_version);
    }
}
}

using Map = boost::container::flat_map<unsigned int,myParam>;

Map generate_data(unsigned n) {
    Map map;
    map.reserve(n);
    std::cout << "Capacity: " << map.capacity() << "\n";

    for (unsigned i=0; i<n; ++i)
        map.emplace(i, myParam { string_pool::generate_random() });

    std::cout << "Capacity: " << map.capacity() << "\n";
    std::cout << "Total length: " << std::accumulate(
            map.begin(), map.end(), 0ul, [](size_t acc, Map::value_type const& v) {
                return acc + v.second.data.size();
            }) << "\n";
    return map;
}

#include <fstream>
#include <iostream>

int main() {
    {
        std::ofstream ofs("/tmp/all_params", std::ios::binary);
        boost::archive::binary_oarchive oa(ofs);

        auto data = generate_data(10ul<<19);
        oa << data;
        std::cout << "Serialized " << data.size() << " entries\n";
    }
}