Warning: file_get_contents(/data/phpspider/zhask/data//catemap/1/list/4.json): failed to open stream: No such file or directory in /data/phpspider/zhask/libs/function.php on line 167

Warning: Invalid argument supplied for foreach() in /data/phpspider/zhask/libs/tag.function.php on line 1116

Notice: Undefined index: in /data/phpspider/zhask/libs/function.php on line 180

Warning: array_chunk() expects parameter 1 to be array, null given in /data/phpspider/zhask/libs/function.php on line 181
C++ 如何最好地写出std::vector<;标准::字符串>;HDF5数据集的容器?_C++_Stl_Hdf5 - Fatal编程技术网

C++ 如何最好地写出std::vector<;标准::字符串>;HDF5数据集的容器?

C++ 如何最好地写出std::vector<;标准::字符串>;HDF5数据集的容器?,c++,stl,hdf5,C++,Stl,Hdf5,给定字符串向量,将其写入HDF5数据集的最佳方式是什么?目前,我正在做如下工作: const unsigned int MaxStrLength = 512; struct TempContainer { char string[MaxStrLength]; }; void writeVector (hid_t group, std::vector<std::string> const & v) { // // Firstly

给定字符串向量,将其写入HDF5数据集的最佳方式是什么?目前,我正在做如下工作:

  const unsigned int MaxStrLength = 512;

  struct TempContainer {
    char string[MaxStrLength];
  };

  void writeVector (hid_t group, std::vector<std::string> const & v)
  {
    //
    // Firstly copy the contents of the vector into a temporary container
    std::vector<TempContainer> tc;
    for (std::vector<std::string>::const_iterator i = v.begin ()
                                              , end = v.end ()
      ; i != end
      ; ++i)
    {
      TempContainer t;
      strncpy (t.string, i->c_str (), MaxStrLength);
      tc.push_back (t);
    }


    //
    // Write the temporary container to a dataset
    hsize_t     dims[] = { tc.size () } ;
    hid_t dataspace = H5Screate_simple(sizeof(dims)/sizeof(*dims)
                               , dims
                               , NULL);

    hid_t strtype = H5Tcopy (H5T_C_S1);
    H5Tset_size (strtype, MaxStrLength);

    hid_t datatype = H5Tcreate (H5T_COMPOUND, sizeof (TempConainer));
    H5Tinsert (datatype
      , "string"
      , HOFFSET(TempContainer, string)
      , strtype);

    hid_t dataset = H5Dcreate1 (group
                          , "files"
                          , datatype
                          , dataspace
                          , H5P_DEFAULT);

    H5Dwrite (dataset, datatype, H5S_ALL, H5S_ALL, H5P_DEFAULT, &tc[0] );

    H5Dclose (dataset);
    H5Sclose (dataspace);
    H5Tclose (strtype);
    H5Tclose (datatype);
}
// Create the datatype as follows
hid_t datatype = H5Tcopy (H5T_C_S1);
H5Tset_size (datatype, H5T_VARIABLE);

// 
// Pass the string to be written to H5Dwrite
// using the address of the pointer!
const char * s = v.c_str ();
H5Dwrite (dataset
  , datatype
  , H5S_ALL
  , H5S_ALL
  , H5P_DEFAULT
  , &s );
const unsigned int MaxStrLength=512;
结构临时容器{
字符字符串[MaxStrLength];
};
void writeVector(隐藏组,标准::向量常量和向量)
{
//
//首先将向量的内容复制到临时容器中
std::向量tc;
对于(std::vector::const_迭代器i=v.begin()
,end=v.end()
结束
++i)
{
临时容器;
strncpy(t.string,i->c_str(),MaxStrLength);
tc.推回(t);
}
//
//将临时容器写入数据集
hsize_t dims[]={tc.size()};
hid_t数据空间=H5Screate_simple(sizeof(dims)/sizeof(*dims)
,暗淡
,空);
hid_t strtype=H5Tcopy(H5T_C_S1);
H5t_尺寸(标准类型,最大长度);
hid_t数据类型=H5Tcreate(H5T_化合物,sizeof(TempConainer));
H5Tinsert(数据类型
,“字符串”
,HOFFSET(临时容器,字符串)
,strtype);
hid_t数据集=H5Dcreate1(组
,“文件”
,数据类型
,数据空间
,H5P_违约);
H5Dwrite(数据集、数据类型、H5S_-ALL、H5S_-ALL、H5P_-DEFAULT和tc[0]);
H5Dclose(数据集);
H5Sclose(数据空间);
H5Tclose(strtype);
H5Tclose(数据类型);
}
至少,我真的希望更改上述内容,以便:

  • 它使用可变长度字符串
  • 我不需要临时的容器
  • 我对如何存储数据没有任何限制,例如,如果有更好的方法,它不必是复合数据类型

    <> > >编辑:只是为了缩小问题的范围,我比较熟悉C++方面的数据,这是我需要最多帮助的HDF5方面。
    感谢您的帮助。

    如果您正在查看更干净的代码:我建议您创建一个函子,该函子将获取一个字符串并将其保存到HDF5容器中(以所需的模式)。理查德,我用错了算法,请重新检查

    std::for_each(v.begin(), v.end(), write_hdf5);
    
    struct hdf5 : public std::unary_function<std::string, void> {
        hdf5() : _dataset(...) {} // initialize the HDF5 db
        ~hdf5() : _dataset(...) {} // close the the HDF5 db
        void operator(std::string& s) {
                // append 
                // use s.c_str() ?
        }
    };
    
    std::for_each(v.begin(),v.end(),write_hdf5);
    struct hdf5:public std::一元函数{
    hdf5():_数据集(…){}//初始化hdf5数据库
    ~hdf5():_dataset(…){}//关闭hdf5数据库
    void运算符(std::string&s){
    //附加
    //使用s.c_str()?
    }
    };
    

    这有助于开始吗

    我不知道HDF5,但您可以使用

    struct TempContainer {
        char* string;
    };
    
    然后以这种方式复制字符串:

    TempContainer t;
    t.string = strdup(i->c_str());
    tc.push_back (t);
    
    这将分配一个具有精确大小的字符串,并且在插入或读取容器时也会有很大的改进(在您的示例中,复制了一个数组,在本例中仅复制了一个指针)。您还可以使用std::vector:

    std::vector<char *> tc;
    ...
    tc.push_back(strdup(i->c_str());
    
    std::向量tc;
    ...
    向后推(strdup(i->c_str());
    
    您可以使用简单的std::vector(您也可以将其模板化以匹配T->basic_字符串),而不是TempContainer。 大概是这样的:

    #include <algorithm>
    #include <vector>
    #include <string>
    #include <functional>
    
    class StringToVector
      : std::unary_function<std::vector<char>, std::string> {
    public:
      std::vector<char> operator()(const std::string &s) const {
        // assumes you want a NUL-terminated string
        const char* str = s.c_str();
        std::size_t size = 1 + std::strlen(str);
        // s.size() != strlen(s.c_str())
        std::vector<char> buf(&str[0], &str[size]);
        return buf;
      }
    };
    
    void conv(const std::vector<std::string> &vi,
              std::vector<std::vector<char> > &vo)
    {
      // assert vo.size() == vi.size()
      std::transform(vi.begin(), vi.end(),
                     vo.begin(),
                     StringToVector());
    }
    
    #包括
    #包括
    #包括
    #包括
    类StringToVector
    :std::一元函数{
    公众:
    std::vector操作符()(常量std::string&s)常量{
    //假设您需要以NUL结尾的字符串
    const char*str=s.c_str();
    std::size\u t size=1+std::strlen(str);
    //s.size()!=strlen(s.c_str())
    std::vector buf(&str[0],&str[size]);
    返回buf;
    }
    };
    void conv(const std::vector&vi、,
    std::vector和vo)
    {
    //断言vo.size()==vi.size()
    转换(vi.begin(),vi.end(),
    vo.begin(),
    StringToVector());
    }
    
    [非常感谢他在回答这一问题时提供的帮助。]

    要在HDF5中写入可变长度字符串,请使用以下命令:

      const unsigned int MaxStrLength = 512;
    
      struct TempContainer {
        char string[MaxStrLength];
      };
    
      void writeVector (hid_t group, std::vector<std::string> const & v)
      {
        //
        // Firstly copy the contents of the vector into a temporary container
        std::vector<TempContainer> tc;
        for (std::vector<std::string>::const_iterator i = v.begin ()
                                                  , end = v.end ()
          ; i != end
          ; ++i)
        {
          TempContainer t;
          strncpy (t.string, i->c_str (), MaxStrLength);
          tc.push_back (t);
        }
    
    
        //
        // Write the temporary container to a dataset
        hsize_t     dims[] = { tc.size () } ;
        hid_t dataspace = H5Screate_simple(sizeof(dims)/sizeof(*dims)
                                   , dims
                                   , NULL);
    
        hid_t strtype = H5Tcopy (H5T_C_S1);
        H5Tset_size (strtype, MaxStrLength);
    
        hid_t datatype = H5Tcreate (H5T_COMPOUND, sizeof (TempConainer));
        H5Tinsert (datatype
          , "string"
          , HOFFSET(TempContainer, string)
          , strtype);
    
        hid_t dataset = H5Dcreate1 (group
                              , "files"
                              , datatype
                              , dataspace
                              , H5P_DEFAULT);
    
        H5Dwrite (dataset, datatype, H5S_ALL, H5S_ALL, H5P_DEFAULT, &tc[0] );
    
        H5Dclose (dataset);
        H5Sclose (dataspace);
        H5Tclose (strtype);
        H5Tclose (datatype);
    }
    
    // Create the datatype as follows
    hid_t datatype = H5Tcopy (H5T_C_S1);
    H5Tset_size (datatype, H5T_VARIABLE);
    
    // 
    // Pass the string to be written to H5Dwrite
    // using the address of the pointer!
    const char * s = v.c_str ();
    H5Dwrite (dataset
      , datatype
      , H5S_ALL
      , H5S_ALL
      , H5P_DEFAULT
      , &s );
    
    编写容器的一种解决方案是单独编写每个元素

    例如:

    class WriteString
    {
    public:
      WriteString (hid_t dataset, hid_t datatype
          , hid_t dataspace, hid_t memspace)
        : m_dataset (dataset), m_datatype (datatype)
        , m_dataspace (dataspace), m_memspace (memspace)
        , m_pos () {}
    
    private:
      hid_t m_dataset;
      hid_t m_datatype;
      hid_t m_dataspace;
      hid_t m_memspace;
      int m_pos;
    
    //

    公共:
    void运算符()(std::vector::value\u type const&v)
    {
    //选择文件位置,1条记录位于位置“pos”
    hsize_t count[]={1};
    hsize_t offset[]={m_pos++};
    H5S选择超实验室(m_数据空间
    ,H5S_选择_集
    ,偏移量
    无效的
    计数
    ,空);
    const char*s=v.c_str();
    H5Dwrite(m_)数据集
    ,m_数据类型
    ,m_memspace
    ,m_数据空间
    ,H5P_默认值
    (s),;
    }    
    };
    
    //

    void writeVector (hid_t group, std::vector<std::string> const & v)
    {
      hsize_t     dims[] = { m_files.size ()  } ;
      hid_t dataspace = H5Screate_simple(sizeof(dims)/sizeof(*dims)
                                        , dims, NULL);
    
      dims[0] = 1;
      hid_t memspace = H5Screate_simple(sizeof(dims)/sizeof(*dims)
                                        , dims, NULL);
    
      hid_t datatype = H5Tcopy (H5T_C_S1);
      H5Tset_size (datatype, H5T_VARIABLE);
    
      hid_t dataset = H5Dcreate1 (group, "files", datatype
                                 , dataspace, H5P_DEFAULT);
    
      // 
      // Select the "memory" to be written out - just 1 record.
      hsize_t offset[] = { 0 } ;
      hsize_t count[] = { 1 } ;
      H5Sselect_hyperslab( memspace, H5S_SELECT_SET, offset
                         , NULL, count, NULL );
    
      std::for_each (v.begin ()
          , v.end ()
          , WriteStrings (dataset, datatype, dataspace, memspace));
    
      H5Dclose (dataset);
      H5Sclose (dataspace);
      H5Sclose (memspace);
      H5Tclose (datatype);
    }      
    
    void writeVector(隐藏组,标准::向量常量&v)
    {
    hsize_t dims[]={m_files.size()};
    hid_t数据空间=H5Screate_simple(sizeof(dims)/sizeof(*dims)
    ,暗显,空);
    dims[0]=1;
    hid_t memspace=H5Screate_simple(尺寸(dims)/sizeof(*dims)
    ,暗显,空);
    hid_t数据类型=H5Tcopy(H5T_C_S1);
    H5Tset_大小(数据类型,H5T_变量);
    hid_t dataset=H5Dcreate1(组,“文件”,数据类型
    ,数据空间,H5P_默认值);
    // 
    //选择要写入的“内存”-仅1条记录。
    hsize_t offset[]={0};
    hsize_t count[]={1};
    H5S选择超实验室(内存空间、H5S选择集、偏移
    ,NULL,count,NULL);
    std::对于每个(v.begin()
    ,v.end()
    ,WriteStrings(数据集、数据类型、数据空间、memspace));
    H5Dclose(数据集);
    H5Sclose(数据空间);
    H5Sclose(memspace);
    H5Tclose(数据类型);
    }      
    
    下面是使用HDF5C++ API编写可变长度字符串向量的一些工作代码。 我在其他帖子中加入了一些建议:

  • 使用H5T_C_S1和H5T_变量
  • 使用
    string::c_str()
    获取指向字符串的指针
  • 将指针放入
    char*
    向量中,并传递给HDF5API
  • 这是不必要的
    H5::Attribute Foo = file.openAttribute("Foo");
    std::vector<std::string> foos
    Foo >> foos;
    
      const H5::Attribute& operator>>(const H5::Attribute& attr0, std::vector<std::string>& array)
      {
          H5::Exception::dontPrint();
    
          try
          {
              hid_t attr = attr0.getId();
    
              hid_t atype = H5Aget_type(attr);
              hid_t aspace = H5Aget_space(attr);
              int rank = H5Sget_simple_extent_ndims(aspace);
              if (rank != 1) throw PBException("Attribute " + attr0.getName() + " is not a string array");
    
              hsize_t sdim[1];
              herr_t ret = H5Sget_simple_extent_dims(aspace, sdim, NULL);
              size_t size = H5Tget_size (atype);
              if (size != sizeof(void*))
              {
                  throw PBException("Internal inconsistency. Expected pointer size element");
              }
    
              // HDF5 only understands vector of char* :-(
              std::vector<char*> arr_c_str(sdim[0]);
    
              H5::StrType stringType(H5::PredType::C_S1, H5T_VARIABLE);
              attr0.read(stringType, arr_c_str.data());
              array.resize(sdim[0]);
              for(int i=0;i<sdim[0];i++)
              {
                  // std::cout << i << "=" << arr_c_str[i] << std::endl;
                  array[i] = arr_c_str[i];
                  free(arr_c_str[i]);
              }
    
          }
          catch (H5::Exception& err)
          {
              throw std::runtime_error(string("HDF5 Error in " )
                                        + err.getFuncName()
                                        + ": "
                                        + err.getDetailMsg());
    
    
          }
    
          return attr0;
      }
    
    #include <vector>
    #include <string>
    #include "H5Cpp.h"
    
    int main(int argc, char* argv[]) {
      // Part 0: make up some data
      std::vector<std::string> strings;
      for (int iii = 0; iii < 10; iii++) {
        strings.push_back("this is " + std::to_string(iii));
      }
    
      // Part 1: grab pointers to the chars
      std::vector<const char*> chars;
      for (const auto& str: strings) {
        chars.push_back(str.data());
      }
    
      // Part 2: create the variable length type
      hvl_t hdf_buffer;
      hdf_buffer.p = chars.data();
      hdf_buffer.len = chars.size();
    
      // Part 3: create the type
      auto s_type = H5::StrType(H5::PredType::C_S1, H5T_VARIABLE);
      s_type.setCset(H5T_CSET_UTF8); // just for fun, you don't need this
      auto svec_type = H5::VarLenType(&s_type);
    
      // Part 4: write the output to a scalar dataset
      H5::H5File out_file("vtest.h5", H5F_ACC_EXCL);
      H5::DataSet dataset(
        out_file.createDataSet("the_ds", svec_type, H5S_SCALAR));
      dataset.write(&hdf_buffer, svec_type);
    
      return 0;
    }
    
    void write_varnames( const std::string& dsetname, const std::vector<std::string>& strings, H5::H5File& f)
      {
        H5::Exception::dontPrint();
    
        try
          {
            // HDF5 only understands vector of char* :-(
            std::vector<const char*> arr_c_str;
            for (size_t ii = 0; ii < strings.size(); ++ii)
          {
            arr_c_str.push_back(strings[ii].c_str());
          }
    
            //
            //  one dimension
            // 
            hsize_t     str_dimsf[1] {arr_c_str.size()};
            H5::DataSpace   dataspace(1, str_dimsf);
    
            // Variable length string
            H5::StrType datatype(H5::PredType::C_S1, H5T_VARIABLE); 
            H5::DataSet str_dataset = f.createDataSet(dsetname, datatype, dataspace);
    
            str_dataset.write(arr_c_str.data(), datatype);
          }
        catch (H5::Exception& err)
          {
            throw std::runtime_error(std::string("HDF5 Error in ")  
                     + err.getFuncName()
                     + ": "
                     + err.getDetailMsg());
    
    
          }
      }
    
    std::vector<std::string> read_string_dset( const std::string& dsname, H5::H5File& f )
      {
        H5::DataSet cdataset = f.openDataSet( dsname );
    
    
        H5::DataSpace space = cdataset.getSpace();
    
        int rank = space.getSimpleExtentNdims();
    
        hsize_t dims_out[1];
    
        int ndims = space.getSimpleExtentDims( dims_out, NULL);
    
        size_t length = dims_out[0];
    
        std::vector<const char*> tmpvect( length, NULL );
    
        fprintf(stdout, "In read STRING dataset, got number of strings: [%ld]\n", length );
    
        std::vector<std::string> strs(length);
        H5::StrType datatype(H5::PredType::C_S1, H5T_VARIABLE); 
        cdataset.read( tmpvect.data(), datatype);
    
        for(size_t x=0; x<tmpvect.size(); ++x)
          {
            fprintf(stdout, "GOT STRING [%s]\n", tmpvect[x] );
            strs[x] = tmpvect[x];
          }
    
        return strs;
      }