C++ 如何最好地写出std::vector<;标准::字符串>;HDF5数据集的容器?
给定字符串向量,将其写入HDF5数据集的最佳方式是什么?目前,我正在做如下工作:C++ 如何最好地写出std::vector<;标准::字符串>;HDF5数据集的容器?,c++,stl,hdf5,C++,Stl,Hdf5,给定字符串向量,将其写入HDF5数据集的最佳方式是什么?目前,我正在做如下工作: const unsigned int MaxStrLength = 512; struct TempContainer { char string[MaxStrLength]; }; void writeVector (hid_t group, std::vector<std::string> const & v) { // // Firstly
const unsigned int MaxStrLength = 512;
struct TempContainer {
char string[MaxStrLength];
};
void writeVector (hid_t group, std::vector<std::string> const & v)
{
//
// Firstly copy the contents of the vector into a temporary container
std::vector<TempContainer> tc;
for (std::vector<std::string>::const_iterator i = v.begin ()
, end = v.end ()
; i != end
; ++i)
{
TempContainer t;
strncpy (t.string, i->c_str (), MaxStrLength);
tc.push_back (t);
}
//
// Write the temporary container to a dataset
hsize_t dims[] = { tc.size () } ;
hid_t dataspace = H5Screate_simple(sizeof(dims)/sizeof(*dims)
, dims
, NULL);
hid_t strtype = H5Tcopy (H5T_C_S1);
H5Tset_size (strtype, MaxStrLength);
hid_t datatype = H5Tcreate (H5T_COMPOUND, sizeof (TempConainer));
H5Tinsert (datatype
, "string"
, HOFFSET(TempContainer, string)
, strtype);
hid_t dataset = H5Dcreate1 (group
, "files"
, datatype
, dataspace
, H5P_DEFAULT);
H5Dwrite (dataset, datatype, H5S_ALL, H5S_ALL, H5P_DEFAULT, &tc[0] );
H5Dclose (dataset);
H5Sclose (dataspace);
H5Tclose (strtype);
H5Tclose (datatype);
}
// Create the datatype as follows
hid_t datatype = H5Tcopy (H5T_C_S1);
H5Tset_size (datatype, H5T_VARIABLE);
//
// Pass the string to be written to H5Dwrite
// using the address of the pointer!
const char * s = v.c_str ();
H5Dwrite (dataset
, datatype
, H5S_ALL
, H5S_ALL
, H5P_DEFAULT
, &s );
const unsigned int MaxStrLength=512;
结构临时容器{
字符字符串[MaxStrLength];
};
void writeVector(隐藏组,标准::向量常量和向量)
{
//
//首先将向量的内容复制到临时容器中
std::向量tc;
对于(std::vector::const_迭代器i=v.begin()
,end=v.end()
结束
++i)
{
临时容器;
strncpy(t.string,i->c_str(),MaxStrLength);
tc.推回(t);
}
//
//将临时容器写入数据集
hsize_t dims[]={tc.size()};
hid_t数据空间=H5Screate_simple(sizeof(dims)/sizeof(*dims)
,暗淡
,空);
hid_t strtype=H5Tcopy(H5T_C_S1);
H5t_尺寸(标准类型,最大长度);
hid_t数据类型=H5Tcreate(H5T_化合物,sizeof(TempConainer));
H5Tinsert(数据类型
,“字符串”
,HOFFSET(临时容器,字符串)
,strtype);
hid_t数据集=H5Dcreate1(组
,“文件”
,数据类型
,数据空间
,H5P_违约);
H5Dwrite(数据集、数据类型、H5S_-ALL、H5S_-ALL、H5P_-DEFAULT和tc[0]);
H5Dclose(数据集);
H5Sclose(数据空间);
H5Tclose(strtype);
H5Tclose(数据类型);
}
至少,我真的希望更改上述内容,以便:
感谢您的帮助。如果您正在查看更干净的代码:我建议您创建一个函子,该函子将获取一个字符串并将其保存到HDF5容器中(以所需的模式)。理查德,我用错了算法,请重新检查
std::for_each(v.begin(), v.end(), write_hdf5);
struct hdf5 : public std::unary_function<std::string, void> {
hdf5() : _dataset(...) {} // initialize the HDF5 db
~hdf5() : _dataset(...) {} // close the the HDF5 db
void operator(std::string& s) {
// append
// use s.c_str() ?
}
};
std::for_each(v.begin(),v.end(),write_hdf5);
struct hdf5:public std::一元函数{
hdf5():_数据集(…){}//初始化hdf5数据库
~hdf5():_dataset(…){}//关闭hdf5数据库
void运算符(std::string&s){
//附加
//使用s.c_str()?
}
};
这有助于开始吗 我不知道HDF5,但您可以使用
struct TempContainer {
char* string;
};
然后以这种方式复制字符串:
TempContainer t;
t.string = strdup(i->c_str());
tc.push_back (t);
这将分配一个具有精确大小的字符串,并且在插入或读取容器时也会有很大的改进(在您的示例中,复制了一个数组,在本例中仅复制了一个指针)。您还可以使用std::vector:
std::vector<char *> tc;
...
tc.push_back(strdup(i->c_str());
std::向量tc;
...
向后推(strdup(i->c_str());
您可以使用简单的std::vector(您也可以将其模板化以匹配T->basic_字符串),而不是TempContainer。
大概是这样的:
#include <algorithm>
#include <vector>
#include <string>
#include <functional>
class StringToVector
: std::unary_function<std::vector<char>, std::string> {
public:
std::vector<char> operator()(const std::string &s) const {
// assumes you want a NUL-terminated string
const char* str = s.c_str();
std::size_t size = 1 + std::strlen(str);
// s.size() != strlen(s.c_str())
std::vector<char> buf(&str[0], &str[size]);
return buf;
}
};
void conv(const std::vector<std::string> &vi,
std::vector<std::vector<char> > &vo)
{
// assert vo.size() == vi.size()
std::transform(vi.begin(), vi.end(),
vo.begin(),
StringToVector());
}
#包括
#包括
#包括
#包括
类StringToVector
:std::一元函数{
公众:
std::vector操作符()(常量std::string&s)常量{
//假设您需要以NUL结尾的字符串
const char*str=s.c_str();
std::size\u t size=1+std::strlen(str);
//s.size()!=strlen(s.c_str())
std::vector buf(&str[0],&str[size]);
返回buf;
}
};
void conv(const std::vector&vi、,
std::vector和vo)
{
//断言vo.size()==vi.size()
转换(vi.begin(),vi.end(),
vo.begin(),
StringToVector());
}
[非常感谢他在回答这一问题时提供的帮助。]
要在HDF5中写入可变长度字符串,请使用以下命令:
const unsigned int MaxStrLength = 512;
struct TempContainer {
char string[MaxStrLength];
};
void writeVector (hid_t group, std::vector<std::string> const & v)
{
//
// Firstly copy the contents of the vector into a temporary container
std::vector<TempContainer> tc;
for (std::vector<std::string>::const_iterator i = v.begin ()
, end = v.end ()
; i != end
; ++i)
{
TempContainer t;
strncpy (t.string, i->c_str (), MaxStrLength);
tc.push_back (t);
}
//
// Write the temporary container to a dataset
hsize_t dims[] = { tc.size () } ;
hid_t dataspace = H5Screate_simple(sizeof(dims)/sizeof(*dims)
, dims
, NULL);
hid_t strtype = H5Tcopy (H5T_C_S1);
H5Tset_size (strtype, MaxStrLength);
hid_t datatype = H5Tcreate (H5T_COMPOUND, sizeof (TempConainer));
H5Tinsert (datatype
, "string"
, HOFFSET(TempContainer, string)
, strtype);
hid_t dataset = H5Dcreate1 (group
, "files"
, datatype
, dataspace
, H5P_DEFAULT);
H5Dwrite (dataset, datatype, H5S_ALL, H5S_ALL, H5P_DEFAULT, &tc[0] );
H5Dclose (dataset);
H5Sclose (dataspace);
H5Tclose (strtype);
H5Tclose (datatype);
}
// Create the datatype as follows
hid_t datatype = H5Tcopy (H5T_C_S1);
H5Tset_size (datatype, H5T_VARIABLE);
//
// Pass the string to be written to H5Dwrite
// using the address of the pointer!
const char * s = v.c_str ();
H5Dwrite (dataset
, datatype
, H5S_ALL
, H5S_ALL
, H5P_DEFAULT
, &s );
编写容器的一种解决方案是单独编写每个元素
例如:
class WriteString
{
public:
WriteString (hid_t dataset, hid_t datatype
, hid_t dataspace, hid_t memspace)
: m_dataset (dataset), m_datatype (datatype)
, m_dataspace (dataspace), m_memspace (memspace)
, m_pos () {}
private:
hid_t m_dataset;
hid_t m_datatype;
hid_t m_dataspace;
hid_t m_memspace;
int m_pos;
//
公共:
void运算符()(std::vector::value\u type const&v)
{
//选择文件位置,1条记录位于位置“pos”
hsize_t count[]={1};
hsize_t offset[]={m_pos++};
H5S选择超实验室(m_数据空间
,H5S_选择_集
,偏移量
无效的
计数
,空);
const char*s=v.c_str();
H5Dwrite(m_)数据集
,m_数据类型
,m_memspace
,m_数据空间
,H5P_默认值
(s),;
}
};
//
void writeVector (hid_t group, std::vector<std::string> const & v)
{
hsize_t dims[] = { m_files.size () } ;
hid_t dataspace = H5Screate_simple(sizeof(dims)/sizeof(*dims)
, dims, NULL);
dims[0] = 1;
hid_t memspace = H5Screate_simple(sizeof(dims)/sizeof(*dims)
, dims, NULL);
hid_t datatype = H5Tcopy (H5T_C_S1);
H5Tset_size (datatype, H5T_VARIABLE);
hid_t dataset = H5Dcreate1 (group, "files", datatype
, dataspace, H5P_DEFAULT);
//
// Select the "memory" to be written out - just 1 record.
hsize_t offset[] = { 0 } ;
hsize_t count[] = { 1 } ;
H5Sselect_hyperslab( memspace, H5S_SELECT_SET, offset
, NULL, count, NULL );
std::for_each (v.begin ()
, v.end ()
, WriteStrings (dataset, datatype, dataspace, memspace));
H5Dclose (dataset);
H5Sclose (dataspace);
H5Sclose (memspace);
H5Tclose (datatype);
}
void writeVector(隐藏组,标准::向量常量&v)
{
hsize_t dims[]={m_files.size()};
hid_t数据空间=H5Screate_simple(sizeof(dims)/sizeof(*dims)
,暗显,空);
dims[0]=1;
hid_t memspace=H5Screate_simple(尺寸(dims)/sizeof(*dims)
,暗显,空);
hid_t数据类型=H5Tcopy(H5T_C_S1);
H5Tset_大小(数据类型,H5T_变量);
hid_t dataset=H5Dcreate1(组,“文件”,数据类型
,数据空间,H5P_默认值);
//
//选择要写入的“内存”-仅1条记录。
hsize_t offset[]={0};
hsize_t count[]={1};
H5S选择超实验室(内存空间、H5S选择集、偏移
,NULL,count,NULL);
std::对于每个(v.begin()
,v.end()
,WriteStrings(数据集、数据类型、数据空间、memspace));
H5Dclose(数据集);
H5Sclose(数据空间);
H5Sclose(memspace);
H5Tclose(数据类型);
}
下面是使用HDF5C++ API编写可变长度字符串向量的一些工作代码。
我在其他帖子中加入了一些建议:
使用H5T_C_S1和H5T_变量
使用string::c_str()
获取指向字符串的指针
将指针放入char*
的向量中,并传递给HDF5API
这是不必要的
H5::Attribute Foo = file.openAttribute("Foo");
std::vector<std::string> foos
Foo >> foos;
const H5::Attribute& operator>>(const H5::Attribute& attr0, std::vector<std::string>& array)
{
H5::Exception::dontPrint();
try
{
hid_t attr = attr0.getId();
hid_t atype = H5Aget_type(attr);
hid_t aspace = H5Aget_space(attr);
int rank = H5Sget_simple_extent_ndims(aspace);
if (rank != 1) throw PBException("Attribute " + attr0.getName() + " is not a string array");
hsize_t sdim[1];
herr_t ret = H5Sget_simple_extent_dims(aspace, sdim, NULL);
size_t size = H5Tget_size (atype);
if (size != sizeof(void*))
{
throw PBException("Internal inconsistency. Expected pointer size element");
}
// HDF5 only understands vector of char* :-(
std::vector<char*> arr_c_str(sdim[0]);
H5::StrType stringType(H5::PredType::C_S1, H5T_VARIABLE);
attr0.read(stringType, arr_c_str.data());
array.resize(sdim[0]);
for(int i=0;i<sdim[0];i++)
{
// std::cout << i << "=" << arr_c_str[i] << std::endl;
array[i] = arr_c_str[i];
free(arr_c_str[i]);
}
}
catch (H5::Exception& err)
{
throw std::runtime_error(string("HDF5 Error in " )
+ err.getFuncName()
+ ": "
+ err.getDetailMsg());
}
return attr0;
}
#include <vector>
#include <string>
#include "H5Cpp.h"
int main(int argc, char* argv[]) {
// Part 0: make up some data
std::vector<std::string> strings;
for (int iii = 0; iii < 10; iii++) {
strings.push_back("this is " + std::to_string(iii));
}
// Part 1: grab pointers to the chars
std::vector<const char*> chars;
for (const auto& str: strings) {
chars.push_back(str.data());
}
// Part 2: create the variable length type
hvl_t hdf_buffer;
hdf_buffer.p = chars.data();
hdf_buffer.len = chars.size();
// Part 3: create the type
auto s_type = H5::StrType(H5::PredType::C_S1, H5T_VARIABLE);
s_type.setCset(H5T_CSET_UTF8); // just for fun, you don't need this
auto svec_type = H5::VarLenType(&s_type);
// Part 4: write the output to a scalar dataset
H5::H5File out_file("vtest.h5", H5F_ACC_EXCL);
H5::DataSet dataset(
out_file.createDataSet("the_ds", svec_type, H5S_SCALAR));
dataset.write(&hdf_buffer, svec_type);
return 0;
}
void write_varnames( const std::string& dsetname, const std::vector<std::string>& strings, H5::H5File& f)
{
H5::Exception::dontPrint();
try
{
// HDF5 only understands vector of char* :-(
std::vector<const char*> arr_c_str;
for (size_t ii = 0; ii < strings.size(); ++ii)
{
arr_c_str.push_back(strings[ii].c_str());
}
//
// one dimension
//
hsize_t str_dimsf[1] {arr_c_str.size()};
H5::DataSpace dataspace(1, str_dimsf);
// Variable length string
H5::StrType datatype(H5::PredType::C_S1, H5T_VARIABLE);
H5::DataSet str_dataset = f.createDataSet(dsetname, datatype, dataspace);
str_dataset.write(arr_c_str.data(), datatype);
}
catch (H5::Exception& err)
{
throw std::runtime_error(std::string("HDF5 Error in ")
+ err.getFuncName()
+ ": "
+ err.getDetailMsg());
}
}
std::vector<std::string> read_string_dset( const std::string& dsname, H5::H5File& f )
{
H5::DataSet cdataset = f.openDataSet( dsname );
H5::DataSpace space = cdataset.getSpace();
int rank = space.getSimpleExtentNdims();
hsize_t dims_out[1];
int ndims = space.getSimpleExtentDims( dims_out, NULL);
size_t length = dims_out[0];
std::vector<const char*> tmpvect( length, NULL );
fprintf(stdout, "In read STRING dataset, got number of strings: [%ld]\n", length );
std::vector<std::string> strs(length);
H5::StrType datatype(H5::PredType::C_S1, H5T_VARIABLE);
cdataset.read( tmpvect.data(), datatype);
for(size_t x=0; x<tmpvect.size(); ++x)
{
fprintf(stdout, "GOT STRING [%s]\n", tmpvect[x] );
strs[x] = tmpvect[x];
}
return strs;
}