C++ 解析XML文件C++;使用TinyXML
我有一个xml文件 该文件由块、行、字、字符组成:C++ 解析XML文件C++;使用TinyXML,c++,xml,tinyxml,C++,Xml,Tinyxml,我有一个xml文件 该文件由块、行、字、字符组成: <block id="48" left="2532" top="108" right="2896" bottom="137"> <line id="49" left="2532" top="108" right="2896" bottom="137"> <word id="50" left="2532" top="108" right="2616" bottom="137" value='
<block id="48" left="2532" top="108" right="2896" bottom="137">
<line id="49" left="2532" top="108" right="2896" bottom="137">
<word id="50" left="2532" top="108" right="2616" bottom="137" value='Date"d'' confidence="69" font="MP" type="-1">
<char id="51" left="2532" top="115" right="2550" bottom="137" value="D" confidence="92" />
<char id="52" left="2551" top="120" right="2565" bottom="137" value="a" confidence="51" />
<char id="53" left="2566" top="116" right="2574" bottom="137" value="t" confidence="33" />
<char id="54" left="2574" top="120" right="2589" bottom="136" value="e" confidence="100" />
<char id="55" left="2589" top="108" right="2592" bottom="112" value='"' confidence="39" />
<char id="56" left="2597" top="115" right="2611" bottom="136" value="d" confidence="76" />
<char id="57" left="2612" top="115" right="2616" bottom="123" value="'" confidence="100" />
</word>
- 每个块由1,…,n条线组成
- 每行由1,…,k个单词组成
- 每个单词由1,…,l个字符组成
Block(int top, int left, int bottom, int right, vector<Lines>)
Line(int top, int left, int bottom, int right, vector<words>)
Word(int top, int left, int bottom, int right, vector<characters>)
块(int-top、int-left、int-bottom、int-right、vector)
行(整数顶部、整数左侧、整数底部、整数右侧、向量)
单词(int-top、int-left、int-bottom、int-right、vector)
我在C++上使用TinyXML,但是我不能把它们链接在一起,我的代码一次可以取一个对象(块、行、字、字符)。
void Keywords::checkChild(TiXmlElement *child)
{
if(child)
{
if((string)child->Value() == "block")
{
cout << child->Value()<<endl;
double x1 = atoi(child->Attribute("left"));
double y1 = atoi(child->Attribute("top"));
double x2 = atoi(child->Attribute("right"));
double y2= atoi(child->Attribute("bottom"));
int bid = atoi(child->Attribute("id"));
double xcenter = (x1 + x2)/2.0;
double ycenter = (y1 + y2)/2.0;
double hauteur = y2-y1;
double largeur = x2-x1;
//LineList is a vector, and I can't find a way to fill the vector
// blockList.push_back(new Block(y1,x1,y2,x2,xcenter,ycenter,largeur,hauteur,xmlFile,lineList));
}
checkChild(child->FirstChildElement());
checkChild(child->NextSiblingElement());
}///end if child
}
void关键字::checkChild(tixmlement*child)
{
如果(儿童)
{
如果((字符串)子->值()=“块”)
{
cout Value()属性(“top”);
双x2=atoi(子->属性(“右”);
双y2=atoi(子->属性(“底部”);
int bid=atoi(子->属性(“id”);
双xcenter=(x1+x2)/2.0;
双中心=(y1+y2)/2.0;
双豪特=y2-y1;
双倍大=x2-x1;
//LineList是一个向量,我找不到填充向量的方法
//blockList.push_back(新块(y1、x1、y2、x2、xcenter、ycenter、largeur、hauteur、xmlFile、lineList));
}
checkChild(child->FirstChildElement());
checkChild(child->NextSiblingElement());
}///结束如果孩子
}
与其尝试通过迭代文档来构建树,不如将文档解析为树结构:
void parseFile(TiXmlElement* document, vector<Block*>& blocks)
{
for (TiXmlElement* sub = document->GetFirstChildElement("block"); sub; sub = sub->GetNextSiblingElement("block"))
blocks.push_back(parseBlock(sub));
}
Block* parseBlock(TiXmlElement* element)
{
double x1 = atof(element->Attribute("left"));
// ...
vector<Line*> lines;
for (TiXmlElement* sub = element->GetFirstChildElement("line"); sub; sub = sub->GetNextSiblingElement("line"))
lines.push_back(parseLine(sub));
return new Block(x1, ..., lines);
}
Line* parseLine(TiXmlElement* element)
{
double x1 = atof(element->Attribute("left"));
// ...
vector<Word*> words;
for (TiXmlElement* sub = element->GetFirstChildElement("word"); sub; sub = sub->GetNextSiblingElement("word"))
words.push_back(parseWord(sub));
return new Line(x1, ..., words);
}
Word* parseWord(TiXmlElement* element)
{
double x1 = atof(element->Attribute("left"));
// ...
vector<Char*> chars;
for (TiXmlElement* sub = element->GetFirstChildElement("char"); sub; sub = sub->GetNextSiblingElement("char"))
chars.push_back(parseChar(sub));
return new Word(x1, ..., chars);
}
Char* parseChar(TiXmlElement* element)
{
double x1 = atof(element->Attribute("left"));
// ...
return new Char(x1, ...);
}
void解析文件(tixmlement*文档、向量和块)
{
对于(TiXmlElement*sub=document->GetFirstChildElement(“块”);sub;sub=sub->GetNextSiblingElement(“块”))
块。推回(块(子));
}
Block*parseBlock(tixmlement*element)
{
双x1=atof(元素->属性(“左”);
// ...
矢量线;
对于(TiXmlElement*sub=element->GetFirstChildElement(“行”);sub;sub=sub->GetNextSiblingElement(“行”))
行。推回(parseLine(sub));
返回新块(x1,…,行);
}
行*parseLine(tixmlement*element)
{
双x1=atof(元素->属性(“左”);
// ...
向量词;
对于(TiXmlElement*sub=element->GetFirstChildElement(“word”);sub;sub=sub->GetNextSiblingElement(“word”))
推回(parseWord(sub));
返回新行(x1,…,字);
}
Word*parseWord(TiXmlElement*element)
{
双x1=atof(元素->属性(“左”);
// ...
矢量字符;
对于(TiXmlElement*sub=element->GetFirstChildElement(“char”);sub;sub=sub->GetNextSiblingElement(“char”))
chars.push_back(parseChar(sub));
返回新词(x1,…,字符);
}
Char*parseChar(tixmlement*element)
{
双x1=atof(元素->属性(“左”);
// ...
返回新字符(x1,…);
}
什么不能链接在一起?我的意思是,我可以单独提取块、线、字,我找不到一种方法来创建一个对象,例如:块(int、int、int、vector)和一个包含块内所有线的向量,我找不到一种方法来创建对象线(int、int、int、vector)对于包含所有单词等的向量,我找不到迭代XML文件的方法。我会发布这个问题的算法,希望现在更清楚。