C# 在C中解析具有Html灵活性的表、单元格#
我需要解析Html代码。更具体地说,解析所有表中每行的每个单元格。每行表示一个对象,每个单元格表示不同的属性。我想解析这些,以便能够编写一个包含所有数据的XML文件(没有无用的HTML代码)。我已经成功地解析了HTML文件中的每一列,但现在我不知道将其写入XML文件的选项是什么。我很困惑 HTML:C# 在C中解析具有Html灵活性的表、单元格#,c#,html-agility-pack,xml-parsing,html-parsing,C#,Html Agility Pack,Xml Parsing,Html Parsing,我需要解析Html代码。更具体地说,解析所有表中每行的每个单元格。每行表示一个对象,每个单元格表示不同的属性。我想解析这些,以便能够编写一个包含所有数据的XML文件(没有无用的HTML代码)。我已经成功地解析了HTML文件中的每一列,但现在我不知道将其写入XML文件的选项是什么。我很困惑 HTML: 1. 矿井 C 39 32 33 65 20 29 10 1. 3. 0 154 20.8 21:54 22.6 55.7 C#: 使用HtmlAgilityPack; 命名空间统计信息 { 类
1.
矿井
C
39
32
33
65
20
29
10
1.
3.
0
154
20.8
21:54
22.6
55.7
C#:
使用HtmlAgilityPack;
命名空间统计信息
{
类StatsParser
{
私有字符串htmlCode;
私有静态字符串fileName=“[”+DateTime.Now.ToSortDateString()+“NHL Stats].xml”;
公共StatsParser(字符串htmlCode)
{
this.htmlCode=htmlCode;
this.ParseHtml();
}
公共空解析HTML()
{
HtmlDocument doc=新的HtmlDocument();
doc.LoadHtml(htmlCode);
尝试
{
//获取文档中的所有表
HtmlNodeCollection tables=doc.DocumentNode.SelectNodes(“//表”);
//迭代第一个表中的所有行
HtmlNodeCollection行=表[0]。选择节点(“.//tr”);
对于(int i=0;i
XML:
克罗斯比
矿井
C
39
32
33
在查看了MSDN之后,我终于找到了解决问题的实施方案:
using System;
using HtmlAgilityPack;
using System.Xml;
namespace HockeyStats
{
class StatsParser
{
private string htmlCode;
private static string fileName = "[" + DateTime.Now.ToShortDateString() + " NHL Stats].xml";
public StatsParser(string htmlCode)
{
this.htmlCode = htmlCode;
this.ParseHtml();
}
public void ParseHtml()
{
HtmlDocument doc = new HtmlDocument();
doc.LoadHtml(htmlCode);
XmlWriter writer = null;
try
{
// Create an XmlWriterSettings object with the correct options.
XmlWriterSettings settings = new XmlWriterSettings();
settings.Indent = true;
settings.IndentChars = (" ");
settings.OmitXmlDeclaration = false;
// Create the XmlWriter object and write some content.
writer = XmlWriter.Create(@"..\..\"+fileName, settings);
writer.WriteStartElement("Stats");
writer.WriteAttributeString("Date", DateTime.Now.ToShortDateString());
// Iterate all rows within another row
HtmlNodeCollection rows = doc.DocumentNode.SelectNodes(".//tr/tr");
for (int i = 0; i < rows.Count; ++i)
{
// Iterate all columns in this row
HtmlNodeCollection cols = rows[i].SelectNodes(".//td[@class='statBox']");
for (int j = 0; j < 20; ++j)
{
switch (j)
{
case 0:
{
writer.WriteStartElement("Player");
writer.WriteAttributeString("Rank", cols[j].InnerText.Trim()); break;
}
case 1: writer.WriteElementString("Name", cols[j].InnerText.Trim()); break;
case 2: writer.WriteElementString("Team", cols[j].InnerText.Trim()); break;
case 3: writer.WriteElementString("Pos", cols[j].InnerText.Trim()); break;
case 4: writer.WriteElementString("GP", cols[j].InnerText.Trim()); break;
case 5: writer.WriteElementString("G", cols[j].InnerText.Trim()); break;
case 6: writer.WriteElementString("A", cols[j].InnerText.Trim()); break;
case 7: writer.WriteElementString("PlusMinus", cols[j].InnerText.Trim()); break;
case 8: writer.WriteElementString("PIM", cols[j].InnerText); break;
case 9: writer.WriteElementString("PP", cols[j].InnerText); break;
case 10: writer.WriteElementString("SH", cols[j].InnerText); break;
case 11: writer.WriteElementString("GW", cols[j].InnerText); break;
case 12: writer.WriteElementString("OT", cols[j].InnerText); break;
case 13: writer.WriteElementString("Shots", cols[j].InnerText); break;
case 14: writer.WriteElementString("ShotPctg", cols[j].InnerText); break;
case 15: writer.WriteElementString("TOIPerGame", cols[j].InnerText); break;
case 16: writer.WriteElementString("ShiftsPerGame", cols[j].InnerText); break;
case 17: writer.WriteElementString("FOWinPctg", cols[j].InnerText); break;
}
}
}
writer.WriteEndElement();
}
writer.WriteEndElement();
writer.Flush();
}
finally
{
if (writer != null)
writer.Close();
}
}
}
}
使用系统;
使用HtmlAgilityPack;
使用System.Xml;
命名空间hockeysts
{
类StatsParser
{
私有字符串htmlCode;
私有静态字符串fileName=“[”+DateTime.Now.ToSortDateString()+“NHL Stats].xml”;
公共StatsParser(字符串htmlCode)
{
this.htmlCode=htmlCode;
this.ParseHtml();
}
公共空解析HTML()
{
HtmlDocument doc=新的HtmlDocument();
doc.LoadHtml(htmlCode);
XmlWriter=null;
尝试
{
//使用正确的选项创建XmlWriterSettings对象。
XmlWriterSettings=新的XmlWriterSettings();
settings.Indent=true;
settings.IndentChars=(“”);
settings.OmitXmlDeclaration=false;
//创建XmlWriter对象并编写一些内容。
writer=XmlWriter.Create(@“.\..\”+文件名,设置);
WriteStarteElement(“统计数据”);
WriteAttributeString(“Date”,DateTime.Now.ToSortDateString());
//迭代另一行中的所有行
HtmlNodeCollection rows=doc.DocumentNode.SelectNodes(“.//tr/tr”);
对于(int i=0;iusing HtmlAgilityPack;
namespace Stats
{
class StatsParser
{
private string htmlCode;
private static string fileName = "[" + DateTime.Now.ToShortDateString() + " NHL Stats].xml";
public StatsParser(string htmlCode)
{
this.htmlCode = htmlCode;
this.ParseHtml();
}
public void ParseHtml()
{
HtmlDocument doc = new HtmlDocument();
doc.LoadHtml(htmlCode);
try
{
// Get all tables in the document
HtmlNodeCollection tables = doc.DocumentNode.SelectNodes("//table");
// Iterate all rows in the first table
HtmlNodeCollection rows = tables[0].SelectNodes(".//tr");
for (int i = 0; i < rows.Count; ++i)
{
// Iterate all columns in this row
HtmlNodeCollection cols = rows[i].SelectNodes(".//td[@class='statBox']");
for (int j = 0; j < cols.Count; ++j)
{
// Get the value of the column and print it
string value = cols[j].InnerText;
if (value!="")
System.Windows.MessageBox.Show(value);
}
}
}
catch (NullReferenceException)
{
System.Windows.Forms.MessageBox.Show("Exception!!");
}
}
<?xml version="1.0" encoding="utf-8" ?>
<Stats Date="2011-01-01">
<Player Rank="1">
<Name>Sidney Crosby</Name>
<Team>PIT</Team>
<Position>C</Position>
<GamesPlayed>39</GamesPlayed>
<Goals>32</Goals>
<Assists>33</Assists>
</Player>
</Stats>
using System;
using HtmlAgilityPack;
using System.Xml;
namespace HockeyStats
{
class StatsParser
{
private string htmlCode;
private static string fileName = "[" + DateTime.Now.ToShortDateString() + " NHL Stats].xml";
public StatsParser(string htmlCode)
{
this.htmlCode = htmlCode;
this.ParseHtml();
}
public void ParseHtml()
{
HtmlDocument doc = new HtmlDocument();
doc.LoadHtml(htmlCode);
XmlWriter writer = null;
try
{
// Create an XmlWriterSettings object with the correct options.
XmlWriterSettings settings = new XmlWriterSettings();
settings.Indent = true;
settings.IndentChars = (" ");
settings.OmitXmlDeclaration = false;
// Create the XmlWriter object and write some content.
writer = XmlWriter.Create(@"..\..\"+fileName, settings);
writer.WriteStartElement("Stats");
writer.WriteAttributeString("Date", DateTime.Now.ToShortDateString());
// Iterate all rows within another row
HtmlNodeCollection rows = doc.DocumentNode.SelectNodes(".//tr/tr");
for (int i = 0; i < rows.Count; ++i)
{
// Iterate all columns in this row
HtmlNodeCollection cols = rows[i].SelectNodes(".//td[@class='statBox']");
for (int j = 0; j < 20; ++j)
{
switch (j)
{
case 0:
{
writer.WriteStartElement("Player");
writer.WriteAttributeString("Rank", cols[j].InnerText.Trim()); break;
}
case 1: writer.WriteElementString("Name", cols[j].InnerText.Trim()); break;
case 2: writer.WriteElementString("Team", cols[j].InnerText.Trim()); break;
case 3: writer.WriteElementString("Pos", cols[j].InnerText.Trim()); break;
case 4: writer.WriteElementString("GP", cols[j].InnerText.Trim()); break;
case 5: writer.WriteElementString("G", cols[j].InnerText.Trim()); break;
case 6: writer.WriteElementString("A", cols[j].InnerText.Trim()); break;
case 7: writer.WriteElementString("PlusMinus", cols[j].InnerText.Trim()); break;
case 8: writer.WriteElementString("PIM", cols[j].InnerText); break;
case 9: writer.WriteElementString("PP", cols[j].InnerText); break;
case 10: writer.WriteElementString("SH", cols[j].InnerText); break;
case 11: writer.WriteElementString("GW", cols[j].InnerText); break;
case 12: writer.WriteElementString("OT", cols[j].InnerText); break;
case 13: writer.WriteElementString("Shots", cols[j].InnerText); break;
case 14: writer.WriteElementString("ShotPctg", cols[j].InnerText); break;
case 15: writer.WriteElementString("TOIPerGame", cols[j].InnerText); break;
case 16: writer.WriteElementString("ShiftsPerGame", cols[j].InnerText); break;
case 17: writer.WriteElementString("FOWinPctg", cols[j].InnerText); break;
}
}
}
writer.WriteEndElement();
}
writer.WriteEndElement();
writer.Flush();
}
finally
{
if (writer != null)
writer.Close();
}
}
}
}
<?xml version="1.0" encoding="utf-8" ?>
<Stats Date="2011-01-01">
<Player Rank="1">
<Name>Sidney Crosby</Name>
<Team>PIT</Team>
<Pos>C</Pos>
<GP>39</GP>
<G>32</G>
<A>33</A>
<PlusMinus>20</PlusMinus>
<PIM>29</PIM>
<PP>10</PP>
<SH>1</SH>
<GW>3</GW>
<Shots>0</Shots>
<ShotPctg>154</ShotPctg>
<TOIPerGame>20.8</TOIPerGame>
<ShiftsPerGame>21:54</ShiftsPerGame>
<FOWinPctg>22.6</FOWinPctg>
</Player>
</Stats>
public void ParseHtml()
{
var htmlDoc = new HtmlDocument();
htmlDoc.LoadHtml(htmlCode);
var cells = htmlDoc.DocumentNode
// use the right XPath rather than looping manually
.SelectNodes(@"//tr/tr/td[@class='statBox']")
.Select(node => node.InnerText.Trim())
.ToList();
var elementNames = new[] { "Name", "Team", "Pos", "GP", "G", "A", "PlusMinus", "PIM", "PP", "SH", "GW", "OT", "Shots", "ShotPctg", "TOIPerGame", "ShiftsPerGame", "FOWinPctg", "UnknownField" };
var xmlDoc =
new XElement("Stats", new XAttribute("Date", DateTime.Now.ToShortDateString()),
new XElement("Player", new XAttribute("Rank", cells.First()),
// generate the elements based on the parsed cells
cells.Skip(1)
.Zip(elementNames, (Value, Name) => new XElement(Name, Value))
.Where(element => !String.IsNullOrEmpty(element.Value))
)
);
// save to your file
xmlDoc.Save(filepath);
}
<?xml version="1.0" encoding="utf-8"?>
<Stats Date="1/3/2011">
<Player Rank="1">
<Name>Sidney Crosby</Name>
<Team>PIT</Team>
<Pos>C</Pos>
<GP>39</GP>
<G>32</G>
<A>33</A>
<PlusMinus>20</PlusMinus>
<PIM>29</PIM>
<PP>10</PP>
<SH>1</SH>
<GW>3</GW>
<Shots>0</Shots>
<ShotPctg>154</ShotPctg>
<TOIPerGame>20.8</TOIPerGame>
<ShiftsPerGame>21:54</ShiftsPerGame>
<FOWinPctg>22.6</FOWinPctg>
<UnknownField>55.7</UnknownField>
</Player>
</Stats>