C# 防止XmlDocument解释HTML代码
我构建了一个代理来动态修改XML格式的数据 问题是一些XML文档在属性中包含HTML十进制代码,并由XmlDocument.LoadXml方法进行解释 输入文档示例:C# 防止XmlDocument解释HTML代码,c#,xml-parsing,C#,Xml Parsing,我构建了一个代理来动态修改XML格式的数据 问题是一些XML文档在属性中包含HTML十进制代码,并由XmlDocument.LoadXml方法进行解释 输入文档示例: <config> <var name="CultureCode" value="fr-FR" /> <var name="CurrencyFormatting"> <var name="Symbo
<config>
<var name="CultureCode" value="fr-FR" />
<var name="CurrencyFormatting">
<var name="Symbol" value="€" />
<var name="Code" value="EUR"/>
<var name="ThousandSeparator" value=" " />
<var name="DecimalSeparator" value="," />
</var>
</config>
但是,问题是生成的XML不再包含€代码>但是€
符号被下游流程误解了
<config>
<var name="CultureCode" value="fr-FR" />
<var name="CurrencyFormatting">
<var name="Symbol" value="€" /> <!-- Value has changed -->
<var name="Code" value="EUR"/>
<var name="ThousandSeparator" value=" " />
<var name="DecimalSeparator" value="." />
</var>
</config>
如何防止这种行为?是否有其他包在默认情况下不会解释HTML代码
我可以从结果字符串中替换'€',但我永远无法确定是否已经解释了另一段HTML十进制代码,这并不理想。正如我所说,这是一种痛苦XmlReader
和XmlTextReader
(这是XmlDocument
和.NET的几乎所有其他“高级”xml类所使用的读取器)会自动取消扫描它们遇到的所有xml字符实体,并且无法禁用此功能。因此,除非重写大块的XmlReader
,否则不可能保留原始的xml字符实体。我们可以做的是在XmlWriter
端工作。这仍然是一种痛苦
public class EscapedXmlWriter : XmlWriter
{
// Note that normally XmlWriter uses hex escapes
public bool UseHexCharEntities { get; set; } = false;
protected XmlWriter Writer;
public EscapedXmlWriter(XmlWriter writer)
{
Writer = writer;
}
public override System.Xml.WriteState WriteState => Writer.WriteState;
public override void Flush() => Writer.Flush();
public override string LookupPrefix(string ns) => Writer.LookupPrefix(ns);
public override void WriteBase64(byte[] buffer, int index, int count) => Writer.WriteBase64(buffer, index, count);
public override void WriteCData(string text) => Writer.WriteCData(text);
public override void WriteCharEntity(char ch) => Writer.WriteCharEntity(ch);
public override void WriteChars(char[] buffer, int index, int count) => Writer.WriteChars(buffer, index, count);
// We could give to comments the same treatment of attributes and content
public override void WriteComment(string text) => Writer.WriteComment(text);
public override void WriteDocType(string name, string pubid, string sysid, string subset) => Writer.WriteDocType(name, pubid, sysid, subset);
public override void WriteEndAttribute() => Writer.WriteEndAttribute();
public override void WriteEndDocument() => Writer.WriteEndDocument();
public override void WriteEndElement() => Writer.WriteEndElement();
public override void WriteEntityRef(string name) => Writer.WriteEntityRef(name);
public override void WriteFullEndElement() => Writer.WriteFullEndElement();
public override void WriteProcessingInstruction(string name, string text) => Writer.WriteProcessingInstruction(name, text);
public override void WriteRaw(char[] buffer, int index, int count) => Writer.WriteRaw(buffer, index, count);
public override void WriteRaw(string data) => Writer.WriteRaw(data);
public override void WriteStartAttribute(string prefix, string localName, string ns) => Writer.WriteStartAttribute(prefix, localName, ns);
public override void WriteStartDocument() => Writer.WriteStartDocument();
public override void WriteStartDocument(bool standalone) => Writer.WriteStartDocument(standalone);
public override void WriteStartElement(string prefix, string localName, string ns) => Writer.WriteStartElement(prefix, localName, ns);
public override void WriteString(string text)
{
Trace.WriteLine($"{Writer.WriteState}: {text}");
var isAttribute = Writer.WriteState == System.Xml.WriteState.Attribute;
var sb = new StringBuilder();
for (int i = 0; i < text.Length; i++)
{
char ch = text[i];
char low;
if (ch == '"' && isAttribute)
{
sb.Append(""");
}
if (ch == '\'' && isAttribute)
{
// XmlWriter doesn't escape ' in attributes because it always uses the form "xyz" instead of the form 'xyz'
sb.Append("'");
}
else if (ch == '&')
{
sb.Append("&");
}
else if (ch == '<')
{
sb.Append("<");
}
else if (ch == '>')
{
sb.Append(">");
}
else if (i + 1 < text.Length && char.IsHighSurrogate(ch) && char.IsLowSurrogate(low = text[i + 1]))
{
int utf32 = char.ConvertToUtf32(ch, low);
if (UseHexCharEntities)
{
sb.Append("&#x");
sb.Append(utf32.ToString("X"));
}
else
{
sb.Append("&#");
sb.Append(utf32);
}
sb.Append(';');
i++;
}
else if (ch < 0x20 || ch > 0x7f)
{
if ((ch == '\r' || ch == '\n' || ch == '\t') && !isAttribute)
{
sb.Append(ch);
}
else
{
if (UseHexCharEntities)
{
sb.Append("&#x");
sb.Append(((int)ch).ToString("X"));
}
else
{
sb.Append("&#");
sb.Append((int)ch);
}
sb.Append(';');
}
}
else
{
sb.Append(ch);
}
}
string text2 = sb.ToString();
Writer.WriteRaw(text2);
}
public override void WriteSurrogateCharEntity(char lowChar, char highChar) => Writer.WriteSurrogateCharEntity(lowChar, highChar);
public override void WriteWhitespace(string ws) => Writer.WriteWhitespace(ws);
protected override void Dispose(bool disposing)
{
if (disposing)
{
Writer.Dispose();
}
base.Dispose(disposing);
}
}
公共类转义XmlWriter:XmlWriter
{
//注意,XmlWriter通常使用十六进制转义
public bool UseHexCharEntities{get;set;}=false;
受保护的XmlWriter;
公共转义XmlWriter(XmlWriter)
{
作家=作家;
}
public override System.Xml.WriteState WriteState=>Writer.WriteState;
public override void Flush()=>Writer.Flush();
公共重写字符串LookupPrefix(字符串ns)=>Writer.LookupPrefix(ns);
public override void WriteBase64(byte[]buffer,int index,int count)=>Writer.WriteBase64(buffer,index,count);
public override void WriteCData(字符串文本)=>Writer.WriteCData(文本);
public override void WriteCharEntity(char ch)=>Writer.WriteCharEntity(ch);
公共重写void WriteChars(char[]缓冲区,int索引,int计数)=>Writer.WriteChars(缓冲区,索引,计数);
//我们可以对评论的属性和内容进行同样的处理
public override void WriteComment(字符串文本)=>Writer.WriteComment(文本);
public override void WriteDocType(字符串名称、字符串pubid、字符串sysid、字符串子集)=>Writer.WriteDocType(名称、pubid、sysid、子集);
公共覆盖无效WriteEndAttribute()=>Writer.WriteEndAttribute();
public override void WriteEndDocument()=>Writer.WriteEndDocument();
public override void WriteEndElement()=>Writer.WriteEndElement();
public override void WriteEntityRef(字符串名称)=>Writer.WriteEntityRef(名称);
public override void writeFileLendElement()=>Writer.writeFileLendElement();
public override void WriteProcessingInstruction(字符串名称,字符串文本)=>Writer.WriteProcessingInstruction(名称,文本);
public override void WriteRaw(char[]buffer,int index,int count)=>Writer.WriteRaw(buffer,index,count);
公共重写void WriteRaw(字符串数据)=>Writer.WriteRaw(数据);
public override void WriteStartAttribute(字符串前缀,字符串localName,字符串ns)=>Writer.WriteStartAttribute(前缀,localName,ns);
公共覆盖无效WriteStartDocument()=>Writer.WriteStartDocument();
public override void WriteStartDocument(bool standalone)=>Writer.WriteStartDocument(standalone);
public override void writeStarteElement(字符串前缀,字符串localName,字符串ns)=>Writer.writeStarteElement(前缀,localName,ns);
公共覆盖无效写入字符串(字符串文本)
{
Trace.WriteLine($“{Writer.WriteState}:{text}”);
var isAttribute=Writer.WriteState==System.Xml.WriteState.Attribute;
var sb=新的StringBuilder();
for(int i=0;i0x7f)
{
如果((ch='\r'| | ch='\n'| | ch='\t')&&&!isAttribute)
{
某人附加(ch);
}
其他的
{
如果(使用实体)
{
某人加上“”);
某人附加((int)ch.ToString(“X”));
}
其他的
{
某人加上()号;
sb.附加((int)ch);
}
某人附加(“;”);
}
}
其他的
{
某人附加(ch);
}
}
字符串text2=sb.ToString();
Writer.WriteRaw(text2);
}
public override void WriteSurrogateCharEntity(char lowChar,char highChar)=>Writer.WriteSurrogateCharEn
public class EscapedXmlWriter : XmlWriter
{
// Note that normally XmlWriter uses hex escapes
public bool UseHexCharEntities { get; set; } = false;
protected XmlWriter Writer;
public EscapedXmlWriter(XmlWriter writer)
{
Writer = writer;
}
public override System.Xml.WriteState WriteState => Writer.WriteState;
public override void Flush() => Writer.Flush();
public override string LookupPrefix(string ns) => Writer.LookupPrefix(ns);
public override void WriteBase64(byte[] buffer, int index, int count) => Writer.WriteBase64(buffer, index, count);
public override void WriteCData(string text) => Writer.WriteCData(text);
public override void WriteCharEntity(char ch) => Writer.WriteCharEntity(ch);
public override void WriteChars(char[] buffer, int index, int count) => Writer.WriteChars(buffer, index, count);
// We could give to comments the same treatment of attributes and content
public override void WriteComment(string text) => Writer.WriteComment(text);
public override void WriteDocType(string name, string pubid, string sysid, string subset) => Writer.WriteDocType(name, pubid, sysid, subset);
public override void WriteEndAttribute() => Writer.WriteEndAttribute();
public override void WriteEndDocument() => Writer.WriteEndDocument();
public override void WriteEndElement() => Writer.WriteEndElement();
public override void WriteEntityRef(string name) => Writer.WriteEntityRef(name);
public override void WriteFullEndElement() => Writer.WriteFullEndElement();
public override void WriteProcessingInstruction(string name, string text) => Writer.WriteProcessingInstruction(name, text);
public override void WriteRaw(char[] buffer, int index, int count) => Writer.WriteRaw(buffer, index, count);
public override void WriteRaw(string data) => Writer.WriteRaw(data);
public override void WriteStartAttribute(string prefix, string localName, string ns) => Writer.WriteStartAttribute(prefix, localName, ns);
public override void WriteStartDocument() => Writer.WriteStartDocument();
public override void WriteStartDocument(bool standalone) => Writer.WriteStartDocument(standalone);
public override void WriteStartElement(string prefix, string localName, string ns) => Writer.WriteStartElement(prefix, localName, ns);
public override void WriteString(string text)
{
Trace.WriteLine($"{Writer.WriteState}: {text}");
var isAttribute = Writer.WriteState == System.Xml.WriteState.Attribute;
var sb = new StringBuilder();
for (int i = 0; i < text.Length; i++)
{
char ch = text[i];
char low;
if (ch == '"' && isAttribute)
{
sb.Append(""");
}
if (ch == '\'' && isAttribute)
{
// XmlWriter doesn't escape ' in attributes because it always uses the form "xyz" instead of the form 'xyz'
sb.Append("'");
}
else if (ch == '&')
{
sb.Append("&");
}
else if (ch == '<')
{
sb.Append("<");
}
else if (ch == '>')
{
sb.Append(">");
}
else if (i + 1 < text.Length && char.IsHighSurrogate(ch) && char.IsLowSurrogate(low = text[i + 1]))
{
int utf32 = char.ConvertToUtf32(ch, low);
if (UseHexCharEntities)
{
sb.Append("&#x");
sb.Append(utf32.ToString("X"));
}
else
{
sb.Append("&#");
sb.Append(utf32);
}
sb.Append(';');
i++;
}
else if (ch < 0x20 || ch > 0x7f)
{
if ((ch == '\r' || ch == '\n' || ch == '\t') && !isAttribute)
{
sb.Append(ch);
}
else
{
if (UseHexCharEntities)
{
sb.Append("&#x");
sb.Append(((int)ch).ToString("X"));
}
else
{
sb.Append("&#");
sb.Append((int)ch);
}
sb.Append(';');
}
}
else
{
sb.Append(ch);
}
}
string text2 = sb.ToString();
Writer.WriteRaw(text2);
}
public override void WriteSurrogateCharEntity(char lowChar, char highChar) => Writer.WriteSurrogateCharEntity(lowChar, highChar);
public override void WriteWhitespace(string ws) => Writer.WriteWhitespace(ws);
protected override void Dispose(bool disposing)
{
if (disposing)
{
Writer.Dispose();
}
base.Dispose(disposing);
}
}