C# 防止XmlDocument解释HTML代码

C# 防止XmlDocument解释HTML代码,c#,xml-parsing,C#,Xml Parsing,我构建了一个代理来动态修改XML格式的数据 问题是一些XML文档在属性中包含HTML十进制代码,并由XmlDocument.LoadXml方法进行解释 输入文档示例: <config> <var name="CultureCode" value="fr-FR" /> <var name="CurrencyFormatting"> <var name="Symbo

我构建了一个代理来动态修改XML格式的数据

问题是一些XML文档在属性中包含HTML十进制代码,并由XmlDocument.LoadXml方法进行解释

输入文档示例:

<config>
   <var name="CultureCode" value="fr-FR" />
   <var name="CurrencyFormatting">
      <var name="Symbol"               value="&#8364;" />
      <var name="Code"                 value="EUR"/>
      <var name="ThousandSeparator"    value=" " />
      <var name="DecimalSeparator"    value="," />
   </var>  
</config>
但是,问题是生成的XML不再包含
但是
符号被下游流程误解了

<config>
   <var name="CultureCode" value="fr-FR" />
   <var name="CurrencyFormatting">
      <var name="Symbol"               value="€" /> <!-- Value has changed -->
      <var name="Code"                 value="EUR"/>
      <var name="ThousandSeparator"    value=" " />
      <var name="DecimalSeparator"    value="." />
   </var>  
</config>

如何防止这种行为?是否有其他包在默认情况下不会解释HTML代码


我可以从结果字符串中替换'€',但我永远无法确定是否已经解释了另一段HTML十进制代码,这并不理想。

正如我所说,这是一种痛苦
XmlReader
XmlTextReader
(这是
XmlDocument
和.NET的几乎所有其他“高级”xml类所使用的读取器)会自动取消扫描它们遇到的所有xml字符实体,并且无法禁用此功能。因此,除非重写大块的
XmlReader
,否则不可能保留原始的xml字符实体。我们可以做的是在
XmlWriter
端工作。这仍然是一种痛苦

public class EscapedXmlWriter : XmlWriter
{
    // Note that normally XmlWriter uses hex escapes
    public bool UseHexCharEntities { get; set; } = false;

    protected XmlWriter Writer;

    public EscapedXmlWriter(XmlWriter writer)
    {
        Writer = writer;
    }

    public override System.Xml.WriteState WriteState => Writer.WriteState;

    public override void Flush() => Writer.Flush();

    public override string LookupPrefix(string ns) => Writer.LookupPrefix(ns);

    public override void WriteBase64(byte[] buffer, int index, int count) => Writer.WriteBase64(buffer, index, count);

    public override void WriteCData(string text) => Writer.WriteCData(text);

    public override void WriteCharEntity(char ch) => Writer.WriteCharEntity(ch);

    public override void WriteChars(char[] buffer, int index, int count) => Writer.WriteChars(buffer, index, count);

    // We could give to comments the same treatment of attributes and content 
    public override void WriteComment(string text) => Writer.WriteComment(text);

    public override void WriteDocType(string name, string pubid, string sysid, string subset) => Writer.WriteDocType(name, pubid, sysid, subset);

    public override void WriteEndAttribute() => Writer.WriteEndAttribute();

    public override void WriteEndDocument() => Writer.WriteEndDocument();

    public override void WriteEndElement() => Writer.WriteEndElement();

    public override void WriteEntityRef(string name) => Writer.WriteEntityRef(name);

    public override void WriteFullEndElement() => Writer.WriteFullEndElement();

    public override void WriteProcessingInstruction(string name, string text) => Writer.WriteProcessingInstruction(name, text);

    public override void WriteRaw(char[] buffer, int index, int count) => Writer.WriteRaw(buffer, index, count);

    public override void WriteRaw(string data) => Writer.WriteRaw(data);

    public override void WriteStartAttribute(string prefix, string localName, string ns) => Writer.WriteStartAttribute(prefix, localName, ns);

    public override void WriteStartDocument() => Writer.WriteStartDocument();

    public override void WriteStartDocument(bool standalone) => Writer.WriteStartDocument(standalone);

    public override void WriteStartElement(string prefix, string localName, string ns) => Writer.WriteStartElement(prefix, localName, ns);

    public override void WriteString(string text)
    {
        Trace.WriteLine($"{Writer.WriteState}: {text}");

        var isAttribute = Writer.WriteState == System.Xml.WriteState.Attribute;

        var sb = new StringBuilder();

        for (int i = 0; i < text.Length; i++)
        {
            char ch = text[i];
            char low;

            if (ch == '"' && isAttribute)
            {
                sb.Append("&quot;");
            }
            if (ch == '\'' && isAttribute)
            {
                // XmlWriter doesn't escape ' in attributes because it always uses the form "xyz" instead of the form 'xyz'
                sb.Append("&apos;");
            }
            else if (ch == '&')
            {
                sb.Append("&amp;");
            }
            else if (ch == '<')
            {
                sb.Append("&lt;");
            }
            else if (ch == '>')
            {
                sb.Append("&gt;");
            }
            else if (i + 1 < text.Length && char.IsHighSurrogate(ch) && char.IsLowSurrogate(low = text[i + 1]))
            {
                int utf32 = char.ConvertToUtf32(ch, low);

                if (UseHexCharEntities)
                {
                    sb.Append("&#x");
                    sb.Append(utf32.ToString("X"));
                }
                else
                {
                    sb.Append("&#");
                    sb.Append(utf32);
                }

                sb.Append(';');

                i++;
            }
            else if (ch < 0x20 || ch > 0x7f)
            {
                if ((ch == '\r' || ch == '\n' || ch == '\t') && !isAttribute)
                {
                    sb.Append(ch);
                }
                else
                {
                    if (UseHexCharEntities)
                    {
                        sb.Append("&#x");
                        sb.Append(((int)ch).ToString("X"));
                    }
                    else
                    {
                        sb.Append("&#");
                        sb.Append((int)ch);
                    }

                    sb.Append(';');
                }
            }
            else
            {
                sb.Append(ch);
            }
        }

        string text2 = sb.ToString();
        Writer.WriteRaw(text2);
    }

    public override void WriteSurrogateCharEntity(char lowChar, char highChar) => Writer.WriteSurrogateCharEntity(lowChar, highChar);

    public override void WriteWhitespace(string ws) => Writer.WriteWhitespace(ws);

    protected override void Dispose(bool disposing)
    {
        if (disposing)
        {
            Writer.Dispose();
        }

        base.Dispose(disposing);
    }
}
公共类转义XmlWriter:XmlWriter
{
//注意,XmlWriter通常使用十六进制转义
public bool UseHexCharEntities{get;set;}=false;
受保护的XmlWriter;
公共转义XmlWriter(XmlWriter)
{
作家=作家;
}
public override System.Xml.WriteState WriteState=>Writer.WriteState;
public override void Flush()=>Writer.Flush();
公共重写字符串LookupPrefix(字符串ns)=>Writer.LookupPrefix(ns);
public override void WriteBase64(byte[]buffer,int index,int count)=>Writer.WriteBase64(buffer,index,count);
public override void WriteCData(字符串文本)=>Writer.WriteCData(文本);
public override void WriteCharEntity(char ch)=>Writer.WriteCharEntity(ch);
公共重写void WriteChars(char[]缓冲区,int索引,int计数)=>Writer.WriteChars(缓冲区,索引,计数);
//我们可以对评论的属性和内容进行同样的处理
public override void WriteComment(字符串文本)=>Writer.WriteComment(文本);
public override void WriteDocType(字符串名称、字符串pubid、字符串sysid、字符串子集)=>Writer.WriteDocType(名称、pubid、sysid、子集);
公共覆盖无效WriteEndAttribute()=>Writer.WriteEndAttribute();
public override void WriteEndDocument()=>Writer.WriteEndDocument();
public override void WriteEndElement()=>Writer.WriteEndElement();
public override void WriteEntityRef(字符串名称)=>Writer.WriteEntityRef(名称);
public override void writeFileLendElement()=>Writer.writeFileLendElement();
public override void WriteProcessingInstruction(字符串名称,字符串文本)=>Writer.WriteProcessingInstruction(名称,文本);
public override void WriteRaw(char[]buffer,int index,int count)=>Writer.WriteRaw(buffer,index,count);
公共重写void WriteRaw(字符串数据)=>Writer.WriteRaw(数据);
public override void WriteStartAttribute(字符串前缀,字符串localName,字符串ns)=>Writer.WriteStartAttribute(前缀,localName,ns);
公共覆盖无效WriteStartDocument()=>Writer.WriteStartDocument();
public override void WriteStartDocument(bool standalone)=>Writer.WriteStartDocument(standalone);
public override void writeStarteElement(字符串前缀,字符串localName,字符串ns)=>Writer.writeStarteElement(前缀,localName,ns);
公共覆盖无效写入字符串(字符串文本)
{
Trace.WriteLine($“{Writer.WriteState}:{text}”);
var isAttribute=Writer.WriteState==System.Xml.WriteState.Attribute;
var sb=新的StringBuilder();
for(int i=0;i0x7f)
{
如果((ch='\r'| | ch='\n'| | ch='\t')&&&!isAttribute)
{
某人附加(ch);
}
其他的
{
如果(使用实体)
{
某人加上“&#x”);
某人附加((int)ch.ToString(“X”));
}
其他的
{
某人加上(&#)号;
sb.附加((int)ch);
}
某人附加(“;”);
}
}
其他的
{
某人附加(ch);
}
}
字符串text2=sb.ToString();
Writer.WriteRaw(text2);
}
public override void WriteSurrogateCharEntity(char lowChar,char highChar)=>Writer.WriteSurrogateCharEn
public class EscapedXmlWriter : XmlWriter
{
    // Note that normally XmlWriter uses hex escapes
    public bool UseHexCharEntities { get; set; } = false;

    protected XmlWriter Writer;

    public EscapedXmlWriter(XmlWriter writer)
    {
        Writer = writer;
    }

    public override System.Xml.WriteState WriteState => Writer.WriteState;

    public override void Flush() => Writer.Flush();

    public override string LookupPrefix(string ns) => Writer.LookupPrefix(ns);

    public override void WriteBase64(byte[] buffer, int index, int count) => Writer.WriteBase64(buffer, index, count);

    public override void WriteCData(string text) => Writer.WriteCData(text);

    public override void WriteCharEntity(char ch) => Writer.WriteCharEntity(ch);

    public override void WriteChars(char[] buffer, int index, int count) => Writer.WriteChars(buffer, index, count);

    // We could give to comments the same treatment of attributes and content 
    public override void WriteComment(string text) => Writer.WriteComment(text);

    public override void WriteDocType(string name, string pubid, string sysid, string subset) => Writer.WriteDocType(name, pubid, sysid, subset);

    public override void WriteEndAttribute() => Writer.WriteEndAttribute();

    public override void WriteEndDocument() => Writer.WriteEndDocument();

    public override void WriteEndElement() => Writer.WriteEndElement();

    public override void WriteEntityRef(string name) => Writer.WriteEntityRef(name);

    public override void WriteFullEndElement() => Writer.WriteFullEndElement();

    public override void WriteProcessingInstruction(string name, string text) => Writer.WriteProcessingInstruction(name, text);

    public override void WriteRaw(char[] buffer, int index, int count) => Writer.WriteRaw(buffer, index, count);

    public override void WriteRaw(string data) => Writer.WriteRaw(data);

    public override void WriteStartAttribute(string prefix, string localName, string ns) => Writer.WriteStartAttribute(prefix, localName, ns);

    public override void WriteStartDocument() => Writer.WriteStartDocument();

    public override void WriteStartDocument(bool standalone) => Writer.WriteStartDocument(standalone);

    public override void WriteStartElement(string prefix, string localName, string ns) => Writer.WriteStartElement(prefix, localName, ns);

    public override void WriteString(string text)
    {
        Trace.WriteLine($"{Writer.WriteState}: {text}");

        var isAttribute = Writer.WriteState == System.Xml.WriteState.Attribute;

        var sb = new StringBuilder();

        for (int i = 0; i < text.Length; i++)
        {
            char ch = text[i];
            char low;

            if (ch == '"' && isAttribute)
            {
                sb.Append("&quot;");
            }
            if (ch == '\'' && isAttribute)
            {
                // XmlWriter doesn't escape ' in attributes because it always uses the form "xyz" instead of the form 'xyz'
                sb.Append("&apos;");
            }
            else if (ch == '&')
            {
                sb.Append("&amp;");
            }
            else if (ch == '<')
            {
                sb.Append("&lt;");
            }
            else if (ch == '>')
            {
                sb.Append("&gt;");
            }
            else if (i + 1 < text.Length && char.IsHighSurrogate(ch) && char.IsLowSurrogate(low = text[i + 1]))
            {
                int utf32 = char.ConvertToUtf32(ch, low);

                if (UseHexCharEntities)
                {
                    sb.Append("&#x");
                    sb.Append(utf32.ToString("X"));
                }
                else
                {
                    sb.Append("&#");
                    sb.Append(utf32);
                }

                sb.Append(';');

                i++;
            }
            else if (ch < 0x20 || ch > 0x7f)
            {
                if ((ch == '\r' || ch == '\n' || ch == '\t') && !isAttribute)
                {
                    sb.Append(ch);
                }
                else
                {
                    if (UseHexCharEntities)
                    {
                        sb.Append("&#x");
                        sb.Append(((int)ch).ToString("X"));
                    }
                    else
                    {
                        sb.Append("&#");
                        sb.Append((int)ch);
                    }

                    sb.Append(';');
                }
            }
            else
            {
                sb.Append(ch);
            }
        }

        string text2 = sb.ToString();
        Writer.WriteRaw(text2);
    }

    public override void WriteSurrogateCharEntity(char lowChar, char highChar) => Writer.WriteSurrogateCharEntity(lowChar, highChar);

    public override void WriteWhitespace(string ws) => Writer.WriteWhitespace(ws);

    protected override void Dispose(bool disposing)
    {
        if (disposing)
        {
            Writer.Dispose();
        }

        base.Dispose(disposing);
    }
}