C# 在html字符串中拼接html标记

C# 在html字符串中拼接html标记,c#,.net,string,c#-4.0,html-agility-pack,C#,.net,String,C# 4.0,Html Agility Pack,我正在使用htmlagility pack删除起始和结束位置的标记,但下面的代码正在从所有位置删除 HTML字符串: <p><br><span>MERV 9 Cartridge<b><br>&nbsp;</b>Prefilters </span><br></p> 我希望结果字符串如下所示 <p><span>MERV 9 Cartridge<b&g

我正在使用htmlagility pack删除起始和结束位置的

标记,但下面的代码正在从所有位置删除

HTML字符串:

 <p><br><span>MERV 9 Cartridge<b><br>&nbsp;</b>Prefilters </span><br></p>
我希望结果字符串如下所示

 <p><span>MERV 9 Cartridge<b><br>&nbsp;</b>Prefilters </span></p>

请任何人帮助我如何删除
br
标记,只在字符串的开头和结尾,而不是在字符串之间,我正在使用HTMLAgility pack library

我不确定您的HTML是否总是在
元素中,或者

元素的数量是否因情况而异。如果它没有不同,并且您可以依赖于外部元素是相同的,那么您可以使用它来获取第一个和最后一个

元素

选项#1-当父元素(
p
在本例中)已知且
br
元素的数量已知时(本例中为3个)

输出:

MERV 9盒式磁带
预滤器


选项#3-考虑第一个和最后一个文本节点的索引,并删除位于“外部”的所有
br
元素。忽略包含空白或全空白值的文本节点

// removes all br tags with an index before the first text node and
// all br tags with an index after the end of the last text node,
// any br tags between are not removed
private string RemoveStartAndEndBrTags(string html)
{
    if (string.IsNullOrEmpty(html)) return html;
    var document = new HtmlAgilityPack.HtmlDocument();
    document.LoadHtml(html);
    var rootNode = document.DocumentNode;
    // get first and last text nodes, excluding any only containing white-space
    var allNonEmptyTextNodes = rootNode.SelectNodes("//text()[not(self::text()[not(normalize-space())])]");
    if (allNonEmptyTextNodes == null || allNonEmptyTextNodes.Count == 0) return html;
    var firstTextNode = allNonEmptyTextNodes[0];
    var lastTextNode = allNonEmptyTextNodes[allNonEmptyTextNodes.Count - 1];
    // get the parent node of the first br element, it will be used when we remove the br elements,
    // this will allow for different parent elements other than the `p` element
    var parentNode = rootNode.SelectSingleNode("//br/parent::*");
    if (parentNode == null) return html;
    var allBrNodes = rootNode.SelectNodes($"//{parentNode.Name}/br");
    foreach (var brNode in allBrNodes)
    {
        if (brNode == null) continue;
        // check index of br nodes against first and last text nodes
        // and remove br nodes that sit outside text nodes
        if (brNode.OuterStartIndex <= firstTextNode.OuterStartIndex
            || brNode.OuterStartIndex >= lastTextNode.OuterStartIndex + lastTextNode.OuterLength)
        { 
            brNode.Remove();
        }
    }
    return document.DocumentNode.OuterHtml;
}
//删除在第一个文本节点和
//在最后一个文本节点结束后具有索引的所有br标记,
//之间的任何br标记都不会被删除
私有字符串RemoveStartAndEndBrTags(字符串html)
{
if(string.IsNullOrEmpty(html))返回html;
var document=new HtmlAgilityPack.HtmlDocument();
document.LoadHtml(html);
var rootNode=document.DocumentNode;
//获取第一个和最后一个文本节点,不包括任何仅包含空白的节点
var allNonEmptyTextNodes=rootNode.SelectNodes(//text()[not(self::text()[not(normalize-space())]])”;
if(allNonEmptyTextNodes==null | | allNonEmptyTextNodes.Count==0)返回html;
var firstTextNode=allNonEmptyTextNodes[0];
var lastTextNode=allNonEmptyTextNodes[allNonEmptyTextNodes.Count-1];
//获取第一个br元素的父节点,当我们删除br元素时将使用该节点,
//这将允许除'p'元素之外的其他父元素
var parentNode=rootNode.SelectSingleNode(“//br/父节点::*”);
if(parentNode==null)返回html;
var allBrNodes=rootNode.SelectNodes($“/{parentNode.Name}/br”);
foreach(所有brnodes中的var brNode)
{
如果(brNode==null)继续;
//对照第一个和最后一个文本节点检查br节点的索引
//并删除位于文本节点外部的br节点
if(brNode.OuterStartIndex=lastTextNode.OuterStartIndex+lastTextNode.OuterLength)
{ 
brNode.Remove();
}
}
return document.DocumentNode.OuterHtml;
}
测试HTML输入:

<p><br><span>MERV 9 Cartridge<b><br>&nbsp;</b>Prefilters </span><br></p>
<p><span>MERV 9 Cartridge<b><br>&nbsp;</b>Prefilters </span></p>
<p><span>MERV 9 <br>Cartridge<b><br>&nbsp;</b>Prefilters </span></p>
<p><span>MERV 9 Cartridge<b><br>&nbsp;</b>Prefilters<br> </span></p>
<p><span>MERV 9 Cartridge<b><br>&nbsp;</b>Prefilters<br></span></p>

MERV 9盒式磁带
预滤器

MERV 9滤芯
预滤器

MERV 9
盒式
预滤器

MERV 9盒式磁带
预滤器

MERV 9盒式磁带
预滤器

测试HTML输出:

<p><span>MERV 9 Cartridge<b><br>&nbsp;</b>Prefilters </span></p>
<p><span>MERV 9 Cartridge<b><br>&nbsp;</b>Prefilters </span></p>
<p><span>MERV 9 <br>Cartridge<b><br>&nbsp;</b>Prefilters </span></p>
<p><span>MERV 9 Cartridge<b><br>&nbsp;</b>Prefilters </span></p
<p><span>MERV 9 Cartridge<b><br>&nbsp;</b>Prefilters</span></p>
MERV 9盒式磁带
预滤器

MERV 9滤芯
预滤器

MERV 9
盒式
预滤器


MERV 9盒带
首先移除的预过滤器
,在检查第一个标签并搜索关闭标签(例如)后,检查此标签的索引并用下一个标签对抗标签的索引,如果closetag是次要的,则
您将删除
。我提到的字符串中不会有结束标记,并且所有标记都是

自己的标记您将阅读文档中的所有行或一行多次?首先阅读所有行,然后识别节点并删除已识别的节点(
br
),但这里我不想删除html字符串之间的标记。我只需要删除字符串开始位置和结束位置的标记(
br
),谢谢您的建议,如果
不存在,并且字符串之间只有
标记,则可能不会放置所有的箱子,并且这会起作用。抱歉,它不适用于此字符串
MERV 9盒式
预滤器
再次抱歉,我正在寻找在字符串中间的<代码> BR>代码>标签,只想在HTML字符串开始时删除<代码> BR<代码>,在HTML结束时,String选项2应该做你想做的事情。它适用于您发布的两个HTML示例。很高兴您能使用它:)
string html = "<p><br><span>MERV 9 Cartridge<b><br>&nbsp;</b>Prefilters </span><br></p>";
// string html = "<p><span>MERV 9 Cartridge<b><br>&nbsp;</b>Prefilters </span></p>";
string outHtml = string.Empty;
var document = new HtmlAgilityPack.HtmlDocument();
document.LoadHtml(html);
var rootNode = document.DocumentNode;
// count all br nodes so we can bypass removal of br if there is only one in HTML
var brNodeCount = rootNode.SelectNodes("//br") == null ? 0 : rootNode.SelectNodes("//br").Count;
// get the parent node of the br element to be used in the xpath when we remove
// the br elements this will allow for different parent elements other than the `p` element
var parentNode = rootNode.SelectSingleNode("//br/parent::*");
// only removes br elements if more than one in HTML, assumes if 1 br element is present it's in the middle and will not be removed
if (brNodeCount > 1)
{ 
    var firstBrNode = rootNode.SelectSingleNode($"//{parentNode.Name}/br[1]");
    var lastBrNode = rootNode.SelectSingleNode($"//{parentNode.Name}/br[last()]");
    firstBrNode?.Remove();
    lastBrNode?.Remove();
}
outHtml = document.DocumentNode.OuterHtml;
// removes all br tags with an index before the first text node and
// all br tags with an index after the end of the last text node,
// any br tags between are not removed
private string RemoveStartAndEndBrTags(string html)
{
    if (string.IsNullOrEmpty(html)) return html;
    var document = new HtmlAgilityPack.HtmlDocument();
    document.LoadHtml(html);
    var rootNode = document.DocumentNode;
    // get first and last text nodes, excluding any only containing white-space
    var allNonEmptyTextNodes = rootNode.SelectNodes("//text()[not(self::text()[not(normalize-space())])]");
    if (allNonEmptyTextNodes == null || allNonEmptyTextNodes.Count == 0) return html;
    var firstTextNode = allNonEmptyTextNodes[0];
    var lastTextNode = allNonEmptyTextNodes[allNonEmptyTextNodes.Count - 1];
    // get the parent node of the first br element, it will be used when we remove the br elements,
    // this will allow for different parent elements other than the `p` element
    var parentNode = rootNode.SelectSingleNode("//br/parent::*");
    if (parentNode == null) return html;
    var allBrNodes = rootNode.SelectNodes($"//{parentNode.Name}/br");
    foreach (var brNode in allBrNodes)
    {
        if (brNode == null) continue;
        // check index of br nodes against first and last text nodes
        // and remove br nodes that sit outside text nodes
        if (brNode.OuterStartIndex <= firstTextNode.OuterStartIndex
            || brNode.OuterStartIndex >= lastTextNode.OuterStartIndex + lastTextNode.OuterLength)
        { 
            brNode.Remove();
        }
    }
    return document.DocumentNode.OuterHtml;
}
<p><br><span>MERV 9 Cartridge<b><br>&nbsp;</b>Prefilters </span><br></p>
<p><span>MERV 9 Cartridge<b><br>&nbsp;</b>Prefilters </span></p>
<p><span>MERV 9 <br>Cartridge<b><br>&nbsp;</b>Prefilters </span></p>
<p><span>MERV 9 Cartridge<b><br>&nbsp;</b>Prefilters<br> </span></p>
<p><span>MERV 9 Cartridge<b><br>&nbsp;</b>Prefilters<br></span></p>
<p><span>MERV 9 Cartridge<b><br>&nbsp;</b>Prefilters </span></p>
<p><span>MERV 9 Cartridge<b><br>&nbsp;</b>Prefilters </span></p>
<p><span>MERV 9 <br>Cartridge<b><br>&nbsp;</b>Prefilters </span></p>
<p><span>MERV 9 Cartridge<b><br>&nbsp;</b>Prefilters </span></p
<p><span>MERV 9 Cartridge<b><br>&nbsp;</b>Prefilters</span></p>