Warning: file_get_contents(/data/phpspider/zhask/data//catemap/2/csharp/272.json): failed to open stream: No such file or directory in /data/phpspider/zhask/libs/function.php on line 167

Warning: Invalid argument supplied for foreach() in /data/phpspider/zhask/libs/tag.function.php on line 1116

Notice: Undefined index: in /data/phpspider/zhask/libs/function.php on line 180

Warning: array_chunk() expects parameter 1 to be array, null given in /data/phpspider/zhask/libs/function.php on line 181
C# 有没有比我试图编写代码更容易擦除为html节点设置的所有固定大小的方法?_C#_Html_.net_String_Replace - Fatal编程技术网

C# 有没有比我试图编写代码更容易擦除为html节点设置的所有固定大小的方法?

C# 有没有比我试图编写代码更容易擦除为html节点设置的所有固定大小的方法?,c#,html,.net,string,replace,C#,Html,.net,String,Replace,我有这样的输入字符串: <span id = 'RTF_Text_101' style="font-family:'Arial';font-size:12pt;text-align:left;"> <p lang="en-US" style="margin-top:0pt;margin-bottom:0pt;"><span style="font-size:10pt;">aeqw</span></p> <p lang="en-U

我有这样的输入字符串:

<span id = 'RTF_Text_101'  style="font-family:'Arial';font-size:12pt;text-align:left;">
<p lang="en-US" style="margin-top:0pt;margin-bottom:0pt;"><span style="font-size:10pt;">aeqw</span></p>
<p lang="en-US" style="margin-top:0pt;margin-bottom:0pt;"><span style="font-size:10pt;">qwe</span></p>
<p lang="en-US" style="margin-top:0pt;margin-bottom:0pt;"><span style="font-size:10pt;">qwe</span></p>
<table cellspacing="0" cellpadding="0pt" style="width:498.2pt;border-collapse:collapse;">
<colgroup>
<col width="332" />
<col width="332" />
</colgroup>
<tr align="left" valign="top">
<td style="width:244pt; padding-right:1.8pt; padding-left:1.8pt; border-top: 1pt solid #000000; border-right: 1pt solid #000000; border-bottom: 1pt solid #000000; border-left: 1pt solid #000000;">
<p lang="en-US" style="margin-top:0pt;margin-bottom:0pt;"><span style="font-size:10pt;">asd1</span></p>
</td>
<td style="width:244pt; padding-right:1.8pt; padding-left:1.8pt; border-top: 1pt solid #000000; border-right: 1pt solid #000000; border-bottom: 1pt solid #000000; border-left: 1pt solid #000000;">
<p lang="en-US" style="margin-top:0pt;margin-bottom:0pt;"><span style="font-size:10pt;">asd3</span></p>
</td>
</tr>
<tr align="left" valign="top">
<td style="width:244pt; padding-right:1.8pt; padding-left:1.8pt; border-top: 1pt solid #000000; border-right: 1pt solid #000000; border-bottom: 1pt solid #000000; border-left: 1pt solid #000000;">
<p lang="en-US" style="margin-top:0pt;margin-bottom:0pt;"><span style="font-size:10pt;">asd2</span></p>
</td>
<td style="width:244pt; padding-right:1.8pt; padding-left:1.8pt; border-top: 1pt solid #000000; border-right: 1pt solid #000000; border-bottom: 1pt solid #000000; border-left: 1pt solid #000000;">
<p lang="en-US" style="margin-top:0pt;margin-bottom:0pt;"><span style="font-size:10pt;">asd4</span></p>
</td>
</tr>
</table>
</span>

aeqw

qwe

qwe

asd1

asd3

asd2

asd4

这些是由rtf到html转换工具生成的。问题是,它使表具有固定的大小,而我需要它是动态的

因此,我需要替换这个字符串中属于表相关标记(table、tr、td)的所有大小(宽度、高度)

我开始编写代码:

//the input parameters for this funct is huge html piece and tag that should be "non sized"
public void RemoveSizesFromStringForTag(ref string str, string tag)
{
    int tag_start_index = -1; //current found tag start index
    int curr_search_pos = 0; //current position to start search for next tag

    while ((tag_start_index = str.IndexOf("<" + tag, curr_search_pos)) >= 0) //while we found some tag
    {
        int tag_end_index = str.IndexOf(">", tag_start_index); //get it's end index
        string temp_part = str.Substring(tag_start_index, tag_end_index - tag_start_index); //substring tag liek that "<table ... >"

        bool replace_needed = false; //used as flag

        if (temp_part.ToLower().Contains("width")) //if substring contains width
        {
            //NOT IMPLEMENTED
            RemoveAttributeFromString(ref temp_part, "width"); // then remove this attribute from that string
            replace_needed = true; //and mark that we need to replace later
        }

        if (temp_part.ToLower().Contains("height"))
        {
            RemoveAttributeFromString(ref temp_part, "height");
            replace_needed = true;
        }

        if (replace_needed) // if replace needed
        {
            str.Remove(tag_start_index, tag_end_index - tag_start_index); //we remove string with sizes
            str.Insert(tag_start_index, temp_part); //and insert string without sizes
        }

        curr_search_pos = tag_start_index + temp_part.Length; //correcting current search position
    }
}

public void RemoveAttributeFromString(ref string str, string attr)
{
    int attr_start_index = -1;
    int curr_search_pos = 0;

    while ((attr_start_index = str.IndexOf(attr, curr_search_pos)) >= 0)
    {
        //honestly I stopped here to understand what to do next,
        //since stoo much of cases possible that I cannot handle.

        //it could be: id = 'asd'width='10%'height=5px, it could be with spaces, with ", set via style width:10%
    }
}
//此函数的输入参数是巨大的html片段和标记,应为“非大小”
public void从stringfortag(ref string str,string tag)移除sizesesfromstringfortag
{
int tag_start_index=-1;//当前找到的标记开始索引
int curr\u search\u pos=0;//开始搜索下一个标记的当前位置
while((tag_start_index=str.IndexOf(“,tag_start_index);//获取它的结束索引
字符串temp\u part=str.Substring(tag\u start\u index,tag\u end\u index-tag\u start\u index);//子字符串tag liek that“”
bool replace_needed=false;//用作标志
if(temp_part.ToLower().包含(“宽度”)//if子字符串包含宽度
{
//未实施
RemoveAttributeFromString(ref temp_part,“width”);//然后从该字符串中删除该属性
replace_needed=true;//并标记我们以后需要替换的内容
}
如果(温度部件ToLower()包含(“高度”))
{
移除属性字符串(参考温度部分,“高度”);
替换所需的值=真;
}
if(需要更换)//如果需要更换
{
str.Remove(tag_start_index,tag_end_index-tag_start_index);//我们删除大小为的字符串
str.Insert(tag_start_index,temp_part);//并插入不带大小的字符串
}
curr\u search\u pos=tag\u start\u index+temp\u part.Length;//正在更正当前搜索位置
}
}
public void RemoveAttributeFromString(ref string str,string attr)
{
int attr_start_index=-1;
int curr\u search\u pos=0;
而((attr\u start\u index=str.IndexOf(attr,curr\u search\u pos))>=0)
{
//老实说我停在这里是想知道下一步该怎么做,
//因为我可能会处理很多我无法处理的案件。
//它可以是:id='asd'width='10%'height=5px,它可以是带有空格的,带有“,通过样式宽度设置:10%
}
}
在某一点上,我发现我的方法非常困难,因为可以通过多种方式设置大小
(width=10px,width=10px,width='10px',id='asd'width=“10px”style='…')
。情况太多,很难找到要删除的属性边界


那么,有没有更简单的方法呢?

这可以理解为格式良好的xml(至少是您发布的示例):

编辑

更容易阅读:

private static string RemoveHeightsAndWidths(string original)
{
    XElement element = XElement.Parse(original);
    var tableRelatedElements =
        element.Descendants("table")
        .Union(element.Descendants("tr"))
        .Union(element.Descendants("td"))
        .Union(element.Descendants("th")); //add more items you want to strip the height and width from in the same manner

    Regex reg = new Regex("(?:width:.*?;)|(?:height:.*?;)");

    foreach (var item in tableRelatedElements)
    {
        if (item.Attributes("style").Any())
        {
            item.Attribute("style").Value = reg.Replace(item.Attribute("style").Value, string.Empty);
        }
        if (item.Attributes("height").Any())
        {
            item.Attribute("height").Remove();
        }
        if (item.Attributes("width").Any())
        {
            item.Attribute("width").Remove();
        }
    }

    return element.ToString();
}

如果你有一个完整的html文档,而不是使用HtmlAgilityPack…通过nuget获得。更安全,更容易使用too@FlorianSchmidinger-对不起,我不知道你在说什么。我只有从
开始的某些html部分。所有宽度和高度都应设置为相同的值?@FlorianSchmidinger-它们应该被删除,like从未设置。仅针对与表相关的标记(table,tr,td)谢谢。这是一个有趣的解决方案。我将测试它如何工作。实际上,大小也可以通过标记属性设置(宽度=10px)您还需要删除
col
标记右侧的属性,谢谢您指向它,不过这更简单。我将简单地删除整个
。看起来它没有从表标记中删除这些值:
@Kosmos更新了该方法,以便您可以更好地读取正在发生的事情。现在它只会删除表/td/tr/th的内容。。。