C# 从脚本Html敏捷包中获取输出

C# 从脚本Html敏捷包中获取输出,c#,html-agility-pack,C#,Html Agility Pack,我有一个问题,这下面的脚本生成的电子邮件地址时,页面加载,我想解析,我怎么能做到这一点 tr> <td align='right' class='generalinfo_left' >Email Address:</td> <td class='generalinfo_right'><script type="text/javascript"> //<![CDATA[ var o3752aaa9bb29d9

我有一个问题,这下面的脚本生成的电子邮件地址时,页面加载,我想解析,我怎么能做到这一点

tr> 
    <td align='right' class='generalinfo_left' >Email Address:</td> 
    <td class='generalinfo_right'><script type="text/javascript">

//<![CDATA[    
var o3752aaa9bb29d904adeb88838117fd7c = String.fromCharCode(109);var f03de7e643c296e211edddbc3197b33f6 = String.fromCharCode(97);var k7c3bf82468602c0f8dff4950e4b6ff1e = String.fromCharCode(105);var b3eaa633e44451be8df1fa47d75149934 = 'l';var ma2fa16c3a3f532b780aaf0fa5a5b75c6 = 't';var re0c13fc69c03925782867a0540f8c084 = 'o';var j335f1365672123d1fcaf9a83b76f1b7b = String.fromCharCode(58);var f32820e1c54cbc3fa0d418cd1c195eaec = String.fromCharCode(105);var y8c24ea00a7a1edf1c01f794d487697e3 = String.fromCharCode(110);var bcc0ad4f628e703f9ff6e25b87b77ec34 = 'f';var c985c961c7ee85fe6a25d5a66fb421745 = String.fromCharCode(111);var z5ab4e3bdc353d621cea5babcc5dca417 = String.fromCharCode(64);var s4e087167cd0bac466344e72016511172 = String.fromCharCode(97);var re26f6ae180723793af62bc36d5ab2530 = String.fromCharCode(108);var ye1b53d01de118079a38de5e951586731 = 'c';var g9fc5710c9266ce08afbe4da24702dfdd = String.fromCharCode(105);var k5cd5ea1bac40fdbb8b133b7e356809c6 = String.fromCharCode(118);var fcd6e4771e956e270c6897d24ca51c256 = String.fromCharCode(97);var y9d7854a5921fa2be88c8cd72c7e2884e = String.fromCharCode(114);var xa58bea1ecad6fe7d2c736aab1df2df44 = '.';var e4569f6c98804675f7117a84abb0b8d5c = 'c';var o4d2081e2344020922dcb924690c9972e = 'o';var af150185e5eef8ecd8dc1b0a4977c7d55 = String.fromCharCode(109);document.write("<a href='" + o3752aaa9bb29d904adeb88838117fd7c + f03de7e643c296e211edddbc3197b33f6 + k7c3bf82468602c0f8dff4950e4b6ff1e + b3eaa633e44451be8df1fa47d75149934 + ma2fa16c3a3f532b780aaf0fa5a5b75c6 + re0c13fc69c03925782867a0540f8c084 + j335f1365672123d1fcaf9a83b76f1b7b  + f32820e1c54cbc3fa0d418cd1c195eaec + y8c24ea00a7a1edf1c01f794d487697e3 + bcc0ad4f628e703f9ff6e25b87b77ec34 + c985c961c7ee85fe6a25d5a66fb421745 + z5ab4e3bdc353d621cea5babcc5dca417 + s4e087167cd0bac466344e72016511172 + re26f6ae180723793af62bc36d5ab2530 + ye1b53d01de118079a38de5e951586731 + g9fc5710c9266ce08afbe4da24702dfdd + k5cd5ea1bac40fdbb8b133b7e356809c6 + fcd6e4771e956e270c6897d24ca51c256 + y9d7854a5921fa2be88c8cd72c7e2884e + xa58bea1ecad6fe7d2c736aab1df2df44 + e4569f6c98804675f7117a84abb0b8d5c + o4d2081e2344020922dcb924690c9972e + af150185e5eef8ecd8dc1b0a4977c7d55  + "'>" + f32820e1c54cbc3fa0d418cd1c195eaec + y8c24ea00a7a1edf1c01f794d487697e3 + bcc0ad4f628e703f9ff6e25b87b77ec34 + c985c961c7ee85fe6a25d5a66fb421745 + z5ab4e3bdc353d621cea5babcc5dca417 + s4e087167cd0bac466344e72016511172 + re26f6ae180723793af62bc36d5ab2530 + ye1b53d01de118079a38de5e951586731 + g9fc5710c9266ce08afbe4da24702dfdd + k5cd5ea1bac40fdbb8b133b7e356809c6 + fcd6e4771e956e270c6897d24ca51c256 + y9d7854a5921fa2be88c8cd72c7e2884e + xa58bea1ecad6fe7d2c736aab1df2df44 + e4569f6c98804675f7117a84abb0b8d5c + o4d2081e2344020922dcb924690c9972e + af150185e5eef8ecd8dc1b0a4977c7d55  + "</a>")

//]]>;

</script></td> 
tr>
电邮地址:
//;
输出是这样的

<td class="generalinfo_right">
<script type="text/javascript">
same above script plus following Line
</script><a href="mailto:someID@email.com">someID@email.com</a></td>

上面的脚本和下面的行相同

该代码从字符代码点一次生成一个字符的电子邮件地址,然后在以后组装它。我想这是为了防止垃圾邮件。根据您需要做什么,使用jQuery或其他方法从链接中提取电子邮件地址可能是最简单的$('a[href^=mailto]').attr('href').substring(7)或其他应该做的事情。

我编写了自己的自定义解析器,可以读取脚本并解析其中的电子邮件。代码如下

如果这个代码可以优化或可以写得更整齐,请让我知道

private string ReadEmail(string EmailScript)
{
  string EncriptedEmail = "";
  string dataPart = "";
  dataPart = EmailScript.Substring(0, EmailScript.IndexOf("document.write")).Replace("//<![CDATA[\r", "").Replace("\"", "").Replace("\r\n","");
  EncriptedEmail = EmailScript.Replace("\"","");
  EncriptedEmail = EncriptedEmail.Substring(EncriptedEmail.IndexOf("'> + "), EncriptedEmail.IndexOf(" + </a>") - EncriptedEmail.IndexOf("'> +")).Replace("'> +", "").Trim();
  string[] requiredVariables = EncriptedEmail.Split('+');
  List<string> ExtractedDataFromRaw = new List<string>();
  string email = "";
  foreach (string variable in requiredVariables)
  {
    string temp = dataPart.Substring(dataPart.IndexOf(variable),dataPart.Length-dataPart.IndexOf(variable)).Replace(" ","");
    string tempValueofVariable = temp.Substring(0, temp.IndexOf(";"));
    tempValueofVariable = tempValueofVariable.Substring(tempValueofVariable.IndexOf("="), tempValueofVariable.Length - temp.IndexOf("=")).Replace("=","");
    if (tempValueofVariable.Contains("String.fromCharCode"))
    {
      tempValueofVariable = GetCharacterFromASCII(tempValueofVariable.Replace("String.fromCharCode(", "").Replace(")", ""));
    }
    ExtractedDataFromRaw.Add(tempValueofVariable.Replace("'",""));
    email += tempValueofVariable.Replace("'", "");
    }
    return email;
 }
 private string GetCharacterFromASCII(string value)
 {
    int result = 0;
    int.TryParse(value, out result);
    return char.ConvertFromUtf32(result);
 }
private string ReadEmail(string EmailScript)
{
字符串EncriptedEmail=“”;
字符串dataPart=“”;
dataPart=EmailScript.Substring(0,EmailScript.IndexOf(“document.write”)).Replace(“//+”).Replace(“>+”,”).Trim();
string[]requiredVariables=EncriptedEmail.Split('+');
List ExtractedDataFromRaw=新列表();
字符串email=“”;
foreach(requiredVariables中的字符串变量)
{
字符串temp=dataPart.Substring(dataPart.IndexOf(变量),dataPart.Length dataPart.IndexOf(变量)).Replace(“,”);
字符串tempValueofVariable=temp.Substring(0,temp.IndexOf(“;”));
tempValueofVariable=tempValueofVariable.Substring(tempValueofVariable.IndexOf(“=”),tempValueofVariable.Length-temp.IndexOf(“=”)。替换(“=”,”);
if(tempValueofVariable.Contains(“String.fromCharCode”))
{
tempValueofVariable=GetCharacterFromASCII(tempValueofVariable.Replace(“String.fromCharCode”(“,”).Replace”(“),”);
}
ExtractedDataFromRaw.Add(tempValueofVariable.Replace(“,”);
email+=tempValueofVariable.Replace(“,”);
}
回复邮件;
}
私有字符串GetCharacterFromASCII(字符串值)
{
int结果=0;
int.TryParse(值,输出结果);
返回字符ConvertFromUtf32(结果);
}

这需要什么?此脚本应该生成电子邮件,但为什么html agility pack显示此脚本而不是生成的电子邮件此脚本是为了防止垃圾邮件。还在想,你为什么要绕开它?如果网站不想让你收集这些数据,考虑让它们少一点鱼腥草。我得到它是为了合法的目的不是为了滥发“合法的目的”?你是说法律目的?最简单的方法是询问数据的所有者。他可能会以你最喜欢的格式给你。无需对站点进行爬网;)我正在使用c#桌面应用程序,我们能让jqury在那里工作吗