C#HTML Agility Pack从SQL Server数据库的字符串中加载HTML_C#_Html_.net_Html Agility Pack

C#HTML Agility Pack从SQL Server数据库的字符串中加载HTML

c# html .net

C#HTML Agility Pack从SQL Server数据库的字符串中加载HTML,c#,html,.net,html-agility-pack,C#,Html,.net,Html Agility Pack,我有一个满是网页的数据库，我想使用HTML agility pack从中提取信息。我已经构建了一个函数，当我从富文本框加载文本时，它可以获取我想要的信息。但是，当我从SQL数据库的字符串中加载HTML时，它无法正确地从节点获取所有文本。因此，没有给我所有的信息，我需要从网页 private static string[] Data(string strWebpage,string strURL, int iID) { //Declair and load

我有一个满是网页的数据库，我想使用HTML agility pack从中提取信息。我已经构建了一个函数，当我从富文本框加载文本时，它可以获取我想要的信息。但是，当我从SQL数据库的字符串中加载HTML时，它无法正确地从节点获取所有文本。因此，没有给我所有的信息，我需要从网页

private static string[] Data(string strWebpage,string strURL, int iID)
        {
            //Declair and load HTML agility pack
            HtmlAgilityPack.HtmlDocument HPD = new HtmlAgilityPack.HtmlDocument();
            HPD.LoadHtml(strWebpage);


            string[] strData = new string[17];//Return string array

            //Get text from html nodes

                HtmlAgilityPack.HtmlNode HDNA = HPD.DocumentNode.SelectSingleNode("//div[@class='product_info']"); //Top product information
                HtmlAgilityPack.HtmlNode HDNB = HPD.DocumentNode.SelectSingleNode("//table[@width='300px']"); //Bottom Product Information
                HtmlAgilityPack.HtmlNode HDNC = HPD.DocumentNode.SelectSingleNode("//h2[@class='name']"); //Product title
                HtmlAgilityPack.HtmlNode HDND = HPD.DocumentNode.SelectSingleNode("//div[@class='product_image']"); //Product URL
                HtmlAgilityPack.HtmlNode HDNE = HPD.DocumentNode.SelectSingleNode("//div[@class='contentwrapper']"); //Product Description
                HtmlAgilityPack.HtmlNodeCollection HDNF = HPD.DocumentNode.SelectNodes("//div[@class='conttopright']//a[@class='uponelevel']"); //Get product category


            //Store temporary data ready to be processed and determined if useful
            List<string> strElimination = new List<string>();

                string[] strBits = TextToNArray(HDNA.InnerText);

                for (int i = 0; i < strBits.Length; i = i + 2)
                {
                    strElimination.Add(strBits[i].Trim() + "\t" + strBits[i + 1].Trim()); //Prepiar data types from field 1
                }


                string [] strBits = TextToNArray(HDNB.InnerText);

                for (int i = 0; i < strBits.Length; i = i + 2)
                {
                    strElimination.Add(strBits[i].Trim() + "\t" + strBits[i + 1].Trim()); //Prepiar data types from field 2
                }

            strData[13] = (HDNC.InnerText.Trim()); //Title
            strData[14] = (HDND.InnerHtml.Replace("\\", "\\\\").Replace("<img id=\"ctl00_ContentPlaceHolder1_ProductImage\" src=\"", "").Replace("\" alt=\"Product Image\" style=\"border-width:0px;\">", "").Trim());
            strData[15] = strURL; //Page source URL
            strData[16] = iID.ToString(); //Raw page id
            strData[8] = ""; //Description start text
            strData[0] = ""; //Product category start text

            //Get product category
            foreach (var vCat in HDNF)
            {
                strData[0] += "-" + vCat.InnerText.ToString();
            }
            strData[0] = strData[0].Trim('-').Trim().Replace("Home-","");

            //Extract the description from the text
            string[] strDescProcess = TextToNArray(HDNE.InnerText);
            for (int i = 0; i < strDescProcess.Length; i++)
            {
                if(strDescProcess[i].Trim() == "Description")
                {
                    i++;
                    while (strDescProcess[i].Trim() != "More Product Details")
                    {
                        strData[8] += strDescProcess[i].Trim(); //Add description as one line
                        i++;
                    }
                }
            }

            //Order Additional information into array
            foreach (string strInfo in strElimination)
            {
                string [] strParts = strInfo.Split('\t');
                switch (strParts[0].Trim().ToLower())
                {
                    case "list price*":
                        double dPrice;
                        //Attempt to turn price into valid double value
                        try
                        {
                           dPrice  =  Convert.ToDouble(strParts[1].Substring(1));
                        }
                        catch
                        {
                            try
                            {
                                dPrice = Convert.ToDouble(strParts[1].Substring(2));
                            }
                            catch
                            {
                                dPrice = 0.0;
                            }
                        }
                        strData[1] = dPrice.ToString();
                        break;
                    case "availability":
                        //Determine if book is availiable
                        if (strParts[1].ToLower() == "available")
                        {
                            strData[2] = "1";
                        }
                        else
                        {
                            strData[2] = "0";
                        }
                        break;
                    case "language":
                        strData[3] = strParts[1];
                        break;
                    case "arrangement":
                        strData[4] = strParts[1];
                        break;
                    case "skill level":
                        strData[5] = strParts[1];
                        break;
                    case "publisher":
                        strData[6] = strParts[1];
                        break;
                    case "catalogue no.":
                        strData[7] = strParts[1];
                        break;
                    case "published on":
                        //Turn the date into a format the database understands (American silly date format yyyy-MM-dd)
                        try
                        {
                            strData[9] = Convert.ToDateTime(strParts[1]).ToString("yyyy-MM-dd");
                        }
                        catch
                        {
                            //Date could not be parsed
                            strData[9] = "0000-00-00";
                        }
                        break;
                    case "format":
                        strData[10] = strParts[1];
                        break;
                    case "pages":
                        strData[11] = strParts[1];
                        break;
                    case "isbn":
                        strData[12] = strParts[1];
                        break;
                }
            }

            //Return data found
            return strData;
        }

private静态字符串[]数据（字符串strWebpage，字符串strURL，int-iID）
{
//声明并加载HTML敏捷包
HtmlAgilityPack.HtmlDocument HPD=新的HtmlAgilityPack.HtmlDocument（）；
加载HTML（strWebpage）；
string[]strData=新字符串[17]；//返回字符串数组
//从html节点获取文本
HtmlAgilityPack.HtmlNode HDNA=HPD.DocumentNode.SelectSingleNode（“//div[@class='product\u info']”）；//顶级产品信息
HtmlAgilityPack.HtmlNode HDNB=HPD.DocumentNode.SelectSingleNode（“//表[@width='300px']”）；//底部产品信息
HtmlAgilityPack.HtmlNode HDNC=HPD.DocumentNode.SelectSingleNode（//h2[@class='name']）；//产品名称
HtmlAgilityPack.HtmlNode HDND=HPD.DocumentNode.SelectSingleNode（“//div[@class='product\u image']”）；//产品URL
HtmlAgilityPack.HtmlNode HDNE=HPD.DocumentNode.SelectSingleNode（“//div[@class='contentwrapper']”）；//产品说明
HtmlAgilityPack.HtmlNodeCollection HDNF=HPD.DocumentNode.SelectNodes（//div[@class='conttopright']//a[@class='uponelevel']）；//获取产品类别
//存储准备好处理的临时数据，并确定是否有用
List strElimination=新列表（）；
字符串[]strBits=TextToNArray（HDNA.InnerText）；
对于（int i=0；i