Asp.net 从html锚定标记C获取href值_Asp.net_C# 4.0

Asp.net 从html锚定标记C获取href值

asp.net c#-4.0

Asp.net 从html锚定标记C获取href值,asp.net,c#-4.0,Asp.net,C# 4.0,如何使用C仅从html锚标记获取href值多谢各位 string ref="<a href="http://www.google.com"></a>"; //i want get result from //string ref like //http://www.google.com 您可以使用HTML解析库，例如。例如： using System; using HtmlAgilityPack; class Program { static void

如何使用C仅从html锚标记获取href值多谢各位

string ref="<a href="http://www.google.com"></a>";
//i want get result from 
//string ref like 
//http://www.google.com

您可以使用HTML解析库，例如。例如：

using System;
using HtmlAgilityPack;

class Program
{
    static void Main()
    {
        var doc = new HtmlDocument();
        doc.LoadHtml("<a href=\"http://www.google.com\"></a>");
        var nodes = doc.DocumentNode.SelectNodes("a[@href]");
        foreach (var node in nodes)
        {
            Console.WriteLine(node.Attributes["href"].Value);
        }
    }
}

使用

如果要在不使用HtmlAgilityPack的情况下执行此操作，则可以使用正则表达式：

希望它能帮助你。

如果你需要锚链接以及锚文本，那么你可以使用下面的函数返回包含所有锚URL的字符串列表；HTML字符串中的文本

 public static List<string> ExtractLinks(string htmlString)
    {
        List<string> list = new List<string>();
        string anchorStart = "<a";
        string anchorEnd = "</a>";
        string anchorText = string.Empty;
        Regex regex = new Regex("(?:href)=[\"|']?(.*?)[\"|'|>]+", RegexOptions.Singleline | RegexOptions.CultureInvariant);
        if (regex.IsMatch(htmlString))
        {
            foreach (Match match in regex.Matches(htmlString))
            {
                try
                {
                    string strURL = match.Groups[1].Value; // should contain the HRF URL 

                    int baseIndex = htmlString.IndexOf(strURL); // Get the Start Index of current URL.                       

                    // Start from baseindex and finc the fisrt instance of "<a" which should be the start of anchor
                    int anchorStartIndex = htmlString.LastIndexOf(anchorStart, baseIndex, StringComparison.CurrentCultureIgnoreCase);

                    // Find the end index of anchor 
                    int anchorEndIndex = htmlString.IndexOf(anchorEnd, anchorStartIndex, StringComparison.CurrentCultureIgnoreCase);

                    // The actual anchor text would be found b/w ">"  and "</a>" so need to find the index of ">"
                    int indexofanchorTextStart = htmlString.LastIndexOf(">", anchorEndIndex);

                    //find the substring b/w ">" and "</a>"
                    anchorText = htmlString.Substring(indexofanchorTextStart + 1, anchorEndIndex - indexofanchorTextStart - 1);
                    anchorText = HttpUtility.HtmlDecode(anchorText);

                    // get Full anchor from start to end
                    // string substringAheadAnchor = htmlString.Substring(anchorStartIndex, anchorEndIndex - anchorStartIndex + anchorEnd.Length + 1);

                }
                catch (Exception ex)
                {
                    // Log Exception in parsing the anchor Text 
                }

                if (!list.Contains(match.Groups[1].Value + ";" + anchorText))
                {
                    list.Add(match.Groups[1].Value + ";" + anchorText);// Append URL and Text using semicolun as seperator.  
                }
            }
        }

        return list;
    }

您是在解析原始html响应还是在使用一些库？如何查找锚标记的InnerHTML必须将前面带有两个斜杠的//a[@href]传递给SelectNodes方法才能使其工作。

 string ref= @"<a href=""http://www.google.com"">test</a>";
 var regex = new Regex("<a [^>]*href=(?:'(?<href>.*?)')|(?:\"(?<href>.*?)\")", RegexOptions.IgnoreCase);
 var urls = regex.Matches(ref).OfType<Match>().Select(m => m.Groups["href"].Value).SingleOrDefault();

 public static List<string> ExtractLinks(string htmlString)
    {
        List<string> list = new List<string>();
        string anchorStart = "<a";
        string anchorEnd = "</a>";
        string anchorText = string.Empty;
        Regex regex = new Regex("(?:href)=[\"|']?(.*?)[\"|'|>]+", RegexOptions.Singleline | RegexOptions.CultureInvariant);
        if (regex.IsMatch(htmlString))
        {
            foreach (Match match in regex.Matches(htmlString))
            {
                try
                {
                    string strURL = match.Groups[1].Value; // should contain the HRF URL 

                    int baseIndex = htmlString.IndexOf(strURL); // Get the Start Index of current URL.                       

                    // Start from baseindex and finc the fisrt instance of "<a" which should be the start of anchor
                    int anchorStartIndex = htmlString.LastIndexOf(anchorStart, baseIndex, StringComparison.CurrentCultureIgnoreCase);

                    // Find the end index of anchor 
                    int anchorEndIndex = htmlString.IndexOf(anchorEnd, anchorStartIndex, StringComparison.CurrentCultureIgnoreCase);

                    // The actual anchor text would be found b/w ">"  and "</a>" so need to find the index of ">"
                    int indexofanchorTextStart = htmlString.LastIndexOf(">", anchorEndIndex);

                    //find the substring b/w ">" and "</a>"
                    anchorText = htmlString.Substring(indexofanchorTextStart + 1, anchorEndIndex - indexofanchorTextStart - 1);
                    anchorText = HttpUtility.HtmlDecode(anchorText);

                    // get Full anchor from start to end
                    // string substringAheadAnchor = htmlString.Substring(anchorStartIndex, anchorEndIndex - anchorStartIndex + anchorEnd.Length + 1);

                }
                catch (Exception ex)
                {
                    // Log Exception in parsing the anchor Text 
                }

                if (!list.Contains(match.Groups[1].Value + ";" + anchorText))
                {
                    list.Add(match.Groups[1].Value + ";" + anchorText);// Append URL and Text using semicolun as seperator.  
                }
            }
        }

        return list;
    }