C# 用C格式化Twitter文本（TweetText）#_C#_Asp.net Mvc_Twitter

C# 用C格式化Twitter文本（TweetText）#

c# asp.net-mvc twitter

C# 用C格式化Twitter文本（TweetText）#,c#,asp.net-mvc,twitter,C#,Asp.net Mvc,Twitter,有没有更好的方法来格式化来自Twitter的文本，以链接超链接、用户名和标签？我所做的是工作，但我知道这可以做得更好。我对替代技术感兴趣。我将此设置为ASP.NET MVC的HTML帮助程序 using System; using System.Collections.Generic; using System.Text.RegularExpressions; using System.Web; using System.Web.Mvc; namespace Acme.Mvc.Extensio

有没有更好的方法来格式化来自Twitter的文本，以链接超链接、用户名和标签？我所做的是工作，但我知道这可以做得更好。我对替代技术感兴趣。我将此设置为ASP.NET MVC的HTML帮助程序

using System;
using System.Collections.Generic;
using System.Text.RegularExpressions;
using System.Web;
using System.Web.Mvc;

namespace Acme.Mvc.Extensions
{

    public static class MvcExtensions
    {
        const string ScreenNamePattern = @"@([A-Za-z0-9\-_&;]+)";
        const string HashTagPattern = @"#([A-Za-z0-9\-_&;]+)";
        const string HyperLinkPattern = @"(http://\S+)\s?";

        public static string TweetText(this HtmlHelper helper, string text)
        {
            return FormatTweetText(text);
        }

        public static string FormatTweetText(string text)
        {
            string result = text;

            if (result.Contains("http://"))
            {
                var links = new List<string>();
                foreach (Match match in Regex.Matches(result, HyperLinkPattern))
                {
                    var url = match.Groups[1].Value;
                    if (!links.Contains(url))
                    {
                        links.Add(url);
                        result = result.Replace(url, String.Format("<a href=\"{0}\">{0}</a>", url));
                    }
                }
            }

            if (result.Contains("@"))
            {
                var names = new List<string>();
                foreach (Match match in Regex.Matches(result, ScreenNamePattern))
                {
                    var screenName = match.Groups[1].Value;
                    if (!names.Contains(screenName))
                    {
                        names.Add(screenName);
                        result = result.Replace("@" + screenName,
                           String.Format("<a href=\"http://twitter.com/{0}\">@{0}</a>", screenName));
                    }
                }
            }

            if (result.Contains("#"))
            {
                var names = new List<string>();
                foreach (Match match in Regex.Matches(result, HashTagPattern))
                {
                    var hashTag = match.Groups[1].Value;
                    if (!names.Contains(hashTag))
                    {
                        names.Add(hashTag);
                        result = result.Replace("#" + hashTag,
                           String.Format("<a href=\"http://twitter.com/search?q={0}\">#{1}</a>",
                           HttpUtility.UrlEncode("#" + hashTag), hashTag));
                    }
                }
            }

            return result;
        }

    }

}

使用系统；
使用System.Collections.Generic；
使用System.Text.RegularExpressions；
使用System.Web；
使用System.Web.Mvc；
命名空间Acme.Mvc.Extensions
{
公共静态类MvcExtensions
{
常量字符串ScreenNamePattern=@“@（[A-Za-z0-9\-\u&]+）”；
常量字符串HashTagPattern=@“#”（[A-Za-z0-9\-&]+）；
常量字符串HyperLinkPattern=@“（http://\S+）\S？”；
公共静态字符串TweetText（此HtmlHelper帮助程序，字符串文本）
{
返回格式文本（text）；
}
公共静态字符串格式TweetText（字符串文本）
{
字符串结果=文本；
if（result.Contains（“http://”）
{
var links=新列表（）；
foreach（Regex.Matches中的匹配（结果，HyperLinkPattern））
{
var url=match.Groups[1]。值；
如果（！links.Contains（url））
{
添加链接（url）；
result=result.Replace（url，String.Format（“，url））；
}
}
}
if（result.Contains（“@”））
{
变量名称=新列表（）；
foreach（Regex.Matches中的匹配（结果、屏幕名称模式））
{
var screenName=match.Groups[1]。值；
如果（！names.Contains（screenName））
{
名称。添加（屏幕名称）；
结果=结果。替换（“@”+屏幕名称，
格式（“，屏幕名称））；
}
}
}
if（result.Contains（“#”）
{
变量名称=新列表（）；
foreach（Regex.Matches中的匹配（结果，HashTagPattern））
{
var hashTag=match.Groups[1]。值；
如果（！names.Contains（hashTag））
{
name.Add（hashTag）；
结果=结果。替换（“#”+标签，
String.Format（“”，
HttpUtility.UrlEncode（“#”+hashTag），hashTag））；
}
}
}
返回结果；
}
}
}

这与我写的在博客上显示我的Twitter状态的代码非常相似。我做的唯一进一步的事情就是

1）查找

@name

并将其替换为

2）一行中的多个

@name

，如果没有逗号，则得到逗号

3）以

@name

开头的推文格式为“To@name:”

我不认为这是一种解析推文的有效方法，因为它们是一种非常一致的格式（适用于正则表达式），在大多数情况下，速度（毫秒）是可以接受的

编辑：

放在堆栈溢出答案中有点太长了。它需要一条tweet，比如：

@user1@user2查看我从@user3:#coollinks获得的这个很酷的链接

并将其转化为：

<span class="salutation">
    To <a href="http://twitter.com/user1">Real Name</a>,
    <a href="http://twitter.com/user2">Real Name</a>:
</span> check out this cool link I got from
<span class="salutation">
    <a href="http://www.twitter.com/user3">Real Name</a>
</span>:
<a href="http://site.com/page.htm#anchor">http://site.com/...</a>
<a href="http://twitter.com/#search?q=%23coollinks">#coollinks</a>

这使得tweet抓取程序可以作为JS异步运行，如果Twitter停止或变慢，它将不会影响我的站点的页面加载时间。

我创建了helper方法，将包含url的文本缩短到140个字符。您可以将共享长度设置为0，以从tweet中排除url

 public static string FormatTwitterText(this string text, string shareurl)
    {
        if (string.IsNullOrEmpty(text))
            return string.Empty;

        string finaltext = string.Empty;
        string sharepath = string.Format("http://url.com/{0}", shareurl);

        //list of all words, trimmed and new space removed
        List<string> textlist = text.Split(' ').Select(txt => Regex.Replace(txt, @"\n", "").Trim())
                              .Where(formatedtxt => !string.IsNullOrEmpty(formatedtxt))
                              .ToList();

        int extraChars = 3; //to account for the two dots ".."
        int finalLength = 140 - sharepath.Length - extraChars;
        int runningLengthCount = 0;
        int collectionCount = textlist.Count;
        int count = 0;
        foreach (string eachwordformated in textlist
                .Select(eachword => string.Format("{0} ", eachword)))
        {
            count++;
            int textlength = eachwordformated.Length;
            runningLengthCount += textlength;
            int nextcount = count + 1;

            var nextTextlength = nextcount < collectionCount ? 
                                             textlist[nextcount].Length : 
                                             0;

            if (runningLengthCount + nextTextlength < finalLength)
                finaltext += eachwordformated;
        }

        return runningLengthCount > finalLength ? finaltext.Trim() + ".." : finaltext.Trim();
    }

公共静态字符串格式TwitterText（此字符串文本，字符串共享URL）
{
if（string.IsNullOrEmpty（text））
返回字符串。空；
string finaltext=string.Empty；
string sharepath=string.Format（“http://url.com/{0}，共享URL）；
//所有单词列表，已修剪并删除新空格
List textlist=text.Split（“”）。选择（txt=>Regex.Replace（txt，@“\n”，“”）。Trim（）
.Where（FormattedText=>！string.IsNullOrEmpty（FormattedText））
.ToList（）；
int extraChars=3；//用于解释两个点“.”
int finalLength=140-共享路径。长度-外部路径；
int runningLengthCount=0；
int collectionCount=textlist.Count；
整数计数=0；
foreach（字符串eachwordformatted in textlist
.Select（eachword=>string.Format（“{0}”，eachword）））
{
计数++；
int textlength=eachwordformatted.Length；
runningLengthCount+=文本长度；
int nextcount=计数+1；
var nextTextlength=nextcountfinalLength？finaltext.Trim（）+“。”：finaltext.Trim（）；
}

有一个很好的资源可以解析Twitter消息。这个链接对我很有用：

如何在C#3.0中解析Twitter用户名、哈希标签和URL

它包含对以下各项的支持：

网址
#标签
@用户名

顺便说一句：ParseURL（）方法中的Regex需要检查，它将股票符号（BARC.L）解析为链接

如果URL有哈希字符，我的代码就有问题。我尝试使用\b定义单词边界，但不起作用。我不确定Django示例在C#中是否适用，但我正在尝试。@Brennan据我所知，Hashtags可以是字母数字。首先捕获URL（这样您就可以捕获带有#的任何URL），然后在未捕获的片段上运行hashtag regex

 public static string FormatTwitterText(this string text, string shareurl)
    {
        if (string.IsNullOrEmpty(text))
            return string.Empty;

        string finaltext = string.Empty;
        string sharepath = string.Format("http://url.com/{0}", shareurl);

        //list of all words, trimmed and new space removed
        List<string> textlist = text.Split(' ').Select(txt => Regex.Replace(txt, @"\n", "").Trim())
                              .Where(formatedtxt => !string.IsNullOrEmpty(formatedtxt))
                              .ToList();

        int extraChars = 3; //to account for the two dots ".."
        int finalLength = 140 - sharepath.Length - extraChars;
        int runningLengthCount = 0;
        int collectionCount = textlist.Count;
        int count = 0;
        foreach (string eachwordformated in textlist
                .Select(eachword => string.Format("{0} ", eachword)))
        {
            count++;
            int textlength = eachwordformated.Length;
            runningLengthCount += textlength;
            int nextcount = count + 1;

            var nextTextlength = nextcount < collectionCount ? 
                                             textlist[nextcount].Length : 
                                             0;

            if (runningLengthCount + nextTextlength < finalLength)
                finaltext += eachwordformated;
        }

        return runningLengthCount > finalLength ? finaltext.Trim() + ".." : finaltext.Trim();
    }