Warning: file_get_contents(/data/phpspider/zhask/data//catemap/4/string/5.json): failed to open stream: No such file or directory in /data/phpspider/zhask/libs/function.php on line 167

Warning: Invalid argument supplied for foreach() in /data/phpspider/zhask/libs/tag.function.php on line 1116

Notice: Undefined index: in /data/phpspider/zhask/libs/function.php on line 180

Warning: array_chunk() expects parameter 1 to be array, null given in /data/phpspider/zhask/libs/function.php on line 181
C# 查找字符串C中出现频率最高的单词#_C#_String_Algorithm - Fatal编程技术网

C# 查找字符串C中出现频率最高的单词#

C# 查找字符串C中出现频率最高的单词#,c#,string,algorithm,C#,String,Algorithm,我试图找出字符串中出现频率最高的单词 e、 g 根据上面的字符串,我试图计算如下结果: 世界,3 太好了,2 你好,1 这个,2 但忽略任何长度小于3个字符的单词,例如出现两次的is 我试着查看字典对,我试着查看linq的GroupBy扩展。我知道解决方案就在这两者之间,但我就是无法理解算法以及如何实现这一点。使用LINQ和Regex Regex.Split("Hello World This is a great world, This World is simply great".ToL

我试图找出字符串中出现频率最高的单词

e、 g

根据上面的字符串,我试图计算如下结果:

  • 世界,3
  • 太好了,2
  • 你好,1
  • 这个,2
但忽略任何长度小于3个字符的单词,例如出现两次的
is

我试着查看
字典
对,我试着查看linq的
GroupBy
扩展。我知道解决方案就在这两者之间,但我就是无法理解算法以及如何实现这一点。

使用LINQ和Regex

Regex.Split("Hello World This is a great world, This World is simply great".ToLower(), @"\W+")
    .Where(s => s.Length > 3)
    .GroupBy(s => s)
    .OrderByDescending(g => g.Count())
要获取出现次数最多的单词,请执行以下操作:

results.First().Word

const string input = "Hello World This is a great world, This World is simply great";
var words = input
    .Split(new[] { ' ' }, StringSplitOptions.RemoveEmptyEntries)
    .Where(w => w.Length >= 3)
    .GroupBy(w => w)
    .OrderByDescending(g => g.Count());

foreach (var word in words)
    Console.WriteLine("{0}x {1}", g.Count(), word.Key);

// 2x World
// 2x This
// 2x great
// 1x Hello
// 1x world,
// 1x simply

这并不完美,因为它没有修剪逗号,但它至少向您展示了如何进行分组和筛选。

因此我不想使用LINQ和Regex之类的工具,因为听起来您好像在试图找到一种算法,并理解这一点,而不是使用一些函数来为您完成

并不是说这些都是无效的解决方案。他们是。当然

试试这样的

Dictionary<string, int> dictionary = new Dictionary<string, int>();

string sInput = "Hello World, This is a great World. I love this great World";
sInput = sInput.Replace(",", ""); //Just cleaning up a bit
sInput = sInput.Replace(".", ""); //Just cleaning up a bit
string[] arr = sInput.Split(' '); //Create an array of words

foreach (string word in arr) //let's loop over the words
{
    if (word.Length >= 3) //if it meets our criteria of at least 3 letters
    {
        if (dictionary.ContainsKey(word)) //if it's in the dictionary
            dictionary[word] = dictionary[word] + 1; //Increment the count
        else
            dictionary[word] = 1; //put it in the dictionary with a count 1
     }
}

foreach (KeyValuePair<string, int> pair in dictionary) //loop through the dictionary
    Response.Write(string.Format("Key: {0}, Pair: {1}<br />",pair.Key,pair.Value));
Dictionary Dictionary=newdictionary();
string sInput=“你好,世界,这是一个伟大的世界,我爱这个伟大的世界”;
sInput=sInput.Replace(“,”,”)//只是清理一下而已
sInput=sInput.Replace(“.”,”)//只是清理一下而已
字符串[]arr=sInput.Split(“”)//创建一个单词数组
foreach(arr中的字符串单词)//让我们在单词上循环
{
if(word.Length>=3)//如果它符合我们至少3个字母的标准
{
if(dictionary.ContainsKey(word))//如果它在字典中
dictionary[word]=dictionary[word]+1;//递增计数
其他的
dictionary[word]=1;//将其放入字典中,计数为1
}
}
foreach(dictionary中的KeyValuePair)//在字典中循环
Write(string.Format(“Key:{0},Pair:{1}
”,Pair.Key,Pair.Value));
我编写了一个字符串处理器类。您可以使用它

例如:

metaKeywords = bodyText.Process(blackListWords: prepositions).OrderByDescending().TakeTop().GetWords().AsString();
类别:

 public static class StringProcessor
{
    private static List<String> PrepositionList;

    public static string ToNormalString(this string strText)
    {
        if (String.IsNullOrEmpty(strText)) return String.Empty;
        char chNormalKaf = (char)1603;
        char chNormalYah = (char)1610;
        char chNonNormalKaf = (char)1705;
        char chNonNormalYah = (char)1740;
        string result = strText.Replace(chNonNormalKaf, chNormalKaf);
        result = result.Replace(chNonNormalYah, chNormalYah);
        return result;
    }

    public static List<KeyValuePair<String, Int32>> Process(this String bodyText,
        List<String> blackListWords = null,
        int minimumWordLength = 3,
        char splitor = ' ',
        bool perWordIsLowerCase = true)
    {
        string[] btArray = bodyText.ToNormalString().Split(splitor);
        long numberOfWords = btArray.LongLength;
        Dictionary<String, Int32> wordsDic = new Dictionary<String, Int32>(1);
        foreach (string word in btArray)
        {
            if (word != null)
            {
                string lowerWord = word;
                if (perWordIsLowerCase)
                    lowerWord = word.ToLower();
                var normalWord = lowerWord.Replace(".", "").Replace("(", "").Replace(")", "")
                    .Replace("?", "").Replace("!", "").Replace(",", "")
                    .Replace("<br>", "").Replace(":", "").Replace(";", "")
                    .Replace("،", "").Replace("-", "").Replace("\n", "").Trim();
                if ((normalWord.Length > minimumWordLength && !normalWord.IsMemberOfBlackListWords(blackListWords)))
                {
                    if (wordsDic.ContainsKey(normalWord))
                    {
                        var cnt = wordsDic[normalWord];
                        wordsDic[normalWord] = ++cnt;
                    }
                    else
                    {
                        wordsDic.Add(normalWord, 1);
                    }
                }
            }
        }
        List<KeyValuePair<String, Int32>> keywords = wordsDic.ToList();
        return keywords;
    }

    public static List<KeyValuePair<String, Int32>> OrderByDescending(this List<KeyValuePair<String, Int32>> list, bool isBasedOnFrequency = true)
    {
        List<KeyValuePair<String, Int32>> result = null;
        if (isBasedOnFrequency)
            result = list.OrderByDescending(q => q.Value).ToList();
        else
            result = list.OrderByDescending(q => q.Key).ToList();
        return result;
    }

    public static List<KeyValuePair<String, Int32>> TakeTop(this List<KeyValuePair<String, Int32>> list, Int32 n = 10)
    {
        List<KeyValuePair<String, Int32>> result = list.Take(n).ToList();
        return result;
    }

    public static List<String> GetWords(this List<KeyValuePair<String, Int32>> list)
    {
        List<String> result = new List<String>();
        foreach (var item in list)
        {
            result.Add(item.Key);
        }
        return result;
    }

    public static List<Int32> GetFrequency(this List<KeyValuePair<String, Int32>> list)
    {
        List<Int32> result = new List<Int32>();
        foreach (var item in list)
        {
            result.Add(item.Value);
        }
        return result;
    }

    public static String AsString<T>(this List<T> list, string seprator = ", ")
    {
        String result = string.Empty;
        foreach (var item in list)
        {
            result += string.Format("{0}{1}", item, seprator);
        }
        return result;
    }

    private static bool IsMemberOfBlackListWords(this String word, List<String> blackListWords)
    {
        bool result = false;
        if (blackListWords == null) return false;
        foreach (var w in blackListWords)
        {
            if (w.ToNormalString().Equals(word))
            {
                result = true;
                break;
            }
        }
        return result;
    }
}
公共静态类StringProcessor
{
私有静态列表介词列表;
公共静态字符串ToNormalString(此字符串strText)
{
if(String.IsNullOrEmpty(strText))返回String.Empty;
char chNormalKaf=(char)1603;
char chNormalYah=(char)1610;
char chNonNormalKaf=(char)1705;
char chNonNormalYah=(char)1740;
字符串结果=strText.Replace(chNonNormalKaf,chNormalKaf);
结果=结果.替换(chNonNormalYah,chNormalYah);
返回结果;
}
公共静态列表进程(此字符串为bodyText,
List blackListWords=null,
int minimumWordLength=3,
字符拆分器=“”,
bool perWordIsLowerCase=true)
{
字符串[]btArray=bodyText.ToNormalString().Split(拆分器);
long numberOfWords=btArray.LongLength;
字典单词dic=新字典(1);
foreach(数组中的字符串字)
{
if(word!=null)
{
字符串下限=字;
if(perWordIsLowerCase)
lowerWord=单词.ToLower();
var normalWord=lowerWord.Replace(“.”,“”)。Replace(“(”,“”)。Replace(“),“”)
.Replace(“?”,“”)。Replace(“!”,“”)。Replace(“,”,“”)
.Replace(“
”,“”)。Replace(“:”,“”)。Replace(“;”,“”) .Replace(“،”和“).Replace(“-”和“).Replace(“\n”和“).Trim(); if((normalWord.Length>minimumWordLength&!normalWord.IsMemberOfBlackListWords(黑名单字))) { if(wordsDic.ContainsKey(normalWord)) { var cnt=wordsDic[normalWord]; wordsDic[normalWord]=++cnt; } 其他的 { wordsDic.Add(normalWord,1); } } } } 列表关键字=wordsDic.ToList(); 返回关键字; } 公共静态列表OrderByDescending(此列表,bool isBasedOnFrequency=true) { 列表结果=空; 如果(IsBasedOn频率) 结果=list.OrderByDescending(q=>q.Value).ToList(); 其他的 结果=list.OrderByDescending(q=>q.Key).ToList(); 返回结果; } 公共静态列表TakeTop(此列表,Int32 n=10) { 列表结果=List.Take(n.ToList(); 返回结果; } 公共静态列表GetWords(此列表) { 列表结果=新列表(); foreach(列表中的变量项) { 结果.添加(项.键); } 返回结果; } 公共静态列表GetFrequency(此列表) { 列表结果=新列表(); foreach(列表中的变量项) { 结果.增加(项目.值); } 返回结果; } 公共静态字符串AsString(此列表,字符串seperator=“,”) { 字符串结果=String.Empty; foreach(列表中的变量项) { result+=string.Format(“{0}{1}”,项,seperator); } 返回结果; } 私有静态bool IsMemberOfBlackListWords(此字符串字,列表黑名单字) { 布尔结果=假; if(blackListWords==null)返回false; foreach(blackListWords中的var w) { if(w.ToNormalString().Equals(word)) { 结果=真; 打破 } } 返回结果; } }
您应该能够使用Linq完成此操作

 string[] splitString = actualString.Split(' ');
            var arrayCount = splitString.GroupBy(a => a);
            foreach (var r in arrayCount)
            {
                Console.WriteLine("This " + r.Key + " appeared " + r.Count() + "  times in a string.");
            }


这可以用许多不同的方法来解决。仅供参考。

这与此类似:@dasblinkenlight-不,这不是家庭作业,我正在尝试提取元关键字并将每个记录保存在数据库中。答案很好,但我不推荐此解决方案,它会
 public static class StringProcessor
{
    private static List<String> PrepositionList;

    public static string ToNormalString(this string strText)
    {
        if (String.IsNullOrEmpty(strText)) return String.Empty;
        char chNormalKaf = (char)1603;
        char chNormalYah = (char)1610;
        char chNonNormalKaf = (char)1705;
        char chNonNormalYah = (char)1740;
        string result = strText.Replace(chNonNormalKaf, chNormalKaf);
        result = result.Replace(chNonNormalYah, chNormalYah);
        return result;
    }

    public static List<KeyValuePair<String, Int32>> Process(this String bodyText,
        List<String> blackListWords = null,
        int minimumWordLength = 3,
        char splitor = ' ',
        bool perWordIsLowerCase = true)
    {
        string[] btArray = bodyText.ToNormalString().Split(splitor);
        long numberOfWords = btArray.LongLength;
        Dictionary<String, Int32> wordsDic = new Dictionary<String, Int32>(1);
        foreach (string word in btArray)
        {
            if (word != null)
            {
                string lowerWord = word;
                if (perWordIsLowerCase)
                    lowerWord = word.ToLower();
                var normalWord = lowerWord.Replace(".", "").Replace("(", "").Replace(")", "")
                    .Replace("?", "").Replace("!", "").Replace(",", "")
                    .Replace("<br>", "").Replace(":", "").Replace(";", "")
                    .Replace("،", "").Replace("-", "").Replace("\n", "").Trim();
                if ((normalWord.Length > minimumWordLength && !normalWord.IsMemberOfBlackListWords(blackListWords)))
                {
                    if (wordsDic.ContainsKey(normalWord))
                    {
                        var cnt = wordsDic[normalWord];
                        wordsDic[normalWord] = ++cnt;
                    }
                    else
                    {
                        wordsDic.Add(normalWord, 1);
                    }
                }
            }
        }
        List<KeyValuePair<String, Int32>> keywords = wordsDic.ToList();
        return keywords;
    }

    public static List<KeyValuePair<String, Int32>> OrderByDescending(this List<KeyValuePair<String, Int32>> list, bool isBasedOnFrequency = true)
    {
        List<KeyValuePair<String, Int32>> result = null;
        if (isBasedOnFrequency)
            result = list.OrderByDescending(q => q.Value).ToList();
        else
            result = list.OrderByDescending(q => q.Key).ToList();
        return result;
    }

    public static List<KeyValuePair<String, Int32>> TakeTop(this List<KeyValuePair<String, Int32>> list, Int32 n = 10)
    {
        List<KeyValuePair<String, Int32>> result = list.Take(n).ToList();
        return result;
    }

    public static List<String> GetWords(this List<KeyValuePair<String, Int32>> list)
    {
        List<String> result = new List<String>();
        foreach (var item in list)
        {
            result.Add(item.Key);
        }
        return result;
    }

    public static List<Int32> GetFrequency(this List<KeyValuePair<String, Int32>> list)
    {
        List<Int32> result = new List<Int32>();
        foreach (var item in list)
        {
            result.Add(item.Value);
        }
        return result;
    }

    public static String AsString<T>(this List<T> list, string seprator = ", ")
    {
        String result = string.Empty;
        foreach (var item in list)
        {
            result += string.Format("{0}{1}", item, seprator);
        }
        return result;
    }

    private static bool IsMemberOfBlackListWords(this String word, List<String> blackListWords)
    {
        bool result = false;
        if (blackListWords == null) return false;
        foreach (var w in blackListWords)
        {
            if (w.ToNormalString().Equals(word))
            {
                result = true;
                break;
            }
        }
        return result;
    }
}
 string[] splitString = actualString.Split(' ');
            var arrayCount = splitString.GroupBy(a => a);
            foreach (var r in arrayCount)
            {
                Console.WriteLine("This " + r.Key + " appeared " + r.Count() + "  times in a string.");
            }