如何在c#中找出句子中的下一个单词?

如何在c#中找出句子中的下一个单词?,c#,.net,regex,C#,.net,Regex,我有一根绳子 “球棒和球不是笔或船不是电话” 我想选择与不相邻的单词 例如--“非笔”、“非电话” 但我做不到?我试图通过使用索引和子字符串来提取单词,但这是不可能的 tempTerm = tempTerm.Trim().Substring(0, tempTerm.Length - (orterm.Length + 1)).ToString(); 我想说的是,首先将字符串拆分成一个数组——这将使这类事情变得更加简单。string[]parts=myStr.Split(“”); Strin

我有一根绳子

球棒和球不是笔或船不是电话”

我想选择与相邻的单词

例如--“非笔”、“非电话”

但我做不到?我试图通过使用索引和子字符串来提取单词,但这是不可能的

   tempTerm = tempTerm.Trim().Substring(0, tempTerm.Length - (orterm.Length + 1)).ToString();

我想说的是,首先将字符串拆分成一个数组——这将使这类事情变得更加简单。

string[]parts=myStr.Split(“”);
String[] parts = myStr.Split(' ');
for (int i = 0; i < parts.Length; i++)
    if (parts[i] == "not" && i + 1 < parts.Length)
        someList.Add(parts[i + 1]);
对于(int i=0;i

这会让你找到与not相邻的所有单词,如果需要,你可以与不区分大小写的进行比较。

你可以拆分句子,然后循环查找“not”:

string-sense=“球棒和球不是笔或船不是电话”;
string[]words=句子.Split(新字符[]{''});
List wordsBesideNot=新列表();
for(int i=0;i
使用一些

差不多

string s = "bat and ball not pen or boat not phone";
Regex reg = new Regex("not\\s\\w+");
MatchCollection matches = reg.Matches(s);
foreach (Match match in matches)
{
    string sub = match.Value;
}

有关更多详细信息,请参见您可以使用此正则表达式:
not\s\w+\b
。它将匹配所需的短语:

  • 非笔式
  • 不是电话
  • 在C#中,我希望这样

            // Orginal string
            string s = "bat and ball not pen or boat not phone";
    
            // Seperator
            string seperate = "not ";
    
            // Length of the seperator
            int length = seperate.Length;
    
            // sCopy so you dont touch the original string
            string sCopy = s.ToString();
    
            // List to store the words, you could use an array if 
            // you count the 'not's.
            List<string> stringList = new List<string>();
    
            // While the seperator (not ) exists in the string
            while (sCopy.IndexOf(seperate) != -1)
            {
                // Index of the next seperator
                int index = sCopy.IndexOf(seperate);
    
                // Remove anything before the seperator and the
                // seperator itself.
                sCopy = sCopy.Substring(index + length);
    
                // In case of multiple spaces remove them.
                sCopy = sCopy.TrimStart(' ');
    
                // If there are more spaces or more words to come
                // then specify the length
                if (sCopy.IndexOf(' ') != -1)
                {
                    // Cut the word out of sCopy
                    string sub = sCopy.Substring(0, sCopy.IndexOf(' '));
    
                    // Add the word to the list
                    stringList.Add(sub);
                }
                // Otherwise just get the rest of the string   
                else
                {
                    // Cut the word out of sCopy
                    string sub = sCopy.Substring(0);
    
                    // Add the word to the list
                    stringList.Add(sub);
                }
            }
            int p = 0;
    
    //原始字符串
    string s=“球棒和球不是笔或船不是电话”;
    //分离器
    字符串separate=“not”;
    //分离器长度
    整数长度=分开的长度;
    //这样你就不会碰原始字符串了
    string=s.ToString();
    //列表来存储单词,如果
    //你数一数“不是”。
    List stringList=新列表();
    //而分隔符(not)存在于字符串中
    while(透视索引of(单独)!=-1)
    {
    //下一个分离器的索引
    int index=0.IndexOf(单独);
    //移除分离器和分离器前的任何东西
    //分离器本身。
    透视=透视子串(索引+长度);
    //如果有多个空格,请删除它们。
    透视=透视。修剪开始(“”);
    //如果有更多的空格或更多的单词
    //然后指定长度
    如果(指数(“”)!=-1)
    {
    //把这个词删掉
    string sub=scope.Substring(0,scope.IndexOf(“”));
    //将单词添加到列表中
    stringList.Add(sub);
    }
    //否则,只需获取字符串的其余部分
    其他的
    {
    //把这个词删掉
    string sub=0.Substring(0);
    //将单词添加到列表中
    stringList.Add(sub);
    }
    }
    int p=0;
    
    列表中的单词是pen和phone。当您获得奇数字符、句号等时,此操作将失败。如果您不知道如何构造字符串,则可能需要更复杂的内容。

    公共类StringHelper
    
    public class StringHelper
    {
        /// <summary>
        /// Gets the surrounding words of a given word in a given text.
        /// </summary>
        /// <param name="text">A text in which the given word to be searched.</param>
        /// <param name="word">A word to be searched in the given text.</param>
        /// <param name="prev">The number of previous words to include in the result.</param>
        /// <param name="next">The number of next words to include in the result.</param>
        /// <param name="all">Sets whether the method returns all instances of the search word.</param>
        /// <returns>An array that consists of parts of the text, including the search word and the surrounding words.</returns>
        public static List<string> GetSurroundingWords(string text, string word, int prev, int next, bool all = false)
        {
            var phrases = new List<string>();
            var words = text.Split();
    
            var indices = new List<int>();
            var index = -1;
            while ((index = Array.IndexOf(words, word, index + 1)) != -1)
            {
                indices.Add(index);
    
                if (!all && indices.Count == 1)
                    break;
            }
    
            foreach (var ind in indices)
            {
                var prevActual = ind;
                if (prev > prevActual)
                    prev = prevActual;
    
                var nextActual = words.Length - ind;
                if (next > nextActual)
                    next = nextActual;
    
                var picked = new List<string>();
                for (var i = 1; i <= prev; i++)
                    picked.Add(words[ind - i]);
    
                picked.Reverse();
                picked.Add(word);
    
                for (var i = 1; i <= next; i++)
                    picked.Add(words[ind + i]);
    
                phrases.Add(string.Join(" ", picked));
            }
    
            return phrases;
        }
    }
    
    [TestClass]
    public class StringHelperTests
    {
        private const string Text = "Date and Time in C# are handled by DateTime class in C# that provides properties and methods to format dates in different datetime formats.";
    
        [TestMethod]
        public void GetSurroundingWords()
        {
            // Arrange
            var word = "class";
            var expected = new [] { "DateTime class in C#" };
    
            // Act
            var actual = StringHelper.GetSurroundingWords(Text, word, 1, 2);
    
            // Assert
            Assert.AreEqual(expected.Length, actual.Count);
            Assert.AreEqual(expected[0], actual[0]);
        }
    
        [TestMethod]
        public void GetSurroundingWords_NoMatch()
        {
            // Arrange
            var word = "classify";
            var expected = new List<string>();
    
            // Act
            var actual = StringHelper.GetSurroundingWords(Text, word, 1, 2);
    
            // Assert
            Assert.AreEqual(expected.Count, actual.Count);
        }
    
        [TestMethod]
        public void GetSurroundingWords_MoreSurroundingWordsThanAvailable()
        {
            // Arrange
            var word = "class";
            var expected = "Date and Time in C# are handled by DateTime class in C#";
    
            // Act
            var actual = StringHelper.GetSurroundingWords(Text, word, 50, 2);
    
            // Assert
            Assert.AreEqual(expected.Length, actual[0].Length);
            Assert.AreEqual(expected, actual[0]);
        }
    
        [TestMethod]
        public void GetSurroundingWords_ZeroSurroundingWords()
        {
            // Arrange
            var word = "class";
            var expected = "class";
    
            // Act
            var actual = StringHelper.GetSurroundingWords(Text, word, 0, 0);
    
            // Assert
            Assert.AreEqual(expected.Length, actual[0].Length);
            Assert.AreEqual(expected, actual[0]);
        }
    
        [TestMethod]
        public void GetSurroundingWords_AllInstancesOfSearchWord()
        {
            // Arrange
            var word = "and";
            var expected = new[] { "Date and Time", "properties and methods" };
    
            // Act
            var actual = StringHelper.GetSurroundingWords(Text, word, 1, 1, true);
    
            // Assert
            Assert.AreEqual(expected.Length, actual.Count);
            Assert.AreEqual(expected[0], actual[0]);
            Assert.AreEqual(expected[1], actual[1]);
        }
    }
    
    { /// ///获取给定文本中给定单词的周围单词。 /// ///要在其中搜索给定单词的文本。 ///要在给定文本中搜索的单词。 ///要包含在结果中的先前单词数。 ///要包含在结果中的下一个单词的数目。 ///设置该方法是否返回搜索词的所有实例。 ///由部分文本组成的数组,包括搜索词和周围的词。 公共静态列表GetSurroundingWords(字符串文本、字符串单词、int-prev、int-next、bool-all=false) { var短语=新列表(); var words=text.Split(); var索引=新列表(); var指数=-1; while((index=Array.IndexOf(words,word,index+1))!=-1) { 索引。添加(索引); 如果(!all&&index.Count==1) 打破 } foreach(指数中的var ind) { var-prevActual=ind; 如果(上一个>上一个实际) prev=prev实际值; var nextActual=单词。长度-索引; 如果(下一步>下一步) 下一步=下一步; var picked=新列表();
    对于(var i=1;听起来你有一个合理的解决方案(使用索引和子字符串),但你就是无法使该解决方案起作用。发布你为该解决方案尝试的代码,有人可以帮助指出错误。你是否还想在“not pen”、“not phone”中包含“not”一词是的,看看其他更优雅的解决方案,我觉得有点尴尬(是的,在内核调试了12个小时后,不管怎么说,这都会让你看起来很痛苦,以防你对Split方法有强烈的仇恨。即使是Regex。这也会产生一堆新的字符串对象。别误会,它可以工作,但哎哟。公平地说,separate和scope是不需要的。我只是决定使用一个变量(separate)不是像其他方法那样插入纯文本。但是,是的,我知道…@astander:-谢谢,这是一个非常好的答案
    public class StringHelper
    {
        /// <summary>
        /// Gets the surrounding words of a given word in a given text.
        /// </summary>
        /// <param name="text">A text in which the given word to be searched.</param>
        /// <param name="word">A word to be searched in the given text.</param>
        /// <param name="prev">The number of previous words to include in the result.</param>
        /// <param name="next">The number of next words to include in the result.</param>
        /// <param name="all">Sets whether the method returns all instances of the search word.</param>
        /// <returns>An array that consists of parts of the text, including the search word and the surrounding words.</returns>
        public static List<string> GetSurroundingWords(string text, string word, int prev, int next, bool all = false)
        {
            var phrases = new List<string>();
            var words = text.Split();
    
            var indices = new List<int>();
            var index = -1;
            while ((index = Array.IndexOf(words, word, index + 1)) != -1)
            {
                indices.Add(index);
    
                if (!all && indices.Count == 1)
                    break;
            }
    
            foreach (var ind in indices)
            {
                var prevActual = ind;
                if (prev > prevActual)
                    prev = prevActual;
    
                var nextActual = words.Length - ind;
                if (next > nextActual)
                    next = nextActual;
    
                var picked = new List<string>();
                for (var i = 1; i <= prev; i++)
                    picked.Add(words[ind - i]);
    
                picked.Reverse();
                picked.Add(word);
    
                for (var i = 1; i <= next; i++)
                    picked.Add(words[ind + i]);
    
                phrases.Add(string.Join(" ", picked));
            }
    
            return phrases;
        }
    }
    
    [TestClass]
    public class StringHelperTests
    {
        private const string Text = "Date and Time in C# are handled by DateTime class in C# that provides properties and methods to format dates in different datetime formats.";
    
        [TestMethod]
        public void GetSurroundingWords()
        {
            // Arrange
            var word = "class";
            var expected = new [] { "DateTime class in C#" };
    
            // Act
            var actual = StringHelper.GetSurroundingWords(Text, word, 1, 2);
    
            // Assert
            Assert.AreEqual(expected.Length, actual.Count);
            Assert.AreEqual(expected[0], actual[0]);
        }
    
        [TestMethod]
        public void GetSurroundingWords_NoMatch()
        {
            // Arrange
            var word = "classify";
            var expected = new List<string>();
    
            // Act
            var actual = StringHelper.GetSurroundingWords(Text, word, 1, 2);
    
            // Assert
            Assert.AreEqual(expected.Count, actual.Count);
        }
    
        [TestMethod]
        public void GetSurroundingWords_MoreSurroundingWordsThanAvailable()
        {
            // Arrange
            var word = "class";
            var expected = "Date and Time in C# are handled by DateTime class in C#";
    
            // Act
            var actual = StringHelper.GetSurroundingWords(Text, word, 50, 2);
    
            // Assert
            Assert.AreEqual(expected.Length, actual[0].Length);
            Assert.AreEqual(expected, actual[0]);
        }
    
        [TestMethod]
        public void GetSurroundingWords_ZeroSurroundingWords()
        {
            // Arrange
            var word = "class";
            var expected = "class";
    
            // Act
            var actual = StringHelper.GetSurroundingWords(Text, word, 0, 0);
    
            // Assert
            Assert.AreEqual(expected.Length, actual[0].Length);
            Assert.AreEqual(expected, actual[0]);
        }
    
        [TestMethod]
        public void GetSurroundingWords_AllInstancesOfSearchWord()
        {
            // Arrange
            var word = "and";
            var expected = new[] { "Date and Time", "properties and methods" };
    
            // Act
            var actual = StringHelper.GetSurroundingWords(Text, word, 1, 1, true);
    
            // Assert
            Assert.AreEqual(expected.Length, actual.Count);
            Assert.AreEqual(expected[0], actual[0]);
            Assert.AreEqual(expected[1], actual[1]);
        }
    }