如何在c#中找出句子中的下一个单词?
我有一根绳子 “球棒和球不是笔或船不是电话” 我想选择与不相邻的单词 例如--“非笔”、“非电话” 但我做不到?我试图通过使用索引和子字符串来提取单词,但这是不可能的如何在c#中找出句子中的下一个单词?,c#,.net,regex,C#,.net,Regex,我有一根绳子 “球棒和球不是笔或船不是电话” 我想选择与不相邻的单词 例如--“非笔”、“非电话” 但我做不到?我试图通过使用索引和子字符串来提取单词,但这是不可能的 tempTerm = tempTerm.Trim().Substring(0, tempTerm.Length - (orterm.Length + 1)).ToString(); 我想说的是,首先将字符串拆分成一个数组——这将使这类事情变得更加简单。string[]parts=myStr.Split(“”); Strin
tempTerm = tempTerm.Trim().Substring(0, tempTerm.Length - (orterm.Length + 1)).ToString();
我想说的是,首先将字符串拆分成一个数组——这将使这类事情变得更加简单。
string[]parts=myStr.Split(“”);
String[] parts = myStr.Split(' ');
for (int i = 0; i < parts.Length; i++)
if (parts[i] == "not" && i + 1 < parts.Length)
someList.Add(parts[i + 1]);
对于(int i=0;i
这会让你找到与not相邻的所有单词,如果需要,你可以与不区分大小写的进行比较。你可以拆分句子,然后循环查找“not”:
string-sense=“球棒和球不是笔或船不是电话”;
string[]words=句子.Split(新字符[]{''});
List wordsBesideNot=新列表();
for(int i=0;i
使用一些
差不多
string s = "bat and ball not pen or boat not phone";
Regex reg = new Regex("not\\s\\w+");
MatchCollection matches = reg.Matches(s);
foreach (Match match in matches)
{
string sub = match.Value;
}
有关更多详细信息,请参见您可以使用此正则表达式:
not\s\w+\b
。它将匹配所需的短语:
非笔式
不是电话
// Orginal string
string s = "bat and ball not pen or boat not phone";
// Seperator
string seperate = "not ";
// Length of the seperator
int length = seperate.Length;
// sCopy so you dont touch the original string
string sCopy = s.ToString();
// List to store the words, you could use an array if
// you count the 'not's.
List<string> stringList = new List<string>();
// While the seperator (not ) exists in the string
while (sCopy.IndexOf(seperate) != -1)
{
// Index of the next seperator
int index = sCopy.IndexOf(seperate);
// Remove anything before the seperator and the
// seperator itself.
sCopy = sCopy.Substring(index + length);
// In case of multiple spaces remove them.
sCopy = sCopy.TrimStart(' ');
// If there are more spaces or more words to come
// then specify the length
if (sCopy.IndexOf(' ') != -1)
{
// Cut the word out of sCopy
string sub = sCopy.Substring(0, sCopy.IndexOf(' '));
// Add the word to the list
stringList.Add(sub);
}
// Otherwise just get the rest of the string
else
{
// Cut the word out of sCopy
string sub = sCopy.Substring(0);
// Add the word to the list
stringList.Add(sub);
}
}
int p = 0;
//原始字符串
string s=“球棒和球不是笔或船不是电话”;
//分离器
字符串separate=“not”;
//分离器长度
整数长度=分开的长度;
//这样你就不会碰原始字符串了
string=s.ToString();
//列表来存储单词,如果
//你数一数“不是”。
List stringList=新列表();
//而分隔符(not)存在于字符串中
while(透视索引of(单独)!=-1)
{
//下一个分离器的索引
int index=0.IndexOf(单独);
//移除分离器和分离器前的任何东西
//分离器本身。
透视=透视子串(索引+长度);
//如果有多个空格,请删除它们。
透视=透视。修剪开始(“”);
//如果有更多的空格或更多的单词
//然后指定长度
如果(指数(“”)!=-1)
{
//把这个词删掉
string sub=scope.Substring(0,scope.IndexOf(“”));
//将单词添加到列表中
stringList.Add(sub);
}
//否则,只需获取字符串的其余部分
其他的
{
//把这个词删掉
string sub=0.Substring(0);
//将单词添加到列表中
stringList.Add(sub);
}
}
int p=0;
列表中的单词是pen和phone。当您获得奇数字符、句号等时,此操作将失败。如果您不知道如何构造字符串,则可能需要更复杂的内容。公共类StringHelper
public class StringHelper
{
/// <summary>
/// Gets the surrounding words of a given word in a given text.
/// </summary>
/// <param name="text">A text in which the given word to be searched.</param>
/// <param name="word">A word to be searched in the given text.</param>
/// <param name="prev">The number of previous words to include in the result.</param>
/// <param name="next">The number of next words to include in the result.</param>
/// <param name="all">Sets whether the method returns all instances of the search word.</param>
/// <returns>An array that consists of parts of the text, including the search word and the surrounding words.</returns>
public static List<string> GetSurroundingWords(string text, string word, int prev, int next, bool all = false)
{
var phrases = new List<string>();
var words = text.Split();
var indices = new List<int>();
var index = -1;
while ((index = Array.IndexOf(words, word, index + 1)) != -1)
{
indices.Add(index);
if (!all && indices.Count == 1)
break;
}
foreach (var ind in indices)
{
var prevActual = ind;
if (prev > prevActual)
prev = prevActual;
var nextActual = words.Length - ind;
if (next > nextActual)
next = nextActual;
var picked = new List<string>();
for (var i = 1; i <= prev; i++)
picked.Add(words[ind - i]);
picked.Reverse();
picked.Add(word);
for (var i = 1; i <= next; i++)
picked.Add(words[ind + i]);
phrases.Add(string.Join(" ", picked));
}
return phrases;
}
}
[TestClass]
public class StringHelperTests
{
private const string Text = "Date and Time in C# are handled by DateTime class in C# that provides properties and methods to format dates in different datetime formats.";
[TestMethod]
public void GetSurroundingWords()
{
// Arrange
var word = "class";
var expected = new [] { "DateTime class in C#" };
// Act
var actual = StringHelper.GetSurroundingWords(Text, word, 1, 2);
// Assert
Assert.AreEqual(expected.Length, actual.Count);
Assert.AreEqual(expected[0], actual[0]);
}
[TestMethod]
public void GetSurroundingWords_NoMatch()
{
// Arrange
var word = "classify";
var expected = new List<string>();
// Act
var actual = StringHelper.GetSurroundingWords(Text, word, 1, 2);
// Assert
Assert.AreEqual(expected.Count, actual.Count);
}
[TestMethod]
public void GetSurroundingWords_MoreSurroundingWordsThanAvailable()
{
// Arrange
var word = "class";
var expected = "Date and Time in C# are handled by DateTime class in C#";
// Act
var actual = StringHelper.GetSurroundingWords(Text, word, 50, 2);
// Assert
Assert.AreEqual(expected.Length, actual[0].Length);
Assert.AreEqual(expected, actual[0]);
}
[TestMethod]
public void GetSurroundingWords_ZeroSurroundingWords()
{
// Arrange
var word = "class";
var expected = "class";
// Act
var actual = StringHelper.GetSurroundingWords(Text, word, 0, 0);
// Assert
Assert.AreEqual(expected.Length, actual[0].Length);
Assert.AreEqual(expected, actual[0]);
}
[TestMethod]
public void GetSurroundingWords_AllInstancesOfSearchWord()
{
// Arrange
var word = "and";
var expected = new[] { "Date and Time", "properties and methods" };
// Act
var actual = StringHelper.GetSurroundingWords(Text, word, 1, 1, true);
// Assert
Assert.AreEqual(expected.Length, actual.Count);
Assert.AreEqual(expected[0], actual[0]);
Assert.AreEqual(expected[1], actual[1]);
}
}
{
///
///获取给定文本中给定单词的周围单词。
///
///要在其中搜索给定单词的文本。
///要在给定文本中搜索的单词。
///要包含在结果中的先前单词数。
///要包含在结果中的下一个单词的数目。
///设置该方法是否返回搜索词的所有实例。
///由部分文本组成的数组,包括搜索词和周围的词。
公共静态列表GetSurroundingWords(字符串文本、字符串单词、int-prev、int-next、bool-all=false)
{
var短语=新列表();
var words=text.Split();
var索引=新列表();
var指数=-1;
while((index=Array.IndexOf(words,word,index+1))!=-1)
{
索引。添加(索引);
如果(!all&&index.Count==1)
打破
}
foreach(指数中的var ind)
{
var-prevActual=ind;
如果(上一个>上一个实际)
prev=prev实际值;
var nextActual=单词。长度-索引;
如果(下一步>下一步)
下一步=下一步;
var picked=新列表();
对于(var i=1;听起来你有一个合理的解决方案(使用索引和子字符串),但你就是无法使该解决方案起作用。发布你为该解决方案尝试的代码,有人可以帮助指出错误。你是否还想在“not pen”、“not phone”中包含“not”一词是的,看看其他更优雅的解决方案,我觉得有点尴尬(是的,在内核调试了12个小时后,不管怎么说,这都会让你看起来很痛苦,以防你对Split方法有强烈的仇恨。即使是Regex。这也会产生一堆新的字符串对象。别误会,它可以工作,但哎哟。公平地说,separate和scope是不需要的。我只是决定使用一个变量(separate)不是像其他方法那样插入纯文本。但是,是的,我知道…@astander:-谢谢,这是一个非常好的答案
public class StringHelper
{
/// <summary>
/// Gets the surrounding words of a given word in a given text.
/// </summary>
/// <param name="text">A text in which the given word to be searched.</param>
/// <param name="word">A word to be searched in the given text.</param>
/// <param name="prev">The number of previous words to include in the result.</param>
/// <param name="next">The number of next words to include in the result.</param>
/// <param name="all">Sets whether the method returns all instances of the search word.</param>
/// <returns>An array that consists of parts of the text, including the search word and the surrounding words.</returns>
public static List<string> GetSurroundingWords(string text, string word, int prev, int next, bool all = false)
{
var phrases = new List<string>();
var words = text.Split();
var indices = new List<int>();
var index = -1;
while ((index = Array.IndexOf(words, word, index + 1)) != -1)
{
indices.Add(index);
if (!all && indices.Count == 1)
break;
}
foreach (var ind in indices)
{
var prevActual = ind;
if (prev > prevActual)
prev = prevActual;
var nextActual = words.Length - ind;
if (next > nextActual)
next = nextActual;
var picked = new List<string>();
for (var i = 1; i <= prev; i++)
picked.Add(words[ind - i]);
picked.Reverse();
picked.Add(word);
for (var i = 1; i <= next; i++)
picked.Add(words[ind + i]);
phrases.Add(string.Join(" ", picked));
}
return phrases;
}
}
[TestClass]
public class StringHelperTests
{
private const string Text = "Date and Time in C# are handled by DateTime class in C# that provides properties and methods to format dates in different datetime formats.";
[TestMethod]
public void GetSurroundingWords()
{
// Arrange
var word = "class";
var expected = new [] { "DateTime class in C#" };
// Act
var actual = StringHelper.GetSurroundingWords(Text, word, 1, 2);
// Assert
Assert.AreEqual(expected.Length, actual.Count);
Assert.AreEqual(expected[0], actual[0]);
}
[TestMethod]
public void GetSurroundingWords_NoMatch()
{
// Arrange
var word = "classify";
var expected = new List<string>();
// Act
var actual = StringHelper.GetSurroundingWords(Text, word, 1, 2);
// Assert
Assert.AreEqual(expected.Count, actual.Count);
}
[TestMethod]
public void GetSurroundingWords_MoreSurroundingWordsThanAvailable()
{
// Arrange
var word = "class";
var expected = "Date and Time in C# are handled by DateTime class in C#";
// Act
var actual = StringHelper.GetSurroundingWords(Text, word, 50, 2);
// Assert
Assert.AreEqual(expected.Length, actual[0].Length);
Assert.AreEqual(expected, actual[0]);
}
[TestMethod]
public void GetSurroundingWords_ZeroSurroundingWords()
{
// Arrange
var word = "class";
var expected = "class";
// Act
var actual = StringHelper.GetSurroundingWords(Text, word, 0, 0);
// Assert
Assert.AreEqual(expected.Length, actual[0].Length);
Assert.AreEqual(expected, actual[0]);
}
[TestMethod]
public void GetSurroundingWords_AllInstancesOfSearchWord()
{
// Arrange
var word = "and";
var expected = new[] { "Date and Time", "properties and methods" };
// Act
var actual = StringHelper.GetSurroundingWords(Text, word, 1, 1, true);
// Assert
Assert.AreEqual(expected.Length, actual.Count);
Assert.AreEqual(expected[0], actual[0]);
Assert.AreEqual(expected[1], actual[1]);
}
}