C# 如何解析大写单词的字符串
我有一根弦:咪咪爱托托,塔塔恨咪咪,所以托托杀了塔塔 我想写一个代码,只打印以大写字母开头的单词,避免重复 输出应该是C# 如何解析大写单词的字符串,c#,regex,C#,Regex,我有一根弦:咪咪爱托托,塔塔恨咪咪,所以托托杀了塔塔 我想写一个代码,只打印以大写字母开头的单词,避免重复 输出应该是 Mimi Toto Tata 我试着这么做,但我确信这是错误的,即使没有显示错误 我写的代码是: static void Main(string[] args) { string s = "Memi ate Toto and she killed Tata Memi also hate Biso"; Consol
Mimi
Toto
Tata
我试着这么做,但我确信这是错误的,即使没有显示错误
我写的代码是:
static void Main(string[] args)
{
string s = "Memi ate Toto and she killed Tata Memi also hate Biso";
Console.WriteLine((spliter(s)));
}
public static string spliter(string s)
{
string x = s;
Regex exp = new Regex(@"[A-Z]");
MatchCollection M = exp.Matches(s);
foreach (Match t in M)
{
while (x != null)
{
x = t.Value;
}
}
return x;
}
}
}
想法:
如果我将字符串拆分成一个数组,然后应用正则表达式逐字检查它们,然后打印结果,会怎么样?我不知道-有谁能帮我把这段代码做好吗?我不知道为什么我要发布这段
string[] foo = "Mimi loves Toto and Tata hate Mimi so Toto killed Tata".Split(' ');
HashSet<string> words = new HashSet<string>();
foreach (string word in foo)
{
if (char.IsUpper(word[0]))
{
words.Add(word);
}
}
foreach (string word in words)
{
Console.WriteLine(word);
}
我建议使用string.split将字符串拆分为单词,然后只打印char.isuperword[0]为真的单词 类似于使用这个正则表达式 [A-Z][A-Z]+ 说明:
[A-Z] [a-z]+
| |
Single Multiple(+)
| |
C apital -> Capital
试用regex解决方案。请注意内置字符串拆分器的使用。您可以通过检查第一个字符是否在“A”和“Z”之间来替换toupper内容。删除重复项我留给您使用哈希集,如果您需要的话
static void Main(string[] args)
{
string test = " Mimi loves Toto and Tata hate Mimi so Toto killed Tata";
foreach (string j in test.Split(' '))
{
if (j.Length > 0)
{
if (j.ToUpper()[0] == j[0])
{
Console.WriteLine(j);
}
}
}
Console.ReadKey(); //Press any key to continue;
}
我根本不知道C/.net正则表达式库,但这个正则表达式模式可以:
\b[A-Z][a-z]+
\b表示匹配只能从单词的开头开始。如果要允许使用单字大写,请将+更改为*
编辑:你想和麦当劳相配吗
\b[A-Z][A-Za-z']+
如果您不想匹配“如果它只出现在字符串末尾,则只需执行以下操作:
\b[A-Z][A-Za-z']+(?<!')
既然其他人已经发布了这么多答案,我觉得我没有违反任何家庭作业规则来表明这一点:
//set up the string to be searched
string source =
"First The The Quick Red fox jumped oveR A Red Lazy BRown DOg";
//new up a Regex object.
Regex myReg = new Regex(@"(\b[A-Z]\w*)");
//Get the matches, turn then into strings, de-dupe them
IEnumerable<string> results =
myReg.Matches(source)
.OfType<Match>()
.Select(m => m.Value)
.Distinct();
//print out the strings.
foreach (string s in results)
Console.WriteLine(s);
要学习Regex类型,您应该开始学习。
要学习Linq内存查询方法,您应该开始。
C3
C2
大卫B的答案是最好的,他考虑了stopper这个词。一票赞成 对他的回答补充一点:
Func<string,bool,string> CaptureCaps = (source,caseInsensitive) => string.Join(" ",
new Regex(@"\b[A-Z]\w*").Matches(source).OfType<Match>().Select(match => match.Value).Distinct(new KeisInsensitiveComparer(caseInsensitive) ).ToArray() );
MessageBox.Show(CaptureCaps("First The The Quick Red fox jumped oveR A Red Lazy BRown DOg", false));
MessageBox.Show(CaptureCaps("Mimi loves Toto. Tata hate Mimi, so Toto killed TaTa. A bad one!", false));
MessageBox.Show(CaptureCaps("First The The Quick Red fox jumped oveR A Red Lazy BRown DOg", true));
MessageBox.Show(CaptureCaps("Mimi loves Toto. Tata hate Mimi, so Toto killed TaTa. A bad one!", true));
class KeisInsensitiveComparer : IEqualityComparer<string>
{
public KeisInsensitiveComparer() { }
bool _caseInsensitive;
public KeisInsensitiveComparer(bool caseInsensitive) { _caseInsensitive = caseInsensitive; }
// Products are equal if their names and product numbers are equal.
public bool Equals(string x, string y)
{
// Check whether the compared objects reference the same data.
if (Object.ReferenceEquals(x, y)) return true;
// Check whether any of the compared objects is null.
if (Object.ReferenceEquals(x, null) || Object.ReferenceEquals(y, null))
return false;
return _caseInsensitive ? x.ToUpper() == y.ToUpper() : x == y;
}
// If Equals() returns true for a pair of objects,
// GetHashCode must return the same value for these objects.
public int GetHashCode(string s)
{
// Check whether the object is null.
if (Object.ReferenceEquals(s, null)) return 0;
// Get the hash code for the Name field if it is not null.
int hashS = s == null ? 0 : _caseInsensitive ? s.ToUpper().GetHashCode() : s.GetHashCode();
// Get the hash code for the Code field.
int hashScode = _caseInsensitive ? s.ToUpper().GetHashCode() : s.GetHashCode();
// Calculate the hash code for the product.
return hashS ^ hashScode;
}
}
适当的正则表达式:\b\p{Lu}\p{L}*
要打印以大写字母开头的字母吗?我很困惑,我想他指的是以大写字母开头的单词。我编辑了语法。是的,我想打印大写字母,但不重复单词twice。如果Memi吃了Toto,她杀死了Tata,你会期待什么结果?Memi也讨厌Biso作为输入?不,输出应该像Mimi Toto Tata Bisoa,事实上,有点。它将匹配Mc和Donalds。试过了,但没有着色。无论如何,谢谢。如果你摆脱了所有这些额外的html代码,这会有所帮助:系统会为你处理这些代码。在Visual Studio或能够缩进突出显示块的文本编辑器中突出显示你的代码,然后按tab键,然后复制它。然后粘贴到这里,就像上面DavidB为Rizzle指出的那样,虽然越来越近,但它不会正确匹配麦当劳。对于McDonnalds\b[A-Z][A-Za-Z]+可以,我想我喜欢这个,但它会打印重复项。重复项,重复项,重复项如果首字母大写,您的正则表达式将丢失第一个单词。它应该是\b单词边界,而不是\s空格。谢谢。做出了改变。我错过了这个案子,因为我的第一个字被重复了。正则表达式似乎有很多特殊情况。这段代码更好地说明了我在回答中的意思:Char.IsUpper…谁知道?!不是我,但我现在知道了。太好了。这也可能更快。正则表达式引擎功能强大,但它的加载量过大。这不是JavaScript问题。
string z = "Mimi loves Toto and Tata hate Mimi so Toto killed Tata";
var wordsWithCapital = z.Split(' ').Where(word => char.IsUpper(word[0])).Distinct();
MessageBox.Show( string.Join(", ", wordsWithCapital.ToArray()) );
Dictionary<string,int> distinctWords = new Dictionary<string,int>();
string[] wordsWithInitCaps = z.Split(' ');
foreach (string wordX in wordsWithInitCaps)
if (char.IsUpper(wordX[0]))
if (!distinctWords.ContainsKey(wordX))
distinctWords[wordX] = 1;
else
++distinctWords[wordX];
foreach(string k in distinctWords.Keys)
MessageBox.Show(k + ": " + distinctWords[k].ToString());
Func<string,bool,string> CaptureCaps = (source,caseInsensitive) => string.Join(" ",
new Regex(@"\b[A-Z]\w*").Matches(source).OfType<Match>().Select(match => match.Value).Distinct(new KeisInsensitiveComparer(caseInsensitive) ).ToArray() );
MessageBox.Show(CaptureCaps("First The The Quick Red fox jumped oveR A Red Lazy BRown DOg", false));
MessageBox.Show(CaptureCaps("Mimi loves Toto. Tata hate Mimi, so Toto killed TaTa. A bad one!", false));
MessageBox.Show(CaptureCaps("First The The Quick Red fox jumped oveR A Red Lazy BRown DOg", true));
MessageBox.Show(CaptureCaps("Mimi loves Toto. Tata hate Mimi, so Toto killed TaTa. A bad one!", true));
class KeisInsensitiveComparer : IEqualityComparer<string>
{
public KeisInsensitiveComparer() { }
bool _caseInsensitive;
public KeisInsensitiveComparer(bool caseInsensitive) { _caseInsensitive = caseInsensitive; }
// Products are equal if their names and product numbers are equal.
public bool Equals(string x, string y)
{
// Check whether the compared objects reference the same data.
if (Object.ReferenceEquals(x, y)) return true;
// Check whether any of the compared objects is null.
if (Object.ReferenceEquals(x, null) || Object.ReferenceEquals(y, null))
return false;
return _caseInsensitive ? x.ToUpper() == y.ToUpper() : x == y;
}
// If Equals() returns true for a pair of objects,
// GetHashCode must return the same value for these objects.
public int GetHashCode(string s)
{
// Check whether the object is null.
if (Object.ReferenceEquals(s, null)) return 0;
// Get the hash code for the Name field if it is not null.
int hashS = s == null ? 0 : _caseInsensitive ? s.ToUpper().GetHashCode() : s.GetHashCode();
// Get the hash code for the Code field.
int hashScode = _caseInsensitive ? s.ToUpper().GetHashCode() : s.GetHashCode();
// Calculate the hash code for the product.
return hashS ^ hashScode;
}
}
static Regex _capitalizedWordPattern = new Regex(@"\b[A-Z][a-z]*\b", RegexOptions.Compiled | RegexOptions.Multiline);
public static IEnumerable<string> GetDistinctOnlyCapitalizedWords(string text)
{
return _capitalizedWordPattern.Matches(text).Cast<Match>().Select(m => m.Value).Distinct();
}
function capitalLetters() {
var textAreaId = "textAreaId";
var resultsArray = $(textAreaId).value.match( /\b[A-Z][A-Za-z']+/g );
displayResults(textAreaId, resultsArray);
}
var result =
Regex.Matches(input, @"\b\p{Lu}\p{L}*")
.Cast<Match>().Select(m => m.Value);