C# 调试单元测试时,执行随机跳转到抛出的异常
我遇到了一个非常奇怪的问题,在调试Visual Studio.NET单元测试时,我的执行从半可预测的位置跳到另一个位置。发生这种奇怪行为的方法是下面的“Parse(…)”。我已经在这个方法中指出了执行将跳转到的位置(“//异常”)。在我的测试中,我还指出了一些地方,当它奇怪地跳转时执行(“//跳转”)。跳转通常会连续几次从同一位置跳转,然后连续几次从新位置跳转。这些执行跳转的地方要么是switch语句的开始,要么是代码块的结束,这向我表明指令指针出现了一些奇怪的情况,但我对.NET不够了解,不知道这可能是什么。如果有什么不同,执行不会跳转到“throw”语句之前,而是跳转到刚刚抛出异常的执行点。很奇怪 根据我的经验,执行跳转只在解析嵌套命名组的内容时发生 下面代码的作用背景:我试图实现的解决方案是一个简单的正则表达式解析器。这不是一个完整的正则表达式解析器。我的需求只是能够在正则表达式中找到特定的命名组,并用其他内容替换某些命名组的内容。所以基本上我只是运行一个正则表达式并跟踪我找到的命名组。我还跟踪未命名的组,因为我需要知道括号匹配和注释,以便注释的括号不会破坏paren匹配。一段单独的(目前尚未实现的)代码将在考虑替换后重构包含正则表达式的字符串 我非常感谢任何关于可能正在进行的工作的建议;我很困惑 示例解决方案 (TAR格式)包含我在下面讨论的所有代码。我在运行这个解决方案时出现了错误(单元测试项目“TestRegexParserLibTest”作为启动项目)。因为这似乎是一个零星的错误,如果其他人遇到同样的问题,我会很感兴趣 代码 我使用一些简单的类来组织结果:C# 调试单元测试时,执行随机跳转到抛出的异常,c#,unit-testing,parsing,exception,C#,Unit Testing,Parsing,Exception,我遇到了一个非常奇怪的问题,在调试Visual Studio.NET单元测试时,我的执行从半可预测的位置跳到另一个位置。发生这种奇怪行为的方法是下面的“Parse(…)”。我已经在这个方法中指出了执行将跳转到的位置(“//异常”)。在我的测试中,我还指出了一些地方,当它奇怪地跳转时执行(“//跳转”)。跳转通常会连续几次从同一位置跳转,然后连续几次从新位置跳转。这些执行跳转的地方要么是switch语句的开始,要么是代码块的结束,这向我表明指令指针出现了一些奇怪的情况,但我对.NET不够了解,不知
// The root of the regex we are parsing
public class RegexGroupStructureRoot : ISuperRegexGroupStructure
{
public List<RegexGroupStructure> SubStructures { get; set; }
public RegexGroupStructureRoot()
{
SubStructures = new List<RegexGroupStructure>();
}
public override bool Equals(object obj) { ... }
}
// Either a RegexGroupStructureGroup or a RegexGroupStructureRegex
// Contained within the SubStructures of both RegexGroupStructureRoot and RegexGroupStructureGroup
public abstract class RegexGroupStructure
{
}
// A run of text containing regular expression characters (but not groups)
public class RegexGroupStructureRegex : RegexGroupStructure
{
public string Regex { get; set; }
public override bool Equals(object obj) { ... }
}
// A regular expression group
public class RegexGroupStructureGroup : RegexGroupStructure, ISuperRegexGroupStructure
{
// Name == null indicates an unnamed group
public string Name { get; set; }
public List<RegexGroupStructure> SubStructures { get; set; }
public RegexGroupStructureGroup()
{
SubStructures = new List<RegexGroupStructure>();
}
public override bool Equals(object obj) { ... }
}
// Items that contain SubStructures
// Either a RegexGroupStructureGroup or a RegexGroupStructureRoot
interface ISuperRegexGroupStructure
{
List<RegexGroupStructure> SubStructures { get; }
}
//我们正在分析的正则表达式的根
公共类RegexGroupStructureRoot:ISuperRegexGroupStructure
{
公共列表子结构{get;set;}
公共RegexGroupStructureRoot()
{
子结构=新列表();
}
公共覆盖布尔等于(对象对象对象){…}
}
//RegexGroupStructureGroup或RegexGroupStructureRegex
//包含在RegexGroupStructureRoot和RegexGroupStructureGroup的子结构中
公共抽象类RegexGroupStructure
{
}
//包含正则表达式字符(但不包含组)的文本运行
公共类RegexGroupStructureRegex:RegexGroupStructure
{
公共字符串正则表达式{get;set;}
公共覆盖布尔等于(对象对象对象){…}
}
//正则表达式组
公共类RegexGroupStructureGroup:RegexGroupStructure,ISuperRegexGroupStructure
{
//Name==null表示未命名的组
公共字符串名称{get;set;}
公共列表子结构{get;set;}
公共RegexGroupStructureGroup()
{
子结构=新列表();
}
公共覆盖布尔等于(对象对象对象){…}
}
//包含子结构的项
//RegexGroupStructureGroup或RegexGroupStructureRoot
接口ISuperRegexGroupStructure
{
列表子结构{get;}
}
这里是我实际解析正则表达式的方法(以及关联的枚举/静态成员),返回一个RegexGroupStructureRoot,其中包含找到的所有命名组、未命名组和其他正则表达式字符
using Re = System.Text.RegularExpressions
enum Mode
{
TopLevel, // Not in any group
BeginGroup, // Just encountered a character beginning a group: "("
BeginGroupTypeControl, // Just encountered a character controlling group type, immediately after beginning a group: "?"
NamedGroupName, // Reading the named group name (must have encountered a character indicating a named group type immediately following a group type control character: "<" after "?")
NamedGroup, // Reading the contents of a named group
UnnamedGroup, // Reading the contents of an unnamed group
}
static string _NamedGroupNameValidCharRePattern = "[A-Za-z0-9_]";
static Re.Regex _NamedGroupNameValidCharRe;
static RegexGroupStructureParser()
{
_NamedGroupNameValidCharRe = new Re.Regex(_NamedGroupNameValidCharRePattern);
}
public static RegexGroupStructureRoot Parse(string regex)
{
string newLine = Environment.NewLine;
int newLineLen = newLine.Length;
// A record of the parent structures that the parser has created
Stack<ISuperRegexGroupStructure> parentStructures = new Stack<ISuperRegexGroupStructure>();
// The current text we've encountered
StringBuilder textConsumer = new StringBuilder();
// Whether the parser is in an escape sequence
bool escaped = false;
// Whether the parser is in an end-of-line comment (such comments run from a hash-sign ('#') to the end of the line
// The other type of .NET regular expression comment is the group-comment: (?#This is a comment)
// We do not need to specially handle this type of comment since it is treated like an unnamed
// group.
bool commented = false;
// The current mode of the parsing process
Mode mode = Mode.TopLevel;
// Push a root onto the parents to accept whatever regexes/groups we encounter
parentStructures.Push(new RegexGroupStructureRoot());
foreach (char chr in regex.ToArray())
{
if (escaped) // JUMP
{
textConsumer.Append(chr);
escaped = false;
}
else if (chr.Equals('#'))
{
textConsumer.Append(chr);
commented = true;
}
else if (commented)
{
textConsumer.Append(chr);
string txt = textConsumer.ToString();
int txtLen = txt.Length;
if (txtLen >= newLineLen &&
// Does the current text end with a NewLine?
txt.Substring(txtLen - 1 - newLineLen, newLineLen) == newLine)
{
// If so we're no longer in the comment
commented = false;
}
}
else
{
switch (mode) // JUMP
{
case Mode.TopLevel:
switch (chr)
{
case '\\':
textConsumer.Append(chr); // Append the backslash
escaped = true;
break;
case '(':
beginNewGroup(parentStructures, ref textConsumer, ref mode);
break;
case ')':
// Can't close a group if we're already at the top-level
throw new InvalidRegexFormatException("Too many ')'s.");
default:
textConsumer.Append(chr);
break;
}
break;
case Mode.BeginGroup:
switch (chr)
{
case '?':
// If it's an unnamed group, we'll re-add the question mark.
// If it's a named group, named groups reconstruct question marks so no need to add it.
mode = Mode.BeginGroupTypeControl;
break;
default:
// Only a '?' can begin a named group. So anything else begins an unnamed group.
parentStructures.Peek().SubStructures.Add(new RegexGroupStructureRegex()
{
Regex = textConsumer.ToString()
});
textConsumer = new StringBuilder();
parentStructures.Push(new RegexGroupStructureGroup()
{
Name = null, // null indicates an unnamed group
SubStructures = new List<RegexGroupStructure>()
});
mode = Mode.UnnamedGroup;
break;
}
break;
case Mode.BeginGroupTypeControl:
switch (chr)
{
case '<':
mode = Mode.NamedGroupName;
break;
default:
// We previously read a question mark to get here, but the group turned out not to be a named group
// So add back in the question mark, since unnamed groups don't reconstruct with question marks
textConsumer.Append('?' + chr);
mode = Mode.UnnamedGroup;
break;
}
break;
case Mode.NamedGroupName:
if (chr.Equals( '>'))
{
// '>' closes the named group name. So extract the name
string namedGroupName = textConsumer.ToString();
if (namedGroupName == String.Empty)
throw new InvalidRegexFormatException("Named group names cannot be empty.");
// Create the new named group
RegexGroupStructureGroup newNamedGroup = new RegexGroupStructureGroup() {
Name = namedGroupName,
SubStructures = new List<RegexGroupStructure>()
};
// Add this group to the current parent
parentStructures.Peek().SubStructures.Add(newNamedGroup);
// ...and make it the new parent.
parentStructures.Push(newNamedGroup);
textConsumer = new StringBuilder();
mode = Mode.NamedGroup;
}
else if (_NamedGroupNameValidCharRe.IsMatch(chr.ToString()))
{
// Append any valid named group name char to the growing named group name
textConsumer.Append(chr);
}
else
{
// chr is neither a valid named group name character, nor the character that closes the named group name (">"). Error.
throw new InvalidRegexFormatException(String.Format("Invalid named group name character: {0}", chr)); // EXCEPTION
}
break; // JUMP
case Mode.NamedGroup:
case Mode.UnnamedGroup:
switch (chr) // JUMP
{
case '\\':
textConsumer.Append(chr);
escaped = true;
break;
case ')':
closeGroup(parentStructures, ref textConsumer, ref mode);
break;
case '(':
beginNewGroup(parentStructures, ref textConsumer, ref mode);
break;
default:
textConsumer.Append(chr);
break;
}
break;
default:
throw new Exception("Exhausted Modes");
}
} // JUMP
}
ISuperRegexGroupStructure finalParent = parentStructures.Pop();
Debug.Assert(parentStructures.Count < 1, "Left parent structures on the stack.");
Debug.Assert(finalParent.GetType().Equals(typeof(RegexGroupStructureRoot)), "The final parent must be a RegexGroupStructureRoot");
string finalRegex = textConsumer.ToString();
if (!String.IsNullOrEmpty(finalRegex))
finalParent.SubStructures.Add(new RegexGroupStructureRegex() {
Regex = finalRegex
});
return finalParent as RegexGroupStructureRoot;
}
使用Re=System.Text.regular表达式
枚举模式
{
TopLevel,//不在任何组中
BeginGroup,//刚遇到一个以组开头的字符:(“
BeginGroupTypeControl,//刚遇到一个字符控制组类型,就在组开始之后:“?”
NamedGroupName,//正在读取命名组名(必须在组类型控制字符“”之后遇到一个指示命名组类型的字符)。错误。
抛出新的InvalidRegexFormatException(String.Format(“无效的命名组名字符:{0}”,chr));//异常
}
break;//跳跃
case Mode.NamedGroup:
案例模式。未命名组:
开关(chr)//跳转
{
案例“\\”:
textConsumer.Append(chr);
逃逸=真;
打破
案例“)”:
closeGroup(父结构、引用文本使用者、引用模式);
打破
格“(”:
beginNewGroup(父结构、引用文本使用者、引用模式);
打破
违约:
textConsumer.Append(chr);
打破
}
打破
违约:
抛出新异常(“耗尽模式”);
}
}//跳跃
}
ISuperRegexGroupStructure finalParent=parentStructures.Pop();
Assert(parentStructures.Count<1,“堆栈上的左父结构”);
Assert(finalParent.GetType().Equals(typeof(RegexGroupStructureRoot)),“最终父级必须是RegexGroupStructureRoot”);
字符串finalRegex=textConsumer.ToString();
如果(!String.IsNullOrEmpty(finalRegex))
鳍
[TestMethod]
public void ParseTest_Short()
{
string regex = @"
(?<Group1>
,?\s+
(?<Group1_SubGroup>
[\d–-]+ # One or more digits, hyphen, and/or n-dash
)
)
";
RegexGroupStructureRoot expected = new RegexGroupStructureRoot()
{
SubStructures = new List<RegexGroupStructure>()
{
new RegexGroupStructureGroup() {
Name = "Group1",
SubStructures = new List<RegexGroupStructure> {
new RegexGroupStructureRegex() {
Regex = @"
,?\s+
"
},
new RegexGroupStructureGroup() {
Name = "Group1_Subgroup",
SubStructures = new List<RegexGroupStructure>() {
new RegexGroupStructureRegex() {
Regex = @"
[\d–-]+ # One or more digits, hyphen, and/or n-dash
"
}
}
},
new RegexGroupStructureRegex() {
Regex = @"
"
}
}
},
new RegexGroupStructureRegex() {
Regex = @"
"
},
}
};
RegexGroupStructureRoot actual = RegexGroupStructureParser.Parse(regex);
Assert.AreEqual(expected, actual);
}
case 1:
do something
break;
case 2:
throw ... //No break required.
case 3: