C# 调试单元测试时，执行随机跳转到抛出的异常_C#_Unit Testing_Parsing_Exception

C# 调试单元测试时，执行随机跳转到抛出的异常

c# unit-testing parsing exception

C# 调试单元测试时，执行随机跳转到抛出的异常,c#,unit-testing,parsing,exception,C#,Unit Testing,Parsing,Exception,我遇到了一个非常奇怪的问题，在调试Visual Studio.NET单元测试时，我的执行从半可预测的位置跳到另一个位置。发生这种奇怪行为的方法是下面的“Parse（…）”。我已经在这个方法中指出了执行将跳转到的位置（“//异常”）。在我的测试中，我还指出了一些地方，当它奇怪地跳转时执行（“//跳转”）。跳转通常会连续几次从同一位置跳转，然后连续几次从新位置跳转。这些执行跳转的地方要么是switch语句的开始，要么是代码块的结束，这向我表明指令指针出现了一些奇怪的情况，但我对.NET不够了解，不知

我遇到了一个非常奇怪的问题，在调试Visual Studio.NET单元测试时，我的执行从半可预测的位置跳到另一个位置。发生这种奇怪行为的方法是下面的“Parse（…）”。我已经在这个方法中指出了执行将跳转到的位置（“//异常”）。在我的测试中，我还指出了一些地方，当它奇怪地跳转时执行（“//跳转”）。跳转通常会连续几次从同一位置跳转，然后连续几次从新位置跳转。这些执行跳转的地方要么是switch语句的开始，要么是代码块的结束，这向我表明指令指针出现了一些奇怪的情况，但我对.NET不够了解，不知道这可能是什么。如果有什么不同，执行不会跳转到“throw”语句之前，而是跳转到刚刚抛出异常的执行点。很奇怪

根据我的经验，执行跳转只在解析嵌套命名组的内容时发生

下面代码的作用背景：我试图实现的解决方案是一个简单的正则表达式解析器。这不是一个完整的正则表达式解析器。我的需求只是能够在正则表达式中找到特定的命名组，并用其他内容替换某些命名组的内容。所以基本上我只是运行一个正则表达式并跟踪我找到的命名组。我还跟踪未命名的组，因为我需要知道括号匹配和注释，以便注释的括号不会破坏paren匹配。一段单独的（目前尚未实现的）代码将在考虑替换后重构包含正则表达式的字符串

我非常感谢任何关于可能正在进行的工作的建议；我很困惑

示例解决方案 （TAR格式）包含我在下面讨论的所有代码。我在运行这个解决方案时出现了错误（单元测试项目“TestRegexParserLibTest”作为启动项目）。因为这似乎是一个零星的错误，如果其他人遇到同样的问题，我会很感兴趣

代码我使用一些简单的类来组织结果：

// The root of the regex we are parsing
public class RegexGroupStructureRoot : ISuperRegexGroupStructure
{
    public List<RegexGroupStructure> SubStructures { get; set; }

    public RegexGroupStructureRoot()
    {
        SubStructures = new List<RegexGroupStructure>();
    }

    public override bool Equals(object obj) { ... }
}

// Either a RegexGroupStructureGroup or a RegexGroupStructureRegex
// Contained within the SubStructures of both RegexGroupStructureRoot and RegexGroupStructureGroup
public abstract class RegexGroupStructure
{
}

// A run of text containing regular expression characters (but not groups)
public class RegexGroupStructureRegex : RegexGroupStructure
{
    public string Regex { get; set; }

    public override bool Equals(object obj) { ... }
}

// A regular expression group
public class RegexGroupStructureGroup : RegexGroupStructure, ISuperRegexGroupStructure
{
    // Name == null indicates an unnamed group
    public string Name { get; set; }
    public List<RegexGroupStructure> SubStructures { get; set; }

    public RegexGroupStructureGroup()
    {
        SubStructures = new List<RegexGroupStructure>();
    }

    public override bool Equals(object obj) { ... }
}

// Items that contain SubStructures
// Either a RegexGroupStructureGroup or a RegexGroupStructureRoot
interface ISuperRegexGroupStructure
{
    List<RegexGroupStructure> SubStructures { get; }
}

//我们正在分析的正则表达式的根
公共类RegexGroupStructureRoot:ISuperRegexGroupStructure
{
公共列表子结构{get；set；}
公共RegexGroupStructureRoot（）
{
子结构=新列表（）；
}
公共覆盖布尔等于（对象对象对象）{…}
}
//RegexGroupStructureGroup或RegexGroupStructureRegex
//包含在RegexGroupStructureRoot和RegexGroupStructureGroup的子结构中
公共抽象类RegexGroupStructure
{
}
//包含正则表达式字符（但不包含组）的文本运行
公共类RegexGroupStructureRegex:RegexGroupStructure
{
公共字符串正则表达式{get；set；}
公共覆盖布尔等于（对象对象对象）{…}
}
//正则表达式组
公共类RegexGroupStructureGroup:RegexGroupStructure，ISuperRegexGroupStructure
{
//Name==null表示未命名的组
公共字符串名称{get；set；}
公共列表子结构{get；set；}
公共RegexGroupStructureGroup（）
{
子结构=新列表（）；
}
公共覆盖布尔等于（对象对象对象）{…}
}
//包含子结构的项
//RegexGroupStructureGroup或RegexGroupStructureRoot
接口ISuperRegexGroupStructure
{
列表子结构{get；}
}

这里是我实际解析正则表达式的方法（以及关联的枚举/静态成员），返回一个RegexGroupStructureRoot，其中包含找到的所有命名组、未命名组和其他正则表达式字符

using Re = System.Text.RegularExpressions

enum Mode
{
    TopLevel, // Not in any group
    BeginGroup, // Just encountered a character beginning a group: "("
    BeginGroupTypeControl, // Just encountered a character controlling group type, immediately after beginning a group: "?"
    NamedGroupName, // Reading the named group name (must have encountered a character indicating a named group type immediately following a group type control character: "<" after "?")
    NamedGroup, // Reading the contents of a named group
    UnnamedGroup, // Reading the contents of an unnamed group
}

static string _NamedGroupNameValidCharRePattern = "[A-Za-z0-9_]";
static Re.Regex _NamedGroupNameValidCharRe;

static RegexGroupStructureParser()
{
    _NamedGroupNameValidCharRe = new Re.Regex(_NamedGroupNameValidCharRePattern);
}

public static RegexGroupStructureRoot Parse(string regex)
{
    string newLine = Environment.NewLine;
    int newLineLen = newLine.Length;

    // A record of the parent structures that the parser has created
    Stack<ISuperRegexGroupStructure> parentStructures = new Stack<ISuperRegexGroupStructure>();

    // The current text we've encountered
    StringBuilder textConsumer = new StringBuilder();

    // Whether the parser is in an escape sequence
    bool escaped = false;

    // Whether the parser is in an end-of-line comment (such comments run from a hash-sign ('#') to the end of the line
    //  The other type of .NET regular expression comment is the group-comment: (?#This is a comment)
    //   We do not need to specially handle this type of comment since it is treated like an unnamed
    //   group.
    bool commented = false;

    // The current mode of the parsing process
    Mode mode = Mode.TopLevel;

    // Push a root onto the parents to accept whatever regexes/groups we encounter
    parentStructures.Push(new RegexGroupStructureRoot());

    foreach (char chr in regex.ToArray())
    {
        if (escaped) // JUMP
        {
            textConsumer.Append(chr);
            escaped = false;
        }
        else if (chr.Equals('#'))
        {
            textConsumer.Append(chr);
            commented = true;
        }
        else if (commented)
        {
            textConsumer.Append(chr);

            string txt = textConsumer.ToString();
            int txtLen = txt.Length;
            if (txtLen >= newLineLen &&
                // Does the current text end with a NewLine?
                txt.Substring(txtLen - 1 - newLineLen, newLineLen) == newLine)
            {
                // If so we're no longer in the comment
                commented = false;
            }
        }
        else
        {
            switch (mode) // JUMP
            {
                case Mode.TopLevel:
                    switch (chr)
                    {
                        case '\\':
                            textConsumer.Append(chr); // Append the backslash
                            escaped = true;
                            break;
                        case '(':
                            beginNewGroup(parentStructures, ref textConsumer, ref mode);
                            break;
                        case ')':
                            // Can't close a group if we're already at the top-level
                            throw new InvalidRegexFormatException("Too many ')'s.");
                        default:
                            textConsumer.Append(chr);
                            break;
                    }
                    break;

                case Mode.BeginGroup:
                    switch (chr)
                    {
                        case '?':
                            // If it's an unnamed group, we'll re-add the question mark.
                            // If it's a named group, named groups reconstruct question marks so no need to add it.
                            mode = Mode.BeginGroupTypeControl;
                            break;
                        default:
                            // Only a '?' can begin a named group.  So anything else begins an unnamed group.

                            parentStructures.Peek().SubStructures.Add(new RegexGroupStructureRegex()
                            {
                                Regex = textConsumer.ToString()
                            });
                            textConsumer = new StringBuilder();

                            parentStructures.Push(new RegexGroupStructureGroup()
                            {
                                Name = null, // null indicates an unnamed group
                                SubStructures = new List<RegexGroupStructure>()
                            });

                            mode = Mode.UnnamedGroup;
                            break;
                    }
                    break;

                case Mode.BeginGroupTypeControl:
                    switch (chr)
                    {
                        case '<':
                            mode = Mode.NamedGroupName;
                            break;

                        default:
                            // We previously read a question mark to get here, but the group turned out not to be a named group
                            // So add back in the question mark, since unnamed groups don't reconstruct with question marks
                            textConsumer.Append('?' + chr);
                            mode = Mode.UnnamedGroup;
                            break;
                    }
                    break;

                case Mode.NamedGroupName:
                    if (chr.Equals( '>'))
                    {
                        // '>' closes the named group name.  So extract the name
                        string namedGroupName = textConsumer.ToString();

                        if (namedGroupName == String.Empty)
                            throw new InvalidRegexFormatException("Named group names cannot be empty.");

                        // Create the new named group
                        RegexGroupStructureGroup newNamedGroup = new RegexGroupStructureGroup() {
                            Name = namedGroupName,
                            SubStructures = new List<RegexGroupStructure>()
                        };

                        // Add this group to the current parent
                        parentStructures.Peek().SubStructures.Add(newNamedGroup);
                        // ...and make it the new parent.
                        parentStructures.Push(newNamedGroup);

                        textConsumer = new StringBuilder();

                        mode = Mode.NamedGroup;
                    }
                    else if (_NamedGroupNameValidCharRe.IsMatch(chr.ToString()))
                    {
                        // Append any valid named group name char to the growing named group name
                        textConsumer.Append(chr);
                    }
                    else
                    {
                        // chr is neither a valid named group name character, nor the character that closes the named group name (">").  Error.
                        throw new InvalidRegexFormatException(String.Format("Invalid named group name character: {0}", chr)); // EXCEPTION
                    }
                    break; // JUMP

                case Mode.NamedGroup:
                case Mode.UnnamedGroup:
                    switch (chr) // JUMP
                    {
                        case '\\':
                            textConsumer.Append(chr);
                            escaped = true;
                            break;
                        case ')':
                            closeGroup(parentStructures, ref textConsumer, ref mode);
                            break;
                        case '(':
                            beginNewGroup(parentStructures, ref textConsumer, ref mode);
                            break;
                        default:
                            textConsumer.Append(chr);
                            break;
                    }
                    break;

                default:
                    throw new Exception("Exhausted Modes");
            }
        } // JUMP
    }

    ISuperRegexGroupStructure finalParent = parentStructures.Pop();
    Debug.Assert(parentStructures.Count < 1, "Left parent structures on the stack.");
    Debug.Assert(finalParent.GetType().Equals(typeof(RegexGroupStructureRoot)), "The final parent must be a RegexGroupStructureRoot");

    string finalRegex = textConsumer.ToString();
    if (!String.IsNullOrEmpty(finalRegex))
        finalParent.SubStructures.Add(new RegexGroupStructureRegex() {
            Regex = finalRegex
        });

    return finalParent as RegexGroupStructureRoot;
}

使用Re=System.Text.regular表达式
枚举模式
{
TopLevel，//不在任何组中
BeginGroup，//刚遇到一个以组开头的字符：（“
BeginGroupTypeControl，//刚遇到一个字符控制组类型，就在组开始之后：“？”
NamedGroupName，//正在读取命名组名（必须在组类型控制字符“”之后遇到一个指示命名组类型的字符）。错误。
抛出新的InvalidRegexFormatException（String.Format（“无效的命名组名字符：{0}”，chr））；//异常
}
break；//跳跃
case Mode.NamedGroup：
案例模式。未命名组：
开关（chr）//跳转
{
案例“\\”：
textConsumer.Append（chr）；
逃逸=真；
打破
案例“）”：
closeGroup（父结构、引用文本使用者、引用模式）；
打破
格“（”：
beginNewGroup（父结构、引用文本使用者、引用模式）；
打破
违约：
textConsumer.Append（chr）；
打破
}
打破
违约：
抛出新异常（“耗尽模式”）；
}
}//跳跃
}
ISuperRegexGroupStructure finalParent=parentStructures.Pop（）；
Assert（parentStructures.Count<1，“堆栈上的左父结构”）；
Assert（finalParent.GetType（）.Equals（typeof（RegexGroupStructureRoot）），“最终父级必须是RegexGroupStructureRoot”）；
字符串finalRegex=textConsumer.ToString（）；
如果（！String.IsNullOrEmpty（finalRegex））
鳍
[TestMethod]
public void ParseTest_Short()
{
    string regex = @"
        (?<Group1>
            ,?\s+
            (?<Group1_SubGroup>
                [\d–-]+             # One or more digits, hyphen, and/or n-dash
            )            
        )
    ";

    RegexGroupStructureRoot expected = new RegexGroupStructureRoot()
    {
        SubStructures = new List<RegexGroupStructure>()
        {
            new RegexGroupStructureGroup() {
                Name = "Group1", 
                SubStructures = new List<RegexGroupStructure> {
                    new RegexGroupStructureRegex() {
                        Regex = @"
            ,?\s+
            "
                    }, 
                    new RegexGroupStructureGroup() {
                        Name = "Group1_Subgroup", 
                        SubStructures = new List<RegexGroupStructure>() {
                            new RegexGroupStructureRegex() {
                                Regex = @"
                [\d–-]+             # One or more digits, hyphen, and/or n-dash
            "
                            }
                        }
                    }, 
                    new RegexGroupStructureRegex() {
                        Regex = @"            
        "
                    }
                }
            }, 
            new RegexGroupStructureRegex() {
                Regex = @"
        "
            }, 
        }
    };

    RegexGroupStructureRoot actual = RegexGroupStructureParser.Parse(regex);

    Assert.AreEqual(expected, actual);
}

case 1:
   do something
   break;
case 2:
   throw ... //No break required.
case 3: