C# 如何在C中只在第一级解析嵌套括号#
我想编写C代码,将嵌套括号解析为数组元素,但只在第一级。确实需要一个例子: 我想要这个字符串:C# 如何在C中只在第一级解析嵌套括号#,c#,C#,我想编写C代码,将嵌套括号解析为数组元素,但只在第一级。确实需要一个例子: 我想要这个字符串: "(example (to (parsing nested paren) but) (first lvl only))" 可以将tp解析为: ["example", "(to (parsing nested paren) but)", "(first lvl only)"] 我曾考虑过使用regex,但如果不从头开始实现这种行为,就无法正确地使用它们 在输入格式错误的情况下,我希望返回一个空数组,
"(example (to (parsing nested paren) but) (first lvl only))"
可以将tp解析为:
["example", "(to (parsing nested paren) but)", "(first lvl only)"]
我曾考虑过使用regex,但如果不从头开始实现这种行为,就无法正确地使用它们
在输入格式错误的情况下,我希望返回一个空数组,或者一个数组[“error”]好的,regex将完成以下工作:
var text=@”(示例(to(解析嵌套参数),但)(仅限第一个lvl));
变量模式=@“\([\w\s]+)(\([\w\s]+\([\w\s]+\)[\w\s]+\)(\([\w\s]+\)\)*”;
尝试
{
Regex r=新的Regex(模式,RegexOptions.IgnoreCase);
匹配m=r.Match(文本);
字符串组_1=m.Groups[1].Value;//示例
字符串组_2=m.Groups[2]。值;//(to(解析嵌套参数)but)
字符串组_3=m.Groups[3]。值;//(仅限第一级)
返回新字符串[]{group_1,group_2,group_3};
}
捕获(例外情况除外){
返回新字符串[]{“error”};
}
希望这有帮助,在这里测试
编辑:
这可能会让您开始根据所陷入的任何模式构建正确的表达式,并可能会构建一个递归函数来将其余的解析为所需的输出:)RegEx不是递归的。计算括号级别或递归 我为您展示的示例测试的非递归解析器循环是
string SplitFirstLevel(string s)
{
List<string> result = new List<string>();
int p = 0, level = 0;
for (int i = 0; i < s.Length; i++)
{
if (s[i] == '(')
{
level++;
if (level == 1) p = i + 1;
if (level == 2)
{
result.Add('"' + s.Substring(p, i - p) + '"');
p = i;
}
}
if (s[i] == ')')
if (--level == 0)
result.Add('"' + s.Substring(p, i - p) + '"');
}
return "[" + String.Join(",", result) + "]";
}
及
在任何一种情况下,“example”都会得到一个单独的术语,而“but”则与第一个术语分组。在第一个例子中,这是合乎逻辑的,它在括号中,但在第二个例子中,它可能是不需要的行为,其中“but”应该分开,就像“example”一样,它也没有括号(?)我为您的例子开发了一个解析器。我还检查了代码中可以看到的一些其他示例
using System;
using System.Collections;
using System.Collections.Generic;
public class Program
{
public static void Main()
{
string str = "(example (to (parsing nested paren) but) (first lvl only))"; // => [example , (to (parsing nested paren) but) , (first lvl only)]
//string str = "(first)(second)(third)"; // => [first , second , third]
//string str = "(first(second)third)"; // => [first , (second) , third]
//string str = "(first(second)(third)fourth)"; // => [first , (second) , (third) , fourth]
//string str = "(first((second)(third))fourth)"; // => [first , ((second)(third)) , fourth]
//string str = "just Text"; // => [ERROR]
//string str = "start with Text (first , second)"; // => [ERROR]
//string str = "(first , second) end with text"; // => [ERROR]
//string str = ""; // => [ERROR]
//string str = "("; // => [ERROR]
//string str = "(first()(second)(third))fourth)"; // => [ERROR]
//string str = "(((extra close pareanthese))))"; // => [ERROR]
var res = Parser.parse(str);
showRes(res);
}
static void showRes(ArrayList res)
{
var strings = res.ToArray();
var theString = string.Join(" , ", strings);
Console.WriteLine("[" + theString + "]");
}
}
public class Parser
{
static Dictionary<TokenType, TokenType> getRules()
{
var rules = new Dictionary<TokenType, TokenType>();
rules.Add(TokenType.OPEN_PARENTHESE, TokenType.START | TokenType.OPEN_PARENTHESE | TokenType.CLOSE_PARENTHESE | TokenType.SIMPLE_TEXT);
rules.Add(TokenType.CLOSE_PARENTHESE, TokenType.SIMPLE_TEXT | TokenType.CLOSE_PARENTHESE);
rules.Add(TokenType.SIMPLE_TEXT, TokenType.SIMPLE_TEXT | TokenType.CLOSE_PARENTHESE | TokenType.OPEN_PARENTHESE);
rules.Add(TokenType.END, TokenType.CLOSE_PARENTHESE);
return rules;
}
static bool isValid(Token prev, Token cur)
{
var rules = Parser.getRules();
return rules.ContainsKey(cur.type) && ((prev.type & rules[cur.type]) == prev.type);
}
public static ArrayList parse(string sourceText)
{
ArrayList result = new ArrayList();
int openParenthesesCount = 0;
Lexer lexer = new Lexer(sourceText);
Token prevToken = lexer.getStartToken();
Token currentToken = lexer.readNextToken();
string tmpText = "";
while (currentToken.type != TokenType.END)
{
if (currentToken.type == TokenType.OPEN_PARENTHESE)
{
openParenthesesCount++;
if (openParenthesesCount > 1)
{
tmpText += currentToken.token;
}
}
else if (currentToken.type == TokenType.CLOSE_PARENTHESE)
{
openParenthesesCount--;
if (openParenthesesCount < 0)
{
return Parser.Error();
}
if (openParenthesesCount > 0)
{
tmpText += currentToken.token;
}
}
else if (currentToken.type == TokenType.SIMPLE_TEXT)
{
tmpText += currentToken.token;
}
if (!Parser.isValid(prevToken, currentToken))
{
return Parser.Error();
}
if (openParenthesesCount == 1 && tmpText.Trim() != "")
{
result.Add(tmpText);
tmpText = "";
}
prevToken = currentToken;
currentToken = lexer.readNextToken();
}
if (openParenthesesCount != 0)
{
return Parser.Error();
}
if (!Parser.isValid(prevToken, currentToken))
{
return Parser.Error();
}
if (tmpText.Trim() != "")
{
result.Add(tmpText);
}
return result;
}
static ArrayList Error()
{
var er = new ArrayList();
er.Add("ERROR");
return er;
}
}
class Lexer
{
string _txt;
int _index;
public Lexer(string text)
{
this._index = 0;
this._txt = text;
}
public Token getStartToken()
{
return new Token(-1, TokenType.START, "");
}
public Token readNextToken()
{
if (this._index >= this._txt.Length)
{
return new Token(-1, TokenType.END, "");
}
Token t = null;
string txt = "";
if (this._txt[this._index] == '(')
{
txt = "(";
t = new Token(this._index, TokenType.OPEN_PARENTHESE, txt);
}
else if (this._txt[this._index] == ')')
{
txt = ")";
t = new Token(this._index, TokenType.CLOSE_PARENTHESE, txt);
}
else
{
txt = this._readText();
t = new Token(this._index, TokenType.SIMPLE_TEXT, txt);
}
this._index += txt.Length;
return t;
}
private string _readText()
{
string txt = "";
int i = this._index;
while (i < this._txt.Length && this._txt[i] != '(' && this._txt[i] != ')')
{
txt = txt + this._txt[i];
i++;
}
return txt;
}
}
class Token
{
public int position
{
get;
private set;
}
public TokenType type
{
get;
private set;
}
public string token
{
get;
private set;
}
public Token(int position, TokenType type, string token)
{
this.position = position;
this.type = type;
this.token = token;
}
}
[Flags]
enum TokenType
{
START = 1,
OPEN_PARENTHESE = 2,
SIMPLE_TEXT = 4,
CLOSE_PARENTHESE = 8,
END = 16
}
使用系统;
使用系统集合;
使用System.Collections.Generic;
公共课程
{
公共静态void Main()
{
string str=“(示例(到(解析嵌套参数)但是)(仅限第一层));//=>[示例,(到(解析嵌套参数)但是)(仅限第一层)]
//string str=“(第一)(第二)(第三)”;//=>[第一,第二,第三]
//string str=“(第一(第二)第三)”;//=>[第一,(第二),第三]
//string str=“(第一(第二)(第三)第四)”;//=>[第一,(第二),(第三),第四]
//string str=“(第一((第二)(第三))第四)”;//=>[第一,((第二)(第三)),第四]
//string str=“just Text”//=>[错误]
//string str=“以文本开头(第一,第二)”;//=>[错误]
//string str=“(第一、第二)以文本结尾”//=>[错误]
//字符串str=”“;//=>[错误]
//字符串str=“(”;//=>[错误]
//字符串str=“(第一()(第二)(第三))第四)”;//=>[错误]
//string str=“((特写近距排列)))”;//=>[错误]
var res=Parser.parse(str);
showRes(res);
}
静态无效显示(ArrayList显示)
{
var strings=res.ToArray();
var theString=string.Join(“,”字符串);
Console.WriteLine(“[”+字符串+“]);
}
}
公共类解析器
{
静态字典getRules()
{
var规则=新字典();
添加(TokenType.OPEN_括号,TokenType.START | TokenType.OPEN_括号| TokenType.CLOSE_括号| TokenType.SIMPLE_TEXT);
添加(TokenType.CLOSE_括号,TokenType.SIMPLE_TEXT | TokenType.CLOSE_括号);
添加(TokenType.SIMPLE_TEXT,TokenType.SIMPLE_TEXT | TokenType.CLOSE_括号| TokenType.OPEN_括号);
添加(TokenType.END,TokenType.CLOSE_括号);
退货规则;
}
静态布尔值有效(上一个令牌,当前令牌)
{
var rules=Parser.getRules();
返回规则.ContainsKey(cur.type)&&((prev.type&rules[cur.type])==prev.type);
}
公共静态ArrayList解析(字符串sourceText)
{
ArrayList结果=新建ArrayList();
int open圆括号计数=0;
Lexer Lexer=新Lexer(sourceText);
Token prevToken=lexer.getStartToken();
Token currentToken=lexer.readNextToken();
字符串tmpText=“”;
while(currentToken.type!=TokenType.END)
{
if(currentToken.type==TokenType.OPEN_括号)
{
Open括号计数++;
如果(Open括号计数>1)
{
tmpText+=currentToken.token;
}
}
else if(currentToken.type==TokenType.CLOSE_括号)
{
开括号计数--;
if(open圆括号计数<0)
{
返回Parser.Error();
}
如果(Open括号计数>0)
{
tmpText+=currentToken.token;
}
}
else if(currentToken.type==TokenType.SIMPLE_TEXT)
{
tmpText+=currentToken.token;
}
如果(!Parser.isValid(prevToken,currentToken))
{
返回Parser.Error();
}
if(open括号计数==1&&tmpText.Trim()!=“”)
{
结果.添加(tmpText);
tmpText=“”;
}
prevToken=currentToken;
currentToken=lexer.readNextToken();
}
if(open圆括号计数!=0)
{
返回Parser.Error();
}
如果(!Parser.isValid(prevToken,currentToken))
{
返回Parser.Error();
}
如果(tmpText.Trim()!=“”)
{
结果.添加(tmpText);
}
返回结果;
}
静态ArrayList错误()
{
var er=新阵列
(example (to (parsing nested paren)) but (first lvl only))
to:
["example ","(to (parsing nested paren)) but ","(first lvl only)"]
using System;
using System.Collections;
using System.Collections.Generic;
public class Program
{
public static void Main()
{
string str = "(example (to (parsing nested paren) but) (first lvl only))"; // => [example , (to (parsing nested paren) but) , (first lvl only)]
//string str = "(first)(second)(third)"; // => [first , second , third]
//string str = "(first(second)third)"; // => [first , (second) , third]
//string str = "(first(second)(third)fourth)"; // => [first , (second) , (third) , fourth]
//string str = "(first((second)(third))fourth)"; // => [first , ((second)(third)) , fourth]
//string str = "just Text"; // => [ERROR]
//string str = "start with Text (first , second)"; // => [ERROR]
//string str = "(first , second) end with text"; // => [ERROR]
//string str = ""; // => [ERROR]
//string str = "("; // => [ERROR]
//string str = "(first()(second)(third))fourth)"; // => [ERROR]
//string str = "(((extra close pareanthese))))"; // => [ERROR]
var res = Parser.parse(str);
showRes(res);
}
static void showRes(ArrayList res)
{
var strings = res.ToArray();
var theString = string.Join(" , ", strings);
Console.WriteLine("[" + theString + "]");
}
}
public class Parser
{
static Dictionary<TokenType, TokenType> getRules()
{
var rules = new Dictionary<TokenType, TokenType>();
rules.Add(TokenType.OPEN_PARENTHESE, TokenType.START | TokenType.OPEN_PARENTHESE | TokenType.CLOSE_PARENTHESE | TokenType.SIMPLE_TEXT);
rules.Add(TokenType.CLOSE_PARENTHESE, TokenType.SIMPLE_TEXT | TokenType.CLOSE_PARENTHESE);
rules.Add(TokenType.SIMPLE_TEXT, TokenType.SIMPLE_TEXT | TokenType.CLOSE_PARENTHESE | TokenType.OPEN_PARENTHESE);
rules.Add(TokenType.END, TokenType.CLOSE_PARENTHESE);
return rules;
}
static bool isValid(Token prev, Token cur)
{
var rules = Parser.getRules();
return rules.ContainsKey(cur.type) && ((prev.type & rules[cur.type]) == prev.type);
}
public static ArrayList parse(string sourceText)
{
ArrayList result = new ArrayList();
int openParenthesesCount = 0;
Lexer lexer = new Lexer(sourceText);
Token prevToken = lexer.getStartToken();
Token currentToken = lexer.readNextToken();
string tmpText = "";
while (currentToken.type != TokenType.END)
{
if (currentToken.type == TokenType.OPEN_PARENTHESE)
{
openParenthesesCount++;
if (openParenthesesCount > 1)
{
tmpText += currentToken.token;
}
}
else if (currentToken.type == TokenType.CLOSE_PARENTHESE)
{
openParenthesesCount--;
if (openParenthesesCount < 0)
{
return Parser.Error();
}
if (openParenthesesCount > 0)
{
tmpText += currentToken.token;
}
}
else if (currentToken.type == TokenType.SIMPLE_TEXT)
{
tmpText += currentToken.token;
}
if (!Parser.isValid(prevToken, currentToken))
{
return Parser.Error();
}
if (openParenthesesCount == 1 && tmpText.Trim() != "")
{
result.Add(tmpText);
tmpText = "";
}
prevToken = currentToken;
currentToken = lexer.readNextToken();
}
if (openParenthesesCount != 0)
{
return Parser.Error();
}
if (!Parser.isValid(prevToken, currentToken))
{
return Parser.Error();
}
if (tmpText.Trim() != "")
{
result.Add(tmpText);
}
return result;
}
static ArrayList Error()
{
var er = new ArrayList();
er.Add("ERROR");
return er;
}
}
class Lexer
{
string _txt;
int _index;
public Lexer(string text)
{
this._index = 0;
this._txt = text;
}
public Token getStartToken()
{
return new Token(-1, TokenType.START, "");
}
public Token readNextToken()
{
if (this._index >= this._txt.Length)
{
return new Token(-1, TokenType.END, "");
}
Token t = null;
string txt = "";
if (this._txt[this._index] == '(')
{
txt = "(";
t = new Token(this._index, TokenType.OPEN_PARENTHESE, txt);
}
else if (this._txt[this._index] == ')')
{
txt = ")";
t = new Token(this._index, TokenType.CLOSE_PARENTHESE, txt);
}
else
{
txt = this._readText();
t = new Token(this._index, TokenType.SIMPLE_TEXT, txt);
}
this._index += txt.Length;
return t;
}
private string _readText()
{
string txt = "";
int i = this._index;
while (i < this._txt.Length && this._txt[i] != '(' && this._txt[i] != ')')
{
txt = txt + this._txt[i];
i++;
}
return txt;
}
}
class Token
{
public int position
{
get;
private set;
}
public TokenType type
{
get;
private set;
}
public string token
{
get;
private set;
}
public Token(int position, TokenType type, string token)
{
this.position = position;
this.type = type;
this.token = token;
}
}
[Flags]
enum TokenType
{
START = 1,
OPEN_PARENTHESE = 2,
SIMPLE_TEXT = 4,
CLOSE_PARENTHESE = 8,
END = 16
}