C# 在C语言中解析具有已知模式的Lisp S表达式#
我正在使用一个服务,该服务以类似Lisp的S表达式字符串的形式提供数据。这些数据到达的速度又快又厚,我希望尽可能快地处理它们,最好是直接在字节流上(只有单字节字符),而不需要任何回溯。这些字符串可能相当长,我不希望GC为整个消息分配字符串 我当前的实现使用带有语法的CoCo/R,但它有一些问题。由于回溯,它将整个流分配给一个字符串。如果我的代码的用户不得不更改的话,这也有点麻烦。我想要一个纯C的解决方案。CoCo/R也不允许重用解析器/扫描器对象,因此我必须为每条消息重新创建它们 从概念上讲,数据流可以看作是一系列S表达式:C# 在C语言中解析具有已知模式的Lisp S表达式#,c#,.net,parsing,s-expression,C#,.net,Parsing,S Expression,我正在使用一个服务,该服务以类似Lisp的S表达式字符串的形式提供数据。这些数据到达的速度又快又厚,我希望尽可能快地处理它们,最好是直接在字节流上(只有单字节字符),而不需要任何回溯。这些字符串可能相当长,我不希望GC为整个消息分配字符串 我当前的实现使用带有语法的CoCo/R,但它有一些问题。由于回溯,它将整个流分配给一个字符串。如果我的代码的用户不得不更改的话,这也有点麻烦。我想要一个纯C的解决方案。CoCo/R也不允许重用解析器/扫描器对象,因此我必须为每条消息重新创建它们 从概念上讲,数
(item 1 apple)(item 2 banana)(item 3 chainsaw)
解析此序列将创建三个对象。每个对象的类型可以通过列表中的第一个值来确定,在上述情况下为“项”。传入流的模式/语法是众所周知的
在开始编码之前,我想知道是否有库已经在做这件事了。我肯定我不是第一个有这个问题的人
编辑 这里是我想要的更多细节,因为我认为最初的问题可能有点模糊 考虑到一些性别压力,例如:
(Hear 12.3 HelloWorld)
(HJ LAJ1 -0.42)
(FRP lf (pos 2.3 1.7 0.4))
我想要一个与此等效的对象列表:
{
new HearPerceptorState(12.3, "HelloWorld"),
new HingeJointState("LAJ1", -0.42),
new ForceResistancePerceptorState("lf", new Polar(2.3, 1.7, 0.4))
}
我正在处理的实际数据集是一个。我可能还需要反序列化。考虑使用。它是一个状态机编译器,可以生成相当快的代码
从主页上可能看不出这一点,但Ragel确实有C#支持。
这是一个如何在C#中使用它的简单示例,请查看和
或者,您可以简单地将S表达式转换为XML,让.NET来完成其余的工作。Drew,也许您应该为问题添加一些上下文,否则这个答案对其他用户没有意义,但请尝试以下方法:
CHARACTERS
letter = 'A'..'Z' + 'a'..'z' .
digit = "0123456789" .
messageChar = '\u0020'..'\u007e' - ' ' - '(' - ')' .
TOKENS
double = ['-'] digit { digit } [ '.' digit { digit } ] .
ident = letter { letter | digit | '_' } .
message = messageChar { messageChar } CONTEXT (")") .
哦,我必须指出,'\u0020'
是unicode空间,您随后将使用“-”
”删除它。哦,如果您不需要多个字符的前瞻,那么可以使用CONTEXT('))
FWIW:CONTEXT
不使用所包含的序列,您仍必须在生产中使用它
编辑:
好的,这似乎有效。真的,这次我是认真的:)
字符
字母='A'..'Z'+'A'..'Z'。
digit=“0123456789”。
//messageChar='\u0020'..'\u007e'-'''-'('-')。
代币
double=['-']位{digit}['.'位{digit}]。
ident=字母{字母|数字|'.'}。
//message=字母{messageChar}上下文(')')。
//MessageText=消息(.m=t.val;)
// .
HearExpr=(.TimeSpan时间;角度方向=Angle.NaN;字符串消息文本;)
“(听”
时间跨度
(“自我”|角度指数)
//MessageText//已删除
{ANY}(.messageText=t.val;)//MOD
“)”(.message=newheardmessage(时间、方向、新消息(messageText));)
.
在我看来,解析生成器没有必要解析仅由列表、数字和符号组成的简单S表达式。手工编写的递归下降解析器可能更简单,至少也同样快。一般模式如下所示(在java中,c#应该非常类似):
我用C语言编写了一个S表达式解析器。它可以解析您在示例中给出的S表达式,您只需要向解析器添加十进制数
该代码可以在github上找到,并提供了一篇相关文章。作为替代方案,我建议看一看同样使用OMeta#编写的.NET YAML解析器。这里有一个相对简单(并且希望易于扩展)的解决方案:
public delegate object Acceptor(Token token, string match);
public class Symbol
{
public Symbol(string id) { Id = id ?? Guid.NewGuid().ToString("P"); }
public override string ToString() => Id;
public string Id { get; private set; }
}
public class Token : Symbol
{
internal Token(string id) : base(id) { }
public Token(string pattern, Acceptor acceptor) : base(pattern) { Regex = new Regex(string.Format("^({0})", !string.IsNullOrEmpty(Pattern = pattern) ? Pattern : ".*"), RegexOptions.Compiled); ValueOf = acceptor; }
public string Pattern { get; private set; }
public Regex Regex { get; private set; }
public Acceptor ValueOf { get; private set; }
}
public class SExpressionSyntax
{
private readonly Token Space = Token("\\s+", Echo);
private readonly Token Open = Token("\\(", Echo);
private readonly Token Close = Token("\\)", Echo);
private readonly Token Quote = Token("\\'", Echo);
private Token comment;
private static Exception Error(string message, params object[] arguments) => new Exception(string.Format(message, arguments));
private static object Echo(Token token, string match) => new Token(token.Id);
private static object Quoting(Token token, string match) => NewSymbol(token, match);
private Tuple<Token, string, object> Read(ref string input)
{
if (!string.IsNullOrEmpty(input))
{
var found = null as Match;
var sofar = input;
var tuple = Lexicon.FirstOrDefault(current => (found = current.Item2.Regex.Match(sofar)).Success && (found.Length > 0));
var token = tuple != null ? tuple.Item2 : null;
var match = token != null ? found.Value : null;
input = match != null ? input.Substring(match.Length) : input;
return token != null ? Tuple.Create(token, match, token.ValueOf(token, match)) : null;
}
return null;
}
private Tuple<Token, string, object> Next(ref string input)
{
Tuple<Token, string, object> read;
while (((read = Read(ref input)) != null) && ((read.Item1 == Comment) || (read.Item1 == Space))) ;
return read;
}
public object Parse(ref string input, Tuple<Token, string, object> next)
{
var value = null as object;
if (next != null)
{
var token = next.Item1;
if (token == Open)
{
var list = new List<object>();
while (((next = Next(ref input)) != null) && (next.Item1 != Close))
{
list.Add(Parse(ref input, next));
}
if (next == null)
{
throw Error("unexpected EOF");
}
value = list.ToArray();
}
else if (token == Quote)
{
var quote = next.Item3;
next = Next(ref input);
value = new[] { quote, Parse(ref input, next) };
}
else
{
value = next.Item3;
}
}
else
{
throw Error("unexpected EOF");
}
return value;
}
protected Token TokenOf(Acceptor acceptor)
{
var found = Lexicon.FirstOrDefault(pair => pair.Item2.ValueOf == acceptor);
var token = found != null ? found.Item2 : null;
if ((token == null) && (acceptor != Commenting))
{
throw Error("missing required token definition: {0}", acceptor.Method.Name);
}
return token;
}
protected IList<Tuple<string, Token>> Lexicon { get; private set; }
protected Token Comment { get { return comment = comment ?? TokenOf(Commenting); } }
public static Token Token(string pattern, Acceptor acceptor) => new Token(pattern, acceptor);
public static object Commenting(Token token, string match) => Echo(token, match);
public static object NewSymbol(Token token, string match) => new Symbol(match);
public static Symbol Symbol(object value) => value as Symbol;
public static string Moniker(object value) => Symbol(value) != null ? Symbol(value).Id : null;
public static string ToString(object value)
{
return
value is object[] ?
(
((object[])value).Length > 0 ?
((object[])value).Aggregate(new StringBuilder("("), (result, obj) => result.AppendFormat(" {0}", ToString(obj))).Append(" )").ToString()
:
"( )"
)
:
(value != null ? (value is string ? string.Concat('"', (string)value, '"') : (value is bool ? value.ToString().ToLower() : value.ToString())).Replace("\\\r\n", "\r\n").Replace("\\\n", "\n").Replace("\\t", "\t").Replace("\\n", "\n").Replace("\\r", "\r").Replace("\\\"", "\"") : null) ?? "(null)";
}
public SExpressionSyntax()
{
Lexicon = new List<Tuple<string, Token>>();
Include(Space, Open, Close, Quote);
}
public SExpressionSyntax Include(params Token[] tokens)
{
foreach (var token in tokens)
{
Lexicon.Add(new Tuple<string, Token>(token.Id, token));
}
return this;
}
public object Parse(string input)
{
var next = Next(ref input);
var value = Parse(ref input, next);
if ((next = Next(ref input)) != null)
{
throw Error("unexpected ", next.Item1);
}
return value;
}
}
public class CustomSExpressionSyntax : SExpressionSyntax
{
public CustomSExpressionSyntax()
: base()
{
Include
(
// "//" comments
Token("\\/\\/.*", SExpressionSyntax.Commenting),
// Obvious
Token("false", (token, match) => false),
Token("true", (token, match) => true),
Token("null", (token, match) => null),
Token("\\-?[0-9]+\\.[0-9]+", (token, match) => double.Parse(match)),
Token("\\-?[0-9]+", (token, match) => int.Parse(match)),
// String literals
Token("\\\"(\\\\\\n|\\\\t|\\\\n|\\\\r|\\\\\\\"|[^\\\"])*\\\"", (token, match) => match.Substring(1, match.Length - 2)),
// Identifiers
Token("[_A-Za-z][_0-9A-Za-z]*", NewSymbol)
);
}
}
public class Node { }
public class HearPerceptorState : Node
{
public string Ident { get; set; }
public double Value { get; set; }
}
public class HingeJointState : Node
{
public string Ident { get; set; }
public double Value { get; set; }
}
public class Polar : Tuple<double, double, double>
{
public Polar(double a, double b, double c) : base(a, b, c) { }
}
public class ForceResistancePerceptorState : Node
{
public string Ident { get; set; }
public Polar Polar { get; set; }
}
public class Test
{
public static void Main()
{
var input = @"
(
(Hear 12.3 HelloWorld)
(HJ LAJ1 -0.42)
(FRP lf (pos 2.3 1.7 0.4))
)
";
// visit DRY helpers
Func<object, object[]> asRecord = value => (object[])value;
Func<object, Symbol> symbol = value => SExpressionSyntax.Symbol(value);
Func<object, string> identifier = value => symbol(value).Id;
// the SExpr visit, proper
Func<object[], Node[]> visitAll = null;
Func<object[], Node> visitHear = null;
Func<object[], Node> visitHJ = null;
Func<object[], Node> visitFRP = null;
visitAll =
all =>
all.
Select
(
item =>
symbol(asRecord(item)[0]).Id != "Hear" ?
(
symbol(asRecord(item)[0]).Id != "HJ" ?
visitFRP(asRecord(item))
:
visitHJ(asRecord(item))
)
:
visitHear(asRecord(item))
).
ToArray();
visitHear =
item =>
new HearPerceptorState { Value = (double)asRecord(item)[1], Ident = identifier(asRecord(item)[2]) };
visitHJ =
item =>
new HingeJointState { Ident = identifier(asRecord(item)[1]), Value = (double)asRecord(item)[2] };
visitFRP =
item =>
new ForceResistancePerceptorState
{
Ident = identifier(asRecord(item)[1]),
Polar =
new Polar
(
(double)asRecord(asRecord(item)[2])[1],
(double)asRecord(asRecord(item)[2])[2],
(double)asRecord(asRecord(item)[2])[3]
)
};
var syntax = new CustomSExpressionSyntax();
var sexpr = syntax.Parse(input);
var nodes = visitAll(asRecord(sexpr));
Console.WriteLine("SO_3051254");
Console.WriteLine();
Console.WriteLine(nodes.Length == 3);
Console.WriteLine(nodes[0] is HearPerceptorState);
Console.WriteLine(nodes[1] is HingeJointState);
Console.WriteLine(nodes[2] is ForceResistancePerceptorState);
}
}
公共委托对象接受器(令牌、字符串匹配);
公共类符号
{
公共符号(字符串id){id=id??Guid.NewGuid().ToString(“P”);}
公共重写字符串ToString()=>Id;
公共字符串Id{get;private set;}
}
公共类令牌:符号
{
内部令牌(字符串id):基(id){}
公共令牌(字符串模式,接受者-接受者):基本(模式){Regex=new Regex(string.Format(“^({0})”,!string.IsNullOrEmpty(模式=模式)?模式:“*”),RegexOptions.Compiled);ValueOf=Acceptor;}
公共字符串模式{get;private set;}
公共正则表达式正则表达式{get;private set;}
{get;private set;}的公共接受程序值
}
公共类SExpressionSyntax
{
专用只读令牌空间=令牌(\\s+,Echo);
私有只读令牌Open=Token(“\\(”,Echo);
私有只读令牌Close=Token(“\\”,Echo);
私有只读令牌引号=令牌(“\\'”,Echo);
私人令牌评论;
私有静态异常错误(字符串消息,参数对象[]参数)=>新异常(string.Format(消息,参数));
私有静态对象Echo(令牌,字符串匹配)=>新令牌(Token.Id);
私有静态对象引用(令牌,字符串匹配)=>NewSymbol(令牌,匹配);
私有元组读取(参考字符串输入)
{
如果(!string.IsNullOrEmpty(输入))
{
var found=null作为匹配项;
var sofar=输入;
var tuple=Lexicon.FirstOrDefault(current=>(found=current.Item2.Regex.Match(sofar)).Success&(found.Length>0));
var-token=tuple!=null?tuple.Item2:null;
var match=token!=null?找到。值:null;
input=match!=null?input.Substring(match.Length):输入;
return token!=null?Tuple.Create(token,match,token.ValueOf(token,match)):null;
}
返回null;
}
下一个私有元组(参考字符串输入)
{
元组读取;
而(((read=read(ref-input))!=null)和((read.Item1)=
Object readDatum(PushbackReader in) {
int ch = in.read();
return readDatum(in, ch);
}
Object readDatum(PushbackReader in, int ch) {
if (ch == '(')) {
return readList(in, ch);
} else if (isNumber(ch)) {
return readNumber(in, ch);
} else if (isSymbolStart(ch)) {
return readSymbol(in, ch);
} else {
error(ch);
}
}
List readList(PushbackReader in, int lookAhead) {
if (ch != '(') {
error(ch);
}
List result = new List();
while (true) {
int ch = in.read();
if (ch == ')') {
break;
} else if (isWhiteSpace(ch)) {
skipWhiteSpace(in);
} else {
result.append(readDatum(in, ch);
}
}
return result;
}
String readSymbol(PushbackReader in, int ch) {
StringBuilder result = new StringBuilder();
result.append((char)ch);
while (true) {
int ch2 = in.read();
if (isSymbol(ch2)) {
result.append((char)ch2);
} else if (isWhiteSpace(ch2) || ch2 == ')') {
in.unread(ch2);
break;
} else if (ch2 == -1) {
break;
} else {
error(ch2);
}
}
return result.toString();
}
public delegate object Acceptor(Token token, string match);
public class Symbol
{
public Symbol(string id) { Id = id ?? Guid.NewGuid().ToString("P"); }
public override string ToString() => Id;
public string Id { get; private set; }
}
public class Token : Symbol
{
internal Token(string id) : base(id) { }
public Token(string pattern, Acceptor acceptor) : base(pattern) { Regex = new Regex(string.Format("^({0})", !string.IsNullOrEmpty(Pattern = pattern) ? Pattern : ".*"), RegexOptions.Compiled); ValueOf = acceptor; }
public string Pattern { get; private set; }
public Regex Regex { get; private set; }
public Acceptor ValueOf { get; private set; }
}
public class SExpressionSyntax
{
private readonly Token Space = Token("\\s+", Echo);
private readonly Token Open = Token("\\(", Echo);
private readonly Token Close = Token("\\)", Echo);
private readonly Token Quote = Token("\\'", Echo);
private Token comment;
private static Exception Error(string message, params object[] arguments) => new Exception(string.Format(message, arguments));
private static object Echo(Token token, string match) => new Token(token.Id);
private static object Quoting(Token token, string match) => NewSymbol(token, match);
private Tuple<Token, string, object> Read(ref string input)
{
if (!string.IsNullOrEmpty(input))
{
var found = null as Match;
var sofar = input;
var tuple = Lexicon.FirstOrDefault(current => (found = current.Item2.Regex.Match(sofar)).Success && (found.Length > 0));
var token = tuple != null ? tuple.Item2 : null;
var match = token != null ? found.Value : null;
input = match != null ? input.Substring(match.Length) : input;
return token != null ? Tuple.Create(token, match, token.ValueOf(token, match)) : null;
}
return null;
}
private Tuple<Token, string, object> Next(ref string input)
{
Tuple<Token, string, object> read;
while (((read = Read(ref input)) != null) && ((read.Item1 == Comment) || (read.Item1 == Space))) ;
return read;
}
public object Parse(ref string input, Tuple<Token, string, object> next)
{
var value = null as object;
if (next != null)
{
var token = next.Item1;
if (token == Open)
{
var list = new List<object>();
while (((next = Next(ref input)) != null) && (next.Item1 != Close))
{
list.Add(Parse(ref input, next));
}
if (next == null)
{
throw Error("unexpected EOF");
}
value = list.ToArray();
}
else if (token == Quote)
{
var quote = next.Item3;
next = Next(ref input);
value = new[] { quote, Parse(ref input, next) };
}
else
{
value = next.Item3;
}
}
else
{
throw Error("unexpected EOF");
}
return value;
}
protected Token TokenOf(Acceptor acceptor)
{
var found = Lexicon.FirstOrDefault(pair => pair.Item2.ValueOf == acceptor);
var token = found != null ? found.Item2 : null;
if ((token == null) && (acceptor != Commenting))
{
throw Error("missing required token definition: {0}", acceptor.Method.Name);
}
return token;
}
protected IList<Tuple<string, Token>> Lexicon { get; private set; }
protected Token Comment { get { return comment = comment ?? TokenOf(Commenting); } }
public static Token Token(string pattern, Acceptor acceptor) => new Token(pattern, acceptor);
public static object Commenting(Token token, string match) => Echo(token, match);
public static object NewSymbol(Token token, string match) => new Symbol(match);
public static Symbol Symbol(object value) => value as Symbol;
public static string Moniker(object value) => Symbol(value) != null ? Symbol(value).Id : null;
public static string ToString(object value)
{
return
value is object[] ?
(
((object[])value).Length > 0 ?
((object[])value).Aggregate(new StringBuilder("("), (result, obj) => result.AppendFormat(" {0}", ToString(obj))).Append(" )").ToString()
:
"( )"
)
:
(value != null ? (value is string ? string.Concat('"', (string)value, '"') : (value is bool ? value.ToString().ToLower() : value.ToString())).Replace("\\\r\n", "\r\n").Replace("\\\n", "\n").Replace("\\t", "\t").Replace("\\n", "\n").Replace("\\r", "\r").Replace("\\\"", "\"") : null) ?? "(null)";
}
public SExpressionSyntax()
{
Lexicon = new List<Tuple<string, Token>>();
Include(Space, Open, Close, Quote);
}
public SExpressionSyntax Include(params Token[] tokens)
{
foreach (var token in tokens)
{
Lexicon.Add(new Tuple<string, Token>(token.Id, token));
}
return this;
}
public object Parse(string input)
{
var next = Next(ref input);
var value = Parse(ref input, next);
if ((next = Next(ref input)) != null)
{
throw Error("unexpected ", next.Item1);
}
return value;
}
}
public class CustomSExpressionSyntax : SExpressionSyntax
{
public CustomSExpressionSyntax()
: base()
{
Include
(
// "//" comments
Token("\\/\\/.*", SExpressionSyntax.Commenting),
// Obvious
Token("false", (token, match) => false),
Token("true", (token, match) => true),
Token("null", (token, match) => null),
Token("\\-?[0-9]+\\.[0-9]+", (token, match) => double.Parse(match)),
Token("\\-?[0-9]+", (token, match) => int.Parse(match)),
// String literals
Token("\\\"(\\\\\\n|\\\\t|\\\\n|\\\\r|\\\\\\\"|[^\\\"])*\\\"", (token, match) => match.Substring(1, match.Length - 2)),
// Identifiers
Token("[_A-Za-z][_0-9A-Za-z]*", NewSymbol)
);
}
}
public class Node { }
public class HearPerceptorState : Node
{
public string Ident { get; set; }
public double Value { get; set; }
}
public class HingeJointState : Node
{
public string Ident { get; set; }
public double Value { get; set; }
}
public class Polar : Tuple<double, double, double>
{
public Polar(double a, double b, double c) : base(a, b, c) { }
}
public class ForceResistancePerceptorState : Node
{
public string Ident { get; set; }
public Polar Polar { get; set; }
}
public class Test
{
public static void Main()
{
var input = @"
(
(Hear 12.3 HelloWorld)
(HJ LAJ1 -0.42)
(FRP lf (pos 2.3 1.7 0.4))
)
";
// visit DRY helpers
Func<object, object[]> asRecord = value => (object[])value;
Func<object, Symbol> symbol = value => SExpressionSyntax.Symbol(value);
Func<object, string> identifier = value => symbol(value).Id;
// the SExpr visit, proper
Func<object[], Node[]> visitAll = null;
Func<object[], Node> visitHear = null;
Func<object[], Node> visitHJ = null;
Func<object[], Node> visitFRP = null;
visitAll =
all =>
all.
Select
(
item =>
symbol(asRecord(item)[0]).Id != "Hear" ?
(
symbol(asRecord(item)[0]).Id != "HJ" ?
visitFRP(asRecord(item))
:
visitHJ(asRecord(item))
)
:
visitHear(asRecord(item))
).
ToArray();
visitHear =
item =>
new HearPerceptorState { Value = (double)asRecord(item)[1], Ident = identifier(asRecord(item)[2]) };
visitHJ =
item =>
new HingeJointState { Ident = identifier(asRecord(item)[1]), Value = (double)asRecord(item)[2] };
visitFRP =
item =>
new ForceResistancePerceptorState
{
Ident = identifier(asRecord(item)[1]),
Polar =
new Polar
(
(double)asRecord(asRecord(item)[2])[1],
(double)asRecord(asRecord(item)[2])[2],
(double)asRecord(asRecord(item)[2])[3]
)
};
var syntax = new CustomSExpressionSyntax();
var sexpr = syntax.Parse(input);
var nodes = visitAll(asRecord(sexpr));
Console.WriteLine("SO_3051254");
Console.WriteLine();
Console.WriteLine(nodes.Length == 3);
Console.WriteLine(nodes[0] is HearPerceptorState);
Console.WriteLine(nodes[1] is HingeJointState);
Console.WriteLine(nodes[2] is ForceResistancePerceptorState);
}
}