Scala 斯卡拉。难以理解的表达式解析器。OutOfMemoryError
我想为具有操作顺序的复杂表达式创建一个解析器。我有一些例子,但它工作非常缓慢,并抛出异常OutOfMemoryError。我怎样才能改进它Scala 斯卡拉。难以理解的表达式解析器。OutOfMemoryError,scala,parser-combinators,Scala,Parser Combinators,我想为具有操作顺序的复杂表达式创建一个解析器。我有一些例子,但它工作非常缓慢,并抛出异常OutOfMemoryError。我怎样才能改进它 def expr: Parser[Expression] = term5 def term5: Parser[Expression] = (term4 ~ "OR" ~ term4) ^^ { case lhs~o~rhs => BinaryOp("OR", lhs, rhs) } | term4 def term4: Parser[E
def expr: Parser[Expression] = term5
def term5: Parser[Expression] =
(term4 ~ "OR" ~ term4) ^^ { case lhs~o~rhs => BinaryOp("OR", lhs, rhs) } |
term4
def term4: Parser[Expression] =
(term3 ~ "AND" ~ term3) ^^ { case lhs~a~rhs => BinaryOp("AND", lhs, rhs) } |
term3
def term3: Parser[Expression] =
(term2 ~ "<>" ~ term2) ^^ { case lhs~ne~rhs => BinaryOp("NE", lhs, rhs) } |
(term2 ~ "=" ~ term2) ^^ { case lhs~eq~rhs => BinaryOp("EQ", lhs, rhs) } |
(term2 ~ "NE" ~ term2) ^^ { case lhs~ne~rhs => BinaryOp("NE", lhs, rhs) } |
(term2 ~ "EQ" ~ term2) ^^ { case lhs~eq~rhs => BinaryOp("EQ", lhs, rhs) } |
term2
def term2: Parser[Expression] =
(term1 ~ "<" ~ term1) ^^ { case lhs~lt~rhs => BinaryOp("LT", lhs, rhs) } |
(term1 ~ ">" ~ term1) ^^ { case lhs~gt~rhs => BinaryOp("GT", lhs, rhs) } |
(term1 ~ "<=" ~ term1) ^^ { case lhs~le~rhs => BinaryOp("LE", lhs, rhs) } |
(term1 ~ ">=" ~ term1) ^^ { case lhs~ge~rhs => BinaryOp("GE", lhs, rhs) } |
(term1 ~ "LT" ~ term1) ^^ { case lhs~lt~rhs => BinaryOp("LT", lhs, rhs) } |
(term1 ~ "GT" ~ term1) ^^ { case lhs~gt~rhs => BinaryOp("GT", lhs, rhs) } |
(term1 ~ "LE" ~ term1) ^^ { case lhs~le~rhs => BinaryOp("LE", lhs, rhs) } |
(term1 ~ "GE" ~ term1) ^^ { case lhs~ge~rhs => BinaryOp("GE", lhs, rhs) } |
term1
def term1: Parser[Expression] =
(term ~ "+" ~ term) ^^ { case lhs~plus~rhs => BinaryOp("+", lhs, rhs) } |
(term ~ "-" ~ term) ^^ { case lhs~minus~rhs => BinaryOp("-", lhs, rhs) } |
(term ~ ":" ~ term) ^^ { case lhs~concat~rhs => BinaryOp(":", lhs, rhs) } |
term
def term: Parser[Expression] =
(factor ~ "*" ~ factor) ^^ { case lhs~times~rhs => BinaryOp("*", lhs, rhs) } |
(factor ~ "/" ~ factor) ^^ { case lhs~div~rhs => BinaryOp("/", lhs, rhs) } |
(factor ~ "MOD" ~ factor) ^^ { case lhs~div~rhs => BinaryOp("MOD", lhs, rhs) } |
factor
def factor: Parser[Expression] =
"(" ~> expr <~ ")" |
("+" | "-") ~ factor ^^ { case op~rhs => UnaryOp(op, rhs) } |
function |
numericLit ^^ { case x => Number(x/*.toFloat*/) } |
stringLit ^^ { case s => Literal(s) } |
ident ^^ { case id => Variable(id) }
def expr:Parser[Expression]=term5
def term5:解析器[表达式]=
(term4~”或“~term4)^{case lhs~o~rhs=>BinaryOp(“OR”,lhs,rhs)}|
第四学期
def term4:解析器[表达式]=
(term3~”和“~term3)^{case lhs~a~rhs=>BinaryOp(“AND”,lhs,rhs)}|
第三学期
def term3:解析器[表达式]=
(term2~“”~term2)^{case lhs~ne~rhs=>BinaryOp(“ne,lhs,rhs)}|
(term2~“=”~term2)^{case lhs~eq~rhs=>BinaryOp(“eq”,lhs,rhs)}|
(term2~“NE”~term2)^{case lhs~NE~rhs=>BinaryOp(“NE”,lhs,rhs)}|
(term2~“EQ”~term2)^{case lhs~EQ~rhs=>BinaryOp(“EQ”,lhs,rhs)}|
术语2
def term2:解析器[表达式]=
(term1~“”~term1)^{case lhs~gt~rhs=>BinaryOp(“gt”,lhs,rhs)}|
(term1~“=”~term1)^{case lhs~ge~rhs=>BinaryOp(“ge”,lhs,rhs)}|
(term1~“LT”~term1)^{case lhs~LT~rhs=>BinaryOp(“LT”,lhs,rhs)}|
(term1~“GT”~term1)^{case lhs~GT~rhs=>BinaryOp(“GT”,lhs,rhs)}|
(term1~“LE”~term1)^{case lhs~LE~rhs=>BinaryOp(“LE”,lhs,rhs)}|
(term1~“GE”~term1)^{case lhs~GE~rhs=>BinaryOp(“GE”,lhs,rhs)}|
术语1
def term1:解析器[表达式]=
(term~“+”~term)^{case lhs~plus~rhs=>BinaryOp(“+”,lhs,rhs)}|
(term~“-”~term)^{case lhs~减~rhs=>BinaryOp(“-”,lhs,rhs)}|
(term~“:”~term)^{case lhs~concat~rhs=>BinaryOp(“:”,lhs,rhs)}|
学期
定义术语:解析器[表达式]=
(factor~“*”~factor)^{case lhs~times~rhs=>BinaryOp(“*”,lhs,rhs)}|
(factor~“/”~factor)^{case lhs~div~rhs=>BinaryOp(“/”,lhs,rhs)}|
(factor~“MOD”~ factor)^{case lhs~div~rhs=>BinaryOp(“MOD”,lhs,rhs)}|
因素
定义因子:解析器[表达式]=
(“~>expr-UnaryOp(op,rhs)}|
作用|
numericLit^^{case x=>Number(x/*.toFloat*/)}|
stringLit^{case s=>Literal(s)}|
ident^^{case id=>变量(id)}
基本上,它很慢,而且占用了太多内存,因为你的语法效率非常低
让我们考虑第二行:<代码> B= A:(1 + 2)。它将试图解析这条线:
ident * factor + term < term1 <> term2 AND term3 OR term4
或第4条,然后是第4条
和
术语3,然后是术语3
term2,然后=
,然后NE
然后EQ
然后是term2+
术语,术语-
术语,术语:
术语,然后是术语*
因子,因子/
因子,因子MOD
因子,然后因子ident * factor + term < term1 <> term2 AND term3 OR term4
现在转到下一个术语1
:
ident * factor - term < term1 <> term2 AND term3 OR term4
ident / factor - term < term1 <> term2 AND term3 OR term4
ident MOD factor - term < term1 <> term2 AND term3 OR term4
ident - term < term1 <> term2 AND term3 OR term4
等等
所有这些都是因为每行中每个术语的第一个术语总是匹配的!要匹配一个简单的数字,它必须解析factor
2*2*5*9*4*4=2880次
但这还不是故事的一半!你看,因为termX重复了两次,它会在两边重复所有这些内容。例如,A:(1+2)
的第一个匹配是:
ident : term < term1 <> term2 AND term3 OR term4
where ident = A
and term = (1+2)
然后查看scala发行版的doc/scala-devel-docs/examples/parsing
目录,您会发现几个示例
下面是一个版本的解析器(没有函数
),它记录了它尝试的所有内容:
sealed trait Expression
case class Variable(id: String) extends Expression
case class Literal(s: String) extends Expression
case class Number(x: String) extends Expression
case class UnaryOp(op: String, rhs: Expression) extends Expression
case class BinaryOp(op: String, lhs: Expression, rhs: Expression) extends Expression
object TestParser extends scala.util.parsing.combinator.syntactical.StdTokenParsers {
import scala.util.parsing.combinator.lexical.StdLexical
type Tokens = StdLexical
val lexical = new StdLexical
lexical.delimiters ++= List("(", ")", "+", "-", "*", "/", "=", "OR", "AND", "NE", "EQ", "LT", "GT", "LE", "GE", ":", "MOD")
def stmts: Parser[Any] = log(expr.*)("stmts")
def stmt: Parser[Expression] = log(expr <~ "\n")("stmt")
def expr: Parser[Expression] = log(term5)("expr")
def term5: Parser[Expression] = (
log((term4 ~ "OR" ~ term4) ^^ { case lhs~o~rhs => BinaryOp("OR", lhs, rhs) })("term5 OR")
| log(term4)("term5 term4")
)
def term4: Parser[Expression] = (
log((term3 ~ "AND" ~ term3) ^^ { case lhs~a~rhs => BinaryOp("AND", lhs, rhs) })("term4 AND")
| log(term3)("term4 term3")
)
def term3: Parser[Expression] = (
log((term2 ~ "<>" ~ term2) ^^ { case lhs~ne~rhs => BinaryOp("NE", lhs, rhs) })("term3 <>")
| log((term2 ~ "=" ~ term2) ^^ { case lhs~eq~rhs => BinaryOp("EQ", lhs, rhs) })("term3 =")
| log((term2 ~ "NE" ~ term2) ^^ { case lhs~ne~rhs => BinaryOp("NE", lhs, rhs) })("term3 NE")
| log((term2 ~ "EQ" ~ term2) ^^ { case lhs~eq~rhs => BinaryOp("EQ", lhs, rhs) })("term3 EQ")
| log(term2)("term3 term2")
)
def term2: Parser[Expression] = (
log((term1 ~ "<" ~ term1) ^^ { case lhs~lt~rhs => BinaryOp("LT", lhs, rhs) })("term2 <")
| log((term1 ~ ">" ~ term1) ^^ { case lhs~gt~rhs => BinaryOp("GT", lhs, rhs) })("term2 >")
| log((term1 ~ "<=" ~ term1) ^^ { case lhs~le~rhs => BinaryOp("LE", lhs, rhs) })("term2 <=")
| log((term1 ~ ">=" ~ term1) ^^ { case lhs~ge~rhs => BinaryOp("GE", lhs, rhs) })("term2 >=")
| log((term1 ~ "LT" ~ term1) ^^ { case lhs~lt~rhs => BinaryOp("LT", lhs, rhs) })("term2 LT")
| log((term1 ~ "GT" ~ term1) ^^ { case lhs~gt~rhs => BinaryOp("GT", lhs, rhs) })("term2 GT")
| log((term1 ~ "LE" ~ term1) ^^ { case lhs~le~rhs => BinaryOp("LE", lhs, rhs) })("term2 LE")
| log((term1 ~ "GE" ~ term1) ^^ { case lhs~ge~rhs => BinaryOp("GE", lhs, rhs) })("term2 GE")
| log(term1)("term2 term1")
)
def term1: Parser[Expression] = (
log((term ~ "+" ~ term) ^^ { case lhs~plus~rhs => BinaryOp("+", lhs, rhs) })("term1 +")
| log((term ~ "-" ~ term) ^^ { case lhs~minus~rhs => BinaryOp("-", lhs, rhs) })("term1 -")
| log((term ~ ":" ~ term) ^^ { case lhs~concat~rhs => BinaryOp(":", lhs, rhs) })("term1 :")
| log(term)("term1 term")
)
def term: Parser[Expression] = (
log((factor ~ "*" ~ factor) ^^ { case lhs~times~rhs => BinaryOp("*", lhs, rhs) })("term *")
| log((factor ~ "/" ~ factor) ^^ { case lhs~div~rhs => BinaryOp("/", lhs, rhs) })("term /")
| log((factor ~ "MOD" ~ factor) ^^ { case lhs~div~rhs => BinaryOp("MOD", lhs, rhs) })("term MOD")
| log(factor)("term factor")
)
def factor: Parser[Expression] = (
log("(" ~> expr <~ ")")("factor (expr)")
| log(("+" | "-") ~ factor ^^ { case op~rhs => UnaryOp(op, rhs) })("factor +-")
//| function |
| log(numericLit ^^ { case x => Number(x/*.toFloat*/) })("factor numericLit")
| log(stringLit ^^ { case s => Literal(s) })("factor stringLit")
| log(ident ^^ { case id => Variable(id) })("factor ident")
)
def parse(s: String) = stmts(new lexical.Scanner(s))
}
sealed特征表达
case类变量(id:String)扩展表达式
case类Literal(s:String)扩展了表达式
案例类别编号(x:String)扩展表达式
case类UnaryOp(op:String,rhs:Expression)扩展了表达式
case类BinaryOp(op:String,lhs:Expression,rhs:Expression)扩展了Expression
对象TestParser扩展了scala.util.parsing.combinator.syntactic.StdTokenParsers{
导入scala.util.parsing.combinator.lexical.StdLexical
类型Tokens=StdLexical
val lexical=新的StdLexical
lexical.delimiters++=列表(“(“,”)、“+”、“-”、“*”、“/”、“=”、“或”、“和”、“NE”、“EQ”、“LT”、“GT”、“LE”、“GE”、“:”、“MOD”)
def stmts:Parser[Any]=log(expr.*)(“stmts”)
def stmt:Parser[Expression]=log(expr BinaryOp(“OR”,lhs,rhs)})(“term5或”)
|日志(第4条)(“第5条第4条”)
)
def term4:解析器[表达式]=(
log((term3~”和“~term3)^{case lhs~a~rhs=>BinaryOp(“AND”,lhs,rhs)})(“term4和”)
|日志(第3条)(“第4条第3条”)
)
def term3:解析器[表达式]=(
log((term2~“”~term2)^{case lhs~ne~rhs=>BinaryOp(“ne”,lhs,rhs)})(“term3”)
|log((term2~“=”~term2)^{case lhs~eq~rhs=>BinaryOp(“eq”,lhs,rhs)})(“term3=”)
|log((term2~“NE”~term2)^{case lhs~NE~rhs=>BinaryOp(“NE”,lhs,rhs)})(“term3 NE”)
|log((term2~“EQ”~term2)^{case lhs~EQ~rhs=>BinaryOp(“EQ”,lhs,rhs)})(“term3 EQ”)
|日志(第2条)(“第3条第2条”)
)
def term2:解析器[表达式]=(
日志((term1~“”)
|日志((term1~“=”)
|log((term1~“LT”~term1)^{case lhs~LT~rhs=>BinaryOp(“LT”,lhs,rhs)})(“term2 LT”)
|log((term1~“GT”~term1)^{case lhs~GT~rhs=>BinaryOp(“GT”,lhs,rhs)})(“term2 GT”)
|log((term1~“LE”~term1)^{case lhs~LE~rhs=>BinaryOp(“LE”,lhs,rhs)})(“term2 LE”)
|log((term1~“GE”~term1)^{case lhs~GE~rhs=>BinaryOp(“GE”,lhs,rhs)})(“term2 GE”)
|日志(第1条)(“第2条第1条”)
)
def term1:解析器[表达式]=(
log((term~“+”~term)^{case lhs~plus~rhs=>BinaryOp(“+”,lhs,rhs)})(“term1+”)
|log((term~“-”~term)^{case lhs~减~rhs=>BinaryOp(“-”,lhs,rhs)})(“term1-”)
|日志(
ident : term < term1 <> term2 AND term3 OR term4
where ident = A
and term = (1+2)
sbaz install scala-devel-docs
sealed trait Expression
case class Variable(id: String) extends Expression
case class Literal(s: String) extends Expression
case class Number(x: String) extends Expression
case class UnaryOp(op: String, rhs: Expression) extends Expression
case class BinaryOp(op: String, lhs: Expression, rhs: Expression) extends Expression
object TestParser extends scala.util.parsing.combinator.syntactical.StdTokenParsers {
import scala.util.parsing.combinator.lexical.StdLexical
type Tokens = StdLexical
val lexical = new StdLexical
lexical.delimiters ++= List("(", ")", "+", "-", "*", "/", "=", "OR", "AND", "NE", "EQ", "LT", "GT", "LE", "GE", ":", "MOD")
def stmts: Parser[Any] = log(expr.*)("stmts")
def stmt: Parser[Expression] = log(expr <~ "\n")("stmt")
def expr: Parser[Expression] = log(term5)("expr")
def term5: Parser[Expression] = (
log((term4 ~ "OR" ~ term4) ^^ { case lhs~o~rhs => BinaryOp("OR", lhs, rhs) })("term5 OR")
| log(term4)("term5 term4")
)
def term4: Parser[Expression] = (
log((term3 ~ "AND" ~ term3) ^^ { case lhs~a~rhs => BinaryOp("AND", lhs, rhs) })("term4 AND")
| log(term3)("term4 term3")
)
def term3: Parser[Expression] = (
log((term2 ~ "<>" ~ term2) ^^ { case lhs~ne~rhs => BinaryOp("NE", lhs, rhs) })("term3 <>")
| log((term2 ~ "=" ~ term2) ^^ { case lhs~eq~rhs => BinaryOp("EQ", lhs, rhs) })("term3 =")
| log((term2 ~ "NE" ~ term2) ^^ { case lhs~ne~rhs => BinaryOp("NE", lhs, rhs) })("term3 NE")
| log((term2 ~ "EQ" ~ term2) ^^ { case lhs~eq~rhs => BinaryOp("EQ", lhs, rhs) })("term3 EQ")
| log(term2)("term3 term2")
)
def term2: Parser[Expression] = (
log((term1 ~ "<" ~ term1) ^^ { case lhs~lt~rhs => BinaryOp("LT", lhs, rhs) })("term2 <")
| log((term1 ~ ">" ~ term1) ^^ { case lhs~gt~rhs => BinaryOp("GT", lhs, rhs) })("term2 >")
| log((term1 ~ "<=" ~ term1) ^^ { case lhs~le~rhs => BinaryOp("LE", lhs, rhs) })("term2 <=")
| log((term1 ~ ">=" ~ term1) ^^ { case lhs~ge~rhs => BinaryOp("GE", lhs, rhs) })("term2 >=")
| log((term1 ~ "LT" ~ term1) ^^ { case lhs~lt~rhs => BinaryOp("LT", lhs, rhs) })("term2 LT")
| log((term1 ~ "GT" ~ term1) ^^ { case lhs~gt~rhs => BinaryOp("GT", lhs, rhs) })("term2 GT")
| log((term1 ~ "LE" ~ term1) ^^ { case lhs~le~rhs => BinaryOp("LE", lhs, rhs) })("term2 LE")
| log((term1 ~ "GE" ~ term1) ^^ { case lhs~ge~rhs => BinaryOp("GE", lhs, rhs) })("term2 GE")
| log(term1)("term2 term1")
)
def term1: Parser[Expression] = (
log((term ~ "+" ~ term) ^^ { case lhs~plus~rhs => BinaryOp("+", lhs, rhs) })("term1 +")
| log((term ~ "-" ~ term) ^^ { case lhs~minus~rhs => BinaryOp("-", lhs, rhs) })("term1 -")
| log((term ~ ":" ~ term) ^^ { case lhs~concat~rhs => BinaryOp(":", lhs, rhs) })("term1 :")
| log(term)("term1 term")
)
def term: Parser[Expression] = (
log((factor ~ "*" ~ factor) ^^ { case lhs~times~rhs => BinaryOp("*", lhs, rhs) })("term *")
| log((factor ~ "/" ~ factor) ^^ { case lhs~div~rhs => BinaryOp("/", lhs, rhs) })("term /")
| log((factor ~ "MOD" ~ factor) ^^ { case lhs~div~rhs => BinaryOp("MOD", lhs, rhs) })("term MOD")
| log(factor)("term factor")
)
def factor: Parser[Expression] = (
log("(" ~> expr <~ ")")("factor (expr)")
| log(("+" | "-") ~ factor ^^ { case op~rhs => UnaryOp(op, rhs) })("factor +-")
//| function |
| log(numericLit ^^ { case x => Number(x/*.toFloat*/) })("factor numericLit")
| log(stringLit ^^ { case s => Literal(s) })("factor stringLit")
| log(ident ^^ { case id => Variable(id) })("factor ident")
)
def parse(s: String) = stmts(new lexical.Scanner(s))
}
def term3: Parser[Expression] =
log((term2 ~ ("<>" | "=" | "NE" | "EQ") ~ term2) ^^ { case lhs~op~rhs => BinaryOp(op, lhs, rhs) })("term3 <>,=,NE,EQ") |
log(term2)("term3 term2")
def expr: Parser[Expression] = term5
def term5: Parser[Expression] =
log(chainl1(term4, term5, "OR" ^^ {o => (a: Expression, b: Expression) => BinaryOp(o, a, b)}))("term5 OR")
def term4: Parser[Expression] =
log(chainl1(term3, term4, "AND" ^^ {o => (a: Expression, b: Expression) => BinaryOp(o, a, b)}))("term4 AND")
def term3: Parser[Expression] =
log(chainl1(term2, term3, ("<>" | "=" | "NE" | "EQ") ^^ {o => (a: Expression, b: Expression) => BinaryOp(o, a, b)}))("term3 <>,=,NE,EQ")
def term2: Parser[Expression] =
log(chainl1(term1, term2, ("<" | ">" | "<=" | ">=" | "LT" | "GT" | "LE" | "GE") ^^ {o => (a: Expression, b: Expression) => BinaryOp(o, a, b)}))("term2 <,>,...")
def term1: Parser[Expression] =
log(chainl1(term, term1, ("+" | "-" | ":") ^^ {o => (a: Expression, b: Expression) => BinaryOp(o, a, b)}))("term1 +,-,:")
def term: Parser[Expression] =
log(chainl1(factor, term, ("*" | "/" | "MOD") ^^ {o => (a: Expression, b: Expression) => BinaryOp(o, a, b)}))("term *,/,MOD")
def factor: Parser[Expression] =
log("(" ~> expr <~ ")")("factor ()") |
log(("+" | "-") ~ factor ^^ { case op~rhs => UnaryOp(op, rhs) })("factor unary") |
log(function)("factor function") |
log(numericLit ^^ { case x => Number(x/*.toFloat*/) })("factor numLit") |
log(stringLit ^^ { case s => Literal(s) })("factor strLit") |
log(ident ^^ { case id => Variable(id) })("factor ident")