Parsing 解析基于缩进的类似于ML的语法,以及所有被认为是指令/表达式的东西
注:不久前,我已经问过了。这不是重复,但所要求的澄清不属于主题本身的范围。因此,我允许自己打开另一个位置,处理基于缩进的类似ML的语法分析,并将所有内容都视为指令/表达式 例如:Parsing 解析基于缩进的类似于ML的语法,以及所有被认为是指令/表达式的东西,parsing,f#,indentation,fparsec,Parsing,F#,Indentation,Fparsec,注:不久前,我已经问过了。这不是重复,但所要求的澄清不属于主题本身的范围。因此,我允许自己打开另一个位置,处理基于缩进的类似ML的语法分析,并将所有内容都视为指令/表达式 例如: “Hello”是一个表达式, let foo=2+1是使用表达式(2+1)的指令, printfoo是一条指令 简而言之,这是一个非常模块化和动态的语法和语义。比如F#,或者OCaml 为此,我使用F#和API(可在nuget上获得)FParsec。FParsec wiki提供了,所以我再次使用它。下面代码中使用的模块
“Hello”
是一个表达式,
let foo=2+1
是使用表达式(2+1
)的指令,
printfoo
是一条指令
简而言之,这是一个非常模块化和动态的语法和语义。比如F#,或者OCaml
为此,我使用F#和API(可在nuget上获得)FParsec。FParsec wiki提供了,所以我再次使用它。下面代码中使用的模块是无回溯的缩进语法分析器
要解析的示例代码使用基本缩进,而不是混合“文字”和“指令/表达式”:
一个简单的代码,没有上下文(但目前这并不重要)
我的实现允许以下代码:
let foo = a + b
let foo =
let a = 9
let b = 1
a + b
let foo = 7
let foo =
loop i 1 10
print i
比如说。(循环<代码>和打印<代码>仅用于测试…)
我已经有很长一周的时间了,但我无法解决的问题是,每次解析器中需要一条新的行指令时,缩进模块都会询问我。。。以下是一个屏幕截图:
这适用于上述所有示例。我真的不理解这个问题,因此不知道如何解决它
以下是针对该问题测试的代码,它符合最低和功能代码标准,但必须使用FParsec:
open FParsec
// This module come from 'https://github.com/stephan-tolksdorf/fparsec/wiki/Parsing-indentation-based-syntax-with-FParsec'
// I used the second module: 'IndentationParserWithoutBacktracking'
module IndentationParserWithoutBacktracking =
let tabStopDistance = 8
type LastParsedIndentation() =
[<DefaultValue>]
val mutable Value: int32
[<DefaultValue>]
val mutable EndIndex: int64
type UserState =
{Indentation: int
// We put LastParsedIndentation into the UserState so that we
// can conveniently use a separate instance for each stream.
// The members of the LastParsedIndentation instance will be mutated
// directly and hence won't be affected by any stream backtracking.
LastParsedIndentation: LastParsedIndentation}
with
static member Create() = {Indentation = -1
LastParsedIndentation = LastParsedIndentation(EndIndex = -1L)}
type CharStream = CharStream<UserState>
type Parser<'t> = Parser<'t, UserState>
// If this function is called at the same index in the stream
// where the function previously stopped, then the previously
// returned indentation will be returned again.
// This way we can avoid backtracking at the end of indented blocks.
let skipIndentation (stream: CharStream) =
let lastParsedIndentation = stream.UserState.LastParsedIndentation
if lastParsedIndentation.EndIndex = stream.Index then
lastParsedIndentation.Value
else
let mutable indentation = stream.SkipNewlineThenWhitespace(tabStopDistance, false)
lastParsedIndentation.EndIndex <- stream.Index
lastParsedIndentation.Value <- indentation
indentation
let indentedMany1 (p: Parser<'t>) label : Parser<'t list> =
fun stream ->
let oldIndentation = stream.UserState.Indentation
let indentation = skipIndentation stream
if indentation <= oldIndentation then
Reply(Error, expected (if indentation < 0 then "newline" else "indented " + label))
else
stream.UserState <- {stream.UserState with Indentation = indentation}
let results = ResizeArray()
let mutable stateTag = stream.StateTag
let mutable reply = p stream // parse the first element
let mutable newIndentation = 0
while reply.Status = Ok
&& (results.Add(reply.Result)
newIndentation <- skipIndentation stream
newIndentation = indentation)
do
stateTag <- stream.StateTag
reply <- p stream
if reply.Status = Ok
|| (stream.IsEndOfStream && results.Count > 0 && stream.StateTag = stateTag)
then
if newIndentation < indentation || stream.IsEndOfStream then
stream.UserState <- {stream.UserState with Indentation = oldIndentation}
Reply(List.ofSeq results)
else
Reply(Error, messageError "wrong indentation")
else // p failed
Reply(reply.Status, reply.Error)
open IndentationParserWithoutBacktracking
let isBlank = fun c -> c = ' ' || c = '\t'
let ws = spaces
let ws1 = skipMany1SatisfyL isBlank "whitespace"
let str s = pstring s .>> ws
let keyword str = pstring str >>? nextCharSatisfiesNot (fun c -> isLetter c || isDigit c) <?> str
// AST
type Identifier = Identifier of string
// A value is just a literal or a data name, called here "Variable"
type Value =
| Int of int | Float of float
| Bool of bool | String of string
| Char of char | Variable of Identifier
// All is an instruction, but there are some differences:
type Instr =
// Arithmetic
| Literal of Value | Infix of Instr * InfixOp * Instr
// Statements (instructions needing another instructions)
| Let of Identifier * Instr list
| Loop of Identifier * Instr * Instr * Instr list
// Other - the "print" function, from the link seen above
| Print of Identifier
and InfixOp =
// Arithmetic
| Sum | Sub | Mul | Div
// Logic
| And | Or | Equal | NotEqual | Greater | Smaller | GreaterEqual | SmallerEqual
// Literals
let numberFormat = NumberLiteralOptions.AllowMinusSign ||| NumberLiteralOptions.AllowFraction |||
NumberLiteralOptions.AllowHexadecimal ||| NumberLiteralOptions.AllowOctal |||
NumberLiteralOptions.AllowBinary
let literal_numeric =
numberLiteral numberFormat "number" |>> fun nl ->
if nl.IsInteger then Literal (Int(int nl.String))
else Literal (Float(float nl.String))
let literal_bool =
(choice [
(stringReturn "true" (Literal (Bool true)))
(stringReturn "false" (Literal (Bool false)))
]
.>> ws) <?> "boolean"
let literal_string =
(between (pstring "\"") (pstring "\"") (manyChars (satisfy (fun c -> c <> '"')))
|>> fun s -> Literal (String s)) <?> "string"
let literal_char =
(between (pstring "'") (pstring "'") (satisfy (fun c -> c <> '''))
|>> fun c -> Literal (Char c)) <?> "character"
let identifier =
(many1Satisfy2L isLetter (fun c -> isLetter c || isDigit c) "identifier"
|>> Identifier) <?> "identifier"
let betweenParentheses p =
(between (str "(") (str ")") p) <?> ""
let variable = identifier |>> fun id -> Literal (Variable id)
let literal = (attempt literal_numeric <|>
attempt literal_bool <|>
attempt literal_char <|>
attempt literal_string <|>
attempt variable)
// Instressions and statements
let pInstrs, pInstrimpl = createParserForwardedToRef()
// `ploop` is located here to force `pInstrs` to be of the type `Instr list`, `ploop` requesting an instression list.
let ploop =
pipe4
(keyword "loop" >>. ws1 >>. identifier)
(ws1 >>. literal)
(ws1 >>. literal)
(pInstrs)
(fun id min max stmts -> Loop(id, min, max, stmts))
// `singlepInstr` allows to use only one Instression, used just after.
let singlepInstr =
pInstrs |>> fun ex -> ex.Head
let term =
(ws >>. singlepInstr .>> ws) <|>
(betweenParentheses (ws >>. singlepInstr)) <|>
(ws >>. literal .>> ws) <|>
(betweenParentheses (ws >>. literal))
let infixOperator (p: OperatorPrecedenceParser<_, _, _>) op prec map =
p.AddOperator(InfixOperator(op, ws, prec, Associativity.Left, map))
let ops =
// Arithmetic
[ "+"; "-"; "*"; "/"; "%" ] @
// Logical
[ "&&"; "||"; "=="; "!="; ">"; "<"; ">="; "<=" ]
let opCorrespondance op =
match op with
// Arithmetic operators
| "+" -> Sum | "-" -> Sub
| "*" -> Mul | "/" -> Div
// Logical operators
| "&&" -> And | "||" -> Or
| "==" -> Equal | "!=" -> NotEqual
| ">" -> Greater | "<" -> Smaller
| ">=" -> GreaterEqual | "<=" -> SmallerEqual
| _ -> failwith ("Unknown operator: " + op)
let opParser = new OperatorPrecedenceParser<Instr, unit, UserState>()
for op in ops do
infixOperator opParser op 1 (fun x y -> Infix(x, opCorrespondance op, y))
opParser.TermParser <- term
// Statements
(*
- let:
let <identifier> = <instruction(s) / value>
- print:
print <identifier>
- loop:
loop <identifier> <literal> <literal> <indented statements>
*)
let plet =
pipe2
(keyword "let" >>. ws1 >>. identifier)
(ws >>. str "=" >>. ws >>. pInstrs)
(fun id exp -> Let(id, exp))
let print =
keyword "print" >>. ws1 >>. identifier
|>> Print
let instruction =
print <|> ploop <|> plet <|>
opParser.ExpressionParser <|>
literal
pInstrimpl := indentedMany1 instruction "instruction"
let document = pInstrs .>> spaces .>> eof
let test str =
match runParserOnString document (UserState.Create()) "" str with
| Success(result, _, _) -> printfn "%A" result
| Failure(errorMsg, _, _) -> printfn "%s" errorMsg
System.Console.Clear()
let code = test @"
let foo = a + b
"
打开FParsec
//此模块来自'https://github.com/stephan-tolksdorf/fparsec/wiki/Parsing-indentation-based-syntax-with-FParsec'
//我使用了第二个模块:“IndentationParserWithoutBacktracking”
无回溯的模块缩进分析器=
让tabStopDistance=8
类型LastParsedIndentation()=
[]
val可变值:int32
[]
val可变EndIndex:int64
类型UserState=
{缩进:int
//我们将LastParsedIndentation放入UserState,以便
//可以方便地为每个流使用单独的实例。
//LastParsedIndentation实例的成员将发生变异
//直接,因此不会受到任何流回溯的影响。
LastParsedIndentation:LastParsedIndentation}
具有
静态成员Create()={Indentation=-1
LastParsedIndentation=LastParsedIndentation(EndIndex=-1L)}
类型CharStream=CharStream
类型分析器
//如果在流中的同一索引处调用此函数
//如果功能先前停止,则先前
//返回的缩进将再次返回。
//这样,我们可以避免在缩进块的末尾进行回溯。
let skipIndentation(流:CharStream)=
让lastParsedIndentation=stream.UserState.lastParsedIndentation
如果lastParsedIndentation.EndIndex=stream.Index,则
lastParsedIndentation.Value
其他的
让可变缩进=stream.SkipNewlineThenWhitespace(tabStopDistance,false)
lastParsedIndentation.EndIndex
让oldIndentation=stream.UserState.Indentation
let indentation=skipIndentation流
如果缩进?下一步(乐趣c->小岛c | | isDigit c)街
//AST
类型标识符=字符串的标识符
//值只是一个文字或数据名,此处称为“变量”
类型值=
|Int of Int | Float of Float
|Bool of Bool |字符串的字符串
|字符的字符|标识符的变量
//所有这些都是一个指令,但有一些区别:
类型仪表=
//算术
|值的文字| Instr的中缀*InfixOp*Instr
//语句(需要其他指令的指令)
|Let of标识符*仪表列表
|标识符*指令*指令*指令*指令列表的循环
//其他-从上面的链接可以看到“打印”功能
|标识符的打印
和InfixOp=
//算术
|Sum | Sub | Mul | Div
//逻辑
|和|或|相等|不相等|较大|较小|较大|相等|较小|相等
//文字
设numberFormat=NumberInteraloptions.AllowMinusSign | | | | NumberInteraloptions.AllowFraction|||
numberteraloptions.AllowHexadecimal | | | numberteraloptions.AllowOctal|||
NumberTeraloptions.AllowBinary
让文字为数字=
数字侧数字格式“数字”|>>趣味nl->
如果是nl.IsInteger,则为Literal(Int(Int nl.String))
else文本(Float(Float nl.String))
设文字布尔=
(选择[
(stringReturn“true”(Literal(Bool-true)))
(stringReturn“false”(Literal(Bool false)))
]
.>>ws“布尔”
让文字_字符串=
(在(pstring“\”)(pstring“\”)(许多字符(满足(乐趣c->c’))之间)
|>>趣味s->Literal(字符串s))“字符串”
让文字字符=
(在(pstring“”)和(pstring“”)之间
|>>趣味c->Literal(Char c))“字符”
let标识符=
(许多令人满意的2L胰岛细胞(乐趣c->胰岛细胞c | | isDigit c)“标识符”
|>>标识符)“标识符”
让我们在这些p之间=
(在(str)("(""str")"p"之间"
让变量=标识符|>>乐趣id->文字(变量id)
让literal=(尝试literal\u numeric
尝试文字输入
尝试文本字符
尝试使用字符串
尝试变量)
//指示和声明
让pInstrs,pInstrimpl=createParserForwardedToRef()
//'ploop'位于此处,用于强制'pInstrs'为'Instr list'、'ploop'r类型
open FParsec
// This module come from 'https://github.com/stephan-tolksdorf/fparsec/wiki/Parsing-indentation-based-syntax-with-FParsec'
// I used the second module: 'IndentationParserWithoutBacktracking'
module IndentationParserWithoutBacktracking =
let tabStopDistance = 8
type LastParsedIndentation() =
[<DefaultValue>]
val mutable Value: int32
[<DefaultValue>]
val mutable EndIndex: int64
type UserState =
{Indentation: int
// We put LastParsedIndentation into the UserState so that we
// can conveniently use a separate instance for each stream.
// The members of the LastParsedIndentation instance will be mutated
// directly and hence won't be affected by any stream backtracking.
LastParsedIndentation: LastParsedIndentation}
with
static member Create() = {Indentation = -1
LastParsedIndentation = LastParsedIndentation(EndIndex = -1L)}
type CharStream = CharStream<UserState>
type Parser<'t> = Parser<'t, UserState>
// If this function is called at the same index in the stream
// where the function previously stopped, then the previously
// returned indentation will be returned again.
// This way we can avoid backtracking at the end of indented blocks.
let skipIndentation (stream: CharStream) =
let lastParsedIndentation = stream.UserState.LastParsedIndentation
if lastParsedIndentation.EndIndex = stream.Index then
lastParsedIndentation.Value
else
let mutable indentation = stream.SkipNewlineThenWhitespace(tabStopDistance, false)
lastParsedIndentation.EndIndex <- stream.Index
lastParsedIndentation.Value <- indentation
indentation
let indentedMany1 (p: Parser<'t>) label : Parser<'t list> =
fun stream ->
let oldIndentation = stream.UserState.Indentation
let indentation = skipIndentation stream
if indentation <= oldIndentation then
Reply(Error, expected (if indentation < 0 then "newline" else "indented " + label))
else
stream.UserState <- {stream.UserState with Indentation = indentation}
let results = ResizeArray()
let mutable stateTag = stream.StateTag
let mutable reply = p stream // parse the first element
let mutable newIndentation = 0
while reply.Status = Ok
&& (results.Add(reply.Result)
newIndentation <- skipIndentation stream
newIndentation = indentation)
do
stateTag <- stream.StateTag
reply <- p stream
if reply.Status = Ok
|| (stream.IsEndOfStream && results.Count > 0 && stream.StateTag = stateTag)
then
if newIndentation < indentation || stream.IsEndOfStream then
stream.UserState <- {stream.UserState with Indentation = oldIndentation}
Reply(List.ofSeq results)
else
Reply(Error, messageError "wrong indentation")
else // p failed
Reply(reply.Status, reply.Error)
open IndentationParserWithoutBacktracking
let isBlank = fun c -> c = ' ' || c = '\t'
let ws = spaces
let ws1 = skipMany1SatisfyL isBlank "whitespace"
let str s = pstring s .>> ws
let keyword str = pstring str >>? nextCharSatisfiesNot (fun c -> isLetter c || isDigit c) <?> str
// AST
type Identifier = Identifier of string
// A value is just a literal or a data name, called here "Variable"
type Value =
| Int of int | Float of float
| Bool of bool | String of string
| Char of char | Variable of Identifier
// All is an instruction, but there are some differences:
type Instr =
// Arithmetic
| Literal of Value | Infix of Instr * InfixOp * Instr
// Statements (instructions needing another instructions)
| Let of Identifier * Instr list
| Loop of Identifier * Instr * Instr * Instr list
// Other - the "print" function, from the link seen above
| Print of Identifier
and InfixOp =
// Arithmetic
| Sum | Sub | Mul | Div
// Logic
| And | Or | Equal | NotEqual | Greater | Smaller | GreaterEqual | SmallerEqual
// Literals
let numberFormat = NumberLiteralOptions.AllowMinusSign ||| NumberLiteralOptions.AllowFraction |||
NumberLiteralOptions.AllowHexadecimal ||| NumberLiteralOptions.AllowOctal |||
NumberLiteralOptions.AllowBinary
let literal_numeric =
numberLiteral numberFormat "number" |>> fun nl ->
if nl.IsInteger then Literal (Int(int nl.String))
else Literal (Float(float nl.String))
let literal_bool =
(choice [
(stringReturn "true" (Literal (Bool true)))
(stringReturn "false" (Literal (Bool false)))
]
.>> ws) <?> "boolean"
let literal_string =
(between (pstring "\"") (pstring "\"") (manyChars (satisfy (fun c -> c <> '"')))
|>> fun s -> Literal (String s)) <?> "string"
let literal_char =
(between (pstring "'") (pstring "'") (satisfy (fun c -> c <> '''))
|>> fun c -> Literal (Char c)) <?> "character"
let identifier =
(many1Satisfy2L isLetter (fun c -> isLetter c || isDigit c) "identifier"
|>> Identifier) <?> "identifier"
let betweenParentheses p =
(between (str "(") (str ")") p) <?> ""
let variable = identifier |>> fun id -> Literal (Variable id)
let literal = (attempt literal_numeric <|>
attempt literal_bool <|>
attempt literal_char <|>
attempt literal_string <|>
attempt variable)
// Instressions and statements
let pInstrs, pInstrimpl = createParserForwardedToRef()
// `ploop` is located here to force `pInstrs` to be of the type `Instr list`, `ploop` requesting an instression list.
let ploop =
pipe4
(keyword "loop" >>. ws1 >>. identifier)
(ws1 >>. literal)
(ws1 >>. literal)
(pInstrs)
(fun id min max stmts -> Loop(id, min, max, stmts))
// `singlepInstr` allows to use only one Instression, used just after.
let singlepInstr =
pInstrs |>> fun ex -> ex.Head
let term =
(ws >>. singlepInstr .>> ws) <|>
(betweenParentheses (ws >>. singlepInstr)) <|>
(ws >>. literal .>> ws) <|>
(betweenParentheses (ws >>. literal))
let infixOperator (p: OperatorPrecedenceParser<_, _, _>) op prec map =
p.AddOperator(InfixOperator(op, ws, prec, Associativity.Left, map))
let ops =
// Arithmetic
[ "+"; "-"; "*"; "/"; "%" ] @
// Logical
[ "&&"; "||"; "=="; "!="; ">"; "<"; ">="; "<=" ]
let opCorrespondance op =
match op with
// Arithmetic operators
| "+" -> Sum | "-" -> Sub
| "*" -> Mul | "/" -> Div
// Logical operators
| "&&" -> And | "||" -> Or
| "==" -> Equal | "!=" -> NotEqual
| ">" -> Greater | "<" -> Smaller
| ">=" -> GreaterEqual | "<=" -> SmallerEqual
| _ -> failwith ("Unknown operator: " + op)
let opParser = new OperatorPrecedenceParser<Instr, unit, UserState>()
for op in ops do
infixOperator opParser op 1 (fun x y -> Infix(x, opCorrespondance op, y))
opParser.TermParser <- term
// Statements
(*
- let:
let <identifier> = <instruction(s) / value>
- print:
print <identifier>
- loop:
loop <identifier> <literal> <literal> <indented statements>
*)
let plet =
pipe2
(keyword "let" >>. ws1 >>. identifier)
(ws >>. str "=" >>. ws >>. pInstrs)
(fun id exp -> Let(id, exp))
let print =
keyword "print" >>. ws1 >>. identifier
|>> Print
let instruction =
print <|> ploop <|> plet <|>
opParser.ExpressionParser <|>
literal
pInstrimpl := indentedMany1 instruction "instruction"
let document = pInstrs .>> spaces .>> eof
let test str =
match runParserOnString document (UserState.Create()) "" str with
| Success(result, _, _) -> printfn "%A" result
| Failure(errorMsg, _, _) -> printfn "%s" errorMsg
System.Console.Clear()
let code = test @"
let foo = a + b
"
let x = instruction
let b =
instruction
instruction
type Statement = Loop of Identifier * int * int * Statement list
| Print of Identifier
| Let of Identifier * Statement list
let ws = skipManySatisfy isBlank
let str s = pstring s .>> ws
let statement, statementRef = createParserForwardedToRef()
let indentedStatements = indentedMany1 statement "statement"
let plet = keyword "let" >>. pipe2 (ws1 >>. identifier)
(ws >>. str "=" >>. ws
>>. (indentedStatements
<|> (statement |>> fun s -> [s])))
(fun id exp -> Let(id, exp))
statementRef := print <|> loop <|> plet
let x = instruction
instruction
instruction