Parsing 解析基于缩进的类似于ML的语法，以及所有被认为是指令/表达式的东西_Parsing_F#_Indentation_Fparsec

Parsing 解析基于缩进的类似于ML的语法，以及所有被认为是指令/表达式的东西

parsing f#

Parsing 解析基于缩进的类似于ML的语法，以及所有被认为是指令/表达式的东西,parsing,f#,indentation,fparsec,Parsing,F#,Indentation,Fparsec,注：不久前，我已经问过了。这不是重复，但所要求的澄清不属于主题本身的范围。因此，我允许自己打开另一个位置，处理基于缩进的类似ML的语法分析，并将所有内容都视为指令/表达式例如： “Hello”是一个表达式， let foo=2+1是使用表达式（2+1）的指令， printfoo是一条指令简而言之，这是一个非常模块化和动态的语法和语义。比如F#，或者OCaml 为此，我使用F#和API（可在nuget上获得）FParsec。FParsec wiki提供了，所以我再次使用它。下面代码中使用的模块

注：不久前，我已经问过了。这不是重复，但所要求的澄清不属于主题本身的范围。因此，我允许自己打开另一个位置，处理基于缩进的类似ML的语法分析，并将所有内容都视为指令/表达式

例如：

“Hello”

是一个表达式，

let foo=2+1

是使用表达式（

2+1

）的指令，

printfoo

是一条指令

简而言之，这是一个非常模块化和动态的语法和语义。比如F#，或者OCaml

为此，我使用F#和API（可在nuget上获得）FParsec。FParsec wiki提供了，所以我再次使用它。下面代码中使用的模块是无回溯的缩进语法分析器

要解析的示例代码使用基本缩进，而不是混合“文字”和“指令/表达式”：

一个简单的代码，没有上下文（但目前这并不重要）

我的实现允许以下代码：

let foo = a + b

let foo =
    let a = 9
    let b = 1
    a + b

let foo = 7

let foo =
    loop i 1 10
        print i

比如说。（循环<代码>和打印<代码>仅用于测试…）

我已经有很长一周的时间了，但我无法解决的问题是，每次解析器中需要一条新的行指令时，缩进模块都会询问我。。。以下是一个屏幕截图：

这适用于上述所有示例。我真的不理解这个问题，因此不知道如何解决它

以下是针对该问题测试的代码，它符合最低和功能代码标准，但必须使用FParsec：

open FParsec

// This module come from 'https://github.com/stephan-tolksdorf/fparsec/wiki/Parsing-indentation-based-syntax-with-FParsec'
// I used the second module: 'IndentationParserWithoutBacktracking'

module IndentationParserWithoutBacktracking =

    let tabStopDistance = 8

    type LastParsedIndentation() =
        [<DefaultValue>]
        val mutable Value: int32
        [<DefaultValue>]
        val mutable EndIndex: int64

    type UserState = 
        {Indentation: int
         // We put LastParsedIndentation into the UserState so that we 
         // can conveniently use a separate instance for each stream.
         // The members of the LastParsedIndentation instance will be mutated
         // directly and hence won't be affected by any stream backtracking. 
         LastParsedIndentation: LastParsedIndentation}
        with
           static member Create() = {Indentation = -1
                                     LastParsedIndentation = LastParsedIndentation(EndIndex = -1L)}

    type CharStream = CharStream<UserState>
    type Parser<'t> = Parser<'t, UserState>

    // If this function is called at the same index in the stream
    // where the function previously stopped, then the previously
    // returned indentation will be returned again. 
    // This way we can avoid backtracking at the end of indented blocks.
    let skipIndentation (stream: CharStream) =    
        let lastParsedIndentation = stream.UserState.LastParsedIndentation
        if lastParsedIndentation.EndIndex = stream.Index then
            lastParsedIndentation.Value
        else
            let mutable indentation = stream.SkipNewlineThenWhitespace(tabStopDistance, false)
            lastParsedIndentation.EndIndex <- stream.Index
            lastParsedIndentation.Value <- indentation
            indentation

    let indentedMany1 (p: Parser<'t>) label : Parser<'t list> =
        fun stream ->
            let oldIndentation = stream.UserState.Indentation
            let indentation = skipIndentation stream
            if indentation <= oldIndentation then 
                Reply(Error, expected (if indentation < 0 then "newline" else "indented " + label))
            else
                stream.UserState <- {stream.UserState with Indentation = indentation}            
                let results = ResizeArray()
                let mutable stateTag = stream.StateTag
                let mutable reply = p stream // parse the first element
                let mutable newIndentation = 0
                while reply.Status = Ok 
                      && (results.Add(reply.Result)
                          newIndentation <- skipIndentation stream
                          newIndentation = indentation)
                   do
                     stateTag <- stream.StateTag
                     reply <- p stream
                if reply.Status = Ok 
                   || (stream.IsEndOfStream && results.Count > 0 && stream.StateTag = stateTag) 
                then
                    if newIndentation < indentation || stream.IsEndOfStream then
                        stream.UserState <- {stream.UserState with Indentation = oldIndentation}
                        Reply(List.ofSeq results)
                    else
                        Reply(Error, messageError "wrong indentation")
                else // p failed
                    Reply(reply.Status, reply.Error) 

open IndentationParserWithoutBacktracking

let isBlank = fun c -> c = ' ' || c = '\t'
let ws  = spaces
let ws1 = skipMany1SatisfyL isBlank "whitespace"
let str s = pstring s .>> ws

let keyword str = pstring str >>? nextCharSatisfiesNot (fun c -> isLetter c || isDigit c) <?> str

// AST

type Identifier = Identifier of string

// A value is just a literal or a data name, called here "Variable"
type Value =
    | Int of int   | Float of float
    | Bool of bool | String of string
    | Char of char | Variable of Identifier

// All is an instruction, but there are some differences:
type Instr =
    // Arithmetic
    | Literal of Value   | Infix of Instr * InfixOp * Instr
    // Statements (instructions needing another instructions)
    | Let of Identifier * Instr list
    | Loop of Identifier * Instr * Instr * Instr list
    // Other - the "print" function, from the link seen above
    | Print of Identifier
and InfixOp =
    // Arithmetic
    | Sum | Sub | Mul | Div
    // Logic
    | And | Or | Equal | NotEqual | Greater | Smaller | GreaterEqual | SmallerEqual

// Literals

let numberFormat = NumberLiteralOptions.AllowMinusSign   ||| NumberLiteralOptions.AllowFraction |||
                   NumberLiteralOptions.AllowHexadecimal ||| NumberLiteralOptions.AllowOctal    |||
                   NumberLiteralOptions.AllowBinary

let literal_numeric =
    numberLiteral numberFormat "number" |>> fun nl ->
        if nl.IsInteger then Literal (Int(int nl.String))
        else Literal (Float(float nl.String))

let literal_bool = 
    (choice [
        (stringReturn "true" (Literal (Bool true)))
        (stringReturn "false" (Literal (Bool false)))
    ]
    .>> ws) <?> "boolean"

let literal_string = 
    (between (pstring "\"") (pstring "\"") (manyChars (satisfy (fun c -> c <> '"')))
    |>> fun s -> Literal (String s)) <?> "string"

let literal_char = 
    (between (pstring "'") (pstring "'") (satisfy (fun c -> c <> '''))
    |>> fun c -> Literal (Char c)) <?> "character"

let identifier =
    (many1Satisfy2L isLetter (fun c -> isLetter c || isDigit c) "identifier"
    |>> Identifier) <?> "identifier"

let betweenParentheses p =
    (between (str "(") (str ")") p) <?> ""

let variable = identifier |>> fun id -> Literal (Variable id)

let literal = (attempt literal_numeric  <|>
               attempt literal_bool     <|>
               attempt literal_char     <|>
               attempt literal_string   <|>
               attempt variable)

// Instressions and statements

let pInstrs, pInstrimpl = createParserForwardedToRef()

// `ploop` is located here to force `pInstrs` to be of the type `Instr list`, `ploop` requesting an instression list.
let ploop =
    pipe4
        (keyword "loop" >>. ws1 >>. identifier)
        (ws1 >>. literal)
        (ws1 >>. literal)
        (pInstrs)
        (fun id min max stmts -> Loop(id, min, max, stmts))

// `singlepInstr` allows to use only one Instression, used just after.
let singlepInstr =
    pInstrs |>> fun ex -> ex.Head

let term =
    (ws >>. singlepInstr .>> ws) <|>
    (betweenParentheses (ws >>. singlepInstr)) <|>
    (ws >>. literal .>> ws) <|>
    (betweenParentheses (ws >>. literal))

let infixOperator (p: OperatorPrecedenceParser<_, _, _>) op prec map =
    p.AddOperator(InfixOperator(op, ws, prec, Associativity.Left, map))

let ops =
    // Arithmetic
    [ "+"; "-"; "*"; "/"; "%" ] @
    // Logical
    [ "&&"; "||"; "=="; "!="; ">"; "<"; ">="; "<=" ]

let opCorrespondance op =
    match op with
    // Arithmetic operators
    | "+"  -> Sum | "-"  -> Sub
    | "*"  -> Mul | "/"  -> Div
    // Logical operators
    | "&&" -> And           | "||" -> Or
    | "==" -> Equal         | "!=" -> NotEqual
    | ">"  -> Greater       | "<"  -> Smaller
    | ">=" -> GreaterEqual  | "<=" -> SmallerEqual
    | _ -> failwith ("Unknown operator: " + op)

let opParser = new OperatorPrecedenceParser<Instr, unit, UserState>()

for op in ops do
    infixOperator opParser op 1 (fun x y -> Infix(x, opCorrespondance op, y))

opParser.TermParser <- term

// Statements

(*
- let:

        let <identifier> = <instruction(s) / value>

- print:

        print <identifier>

- loop:

        loop <identifier> <literal> <literal> <indented statements>

*)

let plet =
    pipe2
        (keyword "let" >>. ws1 >>. identifier)
        (ws >>. str "=" >>. ws >>. pInstrs)
        (fun id exp -> Let(id, exp))

let print =
    keyword "print" >>. ws1 >>. identifier 
    |>> Print

let instruction =
    print <|> ploop <|> plet <|>

    opParser.ExpressionParser <|>
    literal

pInstrimpl := indentedMany1 instruction "instruction"

let document = pInstrs .>> spaces .>> eof

let test str =
    match runParserOnString document (UserState.Create()) "" str with
        | Success(result, _, _)   -> printfn "%A" result
        | Failure(errorMsg, _, _) -> printfn "%s" errorMsg

System.Console.Clear()

let code = test @"
let foo = a + b
"

打开FParsec
//此模块来自'https://github.com/stephan-tolksdorf/fparsec/wiki/Parsing-indentation-based-syntax-with-FParsec'
//我使用了第二个模块：“IndentationParserWithoutBacktracking”
无回溯的模块缩进分析器=
让tabStopDistance=8
类型LastParsedIndentation（）=
[]
val可变值：int32
[]
val可变EndIndex:int64
类型UserState=
{缩进：int
//我们将LastParsedIndentation放入UserState，以便
//可以方便地为每个流使用单独的实例。
//LastParsedIndentation实例的成员将发生变异
//直接，因此不会受到任何流回溯的影响。
LastParsedIndentation:LastParsedIndentation}
具有
静态成员Create（）={Indentation=-1
LastParsedIndentation=LastParsedIndentation（EndIndex=-1L）}
类型CharStream=CharStream
类型分析器
//如果在流中的同一索引处调用此函数
//如果功能先前停止，则先前
//返回的缩进将再次返回。
//这样，我们可以避免在缩进块的末尾进行回溯。
let skipIndentation（流：CharStream）=
让lastParsedIndentation=stream.UserState.lastParsedIndentation
如果lastParsedIndentation.EndIndex=stream.Index，则
lastParsedIndentation.Value
其他的
让可变缩进=stream.SkipNewlineThenWhitespace（tabStopDistance，false）
lastParsedIndentation.EndIndex
让oldIndentation=stream.UserState.Indentation
let indentation=skipIndentation流
如果缩进？下一步（乐趣c->小岛c | | isDigit c）街
//AST
类型标识符=字符串的标识符
//值只是一个文字或数据名，此处称为“变量”
类型值=
|Int of Int | Float of Float
|Bool of Bool |字符串的字符串
|字符的字符|标识符的变量
//所有这些都是一个指令，但有一些区别：
类型仪表=
//算术
|值的文字| Instr的中缀*InfixOp*Instr
//语句（需要其他指令的指令）
|Let of标识符*仪表列表
|标识符*指令*指令*指令*指令列表的循环
//其他-从上面的链接可以看到“打印”功能
|标识符的打印
和InfixOp=
//算术
|Sum | Sub | Mul | Div
//逻辑
|和|或|相等|不相等|较大|较小|较大|相等|较小|相等
//文字
设numberFormat=NumberInteraloptions.AllowMinusSign | | | | NumberInteraloptions.AllowFraction|||
numberteraloptions.AllowHexadecimal | | | numberteraloptions.AllowOctal|||
NumberTeraloptions.AllowBinary
让文字为数字=
数字侧数字格式“数字”|>>趣味nl->
如果是nl.IsInteger，则为Literal（Int（Int nl.String））
else文本（Float（Float nl.String））
设文字布尔=
（选择[
（stringReturn“true”（Literal（Bool-true）））
（stringReturn“false”（Literal（Bool false）））
]
.>>ws“布尔”
让文字_字符串=
（在（pstring“\”）（pstring“\”）（许多字符（满足（乐趣c->c’））之间）
|>>趣味s->Literal（字符串s））“字符串”
让文字字符=
（在（pstring“”）和（pstring“”）之间
|>>趣味c->Literal（Char c））“字符”
let标识符=
（许多令人满意的2L胰岛细胞（乐趣c->胰岛细胞c | | isDigit c）“标识符”
|>>标识符）“标识符”
让我们在这些p之间=
(在(str)("(""str")"p"之间"
让变量=标识符|>>乐趣id->文字（变量id）
让literal=（尝试literal\u numeric
尝试文字输入
尝试文本字符
尝试使用字符串
尝试变量）
//指示和声明
让pInstrs，pInstrimpl=createParserForwardedToRef（）
//'ploop'位于此处，用于强制'pInstrs'为'Instr list'、'ploop'r类型
open FParsec

// This module come from 'https://github.com/stephan-tolksdorf/fparsec/wiki/Parsing-indentation-based-syntax-with-FParsec'
// I used the second module: 'IndentationParserWithoutBacktracking'

module IndentationParserWithoutBacktracking =

    let tabStopDistance = 8

    type LastParsedIndentation() =
        [<DefaultValue>]
        val mutable Value: int32
        [<DefaultValue>]
        val mutable EndIndex: int64

    type UserState = 
        {Indentation: int
         // We put LastParsedIndentation into the UserState so that we 
         // can conveniently use a separate instance for each stream.
         // The members of the LastParsedIndentation instance will be mutated
         // directly and hence won't be affected by any stream backtracking. 
         LastParsedIndentation: LastParsedIndentation}
        with
           static member Create() = {Indentation = -1
                                     LastParsedIndentation = LastParsedIndentation(EndIndex = -1L)}

    type CharStream = CharStream<UserState>
    type Parser<'t> = Parser<'t, UserState>

    // If this function is called at the same index in the stream
    // where the function previously stopped, then the previously
    // returned indentation will be returned again. 
    // This way we can avoid backtracking at the end of indented blocks.
    let skipIndentation (stream: CharStream) =    
        let lastParsedIndentation = stream.UserState.LastParsedIndentation
        if lastParsedIndentation.EndIndex = stream.Index then
            lastParsedIndentation.Value
        else
            let mutable indentation = stream.SkipNewlineThenWhitespace(tabStopDistance, false)
            lastParsedIndentation.EndIndex <- stream.Index
            lastParsedIndentation.Value <- indentation
            indentation

    let indentedMany1 (p: Parser<'t>) label : Parser<'t list> =
        fun stream ->
            let oldIndentation = stream.UserState.Indentation
            let indentation = skipIndentation stream
            if indentation <= oldIndentation then 
                Reply(Error, expected (if indentation < 0 then "newline" else "indented " + label))
            else
                stream.UserState <- {stream.UserState with Indentation = indentation}            
                let results = ResizeArray()
                let mutable stateTag = stream.StateTag
                let mutable reply = p stream // parse the first element
                let mutable newIndentation = 0
                while reply.Status = Ok 
                      && (results.Add(reply.Result)
                          newIndentation <- skipIndentation stream
                          newIndentation = indentation)
                   do
                     stateTag <- stream.StateTag
                     reply <- p stream
                if reply.Status = Ok 
                   || (stream.IsEndOfStream && results.Count > 0 && stream.StateTag = stateTag) 
                then
                    if newIndentation < indentation || stream.IsEndOfStream then
                        stream.UserState <- {stream.UserState with Indentation = oldIndentation}
                        Reply(List.ofSeq results)
                    else
                        Reply(Error, messageError "wrong indentation")
                else // p failed
                    Reply(reply.Status, reply.Error) 

open IndentationParserWithoutBacktracking

let isBlank = fun c -> c = ' ' || c = '\t'
let ws  = spaces
let ws1 = skipMany1SatisfyL isBlank "whitespace"
let str s = pstring s .>> ws

let keyword str = pstring str >>? nextCharSatisfiesNot (fun c -> isLetter c || isDigit c) <?> str

// AST

type Identifier = Identifier of string

// A value is just a literal or a data name, called here "Variable"
type Value =
    | Int of int   | Float of float
    | Bool of bool | String of string
    | Char of char | Variable of Identifier

// All is an instruction, but there are some differences:
type Instr =
    // Arithmetic
    | Literal of Value   | Infix of Instr * InfixOp * Instr
    // Statements (instructions needing another instructions)
    | Let of Identifier * Instr list
    | Loop of Identifier * Instr * Instr * Instr list
    // Other - the "print" function, from the link seen above
    | Print of Identifier
and InfixOp =
    // Arithmetic
    | Sum | Sub | Mul | Div
    // Logic
    | And | Or | Equal | NotEqual | Greater | Smaller | GreaterEqual | SmallerEqual

// Literals

let numberFormat = NumberLiteralOptions.AllowMinusSign   ||| NumberLiteralOptions.AllowFraction |||
                   NumberLiteralOptions.AllowHexadecimal ||| NumberLiteralOptions.AllowOctal    |||
                   NumberLiteralOptions.AllowBinary

let literal_numeric =
    numberLiteral numberFormat "number" |>> fun nl ->
        if nl.IsInteger then Literal (Int(int nl.String))
        else Literal (Float(float nl.String))

let literal_bool = 
    (choice [
        (stringReturn "true" (Literal (Bool true)))
        (stringReturn "false" (Literal (Bool false)))
    ]
    .>> ws) <?> "boolean"

let literal_string = 
    (between (pstring "\"") (pstring "\"") (manyChars (satisfy (fun c -> c <> '"')))
    |>> fun s -> Literal (String s)) <?> "string"

let literal_char = 
    (between (pstring "'") (pstring "'") (satisfy (fun c -> c <> '''))
    |>> fun c -> Literal (Char c)) <?> "character"

let identifier =
    (many1Satisfy2L isLetter (fun c -> isLetter c || isDigit c) "identifier"
    |>> Identifier) <?> "identifier"

let betweenParentheses p =
    (between (str "(") (str ")") p) <?> ""

let variable = identifier |>> fun id -> Literal (Variable id)

let literal = (attempt literal_numeric  <|>
               attempt literal_bool     <|>
               attempt literal_char     <|>
               attempt literal_string   <|>
               attempt variable)

// Instressions and statements

let pInstrs, pInstrimpl = createParserForwardedToRef()

// `ploop` is located here to force `pInstrs` to be of the type `Instr list`, `ploop` requesting an instression list.
let ploop =
    pipe4
        (keyword "loop" >>. ws1 >>. identifier)
        (ws1 >>. literal)
        (ws1 >>. literal)
        (pInstrs)
        (fun id min max stmts -> Loop(id, min, max, stmts))

// `singlepInstr` allows to use only one Instression, used just after.
let singlepInstr =
    pInstrs |>> fun ex -> ex.Head

let term =
    (ws >>. singlepInstr .>> ws) <|>
    (betweenParentheses (ws >>. singlepInstr)) <|>
    (ws >>. literal .>> ws) <|>
    (betweenParentheses (ws >>. literal))

let infixOperator (p: OperatorPrecedenceParser<_, _, _>) op prec map =
    p.AddOperator(InfixOperator(op, ws, prec, Associativity.Left, map))

let ops =
    // Arithmetic
    [ "+"; "-"; "*"; "/"; "%" ] @
    // Logical
    [ "&&"; "||"; "=="; "!="; ">"; "<"; ">="; "<=" ]

let opCorrespondance op =
    match op with
    // Arithmetic operators
    | "+"  -> Sum | "-"  -> Sub
    | "*"  -> Mul | "/"  -> Div
    // Logical operators
    | "&&" -> And           | "||" -> Or
    | "==" -> Equal         | "!=" -> NotEqual
    | ">"  -> Greater       | "<"  -> Smaller
    | ">=" -> GreaterEqual  | "<=" -> SmallerEqual
    | _ -> failwith ("Unknown operator: " + op)

let opParser = new OperatorPrecedenceParser<Instr, unit, UserState>()

for op in ops do
    infixOperator opParser op 1 (fun x y -> Infix(x, opCorrespondance op, y))

opParser.TermParser <- term

// Statements

(*
- let:

        let <identifier> = <instruction(s) / value>

- print:

        print <identifier>

- loop:

        loop <identifier> <literal> <literal> <indented statements>

*)

let plet =
    pipe2
        (keyword "let" >>. ws1 >>. identifier)
        (ws >>. str "=" >>. ws >>. pInstrs)
        (fun id exp -> Let(id, exp))

let print =
    keyword "print" >>. ws1 >>. identifier 
    |>> Print

let instruction =
    print <|> ploop <|> plet <|>

    opParser.ExpressionParser <|>
    literal

pInstrimpl := indentedMany1 instruction "instruction"

let document = pInstrs .>> spaces .>> eof

let test str =
    match runParserOnString document (UserState.Create()) "" str with
        | Success(result, _, _)   -> printfn "%A" result
        | Failure(errorMsg, _, _) -> printfn "%s" errorMsg

System.Console.Clear()

let code = test @"
let foo = a + b
"

let x = instruction
let b =
  instruction
  instruction

type Statement = Loop of Identifier * int * int * Statement list
               | Print of Identifier
               | Let of Identifier * Statement list

let ws = skipManySatisfy isBlank
let str s = pstring s .>> ws

let statement, statementRef = createParserForwardedToRef()

let indentedStatements = indentedMany1 statement "statement"

let plet = keyword "let" >>. pipe2 (ws1 >>. identifier)
                                   (ws >>. str "=" >>. ws
                                    >>. (indentedStatements
                                         <|> (statement |>> fun s -> [s])))
                                   (fun id exp -> Let(id, exp))
statementRef := print <|> loop <|> plet

let x = instruction
        instruction
        instruction