Parsing 如何将FParsec用于基于缩进的语句(如python)?
因此,我为我的语言提供了一个基本的解析器 我想为它合并基于缩进的语法,就像在python中一样 然而,我正在努力了解如何将indentationParser与我的结合起来,它们与类型不匹配相冲突 在我的语言中,“do”之后的任何内容都是新的范围,需要标识:Parsing 如何将FParsec用于基于缩进的语句(如python)?,parsing,f#,indentation,fparsec,Parsing,F#,Indentation,Fparsec,因此,我为我的语言提供了一个基本的解析器 我想为它合并基于缩进的语法,就像在python中一样 然而,我正在努力了解如何将indentationParser与我的结合起来,它们与类型不匹配相冲突 在我的语言中,“do”之后的任何内容都是新的范围,需要标识: type ExprC = | BoolC of bool | DecC of decimal | MathC of MathOp * array<ExprC> | VarC of string | BlockC
type ExprC =
| BoolC of bool
| DecC of decimal
| MathC of MathOp * array<ExprC>
| VarC of string
| BlockC of BlockCodeC
| IfC of LogiOpC * BlockCodeC * BlockCodeC
| LoopC of ExprC * BlockCodeC
module Parser2
open System
open System.Collections.Generic
open FParsec
open TablaM
let tabStopDistance = 8 // must be a power of 2
module IndentationParser =
type LastParsedIndentation() =
[<DefaultValue>]
val mutable Value: int32
[<DefaultValue>]
val mutable EndIndex: int64
type UserState =
{Indentation: int
// We put LastParsedIndentation into the UserState so that we
// can conveniently use a separate instance for each stream.
// The members of the LastParsedIndentation instance will be mutated
// directly and hence won't be affected by any stream backtracking.
LastParsedIndentation: LastParsedIndentation}
with
static member Create() = {Indentation = -1
LastParsedIndentation = LastParsedIndentation(EndIndex = -1L)}
type CharStream = CharStream<UserState>
type Parser<'t> = Parser<'t, UserState>
// If this function is called at the same index in the stream
// where the function previously stopped, then the previously
// returned indentation will be returned again.
// This way we can avoid backtracking at the end of indented blocks.
let skipIndentation (stream: CharStream) =
let lastParsedIndentation = stream.UserState.LastParsedIndentation
if lastParsedIndentation.EndIndex = stream.Index then
lastParsedIndentation.Value
else
let mutable indentation = stream.SkipNewlineThenWhitespace(tabStopDistance, false)
while stream.Peek() = '#' do
stream.SkipRestOfLine(false) // skip comment
indentation <- stream.SkipNewlineThenWhitespace(tabStopDistance, false)
lastParsedIndentation.EndIndex <- stream.Index
lastParsedIndentation.Value <- indentation
indentation
let indentedMany1 (p: Parser<'t>) label : Parser<'t list> =
fun stream ->
let oldIndentation = stream.UserState.Indentation
let indentation = skipIndentation stream
if indentation <= oldIndentation then
Reply(Error, expected (if indentation < 0 then "newline" else "indented " + label))
else
stream.UserState <- {stream.UserState with Indentation = indentation}
let results = ResizeArray()
let mutable stateTag = stream.StateTag
let mutable reply = p stream // parse the first element
let mutable newIndentation = 0
while reply.Status = Ok
&& (results.Add(reply.Result)
newIndentation <- skipIndentation stream
newIndentation = indentation)
do
stateTag <- stream.StateTag
reply <- p stream
if reply.Status = Ok
|| (stream.IsEndOfStream && results.Count > 0 && stream.StateTag = stateTag)
then
if newIndentation < indentation || stream.IsEndOfStream then
stream.UserState <- {stream.UserState with Indentation = oldIndentation}
Reply(List.ofSeq results)
else
Reply(Error, messageError "wrong indentation")
else // p failed
Reply(reply.Status, reply.Error)
open IndentationParser
let reserved = ["for";"do"; "while";"if";"case";"type"]
let DecimalParser : Parser<_, unit> =
// note: doesn't parse a float exponent suffix
numberLiteral NumberLiteralOptions.AllowFraction "number"
|>> fun num ->
decimal num.String
let isBlank = fun c -> c = ' ' || c = '\t'
let ws = skipMany1SatisfyL isBlank "whitespace"
let str_ws s = pstring s .>> ws
let comment = pstring "#" >>. skipRestOfLine false
let wsBeforeEOL = skipManySatisfy isBlank >>. optional comment
let pidentifierraw =
let isIdentifierFirstChar c = isLetter c || c = '_'
let isIdentifierChar c = isLetter c || isDigit c || c = '_'
many1Satisfy2L isIdentifierFirstChar isIdentifierChar "identifier"
let pidentifier =
pidentifierraw
>>= fun s ->
if reserved |> List.exists ((=) s) then fail "keyword"
else preturn s
let keyword str = pstring str >>? nextCharSatisfiesNot (fun c -> isLetter c || isDigit c) <?> str
let pvar = pidentifier |>> VarC
let booleans = choice[stringReturn "true" true <|> stringReturn "false" false] .>> spaces |>> BoolC
let decimals = DecimalParser |>> DecC
let pliteral = decimals <|> booleans //<|> pstringliteral
let expression1 = spaces >>? choice[pliteral;pvar]
let between a b p = pstring a >>. p .>> pstring b
let expr, exprImpl = createParserForwardedToRef()
let parens = expr |> between "(" ")"
let lhExpression = choice[pliteral; parens; pvar]
do exprImpl := spaces >>. choice[attempt parens;
expression1]
(* Rules of associations *)
type Assoc = Associativity
let opp = new OperatorPrecedenceParser<ExprC,unit,unit>()
let parithmetic = opp.ExpressionParser
let terma = (lhExpression .>> spaces) <|> parithmetic .>> parens
opp.TermParser <- terma
let mathOps = [
"+", Add, 1;
"-", Sub, 1;
"*", Mul, 2;
"/", Div, 2
]
for str:String, op:MathOp, precedence:int in mathOps do
opp.AddOperator(InfixOperator(str, spaces, precedence, Assoc.Left, fun x y -> MathC(op, [| x; y |])))
let indentedStatements, indentedStatementsRef = createParserForwardedToRef()
let doBlock = keyword "do" >>. (pipe2 (ws .>> wsBeforeEOL)
indentedStatements
(fun a stmts ->
let lines = stmts |> List.toArray
BlockC(lines)))
let ifBlock = keyword "if" >>. doBlock
let forBlock = keyword "for" >>. doBlock
let funBlock = keyword "fun" >>. doBlock
let statement = ifBlock <|> forBlock <|> funBlock <|> lhExpression <|> parithmetic
do indentedStatementsRef := indentedMany1 statement "statement"
let document = indentedStatements .>> spaces .>> eof
let parse str =
runParserOnString document (UserState.Create()) "" str
type ExprC=
|布尔的布尔
|十进制十进制数
|MathOp*数组的MathC
|弦变量
|BlockCodeC的BlockC
|LogiOpC*BlockCodeC*BlockCodeC的IfC
|ExprC*BlockCodeC的LoopC
模块解析器2
开放系统
open System.Collections.Generic
开放式FParsec
开放式塔布拉姆
设tabStopDistance=8//必须是2的幂
模块标识解析器=
类型LastParsedIndentation()=
[]
val可变值:int32
[]
val可变EndIndex:int64
类型UserState=
{缩进:int
//我们将LastParsedIndentation放入UserState,以便
//可以方便地为每个流使用单独的实例。
//LastParsedIndentation实例的成员将发生变异
//直接,因此不会受到任何流回溯的影响。
LastParsedIndentation:LastParsedIndentation}
具有
静态成员Create()={Indentation=-1
LastParsedIndentation=LastParsedIndentation(EndIndex=-1L)}
类型CharStream=CharStream
类型分析器
//如果在流中的同一索引处调用此函数
//如果功能先前停止,则先前
//返回的缩进将再次返回。
//这样,我们可以避免在缩进块的末尾进行回溯。
let skipIndentation(流:CharStream)=
让lastParsedIndentation=stream.UserState.lastParsedIndentation
如果lastParsedIndentation.EndIndex=stream.Index,则
lastParsedIndentation.Value
其他的
让可变缩进=stream.SkipNewlineThenWhitespace(tabStopDistance,false)
而stream.Peek()
stream.SkipRestOfLine(false)//跳过注释
缩进我猜,这是因为无状态的分词解析器
,因此的类型是并行的和lhExpression
自动概括为解析器
。好吧,让我发疯的是为什么pipe2使它工作,但不适用于文本。另外,如果有可能以另一种适合FParsec常规方法的方式进行缩进…我想,这是因为无状态的DecimalParser
,因此pliteral
和lhExpression
的类型自动概括为解析器,让我抓狂的是为什么pipe2让它工作,而不是文字。此外,如果有可能以符合FParsec常规方法的其他方式进行压痕。。。
let statement = ifBlock <|> lhExpression <|> parithmetic
Error FS0001: Type mismatch. Expecting a Parser<ExprC list,unit> but given a Parser<'a list> The type 'unit' does not match the type 'UserState' (FS0001) (TablaM)