F# 解析函数的签名-箭头类型为-FParsec+的错误;压痕
我已经说过了,这不是重复的,而是对基于缩进的语法的改编 事实上,我希望能够分析接近ML家族语言的语法。我还介绍了Haskell中函数类型签名的语法,因此:F# 解析函数的签名-箭头类型为-FParsec+的错误;压痕,f#,indentation,fparsec,F#,Indentation,Fparsec,我已经说过了,这不是重复的,而是对基于缩进的语法的改编 事实上,我希望能够分析接近ML家族语言的语法。我还介绍了Haskell中函数类型签名的语法,因此: myFunction :: atype 我的解析器适用于所有类型的签名类型,但箭头类型“单独”时除外: 函数的创建也是如此(为了简单起见,我只希望有一个数字作为值): 如果没有压痕,所有这些情况都是先验的 我尝试了一个模块来解析缩进,而不是FParsec wiki,只是想尝试和评估一下,以下是问题的必要和充分的模块代码: module In
myFunction :: atype
我的解析器适用于所有类型的签名类型,但箭头类型“单独”时除外:
函数的创建也是如此(为了简单起见,我只希望有一个数字作为值):
如果没有压痕,所有这些情况都是先验的
我尝试了一个模块来解析缩进,而不是FParsec wiki,只是想尝试和评估一下,以下是问题的必要和充分的模块代码:
module IndentParser =
type Indentation =
| Fail
| Any
| Greater of Position
| Exact of Position
| AtLeast of Position
| StartIndent of Position
with
member this.Position = match this with
| Any | Fail -> None
| Greater p -> Some p
| Exact p -> Some p
| AtLeast p -> Some p
| StartIndent p -> Some p
type IndentState<'T> = { Indent : Indentation; UserState : 'T }
type CharStream<'T> = FParsec.CharStream<IndentState<'T>>
type IndentParser<'T, 'UserState> = Parser<'T, IndentState<'UserState>>
let indentState u = {Indent = Any; UserState = u}
let runParser p u s = runParserOnString p (indentState u) "" s
let runParserOnFile p u path = runParserOnFile p (indentState u) path System.Text.Encoding.UTF8
let getIndentation : IndentParser<_,_> =
fun stream -> match stream.UserState with
| {Indent = i} -> Reply i
let getUserState : IndentParser<_,_> =
fun stream -> match stream.UserState with
| {UserState = u} -> Reply u
let putIndentation newi : IndentParser<unit, _> =
fun stream ->
stream.UserState <- {stream.UserState with Indent = newi}
Reply(Unchecked.defaultof<unit>)
let failf fmt = fail << sprintf fmt
let acceptable i (pos : Position) =
match i with
| Any _ -> true
| Fail -> false
| Greater bp -> bp.Column < pos.Column
| Exact ep -> ep.Column = pos.Column
| AtLeast ap -> ap.Column <= pos.Column
| StartIndent _ -> true
let tokeniser p = parse {
let! pos = getPosition
let! i = getIndentation
if acceptable i pos then return! p
else return! failf "incorrect indentation at %A" pos
}
let indented<'a,'u> i (p : Parser<'a,_>) : IndentParser<_, 'u> = parse {
do! putIndentation i
do! spaces
return! tokeniser p
}
/// Allows to check if the position of the parser currently being analyzed (`p`)
/// is on the same line as the defined position (`pos`).
let exact<'a,'u> pos p: IndentParser<'a, 'u> = indented (Exact pos) p
/// Allows to check if the position of the parser currently being analyzed (`p`)
/// is further away than the defined position (`pos`).
let greater<'a,'u> pos p: IndentParser<'a, 'u> = indented (Greater pos) p
/// Allows to check if the position of the parser currently being analyzed (`p`)
/// is on the same OR line further than the defined position (`pos`).
let atLeast<'a,'u> pos p: IndentParser<'a, 'u> = indented (AtLeast pos) p
/// Simply check if the parser (`p`) exists, regardless of its position in the text to be analyzed.
let any<'a,'u> pos p: IndentParser<'a, 'u> = indented Any p
let newline<'u> : IndentParser<unit, 'u> = many (skipAnyOf " \t" <?> "whitespace") >>. newline |>> ignore
let rec blockOf p = parse {
do! spaces
let! pos = getPosition
let! x = exact pos p
let! xs = attempt (exact pos <| blockOf p) <|> preturn []
return x::xs
}
下面是新的错误消息:
和
目前,您的代码在
:
签名上失败,因为您实际上没有在任何地方使用签名
解析器。您已将表达式定义为尝试let'
,但我认为您的意思是编写尝试签名尝试let'
。这就是为什么您的测试在:
的第二个冒号上失败的原因,因为它与let'
的单个冒号匹配,然后不需要第二个冒号
此外,我认为将多个尝试
组合链接在一起,如尝试a尝试b尝试c
会在某些地方给您带来问题,您应该删除最后的尝试
,例如尝试a尝试b c
。如果您在所有可能的选择中使用trunt
,您将得到一个解析器,该解析器可以通过不解析任何内容而成功,这通常不是您想要的
更新:我想我已经找到了原因和解决方案
摘要:在opws
解析器中,将ws>.
行替换为ws>?
解释:在所有sebby
变体中(并且chainr1
是sebby
变体),FParsec期望分隔符解析器要么成功,要么在不使用输入的情况下失败。(如果分隔符在使用输入后失败,FParsec会认为整个sebby
-系列解析器整体失败。)但是opws
解析器将使用空格,如果找不到正确的运算符,则会失败。因此,当您的arrow\u type
解析器解析字符串a->a
后接一个换行符时,第一个a
后面的箭头正确匹配,然后它看到第二个a
,然后尝试查找另一个箭头。由于接下来至少有一个空格字符(),opws“->”
解析器在失败之前消耗了一些输入。(它失败,因为在此之后,空格是文件的结尾,而不是另一个->
标记)。这使得chainr1
组合器失败,因此arrow\u type
失败,您的a->a
解析器最终被解析为单个类型a
。(此时箭头是意外的)
通过在opws
的定义中使用>?
,可以确保如果解析器的第二部分失败,它将在匹配任何空格之前回溯到。这将确保分隔符解析器在不匹配输入和不推进字符流中的解析位置的情况下失败。因此,chainr1
解析器在解析a->a
后成功,您可以得到预期的结果。事实上,当我将代码转录成最小代码时,我忘记了使用签名
解析器(在我的真实代码中,我有600多行代码,我必须排序^^)。。。但问题仍然存在。我编辑了我的问题。谢谢你关于尝试的建议
否则,我确实经常使用它。我希望你能帮我编辑这篇文章。我想你已经解决了。在opws
中,将ws>.
替换为ws>.
?
,这样,如果您的操作员不匹配,opws
将在不消耗输入的情况下失败。这可能会解决解析器中的所有问题,而不仅仅是这个问题。请参阅我编辑的答案以了解全部细节。它工作得非常好:)FParsec肯定包含了许多优秀的功能。谢谢。事实上,我建议用ws>>替换。
?
,无论它出现在哪里:例如在typename
和tuple\u type
。几乎没有一种情况需要ws>>。一些有意义的解析器
;如果某个有意义的解析器
失败,您总是希望回溯到空格之前,这样任何
或选择
组合器都能够做正确的事情。这意味着ws>>?一些有意义的解析器总是你想要的。我会注意到的,谢谢。一个简单的问题是,>?
是否也值得插入,如果您希望在某个解析器之后插入一些内容?如果我们遵循您关于>>。
的建议,通常情况下,是的。经验法则是考虑如果某个组件失败,解析应该在哪里恢复。如果您使用>
而第二个组件失败,那么整个解析器在使用输入后将失败,这意味着您无法回溯以尝试其他方法。有时候这就是你想要的,这就是为什么我不能说你总是想使用>?
。但是,通常,如果第二个组件出现故障,您希望一直回溯到开始,这意味着在该场景中使用>?
。只需考虑在每个特定情况下,解析应该在哪里恢复。
foo: a = 0 // ok
foo: [a] = 0 // ok
foo: (a, a) = 0 // ok
foo: [a -> a] = 0 // ok
foo: (a -> a, a) = 0 // ok
foo: a -> a = 0 // error
module IndentParser =
type Indentation =
| Fail
| Any
| Greater of Position
| Exact of Position
| AtLeast of Position
| StartIndent of Position
with
member this.Position = match this with
| Any | Fail -> None
| Greater p -> Some p
| Exact p -> Some p
| AtLeast p -> Some p
| StartIndent p -> Some p
type IndentState<'T> = { Indent : Indentation; UserState : 'T }
type CharStream<'T> = FParsec.CharStream<IndentState<'T>>
type IndentParser<'T, 'UserState> = Parser<'T, IndentState<'UserState>>
let indentState u = {Indent = Any; UserState = u}
let runParser p u s = runParserOnString p (indentState u) "" s
let runParserOnFile p u path = runParserOnFile p (indentState u) path System.Text.Encoding.UTF8
let getIndentation : IndentParser<_,_> =
fun stream -> match stream.UserState with
| {Indent = i} -> Reply i
let getUserState : IndentParser<_,_> =
fun stream -> match stream.UserState with
| {UserState = u} -> Reply u
let putIndentation newi : IndentParser<unit, _> =
fun stream ->
stream.UserState <- {stream.UserState with Indent = newi}
Reply(Unchecked.defaultof<unit>)
let failf fmt = fail << sprintf fmt
let acceptable i (pos : Position) =
match i with
| Any _ -> true
| Fail -> false
| Greater bp -> bp.Column < pos.Column
| Exact ep -> ep.Column = pos.Column
| AtLeast ap -> ap.Column <= pos.Column
| StartIndent _ -> true
let tokeniser p = parse {
let! pos = getPosition
let! i = getIndentation
if acceptable i pos then return! p
else return! failf "incorrect indentation at %A" pos
}
let indented<'a,'u> i (p : Parser<'a,_>) : IndentParser<_, 'u> = parse {
do! putIndentation i
do! spaces
return! tokeniser p
}
/// Allows to check if the position of the parser currently being analyzed (`p`)
/// is on the same line as the defined position (`pos`).
let exact<'a,'u> pos p: IndentParser<'a, 'u> = indented (Exact pos) p
/// Allows to check if the position of the parser currently being analyzed (`p`)
/// is further away than the defined position (`pos`).
let greater<'a,'u> pos p: IndentParser<'a, 'u> = indented (Greater pos) p
/// Allows to check if the position of the parser currently being analyzed (`p`)
/// is on the same OR line further than the defined position (`pos`).
let atLeast<'a,'u> pos p: IndentParser<'a, 'u> = indented (AtLeast pos) p
/// Simply check if the parser (`p`) exists, regardless of its position in the text to be analyzed.
let any<'a,'u> pos p: IndentParser<'a, 'u> = indented Any p
let newline<'u> : IndentParser<unit, 'u> = many (skipAnyOf " \t" <?> "whitespace") >>. newline |>> ignore
let rec blockOf p = parse {
do! spaces
let! pos = getPosition
let! x = exact pos p
let! xs = attempt (exact pos <| blockOf p) <|> preturn []
return x::xs
}
module Parser =
open IndentParser
type Identifier = string
type Type =
| Typename of Identifier
| Tuple of Type list
| List of Type
| Arrow of Type * Type
| Infered
type Expression =
| Let of Identifier * Type * int
| Signature of Identifier * Type
type Program = Program of Expression list
// Utils -----------------------------------------------------------------
let private ws = spaces
/// All symbols granted for the "opws" parser
let private allowedSymbols =
['!'; '@'; '#'; '$'; '%'; '+'; '&'; '*'; '('; ')'; '-'; '+'; '='; '?'; '/'; '>'; '<'; '|']
/// Parse an operator and white spaces around it: `ws >>. p .>> ws`
let inline private opws str =
ws >>.
(tokeniser (pstring str >>?
(nextCharSatisfiesNot
(isAnyOf (allowedSymbols @ ['"'; '''])) <?> str))) .>> ws
let private identifier =
(many1Satisfy2L isLetter
(fun c -> isLetter c || isDigit c) "identifier")
// Types -----------------------------------------------------------------
let rec typename = parse {
let! name = ws >>. identifier
return Type.Typename name
}
and tuple_type = parse {
let! types = between (opws "(") (opws ")") (sepBy (ws >>. type') (opws ","))
return Type.Tuple types
}
and list_type = parse {
let! ty = between (opws "[") (opws "]") type'
return Type.List ty
}
and arrow_type =
chainr1 (typename <|> tuple_type <|> list_type) (opws "->" >>% fun t1 t2 -> Arrow(t1, t2))
and type' =
attempt arrow_type <|>
attempt typename <|>
attempt tuple_type <|>
attempt list_type
// Expressions -----------------------------------------------------------------
let rec private let' = parse {
let! pos = getPosition
let! id = exact pos identifier
do! greater pos (opws ":")
let! ty = greater pos type'
do! greater pos (opws "=")
let! value = greater pos pint32
return Expression.Let(id, ty, value)
}
and private signature = parse {
let! pos = getPosition
let! id = exact pos identifier
do! greater pos (opws "::")
let! ty = greater pos type'
return Expression.Signature(id, ty)
}
and private expression =
attempt let'
and private expressions = blockOf expression <?> "expressions"
let private document = ws >>. expressions .>> ws .>> eof |>> Program
let private testType = ws >>. type' .>> ws .>> eof
let rec parse code =
runParser document () code
|> printfn "%A"
open Parser
parse @"
foo :: a -> a
"
open FParsec
// module IndentParser
module Parser =
open IndentParser
type Identifier = string
type Type =
| Typename of Identifier
| Tuple of Type list
| List of Type
| Arrow of Type * Type
| Infered
type Expression =
| Let of Identifier * Type * int
| Signature of Identifier * Type
type Program = Program of Expression list
// Utils -----------------------------------------------------------------
let private ws = spaces
/// All symbols granted for the "opws" parser
let private allowedSymbols =
['!'; '@'; '#'; '$'; '%'; '+'; '&'; '*'; '('; ')'; '-'; '+'; '='; '?'; '/'; '>'; '<'; '|']
/// Parse an operator and white spaces around it: `ws >>. p .>> ws`
let inline private opws str =
ws >>.
(tokeniser (pstring str >>?
(nextCharSatisfiesNot
(isAnyOf (allowedSymbols @ ['"'; '''])) <?> str))) .>> ws
let private identifier =
(many1Satisfy2L isLetter
(fun c -> isLetter c || isDigit c) "identifier")
// Types -----------------------------------------------------------------
let rec typename = parse {
let! name = ws >>. identifier
return Type.Typename name
}
and tuple_type = parse {
let! types = between (opws "(") (opws ")") (sepBy (ws >>. type') (opws ","))
return Type.Tuple types
}
and list_type = parse {
let! ty = between (opws "[") (opws "]") type'
return Type.List ty
}
and arrow_type =
chainr1 (typename <|> tuple_type <|> list_type) (opws "->" >>% fun t1 t2 -> Arrow(t1, t2))
and type' =
attempt arrow_type <|>
typename <|>
tuple_type <|>
list_type
// Expressions -----------------------------------------------------------------
let rec private let' = parse {
let! pos = getPosition
let! id = exact pos identifier
do! greater pos (opws ":")
let! ty = greater pos type'
do! greater pos (opws "=")
let! value = greater pos pint32
return Expression.Let(id, ty, value)
}
and private signature = parse {
let! pos = getPosition
let! id = exact pos identifier
do! greater pos (opws "::")
let! ty = greater pos type'
return Expression.Signature(id, ty)
}
and private expression =
attempt let' <|>
signature
and private expressions = blockOf expression <?> "expressions"
let private document = ws >>. expressions .>> ws .>> eof |>> Program
let private testType = ws >>. type' .>> ws .>> eof
let rec parse code =
runParser document () code
|> printfn "%A"
open Parser
System.Console.Clear()
parse @"
foo :: a -> a
"