F# 解析FParsec中的数字
我已经开始学习FParsec。它有一个非常灵活的方法来解析数字;我可以提供一组我想要使用的数字格式:F# 解析FParsec中的数字,f#,fparsec,F#,Fparsec,我已经开始学习FParsec。它有一个非常灵活的方法来解析数字;我可以提供一组我想要使用的数字格式: type Number = | Numeral of int | Decimal of float | Hexadecimal of int | Binary of int let numberFormat = NumberLiteralOptions.AllowFraction ||| NumberLiteralOptio
type Number =
| Numeral of int
| Decimal of float
| Hexadecimal of int
| Binary of int
let numberFormat = NumberLiteralOptions.AllowFraction
||| NumberLiteralOptions.AllowHexadecimal
||| NumberLiteralOptions.AllowBinary
let pnumber =
numberLiteral numberFormat "number"
|>> fun num -> if num.IsHexadecimal then Hexadecimal (int num.String)
elif num.IsBinary then Binary (int num.String)
elif num.IsInteger then Numeral (int num.String)
else Decimal (float num.String)
然而,我试图解析的语言有点奇怪。数字可以是数字(非负int
)、十进制(非负float
)、十六进制(带前缀#x
)或二进制(带前缀#b
):
现在我必须用0
(如有必要)替换#
来进行两次解析,以使用pnumber
:
let number: Parser<_, unit> =
let isDotOrDigit c = isDigit c || c = '.'
let numOrDec = many1Satisfy2 isDigit isDotOrDigit
let hexOrBin = skipChar '#' >>. manyChars (letter <|> digit) |>> sprintf "0%s"
let str = spaces >>. numOrDec <|> hexOrBin
str |>> fun s -> match run pnumber s with
| Success(result, _, _) -> result
| Failure(errorMsg, _, _) -> failwith errorMsg
let编号:解析器=
设IsdorDigit c=isDigit c | | c='。'
设numOrDec=many1满足2为数字
设hexOrBin=skipChar'#'>>。许多字符(字母数字)|>>sprintf“0%s”
设str=spaces>>。努莫代克黑索宾
str |>>趣味s->与跑步号码s匹配
|成功(结果)->result
|故障(errorMsg,,->故障与errorMsg
在这种情况下,什么是更好的解析方法?或者,我如何修改FParsec的
CharStream
,使条件解析更容易?如果您想生成好的错误消息并正确检查溢出,解析数字可能会非常混乱
以下是数字解析器的一个简单FParsec实现:
let numeralOrDecimal : Parser<_, unit> =
// note: doesn't parse a float exponent suffix
numberLiteral NumberLiteralOptions.AllowFraction "number"
|>> fun num ->
// raises an exception on overflow
if num.IsInteger then Numeral(int num.String)
else Decimal(float num.String)
let hexNumber =
pstring "#x" >>. many1SatisfyL isHex "hex digit"
|>> fun hexStr ->
// raises an exception on overflow
Hexadecimal(System.Convert.ToInt32(hexStr, 16))
let binaryNumber =
pstring "#b" >>. many1SatisfyL (fun c -> c = '0' || c = '1') "binary digit"
|>> fun hexStr ->
// raises an exception on overflow
Binary(System.Convert.ToInt32(hexStr, 2))
let number =
choiceL [numeralOrDecimal
hexNumber
binaryNumber]
"number literal"
我不确定您对“alter FParsec'sCharStream
能够简化条件解析”的意思,但下面的示例演示了如何编写一个只直接使用CharStream
方法的低级实现
type NumberStyles = System.Globalization.NumberStyles
let invariantCulture = System.Globalization.CultureInfo.InvariantCulture
let number: Parser<Number, unit> =
let expectedNumber = expected "number"
let inline isBinary c = c = '0' || c = '1'
let inline hex2int c = (int c &&& 15) + (int c >>> 6)*9
let hexStringToInt (str: string) = // does no argument or overflow checking
let mutable n = 0
for c in str do
n <- n*16 + hex2int c
n
let binStringToInt (str: string) = // does no argument or overflow checking
let mutable n = 0
for c in str do
n <- n*2 + (int c - int '0')
n
let findIndexOfFirstNonNull (str: string) =
let mutable i = 0
while i < str.Length && str.[i] = '0' do
i <- i + 1
i
let isHexFun = id isHex // tricks the compiler into caching the function object
let isDigitFun = id isDigit
let isBinaryFun = id isBinary
fun stream ->
let start = stream.IndexToken
let cs = stream.Peek2()
match cs.Char0, cs.Char1 with
| '#', 'x' ->
stream.Skip(2)
let str = stream.ReadCharsOrNewlinesWhile(isHexFun, false)
if str.Length <> 0 then
let i = findIndexOfFirstNonNull str
let length = str.Length - i
if length < 8 || (length = 8 && str.[i] <= '7') then
Reply(Hexadecimal(hexStringToInt str))
else
stream.Seek(start)
Reply(Error, messageError "hex number literal is too large for 32-bit int")
else
Reply(Error, expected "hex digit")
| '#', 'b' ->
stream.Skip(2)
let str = stream.ReadCharsOrNewlinesWhile(isBinaryFun, false)
if str.Length <> 0 then
let i = findIndexOfFirstNonNull str
let length = str.Length - i
if length < 32 then
Reply(Binary(binStringToInt str))
else
stream.Seek(start)
Reply(Error, messageError "binary number literal is too large for 32-bit int")
else
Reply(Error, expected "binary digit")
| c, _ ->
if not (isDigit c) then Reply(Error, expectedNumber)
else
stream.SkipCharsOrNewlinesWhile(isDigitFun) |> ignore
if stream.Skip('.') then
let n2 = stream.SkipCharsOrNewlinesWhile(isDigitFun)
if n2 <> 0 then
// we don't parse any exponent, as in the other example
let mutable result = 0.
if System.Double.TryParse(stream.ReadFrom(start),
NumberStyles.AllowDecimalPoint,
invariantCulture,
&result)
then Reply(Decimal(result))
else
stream.Seek(start)
Reply(Error, messageError "decimal literal is larger than System.Double.MaxValue")
else
Reply(Error, expected "digit")
else
let decimalString = stream.ReadFrom(start)
let mutable result = 0
if System.Int32.TryParse(stream.ReadFrom(start),
NumberStyles.None,
invariantCulture,
&result)
then Reply(Numeral(result))
else
stream.Seek(start)
Reply(Error, messageError "decimal number literal is too large for 32-bit int")
type NumberStyles=System.Globalization.NumberStyles
让invariantCulture=System.Globalization.CultureInfo.invariantCulture
let编号:解析器=
让expectedNumber=预期的“数字”
让内联isBinary c=c='0'| | c='1'
让内联hex2int c=(int c&&15)+(int c>>>6)*9
让hextStringToInt(str:string)=//不执行参数或溢出检查
设可变n=0
对于strdo中的c
N
如果不是(isDigit c),则回复(错误,预期编号)
其他的
stream.skipcharsornewlinewhile(isDigitFun)|>忽略
如果stream.Skip('.'),则
设n2=stream.skipcharsornewlineswwhile(isDigitFun)
如果n2为0,则
//我们不解析任何指数,就像在另一个例子中一样
设可变结果=0。
如果System.Double.TryParse(stream.ReadFrom(start)),
NumberStyles.AllowDecimalPoint,
不变量文化,
&结果)
然后回答(十进制(结果))
其他的
stream.Seek(开始)
回复(错误,messageError“十进制文字大于System.Double.MaxValue”)
其他的
答复(错误,应为“数字”)
其他的
让decimalString=stream.ReadFrom(开始)
设可变结果=0
如果System.Int32.TryParse(stream.ReadFrom(start)),
数字样式。无,
不变量文化,
&结果)
然后回答(数字(结果))
其他的
stream.Seek(开始)
回复(错误,messageError“十进制数字文字对于32位整数来说太大”)
虽然此实现在不借助系统方法的情况下解析十六进制数和二进制数,但它最终会将十进制数的解析委托给Int32.TryParse和Double.TryParse方法
正如我所说:太乱了。+1,谢谢你的快速回复,斯蒂芬。所谓“alterfparsec的CharStream…”,我指的是对
CharStream
的低级操作。我会选择第一种方法,简单易懂。顺便问一下,使用带有标签的组合器的成本是多少?如果我在解析器中的任何地方都使用标签,会花费很多吗?我刚刚添加了一条注释,说明如何在第一个版本中更优雅地处理溢出异常。关于标签:视情况而定choiceL
实际上比choice
更快,因为它不必收集单个错误消息。一般来说,
和类似组合器的开销在非平凡的应用中很难测量。如果您在FParsec解析器中发现了一个性能问题,那么总有一些方法可以让它更快…感谢您提供的详细答案。在这种情况下,skipString
应该优先于pstring
,对吗?没有性能差异,因为两个解析器都不需要做任何工作来创建结果值(在这两种情况下都是引用类型常量)。因此,这只是一个品味的问题。
let mayThrow (p: Parser<'t,'u>) : Parser<'t,'u> =
fun stream ->
let state = stream.State
try
p stream
with e -> // catching all exceptions is somewhat dangerous
stream.BacktrackTo(state)
Reply(FatalError, messageError e.Message)
let number = mayThrow (choiceL [...] "number literal")
type NumberStyles = System.Globalization.NumberStyles
let invariantCulture = System.Globalization.CultureInfo.InvariantCulture
let number: Parser<Number, unit> =
let expectedNumber = expected "number"
let inline isBinary c = c = '0' || c = '1'
let inline hex2int c = (int c &&& 15) + (int c >>> 6)*9
let hexStringToInt (str: string) = // does no argument or overflow checking
let mutable n = 0
for c in str do
n <- n*16 + hex2int c
n
let binStringToInt (str: string) = // does no argument or overflow checking
let mutable n = 0
for c in str do
n <- n*2 + (int c - int '0')
n
let findIndexOfFirstNonNull (str: string) =
let mutable i = 0
while i < str.Length && str.[i] = '0' do
i <- i + 1
i
let isHexFun = id isHex // tricks the compiler into caching the function object
let isDigitFun = id isDigit
let isBinaryFun = id isBinary
fun stream ->
let start = stream.IndexToken
let cs = stream.Peek2()
match cs.Char0, cs.Char1 with
| '#', 'x' ->
stream.Skip(2)
let str = stream.ReadCharsOrNewlinesWhile(isHexFun, false)
if str.Length <> 0 then
let i = findIndexOfFirstNonNull str
let length = str.Length - i
if length < 8 || (length = 8 && str.[i] <= '7') then
Reply(Hexadecimal(hexStringToInt str))
else
stream.Seek(start)
Reply(Error, messageError "hex number literal is too large for 32-bit int")
else
Reply(Error, expected "hex digit")
| '#', 'b' ->
stream.Skip(2)
let str = stream.ReadCharsOrNewlinesWhile(isBinaryFun, false)
if str.Length <> 0 then
let i = findIndexOfFirstNonNull str
let length = str.Length - i
if length < 32 then
Reply(Binary(binStringToInt str))
else
stream.Seek(start)
Reply(Error, messageError "binary number literal is too large for 32-bit int")
else
Reply(Error, expected "binary digit")
| c, _ ->
if not (isDigit c) then Reply(Error, expectedNumber)
else
stream.SkipCharsOrNewlinesWhile(isDigitFun) |> ignore
if stream.Skip('.') then
let n2 = stream.SkipCharsOrNewlinesWhile(isDigitFun)
if n2 <> 0 then
// we don't parse any exponent, as in the other example
let mutable result = 0.
if System.Double.TryParse(stream.ReadFrom(start),
NumberStyles.AllowDecimalPoint,
invariantCulture,
&result)
then Reply(Decimal(result))
else
stream.Seek(start)
Reply(Error, messageError "decimal literal is larger than System.Double.MaxValue")
else
Reply(Error, expected "digit")
else
let decimalString = stream.ReadFrom(start)
let mutable result = 0
if System.Int32.TryParse(stream.ReadFrom(start),
NumberStyles.None,
invariantCulture,
&result)
then Reply(Numeral(result))
else
stream.Seek(start)
Reply(Error, messageError "decimal number literal is too large for 32-bit int")