Parsing Haskell读取变量名
我需要写一段代码来解析某种语言。我一直在解析变量名——它可以是任何长度至少为1个字符、以小写字母开头并且可以包含下划线“\u1”字符的内容。我认为我在以下代码方面有了一个良好的开端:Parsing Haskell读取变量名,parsing,haskell,Parsing,Haskell,我需要写一段代码来解析某种语言。我一直在解析变量名——它可以是任何长度至少为1个字符、以小写字母开头并且可以包含下划线“\u1”字符的内容。我认为我在以下代码方面有了一个良好的开端: identToken :: Parser String identToken = do c <- letter cs <- letdigs return (c:cs
identToken :: Parser String
identToken = do
c <- letter
cs <- letdigs
return (c:cs)
where letter = satisfy isLetter
letdigs = munch isLetter +++ munch isDigit +++ munch underscore
num = satisfy isDigit
underscore = \x -> x == '_'
lowerCase = \x -> x `elem` ['a'..'z'] -- how to add this function to current code?
ident :: Parser Ident
ident = do
_ <- skipSpaces
s <- identToken
skipSpaces; return $ s
idents :: Parser Command
idents = do
skipSpaces; ids <- many1 ident
...
我明白了:
Left (AmbiguousIdents [(["test"],""),(["t","est"],""),(["t","e","st"],""),
(["t","e","st"],""),(["t","est"],""),(["t","e","st"],""),(["t","e","st"],""),
(["t","e","s","t"],""),(["t","e","s","t"],""),(["t","e","s","t"],""),
(["t","e","s","t"],""),(["t","e","s","t"],""),(["t","e","s","t"],""),
(["t","e","s","t"],""),(["t","e","s","t"],""),(["t","e","s","t"],""),
(["t","e","s","t"],""),(["t","e","s","t"],""),(["t","e","s","t"],""),
(["t","e","s","t"],""),(["t","e","s","t"],""),(["t","e","s","t"],""),
(["t","e","s","t"],""),(["t","e","s","t"],""),(["t","e","s","t"],""),
(["t","e","s","t"],""),(["t","e","s","t"],""),(["t","e","s","t"],""),
(["t","e","s","t"],""),(["t","e","s","t"],""),(["t","e","s","t"],"")])
请注意,Parser
只是readpa
的同义词
我还想在解析器中编码变量名应该以小写字符开头
谢谢您的帮助。部分问题在于您使用了
++
操作符。以下代码适用于我:
import Data.Char
import Text.ParserCombinators.ReadP
type Parser a = ReadP a
type Ident = String
identToken :: Parser String
identToken = do c <- satisfy lowerCase
cs <- letdigs
return (c:cs)
where lowerCase = \x -> x `elem` ['a'..'z']
underscore = \x -> x == '_'
letdigs = munch (\c -> isLetter c || isDigit c || underscore c)
ident :: Parser Ident
ident = do _ <- skipSpaces
s <- identToken
skipSpaces
return s
test_parseIdents :: String -> Either String [Ident]
test_parseIdents p = case readP_to_S prog p of
[(j, "")] -> Right j
[] -> Left "Invalid parse"
multipleRes -> Left ("Ambiguous idents: " ++ show multipleRes)
where prog :: Parser [Ident]
prog = do result <- many ident
eof
return result
main = print $ test_parseIdents "test_1349_zefz"
导入数据.Char
导入Text.ParserCombinators.ReadP
类型解析器a=ReadP a
类型标识符=字符串
identoken::解析器字符串
identoken=do c x==''
letdigs=munch(\c->isLetter c | | isDigit c | |下划线c)
标识符::解析器标识符
ident=do uuuj
[]->左“无效解析”
多路复用器->左侧(“不明确标识:+++显示多路复用器)
其中prog::Parser[Ident]
prog=do result部分问题在于使用了++
运算符。以下代码适用于我:
import Data.Char
import Text.ParserCombinators.ReadP
type Parser a = ReadP a
type Ident = String
identToken :: Parser String
identToken = do c <- satisfy lowerCase
cs <- letdigs
return (c:cs)
where lowerCase = \x -> x `elem` ['a'..'z']
underscore = \x -> x == '_'
letdigs = munch (\c -> isLetter c || isDigit c || underscore c)
ident :: Parser Ident
ident = do _ <- skipSpaces
s <- identToken
skipSpaces
return s
test_parseIdents :: String -> Either String [Ident]
test_parseIdents p = case readP_to_S prog p of
[(j, "")] -> Right j
[] -> Left "Invalid parse"
multipleRes -> Left ("Ambiguous idents: " ++ show multipleRes)
where prog :: Parser [Ident]
prog = do result <- many ident
eof
return result
main = print $ test_parseIdents "test_1349_zefz"
导入数据.Char
导入Text.ParserCombinators.ReadP
类型解析器a=ReadP a
类型标识符=字符串
identoken::解析器字符串
identoken=do c x==''
letdigs=munch(\c->isLetter c | | isDigit c | |下划线c)
标识符::解析器标识符
ident=do uuuj
[]->左“无效解析”
多路复用器->左侧(“不明确标识:+++显示多路复用器)
其中prog::Parser[Ident]
prog=do result部分问题在于使用了++
运算符。以下代码适用于我:
import Data.Char
import Text.ParserCombinators.ReadP
type Parser a = ReadP a
type Ident = String
identToken :: Parser String
identToken = do c <- satisfy lowerCase
cs <- letdigs
return (c:cs)
where lowerCase = \x -> x `elem` ['a'..'z']
underscore = \x -> x == '_'
letdigs = munch (\c -> isLetter c || isDigit c || underscore c)
ident :: Parser Ident
ident = do _ <- skipSpaces
s <- identToken
skipSpaces
return s
test_parseIdents :: String -> Either String [Ident]
test_parseIdents p = case readP_to_S prog p of
[(j, "")] -> Right j
[] -> Left "Invalid parse"
multipleRes -> Left ("Ambiguous idents: " ++ show multipleRes)
where prog :: Parser [Ident]
prog = do result <- many ident
eof
return result
main = print $ test_parseIdents "test_1349_zefz"
导入数据.Char
导入Text.ParserCombinators.ReadP
类型解析器a=ReadP a
类型标识符=字符串
identoken::解析器字符串
identoken=do c x==''
letdigs=munch(\c->isLetter c | | isDigit c | |下划线c)
标识符::解析器标识符
ident=do uuuj
[]->左“无效解析”
多路复用器->左侧(“不明确标识:+++显示多路复用器)
其中prog::Parser[Ident]
prog=do result部分问题在于使用了++
运算符。以下代码适用于我:
import Data.Char
import Text.ParserCombinators.ReadP
type Parser a = ReadP a
type Ident = String
identToken :: Parser String
identToken = do c <- satisfy lowerCase
cs <- letdigs
return (c:cs)
where lowerCase = \x -> x `elem` ['a'..'z']
underscore = \x -> x == '_'
letdigs = munch (\c -> isLetter c || isDigit c || underscore c)
ident :: Parser Ident
ident = do _ <- skipSpaces
s <- identToken
skipSpaces
return s
test_parseIdents :: String -> Either String [Ident]
test_parseIdents p = case readP_to_S prog p of
[(j, "")] -> Right j
[] -> Left "Invalid parse"
multipleRes -> Left ("Ambiguous idents: " ++ show multipleRes)
where prog :: Parser [Ident]
prog = do result <- many ident
eof
return result
main = print $ test_parseIdents "test_1349_zefz"
导入数据.Char
导入Text.ParserCombinators.ReadP
类型解析器a=ReadP a
类型标识符=字符串
identoken::解析器字符串
identoken=do c x==''
letdigs=munch(\c->isLetter c | | isDigit c | |下划线c)
标识符::解析器标识符
ident=do uuuj
[]->左“无效解析”
多路复用器->左侧(“不明确标识:+++显示多路复用器)
其中prog::Parser[Ident]
prog=do结果