List 带状态的FSharp序列处理
我需要从带有文本行的长序列中筛选数据。 文本行形成如下记录:List 带状态的FSharp序列处理,list,f#,sequence,state,List,F#,Sequence,State,我需要从带有文本行的长序列中筛选数据。 文本行形成如下记录: { BEGINTYPE1 VAL1: xxx VAL2: yyy ENDTYPE1 // mix of record types including TYPE1 } 我需要在处理过程中保持状态: 查找记录类型,从而跳过其他文本 过滤相关值,直到找到记录结束 继续1 我只能用一个列表来做这件事,因为一个序列似乎 用一句话读到最后。 “似乎”你不能处理一个序列的一部分,然后在另一个表
{
BEGINTYPE1
VAL1: xxx
VAL2: yyy
ENDTYPE1
// mix of record types including TYPE1
}
我需要在处理过程中保持状态:
let patLst = [
"VAL1:" ;
"VAL2:" ;
// ..
]
let BeginRecord1 = "BEGINTYPE1"
let EndRecord1 = "ENDTYPE1"
let filter (lines:seq<string>) =
let llines = Seq.toList lines
let matchLine inp =
let rec loop pat =
match pat with
| [] -> None
| h::t ->
let m = Regex.Match(inp, h)
match m.Success with
| true -> Some (h)
| _ -> loop t
loop patLst
let rec findItem i l =
match l with
| [] -> []
| h::t -> if h=i then t
else findItem i t
let findItemsUntil u a l =
let rec loop a l =
match l with
| [] -> ([],a)
| h::t when h=u -> (t , ""::a)
| h::t -> match matchLine h with
| Some(m) -> loop (m::a) t
| None -> loop a t
loop a l
let rec loop a l =
match findItem BeginRecord1 l with
| [] -> List. rev a
| l2 -> let (l3,a) = findItemsUntil EndRecord1 a l2
loop a l3
llines |> loop [""] |> List.fold (fun a x -> a + "\n" + x) ""
让patLst=[
“VAL1:”;
“VAL2:”;
// ..
]
让BeginRecord1=“BEGINTYPE1”
让EndRecord1=“ENDTYPE1”
let过滤器(线路:序号)=
设llines=序号toList行
设匹配线inp=
让rec循环pat=
配上
|[]->无
|h::t->
设m=Regex.Match(inp,h)
与成功相匹配
|正确->某些(h)
|循环t
环缝
让rec findItem i l=
匹配
| [] -> []
|h::t->如果h=i,则t
否则我就找不到了
让我们找到一个l=
让rec循环a l=
匹配
|[]->([],a)
|当h=u->(t,“::a)时的h::t
|h::t->将匹配线h与
|一些(m)->循环(m::a)t
|无->循环a t
环路a l
让rec循环a l=
将findItem BeginRecord1 l与
|[]->列表。修订版a
|l2->let(l3,a)=findItemsUntil EndRecord1 a l2
环路a l3
llines |>loop[“”]|>List.fold(乐趣a x->a+“\n”+x)”
}您可以使用与列表几乎相同的方式处理序列,只需使用
Seq.head
和Seq.tail
函数,而不必使用列表中方便的模式匹配语法。使用内置函数,您的解决方案如下所示:
open System.Text.RegularExpressions
let patLst = [
"VAL1:" ;
"VAL2:" ;
// ..
]
let BeginRecord1 = "BEGINTYPE1"
let EndRecord1 = "ENDTYPE1"
let filter (lines:seq<string>) =
let matchLine inp =
let rec loop pat =
match pat with
| [] -> None
| h::t ->
match Regex.Match(inp, h) with
| m when m.Success -> Some (h)
| _ -> loop t
loop patLst
let rec findItem i l =
if l |> Seq.isEmpty
then Seq.empty
else let h = l |> Seq.head
let t = l |> Seq.tail
if h=i
then t
else findItem i t
let findItemsUntil u a l =
let rec loop a l =
if l |> Seq.isEmpty
then (Seq.empty,a)
else let h = l |> Seq.head
let t = l |> Seq.tail
if h=u
then (t , ""::a)
else match matchLine h with
| Some(m) -> loop (m::a) t
| None -> loop a t
loop a l
let rec loop a l =
match findItem BeginRecord1 l with
| s when s |> Seq.isEmpty -> List.rev a
| l2 -> let (l3,a) = findItemsUntil EndRecord1 a l2
loop a l3
lines |> loop [""] |> List.fold (fun a x -> a + "\n" + x) ""
let (|HT|Empty|) s =
match s |> Seq.tryHead with
| Some head -> HT (head, s |> Seq.tail)
| None -> Empty
然后,您的实现可以与基于列表的版本保持几乎相同,只需在此活动模式中进行交换,并将空列表替换为Seq.empty
:
let filter (lines:seq<string>) =
let matchLine inp =
let rec loop pat =
match pat with
| Empty -> None
| HT (h,t) ->
let m = Regex.Match(inp, h)
match m.Success with
| true -> Some (h)
| _ -> loop t
loop patLst
let rec findItem i l =
match l with
| Empty -> Seq.empty
| HT (h,t) -> if h=i then t
else findItem i t
let findItemsUntil u a l =
let rec loop a l =
match l with
| Empty -> (Seq.empty,a)
| HT (h,t) when h=u -> (t , ""::a)
| HT (h,t) -> match matchLine h with
| Some(m) -> loop (m::a) t
| None -> loop a t
loop a l
let rec loop a l =
match findItem BeginRecord1 l with
| Empty -> List.rev a
| l2 -> let (l3,a) = findItemsUntil EndRecord1 a l2
loop a l3
lines |> loop [""] |> List.fold (fun a x -> a + "\n" + x) ""
let过滤器(行:seq)=
设匹配线inp=
让rec循环pat=
配上
|空->无
|HT(h,t)->
设m=Regex.Match(inp,h)
与成功相匹配
|正确->某些(h)
|循环t
环缝
让rec findItem i l=
匹配
|空->序列空
|HT(h,t)->如果h=i,则t
否则我就找不到了
让我们找到一个l=
让rec循环a l=
匹配
|空->(序列空,a)
|当h=u->(t,“::a)时的HT(h,t)
|HT(h,t)->将匹配线h与
|一些(m)->循环(m::a)t
|无->循环a t
环路a l
让rec循环a l=
将findItem BeginRecord1 l与
|空->列表.rev a
|l2->let(l3,a)=findItemsUntil EndRecord1 a l2
环路a l3
行数|>循环[“”]|>List.fold(乐趣a x->a+“\n”+x)”
Aim根据示例代码,这可能不是您想要的,但我认为通过序列进行单次迭代并将记录映射到具体类型会很有趣 说明
此解决方案使用一个状态机,该状态机可以位于
Start
或collection
中。在Start
中,它需要一个“BEGINTYPEx”。当发现它将进入收集
状态,将属性收集到映射
中时。当收集状态点击“ENDTYPEx”时,它使用映射函数创建一个实例,并将其添加到聚合列表
,返回到开始
状态
实施为记录定义一些类型,包括这些记录的区分并集和折叠的状态类型:
type Type1 = {
val1:string
val2:string
}
type Type2 = {
val1:string
val2:string
}
type Aggregate =
| T1 of Type1
| T2 of Type2
type State =
| Start of Aggregate list
| Collecting of Aggregate list * string * (Map<string,string> -> Aggregate) * Map<string,string>
接下来,我们有一些活动模式可以轻松决定匹配:
let (|Begin|_|) input =
match input with
| "BEGINTYPE1" -> Some ("TYPE1", mapType1)
| "BEGINTYPE2" -> Some ("TYPE2", mapType2)
| _ -> None
let (|Prop|_|) input =
if(String.IsNullOrEmpty(input)) then None
else
if(input.Contains(":")) then
let split = input.Split(":")
let pName = split.[0].Trim()
let pValue = split.[1].Trim()
Some (pName,pValue)
else None
let (|End|_|) (l,label,f,m) input =
match input with
| "ENDTYPE1" -> Some (List.append l ([f m]), label)
| "ENDTYPE2" -> Some (List.append l ([f m]), label)
| _ -> None
从一种状态移动到下一种状态的实际文件夹功能:
let folder state line =
match state with
| Start xs ->
match line with
| Begin (label, f) -> Collecting (xs, label, f, Map.empty<string,string>)
| _ -> failwithf "Should start with a BEGINTYPEx, intead was %s" line
| Collecting (xs, label, f, m) ->
match line with
| Prop (k,v) -> Collecting (xs, label, f, Map.add k v m)
| End(xs, label, f, m) (ys, s) -> Start ys
| _ -> failwithf "Expecting property or ENDTYPEx, instead was %s" line
最后,用法:
let lines = seq {
yield "BEGINTYPE1"
yield "VAL1: xxx"
yield "VAL2: yyy"
yield "ENDTYPE1"
yield "BEGINTYPE2"
yield "VAL1: xxx"
yield "VAL2: yyy"
yield "ENDTYPE2"
}
let extractTypes lines =
lines
|> Seq.fold folder (Start [])
|> extractTypeList
|> List.iter (fun a -> printfn "%A" a)
extractTypes lines |> ignore
一些有用的链接:
了解。了解。您能格式化您的代码吗?谢谢!我不知道序列可以这样使用。现在,我将首先对列表和序列版本进行性能测试。令我惊讶的是:对于某些文件大小:使用序列处理:18.000毫秒使用列表处理:43毫秒,代码是相同的:在这两种情况下:let flines=file.ReadAllLines列表的文件case:let lines=Seq.toListlines@RobF序列被惰性地评估,这可能是一种好处,也可能是一种惩罚,具体取决于使用情况。在您的例子中,听起来序列被遍历了很多次。如果序列是通过读取文件中的行来构建的,那么这将非常昂贵。在这种情况下,几乎可以肯定的是,从文件中读取一次行并将它们存储在一个列表中会更好,该列表会被热切地评估。感谢您的回复。“我必须先研究你的解决方案,才能真正理解。”如果你有任何问题,罗布会大声喊叫。我把代码放在一个要点上,如果你想把它作为一个fsx(以及在这个要点上的评论中的控制台版本)完整地抓住它@RobF我编辑了答案,在实际给出实现之前,给出了更多关于解决方案方法的描述。希望它能帮助你和快乐的编码。嗨,我理解在折叠函数中处理列表项并将状态保存在折叠累加器中的想法,从来没有想过这个,聪明的解决方案!
let extractTypeList state =
match state with
| Start xs -> xs
| Collecting (xs, _,_,_) -> xs
let lines = seq {
yield "BEGINTYPE1"
yield "VAL1: xxx"
yield "VAL2: yyy"
yield "ENDTYPE1"
yield "BEGINTYPE2"
yield "VAL1: xxx"
yield "VAL2: yyy"
yield "ENDTYPE2"
}
let extractTypes lines =
lines
|> Seq.fold folder (Start [])
|> extractTypeList
|> List.iter (fun a -> printfn "%A" a)
extractTypes lines |> ignore