F#:如何将一个序列拆分为一系列序列
背景: 我有一系列连续的时间戳数据。数据序列中存在数据不连续的间隙。我想创建一个方法,将序列拆分为序列序列,以便每个子序列包含连续数据(在间隙处拆分输入序列) 约束条件:F#:如何将一个序列拆分为一系列序列,f#,sequences,F#,Sequences,背景: 我有一系列连续的时间戳数据。数据序列中存在数据不连续的间隙。我想创建一个方法,将序列拆分为序列序列,以便每个子序列包含连续数据(在间隙处拆分输入序列) 约束条件: 返回值必须是一系列序列,以确保仅根据需要生成元素(不能使用列表/数组/缓存) 解决方案不能是O(n^2),可能排除了Seq.take-Seq.skip模式(参见post) 功能惯用方法的优点(因为我想更精通功能编程),但这不是一个要求 方法签名 let groupContiguousDataPoints (timeBetw
- 返回值必须是一系列序列,以确保仅根据需要生成元素(不能使用列表/数组/缓存)
- 解决方案不能是O(n^2),可能排除了Seq.take-Seq.skip模式(参见post)
- 功能惯用方法的优点(因为我想更精通功能编程),但这不是一个要求
let groupContiguousDataPoints (timeBetweenContiguousDataPoints : TimeSpan) (dataPointsWithHoles : seq<DateTime * float>) : (seq<seq< DateTime * float >>)= ...
让groupContiguousDataPoints(tiguouUSDatapoints:TimeSpan之间的时间)(dataPointsWithHoles:seq):(seq>)=。。。
从表面上看,这个问题对我来说微不足道,但即使使用Seq.pairwise、IEnumerator、序列理解和yield语句,我也无法找到解决方案。我确信这是因为我仍然缺乏结合F#习语的经验,或者可能是因为有些语言结构我还没有接触过
// Test data
let numbers = {1.0..1000.0}
let baseTime = DateTime.Now
let contiguousTimeStamps = seq { for n in numbers ->baseTime.AddMinutes(n)}
let dataWithOccationalHoles = Seq.zip contiguousTimeStamps numbers |> Seq.filter (fun (dateTime, num) -> num % 77.0 <> 0.0) // Has a gap in the data every 77 items
let timeBetweenContiguousValues = (new TimeSpan(0,1,0))
dataWithOccationalHoles |> groupContiguousDataPoints timeBetweenContiguousValues |> Seq.iteri (fun i sequence -> printfn "Group %d has %d data-points: Head: %f" i (Seq.length sequence) (snd(Seq.hd sequence)))
//测试数据
设数={1.0..1000.0}
让baseTime=DateTime.Now
让contiguousTimeStamps=seq{for n in numbers->baseTime.AddMinutes(n)}
让dataWithOccationalHoles=Seq.zip contiguousTimeStamps numbers |>Seq.filter(fun(dateTime,num)->num%77.0 0.0)//每隔77个项目在数据中有一个间隙
让timeBetweenContiguousValues=(新的时间跨度(0,1,0))
dataWithOccationalHoles |>groupContiguousDataPoints时间间隔ContiguousValues |>Seq.iteri(乐趣i序列->打印fn“组%d有%d个数据点:头:%f”i(Seq.length序列)(snd(Seq.hd序列)))
您似乎想要一个具有签名的函数
(`a -> bool) -> seq<'a> -> seq<seq<'a>>
(`a->bool)->seq>
即函数和序列,然后根据函数结果将输入序列分解为序列序列
将值缓存到实现IEnumerable的集合中可能是最简单的(虽然不是纯粹的,但避免了多次迭代输入。这将损失输入的大部分惰性):
let groupBy(趣味:'a->bool)(输入:seq)=
序号{
让cache=ref(new System.Collections.Generic.List())
对于输入do中的e
(!cache).Add(e)
如果不是(乐趣e)那么
让开!快
缓存:=新的System.Collections.Generic.List()
如果cache.Length>0,则
让开!快
}
另一种实现可以将缓存集合(如
seq传递给Haskell解决方案,因为我不太懂F#语法,但它应该很容易翻译:
type TimeStamp = Integer -- ticks
type TimeSpan = Integer -- difference between TimeStamps
groupContiguousDataPoints :: TimeSpan -> [(TimeStamp, a)] -> [[(TimeStamp, a)]]
在序曲中有一个函数groupBy::(a->a->Bool)->[a]->[[a]]]
:
group函数获取一个列表并返回一个列表列表,以便结果的串联等于参数。此外,结果中的每个子列表只包含相等的元素。例如
group "Mississippi" = ["M","i","ss","i","ss","i","pp","i"]
这是groupBy的一个特例,它允许程序员提供自己的平等性测试
这不是我们想要的,因为它将列表中的每个元素与当前组的第一个元素进行比较,并且我们需要比较连续的元素。如果我们有这样一个函数groupBy1
,我们可以轻松地编写groupContiguousDataPoints
:
groupContiguousDataPoints maxTimeDiff list = groupBy1 (\(t1, _) (t2, _) -> t2 - t1 <= maxTimeDiff) list
更新:看起来F#不允许你在seq
上进行模式匹配,所以翻译起来也不太容易。不过,它展示了一种在需要时将序列转换为LazyList
进行模式匹配的方法
更新2:Haskell列表是惰性的,根据需要生成,因此它们对应于F#的LazyList
(而不是seq
,因为生成的数据是缓存的(当然,如果您不再持有对它的引用,则会进行垃圾收集)).我把阿列克谢的《哈斯克尔》翻译成了《F》,但它在《F》中并不好看,而且还是有一个元素太急切了
我希望有更好的办法,但我以后还要再试一次
let N = 20
let data = // produce some arbitrary data with holes
seq {
for x in 1..N do
if x % 4 <> 0 && x % 7 <> 0 then
printfn "producing %d" x
yield x
}
let rec GroupBy comp (input:LazyList<'a>) : LazyList<LazyList<'a>> =
LazyList.delayed (fun () ->
match input with
| LazyList.Nil -> LazyList.cons (LazyList.empty()) (LazyList.empty())
| LazyList.Cons(x,LazyList.Nil) ->
LazyList.cons (LazyList.cons x (LazyList.empty())) (LazyList.empty())
| LazyList.Cons(x,(LazyList.Cons(y,_) as xs)) ->
let groups = GroupBy comp xs
if comp x y then
LazyList.consf
(LazyList.consf x (fun () ->
let (LazyList.Cons(firstGroup,_)) = groups
firstGroup))
(fun () ->
let (LazyList.Cons(_,otherGroups)) = groups
otherGroups)
else
LazyList.cons (LazyList.cons x (LazyList.empty())) groups)
let result = data |> LazyList.of_seq |> GroupBy (fun x y -> y = x + 1)
printfn "Consuming..."
for group in result do
printfn "about to do a group"
for x in group do
printfn " %d" x
设N=20
let data=//生成一些带有孔的任意数据
序号{
对于1..N中的x
如果x%4 0和x%7 0,则
打印fn“生产%d”x
产量x
}
让rec GroupBy comp(输入:LazyList>=
LazyList.delayed(乐趣()->
匹配输入
|Nil->LazyList.cons(LazyList.empty())(LazyList.empty())
|Cons(x,LazyList.Nil)->
LazyList.cons(LazyList.cons x(LazyList.empty())(LazyList.empty())
|Cons(x,(LazyList.Cons(y,u)as xs))->
let groups=GroupBy comp xs
如果comp x y那么
懒汉
(LazyList.consf x(fun()->
let(LazyList.Cons(firstGroup,))=组
第一组)
(乐趣()->
let(LazyList.Cons(u,otherGroups))=组
(其他组)
其他的
LazyList.cons(LazyList.cons x(LazyList.empty())组)
让结果=数据|>LazyList.of|seq |>GroupBy(乐趣x y->y=x+1)
printfn“消费…”
对于结果中的组,请执行以下操作:
printfn“即将进行分组”
对于组中的x
printfn“%d”x
下面是一些我认为您需要的代码。它不是惯用的F
(这可能与布赖恩的答案相似,但我说不出来,因为我不熟悉懒人列表的语义。)
但它与您的测试规范并不完全匹配:Seq.length枚举它的整个输入。您的“测试代码”调用Seq.length
,然后调用Seq.hd
。这将生成两次枚举器,而且由于没有缓存,事情会变得一团糟。我不确定是否有任何干净的方法来实现这一点
groupBy1 :: (a -> a -> Bool) -> [a] -> [[a]]
groupBy1 _ [] = [[]]
groupBy1 _ [x] = [[x]]
groupBy1 comp (x : xs@(y : _))
| comp x y = (x : firstGroup) : otherGroups
| otherwise = [x] : groups
where groups@(firstGroup : otherGroups) = groupBy1 comp xs
let N = 20
let data = // produce some arbitrary data with holes
seq {
for x in 1..N do
if x % 4 <> 0 && x % 7 <> 0 then
printfn "producing %d" x
yield x
}
let rec GroupBy comp (input:LazyList<'a>) : LazyList<LazyList<'a>> =
LazyList.delayed (fun () ->
match input with
| LazyList.Nil -> LazyList.cons (LazyList.empty()) (LazyList.empty())
| LazyList.Cons(x,LazyList.Nil) ->
LazyList.cons (LazyList.cons x (LazyList.empty())) (LazyList.empty())
| LazyList.Cons(x,(LazyList.Cons(y,_) as xs)) ->
let groups = GroupBy comp xs
if comp x y then
LazyList.consf
(LazyList.consf x (fun () ->
let (LazyList.Cons(firstGroup,_)) = groups
firstGroup))
(fun () ->
let (LazyList.Cons(_,otherGroups)) = groups
otherGroups)
else
LazyList.cons (LazyList.cons x (LazyList.empty())) groups)
let result = data |> LazyList.of_seq |> GroupBy (fun x y -> y = x + 1)
printfn "Consuming..."
for group in result do
printfn "about to do a group"
for x in group do
printfn " %d" x
type State<'a> = Unstarted | InnerOkay of 'a | NeedNewInner of 'a | Finished
// f() = true means the neighbors should be kept together
// f() = false means they should be split
let split_up (f : 'a -> 'a -> bool) (input : seq<'a>) =
// simple unfold that assumes f captured a mutable variable
let iter f = Seq.unfold (fun _ ->
match f() with
| Some(x) -> Some(x,())
| None -> None) ()
seq {
let state = ref (Unstarted)
use ie = input.GetEnumerator()
let innerMoveNext() =
match !state with
| Unstarted ->
if ie.MoveNext()
then let cur = ie.Current
state := InnerOkay(cur); Some(cur)
else state := Finished; None
| InnerOkay(last) ->
if ie.MoveNext()
then let cur = ie.Current
if f last cur
then state := InnerOkay(cur); Some(cur)
else state := NeedNewInner(cur); None
else state := Finished; None
| NeedNewInner(last) -> state := InnerOkay(last); Some(last)
| Finished -> None
let outerMoveNext() =
match !state with
| Unstarted | NeedNewInner(_) -> Some(iter innerMoveNext)
| InnerOkay(_) -> failwith "Move to next inner seq when current is active: undefined behavior."
| Finished -> None
yield! iter outerMoveNext }
open System
let groupContigs (contigTime : TimeSpan) (holey : seq<DateTime * int>) =
split_up (fun (t1,_) (t2,_) -> (t2 - t1) <= contigTime) holey
// Test data
let numbers = {1 .. 15}
let contiguousTimeStamps =
let baseTime = DateTime.Now
seq { for n in numbers -> baseTime.AddMinutes(float n)}
let holeyData =
Seq.zip contiguousTimeStamps numbers
|> Seq.filter (fun (dateTime, num) -> num % 7 <> 0)
let grouped_data = groupContigs (new TimeSpan(0,1,0)) holeyData
printfn "Consuming..."
for group in grouped_data do
printfn "about to do a group"
for x in group do
printfn " %A" x
let groupBy cmp (sq:seq<_>) =
let en = sq.GetEnumerator()
let rec partitions (first:option<_>) =
seq {
match first with
| Some first' -> //'
(* The following value is always overwritten;
it represents the first element of the next subsequence to output, if any *)
let next = ref None
(* This function generates a subsequence to output,
setting next appropriately as it goes *)
let rec iter item =
seq {
yield item
if (en.MoveNext()) then
let curr = en.Current
if (cmp item curr) then
yield! iter curr
else // consumed one too many - pass it on as the start of the next sequence
next := Some curr
else
next := None
}
yield iter first' (* ' generate the first sequence *)
yield! partitions !next (* recursively generate all remaining sequences *)
| None -> () // return an empty sequence if there are no more values
}
let first = if en.MoveNext() then Some en.Current else None
partitions first
let groupContiguousDataPoints (time:TimeSpan) : (seq<DateTime*_> -> _) =
groupBy (fun (t,_) (t',_) -> t' - t <= time)
let N = 20
let data = // produce some arbitrary data with holes
seq {
for x in 1..N do
if x % 4 <> 0 && x % 7 <> 0 then
printfn "producing %d" x
yield x
}
let rec GroupBy comp (input:seq<_>) = seq {
let doneWithThisGroup = ref false
let areMore = ref true
use e = input.GetEnumerator()
let Next() = areMore := e.MoveNext(); !areMore
// deal with length 0 or 1, seed 'prev'
if not(e.MoveNext()) then () else
let prev = ref e.Current
while !areMore do
yield seq {
while not(!doneWithThisGroup) do
if Next() then
let next = e.Current
doneWithThisGroup := not(comp !prev next)
yield !prev
prev := next
else
// end of list, yield final value
yield !prev
doneWithThisGroup := true }
doneWithThisGroup := false }
let result = data |> GroupBy (fun x y -> y = x + 1)
printfn "Consuming..."
for group in result do
printfn "about to do a group"
for x in group do
printfn " %d" x
let groupBy cmp (sq:seq<_>) =
let en = sq.GetEnumerator()
let next() = if en.MoveNext() then Some en.Current else None
(* this function returns a pair containing the first sequence and a lazy option indicating the first element in the next sequence (if any) *)
let rec seqStartingWith start =
match next() with
| Some y when cmp start y ->
let rest_next = lazy seqStartingWith y // delay evaluation until forced - stores the rest of this sequence and the start of the next one as a pair
seq { yield start; yield! fst (Lazy.force rest_next) },
lazy Lazy.force (snd (Lazy.force rest_next))
| next -> seq { yield start }, lazy next
let rec iter start =
seq {
match (Lazy.force start) with
| None -> ()
| Some start ->
let (first,next) = seqStartingWith start
yield first
yield! iter next
}
Seq.cache (iter (lazy next()))
dataWithOccationalHoles
|> Seq.pairwise
|> Seq.map(fun ((time1,elem1),(time2,elem2)) -> if time2-time1 = timeBetweenContiguousValues then 0, ((time1,elem1),(time2,elem2)) else 1, ((time1,elem1),(time2,elem2)) )
|> Seq.scan(fun (indexres,(t1,e1),(t2,e2)) (index,((time1,elem1),(time2,elem2))) -> (index+indexres,(time1,elem1),(time2,elem2)) ) (0,(baseTime,-1.0),(baseTime,-1.0))
|> Seq.map( fun (index,(time1,elem1),(time2,elem2)) -> index,(time2,elem2) )
|> Seq.filter( fun (_,(_,elem)) -> elem <> -1.0)
|> PSeq.groupBy(fst)
|> Seq.map(snd>>Seq.map(snd))