在不使用列表的情况下获取列表列表<；列表<；字符串>&燃气轮机；在F#_F#

在不使用列表的情况下获取列表列表<；列表<；字符串>&燃气轮机；在F#

在不使用列表的情况下获取列表列表<；列表<；字符串>&燃气轮机；在F#,f#,F#,我在这里有一个函数： let ProcessFile (allLines: string list) = let list = new List<List<string>>() let rec SplitFile (input: string list) = if input.Length <> 0 then list.Add(new List<string>(input.TakeWhil

我在这里有一个函数：

let ProcessFile (allLines: string list) = 
    let list = new List<List<string>>()

    let rec SplitFile (input: string list) =
        if input.Length <> 0 then
            list.Add(new List<string>(input.TakeWhile(fun x -> x <> "")))
            let nextGroup = input.SkipWhile(fun x -> x <> "").SkipWhile(fun x -> x = "")
            SplitFile (Seq.toList nextGroup)

    SplitFile allLines |> ignore
    list

let ProcessFile（所有行：字符串列表）=
let list=new list（）
let rec SplitFile（输入：字符串列表）=
如果输入.Length为0，则
添加（新列表（input.TakeWhile（funx->x“”））
让nextGroup=input.SkipWhile（funx->x“”）.SkipWhile（funx->x=”“）
拆分文件（Seq.toList nextGroup）
拆分文件所有行|>忽略
列表

它以字符串列表的形式给出文件的内容，并以空行分隔的每个组作为单独的列表，给我一个列表列表

我的问题是，有没有更好的方法来实现这一点，让我有一个字符串列表，而不是使用新的list>？因为这对我来说似乎不是特别整洁。

您的代码对我来说非常可读，但是递归地使用

TakeWhile

和

SkipWhile

效率相当低。下面是一个简单的函数递归解决方案：

let ProcessFile (allLines: string list) =
  // Recursively processes 'input' and keeps the list of 'groups' collected
  // so far. We keep elements of the currently generated group in 'current'  
  let rec SplitFile input groups current = 
    match input with 
    // Current line is empty and there was some previous group
    // Add the current group to the list of groups and continue with empty current
    | ""::xs when current <> [] -> SplitFile xs ((List.rev current)::groups) []
    // Current line is empty, but there was no previous group - skip & continue
    | ""::xs -> SplitFile xs groups []
    // Current line is non-empty - add it to the current group
    | x::xs -> SplitFile xs groups (x::current)
    // We reached the end - add current group if it is not empty
    | [] when current <> [] -> List.rev ((List.rev current)::groups)
    | [] -> List.rev groups

  SplitFile allLines  [] []

ProcessFile ["a"; "b"; ""; ""; "c"; ""; "d"]

就我个人而言，我喜欢一句俏皮话：

let source = ["a"; "b"; ""; ""; "c"; ""; "d"]

source                                                                       // can be any enumerable or seq
|> Seq.scan (fun (i, _) e -> if e = "" then (i + 1, e) else (i, e)) (0, "")  // add the 'index'
|> Seq.filter (fun (_, e) -> e <> "")                                        // remove the empty entries
|> Seq.groupBy fst                                                           // group by the index
|> Seq.map (fun (_, l) -> l |> Seq.map snd |> List.ofSeq)                    // extract the list only from each group (discard the index)
|> List.ofSeq                                                                // turn back into a list

let source=[“a”；“b”；“c”；“d”]
source//可以是任何可枚举或seq
|>Seq.scan（fun（i，）e->if e=“”，然后（i+1，e）else（i，e））（0“”//添加“索引”
|>Seq.filter（fun（u，e）->e“”）//删除空条目
|>Seq.groupBy fst//按索引分组
|>Seq.map（fun（124;，l）->l |>Seq.map snd |>List.ofSeq）//仅从每个组中提取列表（放弃索引）
|>List.ofSeq//返回列表

这里最大的问题是

Seq.groupBy

会将整个列表读取到内存中，但您仍然在这样做。有一些

groupBy

的实现，它们只会查看相邻的条目，这就足够了，并且可以让您以

Seq

的形式输入文件（例如，使用

file.ReadLines

而不是

file.ReadAllLines

）。

更惯用的解决方案可能是：

let processFile xs =
  let rec nonEmpties n = function
    | [] as xs | ""::xs -> n, xs
    | _::xs -> nonEmpties (n+1) xs
  let rec loop xs =
    seq { match xs with
          | [] -> ()
          | ""::xs -> yield! loop xs
          | xs ->
              let n, ys = nonEmpties 0 xs
              yield Seq.take n xs
              yield! loop ys }
  loop xs

其中嵌套的

非空

函数计算给定列表前面有多少个非空元素，并返回最后一个非空元素之后的计数和尾部列表，

循环

函数跳过空元素并生成非空元素序列

此解决方案的一些有趣特性：

let ProcessFile (allLines: string list) =
  // Recursively processes 'input' and keeps the list of 'groups' collected
  // so far. We keep elements of the currently generated group in 'current'  
  let rec SplitFile input groups current = 
    match input with 
    // Current line is empty and there was some previous group
    // Add the current group to the list of groups and continue with empty current
    | ""::xs when current <> [] -> SplitFile xs ((List.rev current)::groups) []
    // Current line is empty, but there was no previous group - skip & continue
    | ""::xs -> SplitFile xs groups []
    // Current line is non-empty - add it to the current group
    | x::xs -> SplitFile xs groups (x::current)
    // We reached the end - add current group if it is not empty
    | [] when current <> [] -> List.rev ((List.rev current)::groups)
    | [] -> List.rev groups

  SplitFile allLines  [] []

ProcessFile ["a"; "b"; ""; ""; "c"; ""; "d"]

完全尾部递归，因此它可以处理任意长的非空字符串序列和非空字符串序列
通过引用回输入列表避免复制

在测试输入1000个串的1000个序列时，该解决方案比yamen的快8倍，比Tomas的快50%

下面是一个更快的解决方案，首先将输入列表转换为数组，然后根据数组索引执行操作：

let processFile xs =
  let xs = Array.ofSeq xs
  let rec nonEmpties i =
    if i=xs.Length || xs.[i]="" then i else
      nonEmpties (i+1)
  let rec loop i =
    seq { if i < xs.Length then
            if xs.[i] = "" then
              yield! loop (i+1)
            else
              let j = nonEmpties i
              yield Array.sub xs i (j - i)
              yield! loop j }
  loop 0

let processFile xs=
设xs=Array.ofSeq xs
让我记录下所有人=
如果i=xs.Length | | xs.[i]=”“那么我会选择其他
非空（i+1）
让rec循环一次=
seq{如果i


在测试输入1000个字符串的1000个序列时，此解决方案比yamen的快34倍，比Tomas的快6倍。
使用普通的旧List.fold怎么样
let processFile lines =
([], lines) ||>
List.fold(fun acc l -> 
    match acc with
        | [] when l = "" -> acc        // filter empty lines at the start of the file
        | [] -> [[l]]                  // start the first group
        | []::xss when l = "" -> acc   // filter continous empty lines
        | xs::xss when l = "" ->       // found an empty line, start a new group
            let rxs = List.rev xs      // reverse the current group before starting a new one
            []::rxs::xss
        | xs::xss -> (l::xs)::xss)     // continue adding to the current group
|> List.rev

回答不错，但我建议使用模式匹配，而不是列表上的相等。因此，在输入
和当前
上进行匹配，并使用模式来检查是否为空当前
，而不是当前[]
。