Arrays F#从字符串[]列表中删除重复项_Arrays_List_F#_Duplicates

Arrays F#从字符串[]列表中删除重复项

arrays list f#

Arrays F#从字符串[]列表中删除重复项,arrays,list,f#,duplicates,Arrays,List,F#,Duplicates,我有一个生成[]列表的程序，我正在尝试从列表中删除几乎重复的数组。列表的一个例子是 [ [| "Jackson"; "Stentzke"; "22"; "001" |]; [| "Jackson"; "Stentzke"; "22"; "002" |]; [| "Alec"; "Stentzke"; "18"; "0

我有一个生成[]列表的程序，我正在尝试从列表中删除几乎重复的数组。列表的一个例子是

[
   [|
      "Jackson";
      "Stentzke";
      "22";
      "001"
    |];
    [|
      "Jackson";
      "Stentzke";
      "22";
      "002"
    |];
    [|
      "Alec";
      "Stentzke";
      "18";
      "003"
    |]
]

基本上，我正在尝试编写一个函数，该函数将读取列表并删除几乎相同数据的所有示例。所以最终返回的[]列表应该如下所示

[
    [|
      "Alec";
      "Stentzke";
      "18";
      "003"
    |]
]

我已经尝试了很多函数，试图得到这个结果，或者一些可以使用的接近它的结果。我目前的尝试是

let removeDuplicates (arrayList: string[]list) =
    let list = arrayList|> List.map(fun aL -> 
        let a = arrayList|> List.map(fun aL2 -> 
                try
                    match (aL.GetValue(0).Equals(aL2.GetValue(0))) && (aL.GetValue(2).Equals(aL2.GetValue(2))) && (aL.GetValue(3).Equals(aL2.GetValue(3))) with
                    | false -> aL2
                    | _ -> [|""|]
                with
                | ex -> [|""|]
            )
        a 
                                              )
    list |> List.concat |> List.distinct

但所有这些返回的都是input[]列表上的反向版本

有人知道如何从列表中删除几乎重复的数组吗？

我相信您的代码和注释不太匹配。考虑到您的评论“第一、第二和第三个值是相同的”，我相信这可以让您走上正确的道路：

let removeDuplicates (arrayList: string[]list) =
    arrayList |> Seq.distinctBy (fun elem ->  (elem.[0] , elem.[1] , elem.[2]))

对输入数据执行此操作的结果是两个元素的列表，其中包含：

[
 [|
  "Jackson";
  "Stentzke";
  "22";
  "001"
 |];
 [|
  "Alec";
  "Stentzke";
  "18";
  "003"
 |]
]

我相信您的代码和注释不太匹配。考虑到您的评论“第一、第二和第三个值是相同的”，我相信这可以让您走上正确的道路：

let removeDuplicates (arrayList: string[]list) =
    arrayList |> Seq.distinctBy (fun elem ->  (elem.[0] , elem.[1] , elem.[2]))

对输入数据执行此操作的结果是两个元素的列表，其中包含：

[
 [|
  "Jackson";
  "Stentzke";
  "22";
  "001"
 |];
 [|
  "Alec";
  "Stentzke";
  "18";
  "003"
 |]
]

你应该根据你认为相同的字段创建字典/地图，然后删除任何重复的出现。这里有一个简单而机械的方法，假设

xs

是您在上面指定的列表：

type DataRec = { key:string
                 fname:string
                 lname:string
                 id1:string
                 id2:string}

let dataRecs = xs |> List.map (fun x -> {key=x.[0]+x.[1]+x.[2];fname=x.[0];lname=x.[1];id1=x.[2];id2=x.[3]})

dataRecs |> Seq.groupBy (fun x -> x.key) 
         |> Seq.filter (fun x -> Seq.length (snd x) = 1)
         |> Seq.collect snd
         |> Seq.map (fun x -> [|x.fname;x.lname;x.id1;x.id2|])
         |> Seq.toList

输出：

val it:string[]list=[[|“Alec”；“Stentzke”；“18”；“003”|]]

它基本上从前三项创建一个键，按它分组，过滤掉超过2个字符的任何内容，然后映射回一个数组xs是您在上面指定的列表：

type DataRec = { key:string
                 fname:string
                 lname:string
                 id1:string
                 id2:string}

let dataRecs = xs |> List.map (fun x -> {key=x.[0]+x.[1]+x.[2];fname=x.[0];lname=x.[1];id1=x.[2];id2=x.[3]})

dataRecs |> Seq.groupBy (fun x -> x.key) 
         |> Seq.filter (fun x -> Seq.length (snd x) = 1)
         |> Seq.collect snd
         |> Seq.map (fun x -> [|x.fname;x.lname;x.id1;x.id2|])
         |> Seq.toList

输出：

val it:string[]list=[[|“Alec”；“Stentzke”；“18”；“003”|]]

它基本上从前三项创建一个键，按它分组，过滤掉超过2个字符的任何内容，然后映射回一个数组

使用一些Linq：

let comparer (atMost) = 
    { new System.Collections.Generic.IEqualityComparer<string[]> with
            member __.Equals(a, b) = 
                Seq.zip a b
                    |> Seq.sumBy (fun (a',b') -> System.StringComparer.InvariantCulture.Compare(a', b') |> abs |> min 1)
                    |> ((>=) atMost)
            member __.GetHashCode(a) = 1
    }

System.Linq.Enumerable.GroupBy(data, id, comparer 1)
    |> Seq.choose (fun g -> match Seq.length g with | 1 -> Some g.Key | _ -> None)

让比较器（atMost）=
{新System.Collections.Generic.IEqualityComparer与
成员等于（a，b）=
序号a b
|>Seq.sumBy（fun（a'，b'）->System.StringComparer.InvariantCulture.Compare（a'，b'）|>abs |>min1）
|>（（>=）大气）
成员GetHashCode（a）=1
}
System.Linq.Enumerable.GroupBy（数据、id、比较器1）
|>Seq.choose（趣味g->将Seq.length g与| 1->某些g.键| |->无匹配）

比较器允许两个数组之间的差异数。

使用一些Linq:

let comparer (atMost) = 
    { new System.Collections.Generic.IEqualityComparer<string[]> with
            member __.Equals(a, b) = 
                Seq.zip a b
                    |> Seq.sumBy (fun (a',b') -> System.StringComparer.InvariantCulture.Compare(a', b') |> abs |> min 1)
                    |> ((>=) atMost)
            member __.GetHashCode(a) = 1
    }

System.Linq.Enumerable.GroupBy(data, id, comparer 1)
    |> Seq.choose (fun g -> match Seq.length g with | 1 -> Some g.Key | _ -> None)

让比较器（atMost）=
{新System.Collections.Generic.IEqualityComparer与
成员等于（a，b）=
序号a b
|>Seq.sumBy（fun（a'，b'）->System.StringComparer.InvariantCulture.Compare（a'，b'）|>abs |>min1）
|>（（>=）大气）
成员GetHashCode（a）=1
}
System.Linq.Enumerable.GroupBy（数据、id、比较器1）
|>Seq.choose（趣味g->将Seq.length g与| 1->某些g.键| |->无匹配）

比较器允许两个数组之间存在

atMost:int

差异数。

如果名字和姓氏都相同，您对“近似相同”的定义是什么？确切的意思是什么？在您的示例中，第一个参数和第四个参数相等，但没有一个数据符合该条件。请尝试使用

List.distinctBy

。它是从目前的缺失，但应该给你的一般想法：你写一个“投影”函数，给定一个列表项，只返回你想考虑的数据子集，以便决定这个项目是否对你的目的“足够独特”。当您调用

List.map

后紧接着调用

List.concat

时，这通常是您应该使用的符号。近似相同，表示第一个、第二个和第三个值相同，但第四个值不同。如果名字和姓氏相同，您对近似相同的定义是什么“几乎完全相同”的意思是什么？在您的示例中，您等于第一个和第四个参数，并且没有一个数据符合该标准。请尝试使用

List.distinctBy

。目前缺少该参数，但该参数应能给您一个大致思路：您编写一个“投影”“给定列表项的函数只返回您想考虑的数据子集，以确定此项对于您的目的是否是唯一的。BTW，当您调用<代码>列表。map < /COD>后面紧接着<代码>列表。CONTAT < /代码>，这通常表示您应该使用。这意味着第一、第二和第三个值相同，但第四个值不同；类型user={fname:string；lname:string；id1:int；id2:int}？我想用user替换key？您还可以创建一个

类型user={key:string，row:string[]}

，其中row项只包含整个数组。然后，您可以将

Seq.map

行分解出来，我将其返回到一个数组中。只需为我的程序配置它，它就可以工作了。干杯：）如此；类型user={fname:string；lname:string；id1:int；id2:int}？我想用user替换key？您还可以创建一个

类型user={key:string，row:string[]}

，其中row项只包含整个数组。然后，您可以将

Seq.map

行分解出来，我将其返回到一个数组中。只需为我的程序配置它，它就可以工作了。干杯：）