Warning: file_get_contents(/data/phpspider/zhask/data//catemap/9/solr/3.json): failed to open stream: No such file or directory in /data/phpspider/zhask/libs/function.php on line 167

Warning: Invalid argument supplied for foreach() in /data/phpspider/zhask/libs/tag.function.php on line 1116

Notice: Undefined index: in /data/phpspider/zhask/libs/function.php on line 180

Warning: array_chunk() expects parameter 1 to be array, null given in /data/phpspider/zhask/libs/function.php on line 181
F# 为什么fsharp自动生成的gethashcode会生成太多冲突?_F# - Fatal编程技术网

F# 为什么fsharp自动生成的gethashcode会生成太多冲突?

F# 为什么fsharp自动生成的gethashcode会生成太多冲突?,f#,F#,在我们的fsharp代码中,自动生成的gethashcode实现显示出非常糟糕的性能和较大的冲突率。这是gethashcode generator的fsharp实现中的一个问题,还是仅仅是一个边缘情况 open System open System.Collections.Generic let check keys e name = let dict = new Dictionary<_,_>(Array.length keys, e)//, HashIdentity.

在我们的fsharp代码中,自动生成的gethashcode实现显示出非常糟糕的性能和较大的冲突率。这是gethashcode generator的fsharp实现中的一个问题,还是仅仅是一个边缘情况

open System
open System.Collections.Generic

let check keys e name =
    let dict =  new Dictionary<_,_>(Array.length keys, e)//, HashIdentity.Structural)
    let stopWatch = System.Diagnostics.Stopwatch.StartNew()
    let add k = dict.Add(k, 1.02)
    Array.iter add keys    
    stopWatch.Stop()
    let hsahes = new HashSet<int>()
    let add_hash x =  hsahes.Add(e.GetHashCode(x)) |> not
    let collisions = Array.filter add_hash keys |> Array.length
    printfn "%s %f sec %f collisions" name stopWatch.Elapsed.TotalSeconds (double(collisions) / double(keys.Length))

type StructTuple<'T,'T2> =
   struct
      val fst: 'T
      val snd : 'T2
      new(fst: 'T, snd : 'T2) = {fst = fst; snd = snd}
   end

let bad_keys = seq{
    let rnd = new Random();
    while true do
        let j = uint32(rnd.Next(0, 3346862))
        let k = uint16 (rnd.Next(0, 658))
        yield StructTuple(j,k)
}

let good_keys = seq{
    for k in 0us..658us do
        for j in 0u.. 3346862u do
            yield StructTuple(j,k)
}

module CmpHelpers = 
    let inline combine (h1:int) (h2:int) = (h1 <<< 5) + h1 ^^^ h2;

type StructTupleComparer<'T,'T2>() =
    let cmparer = EqualityComparer<Object>.Default
    interface IEqualityComparer<StructTuple<'T,'T2>> with
        member this.Equals (a,b) = cmparer.Equals(a.fst, b.fst) && cmparer.Equals(a.snd, b.snd)
        member this.GetHashCode (x) = CmpHelpers.combine (cmparer.GetHashCode(x.fst)) (cmparer.GetHashCode(x.snd)) 

type AutoGeneratedStructTupleComparer<'T,'T2>() =
    let cmparer = LanguagePrimitives.GenericEqualityComparer
    interface IEqualityComparer<StructTuple<'T,'T2>> with
        member this.Equals (a:StructTuple<'T,'T2>,b:StructTuple<'T,'T2>) = 
                LanguagePrimitives.HashCompare.GenericEqualityERIntrinsic<'T> a.fst b.fst
                    && LanguagePrimitives.HashCompare.GenericEqualityERIntrinsic<'T2> a.snd b.snd
        member this.GetHashCode (x:StructTuple<'T,'T2>) = 
                let mutable num = 0
                num <- -1640531527 + (LanguagePrimitives.HashCompare.GenericHashWithComparerIntrinsic<'T2> cmparer x.snd + ((num <<< 6) + (num >>> 2)))
                -1640531527 + (LanguagePrimitives.HashCompare.GenericHashWithComparerIntrinsic<'T> cmparer x.fst + ((num <<< 6) + (num >>> 2)));


let uniq (sq:seq<'a>) = Array.ofSeq (new HashSet<_>(sq)) 

[<EntryPoint>]
let main argv = 
    let count = 15000000
    let keys = good_keys |> Seq.take count |> uniq
    printfn "good keys"
    check keys (new StructTupleComparer<_,_>()) "struct custom"
    check keys HashIdentity.Structural "struct auto"
    check keys (new AutoGeneratedStructTupleComparer<_,_>()) "struct auto explicit"


    let keys = bad_keys |> Seq.take count |>  uniq
    printfn "bad keys"
    check keys (new StructTupleComparer<_,_>()) "struct custom"
    check keys HashIdentity.Structural "struct auto"
    check keys (new AutoGeneratedStructTupleComparer<_,_>()) "struct auto explicit"



    Console.ReadLine() |> ignore
    0 // return an integer exit code
输出

好钥匙

结构自定义1.506934秒0.000000冲突

结构自动4.832881秒0.776863碰撞

结构自动显式3.166931秒0.776863冲突

坏钥匙

结构自定义3.631251秒0.061893冲突

结构自动10.340693秒0.777034碰撞


struct auto explicit 8.893612秒0.777034碰撞

我对用于生成自动生成的Equals和GetHashCode的整体算法不是专家,但它似乎在这里生成了一些非最优的东西。我不知道这对于一个通用的自动生成实现来说是否正常,或者是否有可靠的自动生成接近最佳实现的实用方法

值得注意的是,如果只使用标准元组,则自动生成的哈希和比较将提供与自定义实现相同的冲突率和性能。使用最新的F4.0位,自动生成的东西要比定制实现快得多

我的号码:

// F# 3.1, struct tuples
good keys
  custom 0.951254 sec 0.000000 collisions
  auto 2.737166 sec 0.776863 collisions
bad keys
  custom 2.923103 sec 0.061869 collisions
  auto 7.706678 sec 0.777040 collisions

// F# 3.1, standard tuples
good keys
  custom 0.995701 sec 0.000000 collisions
  auto 0.965949 sec 0.000000 collisions
bad keys
  custom 3.091821 sec 0.061869 collisions
  auto 2.924721 sec 0.061869 collisions

// F# 4.0, standard tuples
good keys
  custom 1.018672 sec 0.000000 collisions
  auto 0.619066 sec 0.000000 collisions
bad keys
  custom 3.082988 sec 0.061869 collisions
  auto 1.829720 sec 0.061869 collisions

在fsharp问题跟踪器中打开问题。作为错误接受

不幸的是,我不能使用builin元组,因为它是引用类型。我不能在生产中使用F4。问题不在于性能,而在于碰撞。据我所知,fsharp使用标准的clr元组类,所以上一个示例没有显示任何内容,因为我的自定义实现只是标准元组实现的一个副本。主要问题是冲突增长,它在我们的生产代码中导致了非常大的性能问题。从“自动”到“自定义”将服务器的加载时间从几个小时缩短到几分钟。