F# 为什么fsharp自动生成的gethashcode会生成太多冲突?
在我们的fsharp代码中,自动生成的gethashcode实现显示出非常糟糕的性能和较大的冲突率。这是gethashcode generator的fsharp实现中的一个问题,还是仅仅是一个边缘情况F# 为什么fsharp自动生成的gethashcode会生成太多冲突?,f#,F#,在我们的fsharp代码中,自动生成的gethashcode实现显示出非常糟糕的性能和较大的冲突率。这是gethashcode generator的fsharp实现中的一个问题,还是仅仅是一个边缘情况 open System open System.Collections.Generic let check keys e name = let dict = new Dictionary<_,_>(Array.length keys, e)//, HashIdentity.
open System
open System.Collections.Generic
let check keys e name =
let dict = new Dictionary<_,_>(Array.length keys, e)//, HashIdentity.Structural)
let stopWatch = System.Diagnostics.Stopwatch.StartNew()
let add k = dict.Add(k, 1.02)
Array.iter add keys
stopWatch.Stop()
let hsahes = new HashSet<int>()
let add_hash x = hsahes.Add(e.GetHashCode(x)) |> not
let collisions = Array.filter add_hash keys |> Array.length
printfn "%s %f sec %f collisions" name stopWatch.Elapsed.TotalSeconds (double(collisions) / double(keys.Length))
type StructTuple<'T,'T2> =
struct
val fst: 'T
val snd : 'T2
new(fst: 'T, snd : 'T2) = {fst = fst; snd = snd}
end
let bad_keys = seq{
let rnd = new Random();
while true do
let j = uint32(rnd.Next(0, 3346862))
let k = uint16 (rnd.Next(0, 658))
yield StructTuple(j,k)
}
let good_keys = seq{
for k in 0us..658us do
for j in 0u.. 3346862u do
yield StructTuple(j,k)
}
module CmpHelpers =
let inline combine (h1:int) (h2:int) = (h1 <<< 5) + h1 ^^^ h2;
type StructTupleComparer<'T,'T2>() =
let cmparer = EqualityComparer<Object>.Default
interface IEqualityComparer<StructTuple<'T,'T2>> with
member this.Equals (a,b) = cmparer.Equals(a.fst, b.fst) && cmparer.Equals(a.snd, b.snd)
member this.GetHashCode (x) = CmpHelpers.combine (cmparer.GetHashCode(x.fst)) (cmparer.GetHashCode(x.snd))
type AutoGeneratedStructTupleComparer<'T,'T2>() =
let cmparer = LanguagePrimitives.GenericEqualityComparer
interface IEqualityComparer<StructTuple<'T,'T2>> with
member this.Equals (a:StructTuple<'T,'T2>,b:StructTuple<'T,'T2>) =
LanguagePrimitives.HashCompare.GenericEqualityERIntrinsic<'T> a.fst b.fst
&& LanguagePrimitives.HashCompare.GenericEqualityERIntrinsic<'T2> a.snd b.snd
member this.GetHashCode (x:StructTuple<'T,'T2>) =
let mutable num = 0
num <- -1640531527 + (LanguagePrimitives.HashCompare.GenericHashWithComparerIntrinsic<'T2> cmparer x.snd + ((num <<< 6) + (num >>> 2)))
-1640531527 + (LanguagePrimitives.HashCompare.GenericHashWithComparerIntrinsic<'T> cmparer x.fst + ((num <<< 6) + (num >>> 2)));
let uniq (sq:seq<'a>) = Array.ofSeq (new HashSet<_>(sq))
[<EntryPoint>]
let main argv =
let count = 15000000
let keys = good_keys |> Seq.take count |> uniq
printfn "good keys"
check keys (new StructTupleComparer<_,_>()) "struct custom"
check keys HashIdentity.Structural "struct auto"
check keys (new AutoGeneratedStructTupleComparer<_,_>()) "struct auto explicit"
let keys = bad_keys |> Seq.take count |> uniq
printfn "bad keys"
check keys (new StructTupleComparer<_,_>()) "struct custom"
check keys HashIdentity.Structural "struct auto"
check keys (new AutoGeneratedStructTupleComparer<_,_>()) "struct auto explicit"
Console.ReadLine() |> ignore
0 // return an integer exit code
输出
好钥匙
结构自定义1.506934秒0.000000冲突
结构自动4.832881秒0.776863碰撞
结构自动显式3.166931秒0.776863冲突
坏钥匙
结构自定义3.631251秒0.061893冲突
结构自动10.340693秒0.777034碰撞
struct auto explicit 8.893612秒0.777034碰撞我对用于生成自动生成的Equals和GetHashCode的整体算法不是专家,但它似乎在这里生成了一些非最优的东西。我不知道这对于一个通用的自动生成实现来说是否正常,或者是否有可靠的自动生成接近最佳实现的实用方法 值得注意的是,如果只使用标准元组,则自动生成的哈希和比较将提供与自定义实现相同的冲突率和性能。使用最新的F4.0位,自动生成的东西要比定制实现快得多 我的号码:
// F# 3.1, struct tuples
good keys
custom 0.951254 sec 0.000000 collisions
auto 2.737166 sec 0.776863 collisions
bad keys
custom 2.923103 sec 0.061869 collisions
auto 7.706678 sec 0.777040 collisions
// F# 3.1, standard tuples
good keys
custom 0.995701 sec 0.000000 collisions
auto 0.965949 sec 0.000000 collisions
bad keys
custom 3.091821 sec 0.061869 collisions
auto 2.924721 sec 0.061869 collisions
// F# 4.0, standard tuples
good keys
custom 1.018672 sec 0.000000 collisions
auto 0.619066 sec 0.000000 collisions
bad keys
custom 3.082988 sec 0.061869 collisions
auto 1.829720 sec 0.061869 collisions
在fsharp问题跟踪器中打开问题。作为错误接受不幸的是,我不能使用builin元组,因为它是引用类型。我不能在生产中使用F4。问题不在于性能,而在于碰撞。据我所知,fsharp使用标准的clr元组类,所以上一个示例没有显示任何内容,因为我的自定义实现只是标准元组实现的一个副本。主要问题是冲突增长,它在我们的生产代码中导致了非常大的性能问题。从“自动”到“自定义”将服务器的加载时间从几个小时缩短到几分钟。