Java 固定大小HashMap的最佳容量和负载系数是多少?
我试图找出一个特定情况下的最佳容量和负载系数。我想我已经了解了要点,但我还是要感谢比我更了解情况的人的确认 如果我知道我的HashMap将填充到包含(比如)100个对象,并且大部分时间都有100个对象,那么我猜最佳值是初始容量100和负载系数1?或者我需要容量101,或者还有其他问题吗 编辑:好的,我留出几个小时做了一些测试。结果如下:Java 固定大小HashMap的最佳容量和负载系数是多少?,java,hashmap,Java,Hashmap,我试图找出一个特定情况下的最佳容量和负载系数。我想我已经了解了要点,但我还是要感谢比我更了解情况的人的确认 如果我知道我的HashMap将填充到包含(比如)100个对象,并且大部分时间都有100个对象,那么我猜最佳值是初始容量100和负载系数1?或者我需要容量101,或者还有其他问题吗 编辑:好的,我留出几个小时做了一些测试。结果如下: 奇怪的是,容量、容量+1、容量+2、容量-1甚至容量-10都产生完全相同的结果。我预计至少1号和10号容量会产生更糟糕的结果 使用初始容量(而不是使用默认值1
- 奇怪的是,容量、容量+1、容量+2、容量-1甚至容量-10都产生完全相同的结果。我预计至少1号和10号容量会产生更糟糕的结果
- 使用初始容量(而不是使用默认值16)可以显著提高put()性能,最多可以提高30%
- 对于少量对象,使用负载因子1可以获得相同的性能,对于大量对象(>100000)可以获得更好的性能。然而,这并不能与物体的数量成比例地提高;我怀疑还有其他因素会影响结果
- get()的性能对于不同数量的对象/容量有点不同,但尽管它可能会因情况的不同而略有不同,但通常不受初始容量或负载因子的影响
import java.util.HashMap;
import java.util.Map;
public class HashMapTest {
// capacity - numbers high as 10000000 require -mx1536m -ms1536m JVM parameters
public static final int CAPACITY = 10000000;
public static final int ITERATIONS = 10000;
// set to false to print put performance, or to true to print get performance
boolean doIterations = false;
private Map<Integer, String> cache;
public void fillCache(int capacity) {
long t = System.currentTimeMillis();
for (int i = 0; i <= capacity; i++)
cache.put(i, "Value number " + i);
if (!doIterations) {
System.out.print(System.currentTimeMillis() - t);
System.out.print("\t");
}
}
public void iterate(int capacity) {
long t = System.currentTimeMillis();
for (int i = 0; i <= ITERATIONS; i++) {
long x = Math.round(Math.random() * capacity);
String result = cache.get((int) x);
}
if (doIterations) {
System.out.print(System.currentTimeMillis() - t);
System.out.print("\t");
}
}
public void test(float loadFactor, int divider) {
for (int i = 10000; i <= CAPACITY; i+= 10000) {
cache = new HashMap<Integer, String>(i, loadFactor);
fillCache(i / divider);
if (doIterations)
iterate(i / divider);
}
System.out.println();
}
public static void main(String[] args) {
HashMapTest test = new HashMapTest();
// fill to capacity
test.test(0.75f, 1);
test.test(1, 1);
test.test(1.25f, 1);
// fill to half capacity
test.test(0.75f, 2);
test.test(1, 2);
test.test(1.25f, 2);
}
}
import java.util.HashMap;
导入java.util.Map;
公共类HashMapTest{
//容量-高达10000000的数字需要-mx1536m-ms1536m JVM参数
公共静态最终整数容量=10000000;
公共静态最终整数迭代=10000;
//设置为false以打印put性能,或设置为true以打印get性能
布尔doIterations=false;
私有地图缓存;
公共空缓存(整数容量){
long t=System.currentTimeMillis();
对于(inti=0;i只需使用101
。我不确定是否需要它,但可能不值得费心去确定
…只需添加1
编辑:为我的答案提供一些理由
首先,我假设您的HashMap
不会超过100
;如果超过100,您应该保持负载因子不变。同样,如果您关心的是性能,请保持负载因子不变。如果您关心的是内存,您可以通过设置静态大小来节省一些内存。如果您大量填充,这可能值得一做内存中的内容;例如,正在存储许多映射,或者正在创建堆空间大小的映射
其次,我选择值101
,因为它提供了更好的可读性……如果我在查看您的代码后发现您已将初始容量设置为100
,并且正在加载100
元素,我将不得不通读Javadoc,以确保它在达到精度时不会调整大小y100
。当然,我在那里找不到答案,所以我必须查看源代码。这不值得…只需将其留在101
中,每个人都很高兴,没有人查看java.util.HashMap
的源代码
第三,将HashMap
设置为负载因子为1
时所期望的准确容量的说法是不正确的,即使它是粗体的
…如果你有<代码> N< /代码>桶,你随机地把<代码> N< /代码>条目放入<代码> N< /代码>桶中,YEP,你将在同一个桶中结束项目,当然……但这不是世界末日…实际上,它只是一对夫妇的比较而已。事实上,当你考虑ALT时,几乎没有什么不同。ernative正在将
n
项分配到n/0.75
存储桶中
不用相信我的话
快速测试代码:
static Random r = new Random();
public static void main(String[] args){
int[] tests = {100, 1000, 10000};
int runs = 5000;
float lf_sta = 1f;
float lf_dyn = 0.75f;
for(int t:tests){
System.err.println("=======Test Put "+t+"");
HashMap<Integer,Integer> map = new HashMap<Integer,Integer>();
long norm_put = testInserts(map, t, runs);
System.err.print("Norm put:"+norm_put+" ms. ");
int cap_sta = t;
map = new HashMap<Integer,Integer>(cap_sta, lf_sta);
long sta_put = testInserts(map, t, runs);
System.err.print("Static put:"+sta_put+" ms. ");
int cap_dyn = (int)Math.ceil((float)t/lf_dyn);
map = new HashMap<Integer,Integer>(cap_dyn, lf_dyn);
long dyn_put = testInserts(map, t, runs);
System.err.println("Dynamic put:"+dyn_put+" ms. ");
}
for(int t:tests){
System.err.println("=======Test Get (hits) "+t+"");
HashMap<Integer,Integer> map = new HashMap<Integer,Integer>();
fill(map, t);
long norm_get_hits = testGetHits(map, t, runs);
System.err.print("Norm get (hits):"+norm_get_hits+" ms. ");
int cap_sta = t;
map = new HashMap<Integer,Integer>(cap_sta, lf_sta);
fill(map, t);
long sta_get_hits = testGetHits(map, t, runs);
System.err.print("Static get (hits):"+sta_get_hits+" ms. ");
int cap_dyn = (int)Math.ceil((float)t/lf_dyn);
map = new HashMap<Integer,Integer>(cap_dyn, lf_dyn);
fill(map, t);
long dyn_get_hits = testGetHits(map, t, runs);
System.err.println("Dynamic get (hits):"+dyn_get_hits+" ms. ");
}
for(int t:tests){
System.err.println("=======Test Get (Rand) "+t+"");
HashMap<Integer,Integer> map = new HashMap<Integer,Integer>();
fill(map, t);
long norm_get_rand = testGetRand(map, t, runs);
System.err.print("Norm get (rand):"+norm_get_rand+" ms. ");
int cap_sta = t;
map = new HashMap<Integer,Integer>(cap_sta, lf_sta);
fill(map, t);
long sta_get_rand = testGetRand(map, t, runs);
System.err.print("Static get (rand):"+sta_get_rand+" ms. ");
int cap_dyn = (int)Math.ceil((float)t/lf_dyn);
map = new HashMap<Integer,Integer>(cap_dyn, lf_dyn);
fill(map, t);
long dyn_get_rand = testGetRand(map, t, runs);
System.err.println("Dynamic get (rand):"+dyn_get_rand+" ms. ");
}
}
public static long testInserts(HashMap<Integer,Integer> map, int test, int runs){
long b4 = System.currentTimeMillis();
for(int i=0; i<runs; i++){
fill(map, test);
map.clear();
}
return System.currentTimeMillis()-b4;
}
public static void fill(HashMap<Integer,Integer> map, int test){
for(int j=0; j<test; j++){
if(map.put(r.nextInt(), j)!=null){
j--;
}
}
}
public static long testGetHits(HashMap<Integer,Integer> map, int test, int runs){
long b4 = System.currentTimeMillis();
ArrayList<Integer> keys = new ArrayList<Integer>();
keys.addAll(map.keySet());
for(int i=0; i<runs; i++){
for(int j=0; j<test; j++){
keys.get(r.nextInt(keys.size()));
}
}
return System.currentTimeMillis()-b4;
}
public static long testGetRand(HashMap<Integer,Integer> map, int test, int runs){
long b4 = System.currentTimeMillis();
for(int i=0; i<runs; i++){
for(int j=0; j<test; j++){
map.get(r.nextInt());
}
}
return System.currentTimeMillis()-b4;
}
=======Test Put 100
Norm put:78 ms. Static put:78 ms. Dynamic put:62 ms.
=======Test Put 1000
Norm put:764 ms. Static put:763 ms. Dynamic put:748 ms.
=======Test Put 10000
Norm put:12921 ms. Static put:12889 ms. Dynamic put:12873 ms.
=======Test Get (hits) 100
Norm get (hits):47 ms. Static get (hits):31 ms. Dynamic get (hits):32 ms.
=======Test Get (hits) 1000
Norm get (hits):327 ms. Static get (hits):328 ms. Dynamic get (hits):343 ms.
=======Test Get (hits) 10000
Norm get (hits):3304 ms. Static get (hits):3366 ms. Dynamic get (hits):3413 ms.
=======Test Get (Rand) 100
Norm get (rand):63 ms. Static get (rand):46 ms. Dynamic get (rand):47 ms.
=======Test Get (Rand) 1000
Norm get (rand):483 ms. Static get (rand):499 ms. Dynamic get (rand):483 ms.
=======Test Get (Rand) 10000
Norm get (rand):5190 ms. Static get (rand):5362 ms. Dynamic get (rand):5236 ms.
关于:↑ — 关于这件事有很多建议→||← 不同设置之间的差异很大
关于我的原始答案(第一条水平线上方的那一位),这是故意装腔作势的,因为在大多数情况下,HashMap
JavaDoc中的
作为一般规则,默认加载因子(.75)在时间和空间成本之间提供了一个很好的折衷。较高的值会减少空间开销,但会增加查找成本(反映在HashMap类的大多数操作中,包括get和put)。在设置其输入时,应考虑地图中的预期条目数及其负载系数
package hashmaptest;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
public class HashMapTest {
private static final List<Result> results = new ArrayList<Result>();
public static void main(String[] args) throws IOException {
//First entry of each array is the sample collection size, subsequent entries
//are the hash limits
final int[][] sampleSizesAndHashLimits = new int[][] {
{100, 50, 90, 100},
{1000, 500, 900, 990, 1000},
{100000, 10000, 90000, 99000, 100000}
};
final double[] initialCapacityFactors = new double[] {0.5, 0.75, 1.0, 1.25, 1.5, 2.0};
final float[] loadFactors = new float[] {0.5f, 0.75f, 1.0f, 1.25f};
//Doing a warmup run to eliminate JIT influence
for(int[] sizeAndLimits : sampleSizesAndHashLimits) {
int size = sizeAndLimits[0];
for(int i = 1; i < sizeAndLimits.length; ++i) {
int limit = sizeAndLimits[i];
for(double initCapacityFactor : initialCapacityFactors) {
for(float loadFactor : loadFactors) {
runTest(limit, size, initCapacityFactor, loadFactor);
}
}
}
}
results.clear();
//Now for the real thing...
for(int[] sizeAndLimits : sampleSizesAndHashLimits) {
int size = sizeAndLimits[0];
for(int i = 1; i < sizeAndLimits.length; ++i) {
int limit = sizeAndLimits[i];
for(double initCapacityFactor : initialCapacityFactors) {
for(float loadFactor : loadFactors) {
runTest(limit, size, initCapacityFactor, loadFactor);
}
}
}
}
Collections.sort(results);
for(final Result result : results) {
result.printSummary();
}
// ResultVisualizer.visualizeResults(results);
}
private static void runTest(final int hashLimit, final int sampleSize,
final double initCapacityFactor, final float loadFactor) {
final int initialCapacity = (int)(sampleSize * initCapacityFactor);
System.out.println("Running test for a sample collection of size " + sampleSize
+ ", an initial capacity of " + initialCapacity + ", a load factor of "
+ loadFactor + " and keys with a hash code limited to " + hashLimit);
System.out.println("====================");
double hashOverload = (((double)sampleSize/hashLimit) - 1.0) * 100.0;
System.out.println("Hash code overload: " + hashOverload + "%");
//Generating our sample key collection.
final List<Key> keys = generateSamples(hashLimit, sampleSize);
//Generating our value collection
final List<Object> values = generateValues(sampleSize);
final HashMap<Key, Object> map = new HashMap<Key, Object>(initialCapacity, loadFactor);
final long startPut = System.nanoTime();
for(int i = 0; i < sampleSize; ++i) {
map.put(keys.get(i), values.get(i));
}
final long endPut = System.nanoTime();
final long putTime = endPut - startPut;
final long averagePutTime = putTime/(sampleSize/10);
System.out.println("Time to map all keys to their values: " + putTime + " ns");
System.out.println("Average put time per 10 entries: " + averagePutTime + " ns");
final long startGet = System.nanoTime();
for(int i = 0; i < sampleSize; ++i) {
map.get(keys.get(i));
}
final long endGet = System.nanoTime();
final long getTime = endGet - startGet;
final long averageGetTime = getTime/(sampleSize/10);
System.out.println("Time to get the value for every key: " + getTime + " ns");
System.out.println("Average get time per 10 entries: " + averageGetTime + " ns");
System.out.println("");
final Result result =
new Result(sampleSize, initialCapacity, loadFactor, hashOverload, averagePutTime, averageGetTime, hashLimit);
results.add(result);
//Haha, what kind of noob explicitly calls for garbage collection?
System.gc();
try {
Thread.sleep(200);
} catch(final InterruptedException e) {}
}
private static List<Key> generateSamples(final int hashLimit, final int sampleSize) {
final ArrayList<Key> result = new ArrayList<Key>(sampleSize);
for(int i = 0; i < sampleSize; ++i) {
result.add(new Key(i, hashLimit));
}
return result;
}
private static List<Object> generateValues(final int sampleSize) {
final ArrayList<Object> result = new ArrayList<Object>(sampleSize);
for(int i = 0; i < sampleSize; ++i) {
result.add(new Object());
}
return result;
}
private static class Key {
private final int hashCode;
private final int id;
Key(final int id, final int hashLimit) {
//Equals implies same hashCode if limit is the same
//Same hashCode doesn't necessarily implies equals
this.id = id;
this.hashCode = id % hashLimit;
}
@Override
public int hashCode() {
return hashCode;
}
@Override
public boolean equals(final Object o) {
return ((Key)o).id == this.id;
}
}
static class Result implements Comparable<Result> {
final int sampleSize;
final int initialCapacity;
final float loadFactor;
final double hashOverloadPercentage;
final long averagePutTime;
final long averageGetTime;
final int hashLimit;
Result(final int sampleSize, final int initialCapacity, final float loadFactor,
final double hashOverloadPercentage, final long averagePutTime,
final long averageGetTime, final int hashLimit) {
this.sampleSize = sampleSize;
this.initialCapacity = initialCapacity;
this.loadFactor = loadFactor;
this.hashOverloadPercentage = hashOverloadPercentage;
this.averagePutTime = averagePutTime;
this.averageGetTime = averageGetTime;
this.hashLimit = hashLimit;
}
@Override
public int compareTo(final Result o) {
final long putDiff = o.averagePutTime - this.averagePutTime;
final long getDiff = o.averageGetTime - this.averageGetTime;
return (int)(putDiff + getDiff);
}
void printSummary() {
System.out.println("" + averagePutTime + " ns per 10 puts, "
+ averageGetTime + " ns per 10 gets, for a load factor of "
+ loadFactor + ", initial capacity of " + initialCapacity
+ " for " + sampleSize + " mappings and " + hashOverloadPercentage
+ "% hash code overload.");
}
}
}
package hashmaptest;
import hashmaptest.HashMapTest.Result;
import java.awt.Color;
import java.awt.Graphics2D;
import java.awt.image.BufferedImage;
import java.io.File;
import java.io.IOException;
import java.text.DecimalFormat;
import java.text.NumberFormat;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import javax.imageio.ImageIO;
public class ResultVisualizer {
private static final Map<Integer, Map<Integer, Set<Result>>> sampleSizeToHashLimit =
new HashMap<Integer, Map<Integer, Set<Result>>>();
private static final DecimalFormat df = new DecimalFormat("0.00");
static void visualizeResults(final List<Result> results) throws IOException {
final File tempFolder = new File("C:\\temp");
final File baseFolder = makeFolder(tempFolder, "hashmap_tests");
long bestPutTime = -1L;
long worstPutTime = 0L;
long bestGetTime = -1L;
long worstGetTime = 0L;
for(final Result result : results) {
final Integer sampleSize = result.sampleSize;
final Integer hashLimit = result.hashLimit;
final long putTime = result.averagePutTime;
final long getTime = result.averageGetTime;
if(bestPutTime == -1L || putTime < bestPutTime)
bestPutTime = putTime;
if(bestGetTime <= -1.0f || getTime < bestGetTime)
bestGetTime = getTime;
if(putTime > worstPutTime)
worstPutTime = putTime;
if(getTime > worstGetTime)
worstGetTime = getTime;
Map<Integer, Set<Result>> hashLimitToResults =
sampleSizeToHashLimit.get(sampleSize);
if(hashLimitToResults == null) {
hashLimitToResults = new HashMap<Integer, Set<Result>>();
sampleSizeToHashLimit.put(sampleSize, hashLimitToResults);
}
Set<Result> resultSet = hashLimitToResults.get(hashLimit);
if(resultSet == null) {
resultSet = new HashSet<Result>();
hashLimitToResults.put(hashLimit, resultSet);
}
resultSet.add(result);
}
System.out.println("Best average put time: " + bestPutTime + " ns");
System.out.println("Best average get time: " + bestGetTime + " ns");
System.out.println("Worst average put time: " + worstPutTime + " ns");
System.out.println("Worst average get time: " + worstGetTime + " ns");
for(final Integer sampleSize : sampleSizeToHashLimit.keySet()) {
final File sizeFolder = makeFolder(baseFolder, "sample_size_" + sampleSize);
final Map<Integer, Set<Result>> hashLimitToResults =
sampleSizeToHashLimit.get(sampleSize);
for(final Integer hashLimit : hashLimitToResults.keySet()) {
final File limitFolder = makeFolder(sizeFolder, "hash_limit_" + hashLimit);
final Set<Result> resultSet = hashLimitToResults.get(hashLimit);
final Set<Float> loadFactorSet = new HashSet<Float>();
final Set<Integer> initialCapacitySet = new HashSet<Integer>();
for(final Result result : resultSet) {
loadFactorSet.add(result.loadFactor);
initialCapacitySet.add(result.initialCapacity);
}
final List<Float> loadFactors = new ArrayList<Float>(loadFactorSet);
final List<Integer> initialCapacities = new ArrayList<Integer>(initialCapacitySet);
Collections.sort(loadFactors);
Collections.sort(initialCapacities);
final BufferedImage putImage =
renderMap(resultSet, loadFactors, initialCapacities, worstPutTime, bestPutTime, false);
final BufferedImage getImage =
renderMap(resultSet, loadFactors, initialCapacities, worstGetTime, bestGetTime, true);
final String putFileName = "size_" + sampleSize + "_hlimit_" + hashLimit + "_puts.png";
final String getFileName = "size_" + sampleSize + "_hlimit_" + hashLimit + "_gets.png";
writeImage(putImage, limitFolder, putFileName);
writeImage(getImage, limitFolder, getFileName);
}
}
}
private static File makeFolder(final File parent, final String folder) throws IOException {
final File child = new File(parent, folder);
if(!child.exists())
child.mkdir();
return child;
}
private static BufferedImage renderMap(final Set<Result> results, final List<Float> loadFactors,
final List<Integer> initialCapacities, final float worst, final float best,
final boolean get) {
//[x][y] => x is mapped to initial capacity, y is mapped to load factor
final Color[][] map = new Color[initialCapacities.size()][loadFactors.size()];
for(final Result result : results) {
final int x = initialCapacities.indexOf(result.initialCapacity);
final int y = loadFactors.indexOf(result.loadFactor);
final float time = get ? result.averageGetTime : result.averagePutTime;
final float score = (time - best)/(worst - best);
final Color c = new Color(score, 1.0f - score, 0.0f);
map[x][y] = c;
}
final int imageWidth = initialCapacities.size() * 40 + 50;
final int imageHeight = loadFactors.size() * 40 + 50;
final BufferedImage image =
new BufferedImage(imageWidth, imageHeight, BufferedImage.TYPE_3BYTE_BGR);
final Graphics2D g = image.createGraphics();
g.setColor(Color.WHITE);
g.fillRect(0, 0, imageWidth, imageHeight);
for(int x = 0; x < map.length; ++x) {
for(int y = 0; y < map[x].length; ++y) {
g.setColor(map[x][y]);
g.fillRect(50 + x*40, imageHeight - 50 - (y+1)*40, 40, 40);
g.setColor(Color.BLACK);
g.drawLine(25, imageHeight - 50 - (y+1)*40, 50, imageHeight - 50 - (y+1)*40);
final Float loadFactor = loadFactors.get(y);
g.drawString(df.format(loadFactor), 10, imageHeight - 65 - (y)*40);
}
g.setColor(Color.BLACK);
g.drawLine(50 + (x+1)*40, imageHeight - 50, 50 + (x+1)*40, imageHeight - 15);
final int initialCapacity = initialCapacities.get(x);
g.drawString(((initialCapacity%1000 == 0) ? "" + (initialCapacity/1000) + "K" : "" + initialCapacity), 15 + (x+1)*40, imageHeight - 25);
}
g.drawLine(25, imageHeight - 50, imageWidth, imageHeight - 50);
g.drawLine(50, 0, 50, imageHeight - 25);
g.dispose();
return image;
}
private static void writeImage(final BufferedImage image, final File folder,
final String filename) throws IOException {
final File imageFile = new File(folder, filename);
ImageIO.write(image, "png", imageFile);
}
}
/**
* Constructs an empty <tt>HashMap</tt> with the specified initial
* capacity and load factor.
*
* @param initialCapacity the initial capacity
* @param loadFactor the load factor
* @throws IllegalArgumentException if the initial capacity is negative
* or the load factor is nonpositive
*/
public HashMap(int initialCapacity, float loadFactor) {
if (initialCapacity < 0)
throw new IllegalArgumentException("Illegal initial capacity: " +
initialCapacity);
if (initialCapacity > MAXIMUM_CAPACITY)
initialCapacity = MAXIMUM_CAPACITY;
if (loadFactor <= 0 || Float.isNaN(loadFactor))
throw new IllegalArgumentException("Illegal load factor: " +
loadFactor);
// Find a power of 2 >= initialCapacity
int capacity = 1;
while (capacity < initialCapacity)
capacity <<= 1;
this.loadFactor = loadFactor;
threshold = (int)(capacity * loadFactor);
table = new Entry[capacity];
init();
}
Maps.newHashMapWithExpectedSize(expectedSize)
capacity = expectedSize / 0.75F + 1.0F