Java 自定义Hashmap在插入几次后停止工作
我正在尝试为一个自定义hashmap(链表数组)编写一个代码,它可以存储5亿个值(键是链表数组号),并可以将索引保存到磁盘上。代码如下:这是我将如何实现它的Java 自定义Hashmap在插入几次后停止工作,java,Java,我正在尝试为一个自定义hashmap(链表数组)编写一个代码,它可以存储5亿个值(键是链表数组号),并可以将索引保存到磁盘上。代码如下:这是我将如何实现它的 import java.io.File; import java.io.IOException; import java.nio.*; import java.nio.channels.FileChannel; import java.io.RandomAccessFile; import java.util.Arrays; class
import java.io.File;
import java.io.IOException;
import java.nio.*;
import java.nio.channels.FileChannel;
import java.io.RandomAccessFile;
import java.util.Arrays;
class LongIntParallelHashMultimap {
public static final int BUFFER_BITS = 24;
private final FileChannel fc;
private final ByteBuffer[] keys, data;
private final int topBits, topMask, offsetMask;
public LongIntParallelHashMultimap(String fileName, int sizeBits, boolean load) throws IOException {
fc = new RandomAccessFile(fileName, "rw").getChannel();
long totalSize = 4L << sizeBits;
int bufferIndex = (int) (totalSize >> BUFFER_BITS);
keys = new ByteBuffer[bufferIndex];
data = new ByteBuffer[bufferIndex];
int bufferSize = 1 << BUFFER_BITS;
long offset = 0;
for (int i = 0; i < bufferIndex; i++) {
MappedByteBuffer kmap = fc.map(FileChannel.MapMode.READ_WRITE, offset, bufferSize);
kmap.load();
keys[i] = kmap.order(ByteOrder.nativeOrder());
MappedByteBuffer dmap = fc.map(FileChannel.MapMode.READ_WRITE, offset + bufferSize, bufferSize);
dmap.load();
data[i] = dmap.order(ByteOrder.nativeOrder());
offset += bufferSize * 2;
}
topBits = sizeBits + 2 - BUFFER_BITS;
topMask = (1 << topBits) - 1;
offsetMask = bufferSize - 4;
}
public void put(int key, int value) {
int buffer = key & topMask;
int key2 = (key >> topBits) + 1;
assert key2 != 0;
ByteBuffer keys2 = keys[buffer];
ByteBuffer data2 = data[buffer];
int offset = (key2 * 101) & offsetMask;
while (keys2.getInt(offset) != 0) {
offset += 3 * 4;
offset &= offsetMask;
}
keys2.putInt(offset, key2);
data2.putInt(offset, value);
}
public int get(int key, int[] values) {
int buffer = key & topMask;
int key2 = (key >> topBits) + 1;
assert key2 != 0;
ByteBuffer keys2 = keys[buffer];
ByteBuffer data2 = data[buffer];
int offset = (key2 * 101) & offsetMask;
for (int count = 0; count < values.length; ) {
int key3 = keys2.getInt(offset);
if (key3 == 0)
return count;
if (key3 == key2)
values[count++] = data2.getInt(offset);
offset += 3 * 4;
offset &= offsetMask;
}
return values.length;
}
private final int[] getValues = new int[1000];
private static final int[] NO_VALUES = { };
public int[] get(int key) {
int len = get(key, getValues);
return len == 0 ? NO_VALUES : Arrays.copyOf(getValues, len);
}
public static void main(String... args) throws IOException {
int keys = 500 * 1000 * 1000;
new File("abc.bin").delete();
long startTime = System.nanoTime();
LongIntParallelHashMultimap lph = new LongIntParallelHashMultimap("abc.bin", 30, true);
long time = System.nanoTime() - startTime;
System.out.printf("Load time was %.3f sec%n", time / 1e9);
timePut(keys, lph);
timeGet(keys, lph);
timeGet2(keys, lph);
startTime = System.nanoTime();
System.gc();
time = System.nanoTime() - startTime;
System.out.printf("Time to Full GC was %.3f sec%n", time / 1e9);
}
private static void timePut(int keys, LongIntParallelHashMultimap lph) {
long startTime;
long time;
startTime = System.nanoTime();
for (int i = 0; i < keys; i++) {
lph.put(i, i + 100);
if ((i + 1) % 100_000_000 == 0)
System.out.printf("%,d ", i + 1);
}
time = System.nanoTime() - startTime;
System.out.printf("%nput time was %.3f sec%n", time / 1e9);
}
private static void timeGet(int keys, LongIntParallelHashMultimap lph) {
long startTime;
long time;
startTime = System.nanoTime();
int[] values = new int[2];
for (int i = 0; i < keys; i++) {
lph.get(i, values);
if ((i + 1) % 100_000_000 == 0)
System.out.printf("%,d ", i + 1);
}
time = System.nanoTime() - startTime;
System.out.printf("%nget(key, values) time was %.3f sec%n", time / 1e9);
}
private static void timeGet2(int keys, LongIntParallelHashMultimap lph) {
long startTime;
long time;
startTime = System.nanoTime();
for (int i = 0; i < keys; i++) {
lph.get(i);
if ((i + 1) % 100_000_000 == 0)
System.out.printf("%,d ", i + 1);
}
time = System.nanoTime() - startTime;
System.out.printf("%nget(key) time was %.3f sec%n", time / 1e9);
}
}
注意:它只是GCed,因为我调用了System.gc()
并查看所用堆的大小
我将使用
int[]
而不是节点的链接列表
import sun.nio.ch.DirectBuffer;
import java.io.IOException;
import java.util.*;
import java.nio.*;
import java.nio.channels.FileChannel;
import java.io.RandomAccessFile;
class LongIntParallelHashMultimap {
private static final int[] NO_INTS = {};
final int[][] data;
public LongIntParallelHashMultimap() {
data = new int[Integer.MAX_VALUE][];
}
public void put(int key, int value) {
int[] ints = data[key];
if (ints == null) {
data[key] = new int[]{value};
} else {
int[] ints2 = Arrays.copyOf(ints, ints.length + 1);
ints2[ints.length] = value;
data[key] = ints2;
}
}
public int[] get(int key) {
int[] ints = data[key];
return ints == null ? NO_INTS : ints;
}
private FileChannel channel;
private MappedByteBuffer mbb;
public void save() throws IOException {
channel = new RandomAccessFile("abc.bin", "rw").getChannel();
mbb = channel.map(FileChannel.MapMode.READ_WRITE, 0, 1 << 24);
mbb.order(ByteOrder.nativeOrder());
for (int i = 0; i < Integer.MAX_VALUE - 32; i += 32) {
int bits = 0;
for (int j = 0; j < 32; j++) {
if (data[i + j] != null) bits |= 1;
bits <<= 1;
}
getMbb().putInt(bits);
}
for (int i = 0; i < Integer.MAX_VALUE; i++) {
int arr[] = get(i);
if (arr.length == 0) continue;
getMbb().putInt(arr.length);
for (int a : arr)
getMbb().putInt(a);
}
channel.close();
cleanMbb();
}
private ByteBuffer getMbb() throws IOException {
if (mbb.remaining() <= 0) {
cleanMbb();
mbb = channel.map(FileChannel.MapMode.READ_WRITE, channel.size(), 1 << 24);
mbb.order(ByteOrder.nativeOrder());
}
return mbb;
}
private void cleanMbb() {
((DirectBuffer) mbb).cleaner().clean();
}
public static void main(String... args) throws IOException {
int keys = 50 * 1000 * 1000;
long startTime = System.nanoTime();
LongIntParallelHashMultimap lph = new LongIntParallelHashMultimap();
long time = System.nanoTime() - startTime;
System.out.printf("Create time %.3f sec%n", time / 1e9);
startTime = System.nanoTime();
for (int i = 0; i < keys; i++) {
lph.put(i, i + 100);
if (i % 10000000 == 0 && i != 0)
System.out.print(" " + i + " ");
}
time = System.nanoTime() - startTime;
System.out.printf("%nput time was %.3f sec%n", time / 1e9);
startTime = System.nanoTime();
lph.save();
time = System.nanoTime() - startTime;
System.out.printf(" time to save was %.3f sec%n", time / 1e9);
startTime = System.nanoTime();
for (int i = 0; i < keys; i++) {
int k[] = lph.get(i);
}
time = System.nanoTime() - startTime;
System.out.printf("get time was %.3f sec%n", time / 1e9);
}
}
导入sun.nio.ch.DirectBuffer;
导入java.io.IOException;
导入java.util.*;
导入java.nio.*;
导入java.nio.channels.FileChannel;
导入java.io.RandomAccessFile;
类LongIntParallelHashMultimap{
私有静态final int[]NO_int={};
最终int[][]数据;
公共LongIntParallelHashMultimap(){
数据=新整数[Integer.MAX_VALUE][];
}
公共void put(int键,int值){
int[]int=data[key];
if(ints==null){
数据[键]=新的int[]{value};
}否则{
int[]ints2=Arrays.copyOf(ints,ints.length+1);
ints2[ints.length]=值;
数据[键]=ints2;
}
}
公共int[]获取(int键){
int[]int=data[key];
返回整数==null?无整数:整数;
}
专用文件通道;
私人地图管理局;
public void save()引发IOException{
通道=新的随机访问文件(“abc.bin”、“rw”).getChannel();
mbb=channel.map(FileChannel.MapMode.READ_WRITE,0,1“停止工作”是什么意思?您是否收到某种错误消息?您是否称2.1亿为“少数”?@ThomasJungblut,不,它只是被阻止了。意味着最多2.1亿,它会在3秒钟内插入。但在那之后,我只需等待2分钟,代码就被阻止了。如果您检查l[key]!=null,则不需要位集。而不是使用节点的链接列表,而是使用单个int[]
可能更有效。代码真的很好。但是,有一点,我认为,如果使用位集检查哪个int[]已填充,则可以节省更多的磁盘空间(也可以节省磁盘IO)。因为在保存方法中,必须保存(值的数量*4字节)+(4字节*int\u Max)。你怎么看?你可以先从数组中写出一个位集。非常感谢。这正是我要找的。但是,如何反向计算位集(我指的是位)。我的意思是在加载时。将int
值读取到数组中,并使用它来确定以后要读取的值。不需要保留位集。我试图表示“如何”(int j=0;j<32;j++){if(data[I+j]!=null)位|=1;位
import sun.nio.ch.DirectBuffer;
import java.io.IOException;
import java.util.*;
import java.nio.*;
import java.nio.channels.FileChannel;
import java.io.RandomAccessFile;
class LongIntParallelHashMultimap {
private static final int[] NO_INTS = {};
final int[][] data;
public LongIntParallelHashMultimap() {
data = new int[Integer.MAX_VALUE][];
}
public void put(int key, int value) {
int[] ints = data[key];
if (ints == null) {
data[key] = new int[]{value};
} else {
int[] ints2 = Arrays.copyOf(ints, ints.length + 1);
ints2[ints.length] = value;
data[key] = ints2;
}
}
public int[] get(int key) {
int[] ints = data[key];
return ints == null ? NO_INTS : ints;
}
private FileChannel channel;
private MappedByteBuffer mbb;
public void save() throws IOException {
channel = new RandomAccessFile("abc.bin", "rw").getChannel();
mbb = channel.map(FileChannel.MapMode.READ_WRITE, 0, 1 << 24);
mbb.order(ByteOrder.nativeOrder());
for (int i = 0; i < Integer.MAX_VALUE - 32; i += 32) {
int bits = 0;
for (int j = 0; j < 32; j++) {
if (data[i + j] != null) bits |= 1;
bits <<= 1;
}
getMbb().putInt(bits);
}
for (int i = 0; i < Integer.MAX_VALUE; i++) {
int arr[] = get(i);
if (arr.length == 0) continue;
getMbb().putInt(arr.length);
for (int a : arr)
getMbb().putInt(a);
}
channel.close();
cleanMbb();
}
private ByteBuffer getMbb() throws IOException {
if (mbb.remaining() <= 0) {
cleanMbb();
mbb = channel.map(FileChannel.MapMode.READ_WRITE, channel.size(), 1 << 24);
mbb.order(ByteOrder.nativeOrder());
}
return mbb;
}
private void cleanMbb() {
((DirectBuffer) mbb).cleaner().clean();
}
public static void main(String... args) throws IOException {
int keys = 50 * 1000 * 1000;
long startTime = System.nanoTime();
LongIntParallelHashMultimap lph = new LongIntParallelHashMultimap();
long time = System.nanoTime() - startTime;
System.out.printf("Create time %.3f sec%n", time / 1e9);
startTime = System.nanoTime();
for (int i = 0; i < keys; i++) {
lph.put(i, i + 100);
if (i % 10000000 == 0 && i != 0)
System.out.print(" " + i + " ");
}
time = System.nanoTime() - startTime;
System.out.printf("%nput time was %.3f sec%n", time / 1e9);
startTime = System.nanoTime();
lph.save();
time = System.nanoTime() - startTime;
System.out.printf(" time to save was %.3f sec%n", time / 1e9);
startTime = System.nanoTime();
for (int i = 0; i < keys; i++) {
int k[] = lph.get(i);
}
time = System.nanoTime() - startTime;
System.out.printf("get time was %.3f sec%n", time / 1e9);
}
}