Java 自定义Hashmap在插入几次后停止工作

Java 自定义Hashmap在插入几次后停止工作,java,Java,我正在尝试为一个自定义hashmap(链表数组)编写一个代码,它可以存储5亿个值(键是链表数组号),并可以将索引保存到磁盘上。代码如下:这是我将如何实现它的 import java.io.File; import java.io.IOException; import java.nio.*; import java.nio.channels.FileChannel; import java.io.RandomAccessFile; import java.util.Arrays; class

我正在尝试为一个自定义hashmap(链表数组)编写一个代码,它可以存储5亿个值(键是链表数组号),并可以将索引保存到磁盘上。代码如下:

这是我将如何实现它的

import java.io.File;
import java.io.IOException;
import java.nio.*;
import java.nio.channels.FileChannel;
import java.io.RandomAccessFile;
import java.util.Arrays;

class LongIntParallelHashMultimap {
    public static final int BUFFER_BITS = 24;
    private final FileChannel fc;
    private final ByteBuffer[] keys, data;
    private final int topBits, topMask, offsetMask;

    public LongIntParallelHashMultimap(String fileName, int sizeBits, boolean load) throws IOException {
        fc = new RandomAccessFile(fileName, "rw").getChannel();
        long totalSize = 4L << sizeBits;
        int bufferIndex = (int) (totalSize >> BUFFER_BITS);
        keys = new ByteBuffer[bufferIndex];
        data = new ByteBuffer[bufferIndex];
        int bufferSize = 1 << BUFFER_BITS;
        long offset = 0;
        for (int i = 0; i < bufferIndex; i++) {
            MappedByteBuffer kmap = fc.map(FileChannel.MapMode.READ_WRITE, offset, bufferSize);
            kmap.load();
            keys[i] = kmap.order(ByteOrder.nativeOrder());
            MappedByteBuffer dmap = fc.map(FileChannel.MapMode.READ_WRITE, offset + bufferSize, bufferSize);
            dmap.load();
            data[i] = dmap.order(ByteOrder.nativeOrder());
            offset += bufferSize * 2;
        }
        topBits = sizeBits + 2 - BUFFER_BITS;
        topMask = (1 << topBits) - 1;
        offsetMask = bufferSize - 4;
    }

    public void put(int key, int value) {
        int buffer = key & topMask;
        int key2 = (key >> topBits) + 1;
        assert key2 != 0;
        ByteBuffer keys2 = keys[buffer];
        ByteBuffer data2 = data[buffer];
        int offset = (key2 * 101) & offsetMask;
        while (keys2.getInt(offset) != 0) {
            offset += 3 * 4;
            offset &= offsetMask;
        }
        keys2.putInt(offset, key2);
        data2.putInt(offset, value);
    }

    public int get(int key, int[] values) {
        int buffer = key & topMask;
        int key2 = (key >> topBits) + 1;
        assert key2 != 0;
        ByteBuffer keys2 = keys[buffer];
        ByteBuffer data2 = data[buffer];
        int offset = (key2 * 101) & offsetMask;
        for (int count = 0; count < values.length; ) {
            int key3 = keys2.getInt(offset);
            if (key3 == 0)
                return count;
            if (key3 == key2)
                values[count++] = data2.getInt(offset);

            offset += 3 * 4;
            offset &= offsetMask;
        }
        return values.length;
    }

    private final int[] getValues = new int[1000];
    private static final int[] NO_VALUES = { };

    public int[] get(int key) {
        int len = get(key, getValues);
        return len == 0 ? NO_VALUES : Arrays.copyOf(getValues, len);
    }

    public static void main(String... args) throws IOException {
        int keys = 500 * 1000 * 1000;

        new File("abc.bin").delete();
        long startTime = System.nanoTime();
        LongIntParallelHashMultimap lph = new LongIntParallelHashMultimap("abc.bin", 30, true);
        long time = System.nanoTime() - startTime;
        System.out.printf("Load time was %.3f sec%n", time / 1e9);

        timePut(keys, lph);

        timeGet(keys, lph);

        timeGet2(keys, lph);

        startTime = System.nanoTime();
        System.gc();
        time = System.nanoTime() - startTime;
        System.out.printf("Time to Full GC was %.3f sec%n", time / 1e9);
    }

    private static void timePut(int keys, LongIntParallelHashMultimap lph) {
        long startTime;
        long time;
        startTime = System.nanoTime();
        for (int i = 0; i < keys; i++) {
            lph.put(i, i + 100);
            if ((i + 1) % 100_000_000 == 0)
                System.out.printf("%,d ", i + 1);
        }
        time = System.nanoTime() - startTime;
        System.out.printf("%nput time was %.3f sec%n", time / 1e9);
    }

    private static void timeGet(int keys, LongIntParallelHashMultimap lph) {
        long startTime;
        long time;
        startTime = System.nanoTime();
        int[] values = new int[2];
        for (int i = 0; i < keys; i++) {
            lph.get(i, values);
            if ((i + 1) % 100_000_000 == 0)
                System.out.printf("%,d ", i + 1);
        }
        time = System.nanoTime() - startTime;
        System.out.printf("%nget(key, values) time was %.3f sec%n", time / 1e9);
    }

    private static void timeGet2(int keys, LongIntParallelHashMultimap lph) {
        long startTime;
        long time;
        startTime = System.nanoTime();
        for (int i = 0; i < keys; i++) {
            lph.get(i);
            if ((i + 1) % 100_000_000 == 0)
                System.out.printf("%,d ", i + 1);
        }
        time = System.nanoTime() - startTime;
        System.out.printf("%nget(key) time was %.3f sec%n", time / 1e9);
    }
}
注意:它只是GCed,因为我调用了
System.gc()
并查看所用堆的大小


我将使用
int[]
而不是节点的链接列表

import sun.nio.ch.DirectBuffer;

import java.io.IOException;
import java.util.*;
import java.nio.*;
import java.nio.channels.FileChannel;
import java.io.RandomAccessFile;

class LongIntParallelHashMultimap {
    private static final int[] NO_INTS = {};

    final int[][] data;

    public LongIntParallelHashMultimap() {
        data = new int[Integer.MAX_VALUE][];
    }

    public void put(int key, int value) {
        int[] ints = data[key];
        if (ints == null) {
            data[key] = new int[]{value};
        } else {
            int[] ints2 = Arrays.copyOf(ints, ints.length + 1);
            ints2[ints.length] = value;
            data[key] = ints2;
        }
    }

    public int[] get(int key) {
        int[] ints = data[key];
        return ints == null ? NO_INTS : ints;
    }

    private FileChannel channel;
    private MappedByteBuffer mbb;

    public void save() throws IOException {
        channel = new RandomAccessFile("abc.bin", "rw").getChannel();
        mbb = channel.map(FileChannel.MapMode.READ_WRITE, 0, 1 << 24);
        mbb.order(ByteOrder.nativeOrder());

        for (int i = 0; i < Integer.MAX_VALUE - 32; i += 32) {
            int bits = 0;
            for (int j = 0; j < 32; j++) {
                if (data[i + j] != null) bits |= 1;
                bits <<= 1;
            }
            getMbb().putInt(bits);
        }

        for (int i = 0; i < Integer.MAX_VALUE; i++) {
            int arr[] = get(i);
            if (arr.length == 0) continue;
            getMbb().putInt(arr.length);
            for (int a : arr)
                getMbb().putInt(a);
        }
        channel.close();
        cleanMbb();
    }

    private ByteBuffer getMbb() throws IOException {
        if (mbb.remaining() <= 0) {
            cleanMbb();
            mbb = channel.map(FileChannel.MapMode.READ_WRITE, channel.size(), 1 << 24);
            mbb.order(ByteOrder.nativeOrder());
        }
        return mbb;
    }

    private void cleanMbb() {
        ((DirectBuffer) mbb).cleaner().clean();
    }


    public static void main(String... args) throws IOException {
        int keys = 50 * 1000 * 1000;

        long startTime = System.nanoTime();
        LongIntParallelHashMultimap lph = new LongIntParallelHashMultimap();
        long time = System.nanoTime() - startTime;
        System.out.printf("Create time %.3f sec%n", time / 1e9);

        startTime = System.nanoTime();
        for (int i = 0; i < keys; i++) {
            lph.put(i, i + 100);
            if (i % 10000000 == 0 && i != 0)
                System.out.print(" " + i + " ");
        }
        time = System.nanoTime() - startTime;
        System.out.printf("%nput time was %.3f sec%n", time / 1e9);

        startTime = System.nanoTime();
        lph.save();
        time = System.nanoTime() - startTime;
        System.out.printf(" time to save was %.3f sec%n", time / 1e9);

        startTime = System.nanoTime();
        for (int i = 0; i < keys; i++) {
            int k[] = lph.get(i);
        }
        time = System.nanoTime() - startTime;
        System.out.printf("get time was %.3f sec%n", time / 1e9);
    }
}
导入sun.nio.ch.DirectBuffer;
导入java.io.IOException;
导入java.util.*;
导入java.nio.*;
导入java.nio.channels.FileChannel;
导入java.io.RandomAccessFile;
类LongIntParallelHashMultimap{
私有静态final int[]NO_int={};
最终int[][]数据;
公共LongIntParallelHashMultimap(){
数据=新整数[Integer.MAX_VALUE][];
}
公共void put(int键,int值){
int[]int=data[key];
if(ints==null){
数据[键]=新的int[]{value};
}否则{
int[]ints2=Arrays.copyOf(ints,ints.length+1);
ints2[ints.length]=值;
数据[键]=ints2;
}
}
公共int[]获取(int键){
int[]int=data[key];
返回整数==null?无整数:整数;
}
专用文件通道;
私人地图管理局;
public void save()引发IOException{
通道=新的随机访问文件(“abc.bin”、“rw”).getChannel();

mbb=channel.map(FileChannel.MapMode.READ_WRITE,0,1“停止工作”是什么意思?您是否收到某种错误消息?您是否称2.1亿为“少数”?@ThomasJungblut,不,它只是被阻止了。意味着最多2.1亿,它会在3秒钟内插入。但在那之后,我只需等待2分钟,代码就被阻止了。如果您检查
l[key]!=null,则不需要位集。而不是使用节点的链接列表,而是使用单个
int[]
可能更有效。代码真的很好。但是,有一点,我认为,如果使用位集检查哪个int[]已填充,则可以节省更多的磁盘空间(也可以节省磁盘IO)。因为在保存方法中,必须保存(值的数量*4字节)+(4字节*int\u Max)。你怎么看?你可以先从数组中写出一个位集。非常感谢。这正是我要找的。但是,如何反向计算位集(我指的是位)。我的意思是在加载时。将
int
值读取到数组中,并使用它来确定以后要读取的值。不需要保留位集。我试图表示“如何”(int j=0;j<32;j++){if(data[I+j]!=null)位|=1;位
import sun.nio.ch.DirectBuffer;

import java.io.IOException;
import java.util.*;
import java.nio.*;
import java.nio.channels.FileChannel;
import java.io.RandomAccessFile;

class LongIntParallelHashMultimap {
    private static final int[] NO_INTS = {};

    final int[][] data;

    public LongIntParallelHashMultimap() {
        data = new int[Integer.MAX_VALUE][];
    }

    public void put(int key, int value) {
        int[] ints = data[key];
        if (ints == null) {
            data[key] = new int[]{value};
        } else {
            int[] ints2 = Arrays.copyOf(ints, ints.length + 1);
            ints2[ints.length] = value;
            data[key] = ints2;
        }
    }

    public int[] get(int key) {
        int[] ints = data[key];
        return ints == null ? NO_INTS : ints;
    }

    private FileChannel channel;
    private MappedByteBuffer mbb;

    public void save() throws IOException {
        channel = new RandomAccessFile("abc.bin", "rw").getChannel();
        mbb = channel.map(FileChannel.MapMode.READ_WRITE, 0, 1 << 24);
        mbb.order(ByteOrder.nativeOrder());

        for (int i = 0; i < Integer.MAX_VALUE - 32; i += 32) {
            int bits = 0;
            for (int j = 0; j < 32; j++) {
                if (data[i + j] != null) bits |= 1;
                bits <<= 1;
            }
            getMbb().putInt(bits);
        }

        for (int i = 0; i < Integer.MAX_VALUE; i++) {
            int arr[] = get(i);
            if (arr.length == 0) continue;
            getMbb().putInt(arr.length);
            for (int a : arr)
                getMbb().putInt(a);
        }
        channel.close();
        cleanMbb();
    }

    private ByteBuffer getMbb() throws IOException {
        if (mbb.remaining() <= 0) {
            cleanMbb();
            mbb = channel.map(FileChannel.MapMode.READ_WRITE, channel.size(), 1 << 24);
            mbb.order(ByteOrder.nativeOrder());
        }
        return mbb;
    }

    private void cleanMbb() {
        ((DirectBuffer) mbb).cleaner().clean();
    }


    public static void main(String... args) throws IOException {
        int keys = 50 * 1000 * 1000;

        long startTime = System.nanoTime();
        LongIntParallelHashMultimap lph = new LongIntParallelHashMultimap();
        long time = System.nanoTime() - startTime;
        System.out.printf("Create time %.3f sec%n", time / 1e9);

        startTime = System.nanoTime();
        for (int i = 0; i < keys; i++) {
            lph.put(i, i + 100);
            if (i % 10000000 == 0 && i != 0)
                System.out.print(" " + i + " ");
        }
        time = System.nanoTime() - startTime;
        System.out.printf("%nput time was %.3f sec%n", time / 1e9);

        startTime = System.nanoTime();
        lph.save();
        time = System.nanoTime() - startTime;
        System.out.printf(" time to save was %.3f sec%n", time / 1e9);

        startTime = System.nanoTime();
        for (int i = 0; i < keys; i++) {
            int k[] = lph.get(i);
        }
        time = System.nanoTime() - startTime;
        System.out.printf("get time was %.3f sec%n", time / 1e9);
    }
}