Java 使用Neo4j图形数据库创建图形花费的时间太长_Java_Neo4j

Java 使用Neo4j图形数据库创建图形花费的时间太长

java neo4j

Java 使用Neo4j图形数据库创建图形花费的时间太长,java,neo4j,Java,Neo4j,我使用以下代码使用Neo4j图形数据库创建图形： import java.io.BufferedReader; import java.io.FileInputStream; import java.io.IOException; import java.io.InputStreamReader; import java.util.HashMap; import java.util.Map; import org.neo4j.graphdb.RelationshipType; import o

我使用以下代码使用Neo4j图形数据库创建图形：

import java.io.BufferedReader;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.util.HashMap;
import java.util.Map;

import org.neo4j.graphdb.RelationshipType;
import org.neo4j.graphdb.index.IndexHits;
import org.neo4j.helpers.collection.MapUtil;
import org.neo4j.index.lucene.unsafe.batchinsert.LuceneBatchInserterIndexProvider;
import org.neo4j.unsafe.batchinsert.BatchInserter;
import org.neo4j.unsafe.batchinsert.BatchInserterIndex;
import org.neo4j.unsafe.batchinsert.BatchInserterIndexProvider;
import org.neo4j.unsafe.batchinsert.BatchInserters;


public class Neo4jMassiveInsertion implements Insertion {

    private BatchInserter inserter = null;
    private BatchInserterIndexProvider indexProvider = null;
    private BatchInserterIndex nodes = null;

    private static enum RelTypes implements RelationshipType {
        SIMILAR
    }

    public static void main(String args[]) {
        Neo4jMassiveInsertion test = new Neo4jMassiveInsertion();
        test.startup("data/neo4j");
        test.createGraph("data/enronEdges.txt");
        test.shutdown();
    }

    /**
     * Start neo4j database and configure for massive insertion
     * @param neo4jDBDir
     */
    public void startup(String neo4jDBDir) {
        System.out.println("The Neo4j database is now starting . . . .");
        Map<String, String> config = new HashMap<String, String>();
        inserter = BatchInserters.inserter(neo4jDBDir, config);
        indexProvider = new LuceneBatchInserterIndexProvider(inserter);
        nodes = indexProvider.nodeIndex("nodes", MapUtil.stringMap("type", "exact"));
    }

    public void shutdown() {
        System.out.println("The Neo4j database is now shuting down . . . .");
        if(inserter != null) {
            indexProvider.shutdown();
            inserter.shutdown();
            indexProvider = null;
            inserter = null;
        }
    }

    public void createGraph(String datasetDir) {
        System.out.println("Creating the Neo4j database . . . .");
        try {
            BufferedReader reader = new BufferedReader(new InputStreamReader(new FileInputStream(datasetDir)));
            String line;
            int lineCounter = 1;
            Map<String, Object> properties;
            IndexHits<Long> cache;
            long srcNode, dstNode;
            while((line = reader.readLine()) != null) {
                if(lineCounter > 4) {
                    String[] parts = line.split("\t");
                    cache = nodes.get("nodeId", parts[0]);
                    if(cache.hasNext()) {
                        srcNode = cache.next();
                    }
                    else {
                        properties = MapUtil.map("nodeId", parts[0]);
                        srcNode = inserter.createNode(properties);
                        nodes.add(srcNode, properties);
                        nodes.flush();
                    }
                    cache = nodes.get("nodeId", parts[1]);
                    if(cache.hasNext()) {
                        dstNode = cache.next();
                    }
                    else {
                        properties = MapUtil.map("nodeId", parts[1]);
                        dstNode = inserter.createNode(properties);
                        nodes.add(dstNode, properties);
                        nodes.flush();
                    }
                    inserter.createRelationship(srcNode, dstNode, RelTypes.SIMILAR, null);
                }
                lineCounter++;
            }
            reader.close();
        } 
        catch (IOException e) {
            e.printStackTrace();
        }
    }
}

导入java.io.BufferedReader；
导入java.io.FileInputStream；
导入java.io.IOException；
导入java.io.InputStreamReader；
导入java.util.HashMap；
导入java.util.Map；
导入org.neo4j.graphdb.RelationshipType；
导入org.neo4j.graphdb.index.IndexHits；
导入org.neo4j.helpers.collection.MapUtil；
导入org.neo4j.index.lucene.unsafe.batchinsert.LuceneBatchInserterIndexProvider；
导入org.neo4j.unsafe.batchinsert.BatchInserter；
导入org.neo4j.unsafe.batchinsert.BatchInserterIndex；
导入org.neo4j.unsafe.batchinsert.batchInserteIndexProvider；
导入org.neo4j.unsafe.batchinsert.BatchInserters；
公共类Neo4jMassiveInsertion实现插入{
专用批插入器插入器=null；
私有BatchInserterIndexProvider indexProvider=null；
私有BatchInserterIndex节点=null；
私有静态枚举RelTypes实现RelationshipType{
相似的
}
公共静态void main（字符串参数[]）{
Neo4jMassiveInsertion test=新Neo4jMassiveInsertion（）；
测试、启动（“数据/neo4j”）；
test.createGraph（“data/enronEdges.txt”）；
test.shutdown（）；
}
/**
*启动neo4j数据库并配置大规模插入
*@param neo4jDBDir
*/
公共无效启动（字符串neo4jDBDir）{
System.out.println（“Neo4j数据库现在正在启动…”）；
Map config=newhashmap（）；
inserter=BatchInserters.inserter（neo4jDBDir，config）；
indexProvider=新LuceneBatchInserterIndexProvider（插入器）；
nodes=indexProvider.nodeIndex（“节点”，MapUtil.stringMap（“类型”，“精确”））；
}
公共空间关闭（）{
System.out.println（“Neo4j数据库现在正在关闭…”）；
如果（插入器！=null）{
indexProvider.shutdown（）；
inserter.shutdown（）；
indexProvider=null；
插入器=null；
}
}
public void createGraph（字符串datasetDir）{
System.out.println（“创建Neo4j数据库…”）；
试一试{
BufferedReader=new BufferedReader（new InputStreamReader（new FileInputStream（datasetDir））；
弦线；
int lineCounter=1；
地图属性；
索引其缓存；
长srcNode，dstNode；
而（（line=reader.readLine（））！=null）{
如果（线路计数器>4）{
String[]parts=line.split（“\t”）；
cache=nodes.get（“nodeId”，parts[0]）；
if（cache.hasNext（））{
srcNode=cache.next（）；
}
否则{
properties=MapUtil.map（“nodeId”，parts[0]）；
srcNode=inserter.createNode（属性）；
添加（srcNode，properties）；
nodes.flush（）；
}
cache=nodes.get（“nodeId”，parts[1]）；
if（cache.hasNext（））{
dstNode=cache.next（）；
}
否则{
properties=MapUtil.map（“nodeId”，parts[1]）；
dstNode=inserter.createNode（属性）；
添加（dstNode，属性）；
nodes.flush（）；
}
createRelationship（srcNode、dstNode、RelTypes.similor、null）；
}
lineCounter++；
}
reader.close（）；
} 
捕获（IOE异常）{
e、 printStackTrace（）；
}
}
}

与其他图形数据库技术（titan、orientdb）相比，它需要太多的时间。所以我可能做错了什么。有没有办法提高程序的效率

我使用neo4j 1.9.5，我的机器有2.3 Ghz的CPU（i5）、4GB的RAM和320GB的磁盘，我在Macintosh OSX Mavericks（10.9）上运行。此外，我的堆大小为2GB。

通常我可以在macbook上每秒导入大约1M个节点和200k个关系

刷新和搜索 请不要在每次插入时刷新和搜索，这会完全降低性能。 将节点id保存在从数据到节点id的HashMap中，并且仅在导入期间写入lucene

（如果您关心内存使用，也可以使用gnu trove之类的工具）

内存内存映射您还使用了太少的RAM（根据数据集大小，我通常使用4到60GB之间的堆），并且没有任何配置集

请检查类似的配置，根据您的数据量，我会提高这些数字

cache_type=none
use_memory_mapped_buffers=true
neostore.nodestore.db.mapped_memory=200M
neostore.relationshipstore.db.mapped_memory=1000M
neostore.propertystore.db.mapped_memory=250M
neostore.propertystore.db.strings.mapped_memory=250M

堆

并确保给它足够的堆。您可能还拥有一个可能不是最快的磁盘。尝试将堆增加到至少3GB。还要确保使用最新的JDK，1.7..\u b25存在内存分配问题（它只为

分配了一点点内存，如果我使用映射作为索引，那么使用Lucene索引有什么意义？你的建议类似于？我使用列表而不是HashMap，因为我只有索引，我认为没有必要只添加“nodeId”在每个entryNeo4j中，绝对不会在macbook pro上在1秒内导入1M节点和200k关系。我有相同的硬件，并使用类似的脚本，可以在20分钟左右加载5M节点和18M关系。