Java Neo4j插入需要更多时间_Java_Neo4j_Graph Databases_Embedded Database

Java Neo4j插入需要更多时间

java neo4j

Java Neo4j插入需要更多时间,java,neo4j,graph-databases,embedded-database,Java,Neo4j,Graph Databases,Embedded Database,我们有大约50000个节点和8000万（800万）条边我们正试图使用java将这些数据插入neo4j（嵌入式图形数据库）。但这需要很多时间（几个小时）我们想知道我们在插入方面是否出了问题。我们正在为节点使用自动索引。下面给出了完整的实现请让我知道发生了什么错误，并更改以下代码 public static void main(String[] args) { // TODO Auto-generated method stub nodeGraph obj =

我们有大约50000个节点和8000万（800万）条边

我们正试图使用java将这些数据插入neo4j（嵌入式图形数据库）。但这需要很多时间（几个小时）

我们想知道我们在插入方面是否出了问题。我们正在为节点使用自动索引。下面给出了完整的实现

请让我知道发生了什么错误，并更改以下代码

public static void main(String[] args)
{

        // TODO Auto-generated method stub
        nodeGraph obj = new nodeGraph();
        obj.createDB();
        System.out.println("Graph Database Initialised");
        obj.parseNodesCsv();
        System.out.println("Creating relationships in process....");
        obj.parseEdgesCsv();
        obj.shutDown();

}

public void createDB() {

        graphDb = new GraphDatabaseFactory().newEmbeddedDatabaseBuilder( DB_PATH ).
        setConfig( GraphDatabaseSettings.node_keys_indexable, "id,name" ).
        setConfig( GraphDatabaseSettings.relationship_keys_indexable, "rel" ).
        setConfig( GraphDatabaseSettings.node_auto_indexing, "true" ).
        setConfig( GraphDatabaseSettings.relationship_auto_indexing, "true" ).
        newGraphDatabase();             
        registerShutdownHook(graphDb);
        // Get the Node AutoIndexer, set nodeProp1 and nodeProp2 as auto
        // indexed.
        AutoIndexer<Node> nodeAutoIndexer = graphDb.index().getNodeAutoIndexer();
        nodeAutoIndexer.startAutoIndexingProperty( "id" );
        nodeAutoIndexer.startAutoIndexingProperty( "name" );

        // Get the Relationship AutoIndexer
        //AutoIndexer<Relationship> relAutoIndexer = graphDb.index().getRelationshipAutoIndexer();
        //relAutoIndexer.startAutoIndexingProperty( "relProp1" );

        // None of the AutoIndexers are enabled so far. Do that now
        nodeAutoIndexer.setEnabled( true );
        //relAutoIndexer.setEnabled( true );
}

public void parseNodesCsv(){

        try 
        {
            CSVReader reader= new CSVReader(new FileReader("/home/sandy/Desktop/workspacesh/importToNeo4j/nodesNeo.csv"),'  ','"');
            String rows[]=null;
            while ((rows=reader.readNext())!=null) 
            {
                createNode(rows);
                System.out.println(rows[0]);

            }
            reader.close();
        } 


        catch (FileNotFoundException e) 
        {
            // TODO Auto-generated catch block
            System.err.println("Error: cannot find datasource.");
            e.printStackTrace();
        } 
        catch (IOException e) 
        {
            // TODO Auto-generated catch block
            e.printStackTrace();
        } 
}

public void parseEdgesCsv(){

        try 
        {
            CSVReader reader= new CSVReader(new FileReader("/home/sandy/Desktop/workspacesh/importToNeo4j/edgesNeo.csv"),',','"');
            String rows[]=null; 
            while ((rows=reader.readNext())!=null) 
            {
                createRelationshipsUsingIndexes(rows);

            }
            reader.close();
        }   


        catch (FileNotFoundException e) 
        {
            // TODO Auto-generated catch block
            System.err.println("Error: cannot find datasource.");
            e.printStackTrace();
        } 
        catch (IOException e) 
        {
            // TODO Auto-generated catch block
            e.printStackTrace();
        } 
}


public void createNode(String[] rows){

         Transaction tx = graphDb.beginTx();
         try 
            {   
                firstNode = graphDb.createNode(DynamicLabel.label( rows[2] ));
                firstNode.setProperty("id",rows[0] );
                firstNode.setProperty("name",rows[1] );
                System.out.println(firstNode.getProperty("id"));
                tx.success();
            } 
            finally
            {
                tx.finish();
            }

}

public void createRelationshipsUsingIndexes(String rows[]){

        Transaction tx = graphDb.beginTx();
        try
        {
            ReadableIndex<Node> autoNodeIndex = graphDb.index().getNodeAutoIndexer().getAutoIndex();
            // node1 and node2 both had auto indexed properties, get them
            firstNode=autoNodeIndex.get( "id", rows[0] ).getSingle();
            secondNode=autoNodeIndex.get( "id", rows[1] ).getSingle();

            relationship = firstNode.createRelationshipTo( secondNode, RelTypes.CO_OCCURRED );
            relationship.setProperty( "frequency", rows[2] );
            relationship.setProperty( "generatability_score", rows[3] );
            tx.success();   

        }
        finally
        {
              tx.finish();
        }


}

publicstaticvoidmain（字符串[]args）
{
//TODO自动生成的方法存根
nodeGraph obj=新的nodeGraph（）；
obj.createDB（）；
System.out.println（“图形数据库初始化”）；
obj.parseNodesCsv（）；
System.out.println（“创建过程中的关系…”）；
obj.parseedgessv（）；
obj.shutDown（）；
}
public void createDB（）{
graphDb=new GraphDatabaseFactory（）.newEmbeddedDatabaseBuilder（DB\U路径）。
setConfig（GraphDatabaseSettings.node_keys_indexable，“id，name”）。
setConfig（GraphDatabaseSettings.relationship_keys_indexable，“rel”）。
setConfig（GraphDatabaseSettings.node_auto_index，“true”）。
setConfig（GraphDatabaseSettings.relationship_auto_index，“true”）。
newGraphDatabase（）；
寄存器SHUTDownhook（graphDb）；
//获取节点自动索引器，将nodeProp1和nodeProp2设置为auto
//索引。
自动索引器nodeAutoIndexer=graphDb.index（）.getNodeAutoIndexer（）；
nodeAutoIndexer.startAutoIndexingProperty（“id”）；
nodeAutoIndexer.startAutoIndexingProperty（“名称”）；
//获取关系自动索引器
//AutoIndexer relAutoIndexer=graphDb.index（）.getRelationshipAutoIndexer（）；
//relAutoIndexer.startAutoIndexingProperty（“relProp1”）；
//到目前为止，没有启用任何自动索引器。请立即执行此操作
nodeAutoIndexer.setEnabled（true）；
//relAutoIndexer.setEnabled（真）；
}
public void parseNodesCsv（）{
尝试
{
CSVReader reader=新的CSVReader（新的文件阅读器（“/home/sandy/Desktop/workspacesh/importToNeo4j/nodesNeo.csv”），”）；
字符串行[]=null；
while（（rows=reader.readNext（））！=null）
{
创建节点（行）；
System.out.println（行[0]）；
}
reader.close（）；
} 
catch（filenotfounde异常）
{
//TODO自动生成的捕捉块
System.err.println（“错误：找不到数据源。”）；
e、 printStackTrace（）；
} 
捕获（IOE异常）
{
//TODO自动生成的捕捉块
e、 printStackTrace（）；
} 
}
public void parseEdgesCsv（）{
尝试
{
CSVReader reader=新的CSVReader（新的文件阅读器（“/home/sandy/Desktop/workspacesh/importToNeo4j/edgesNeo.csv”），“，”，“，”）；
字符串行[]=null；
while（（rows=reader.readNext（））！=null）
{
CreateRelationshipSusingIndex（行）；
}
reader.close（）；
}   
catch（filenotfounde异常）
{
//TODO自动生成的捕捉块
System.err.println（“错误：找不到数据源。”）；
e、 printStackTrace（）；
} 
捕获（IOE异常）
{
//TODO自动生成的捕捉块
e、 printStackTrace（）；
} 
}
public void createNode（字符串[]行）{
事务tx=graphDb.beginTx（）；
尝试
{   
firstNode=graphDb.createNode（DynamicLabel.label（行[2]）；
setProperty（“id”，行[0]）；
setProperty（“名称”，行[1]）；
System.out.println（firstNode.getProperty（“id”）；
成功（）；
} 
最后
{
tx.finish（）；
}
}
public void CreateRelationshipSusingIndex（字符串行[]）{
事务tx=graphDb.beginTx（）；
尝试
{
ReadableIndex autoNodeIndex=graphDb.index（）.GetNodeAutoIndex（）.getAutoIndex（）；
//node1和node2都具有自动索引属性，请获取它们
firstNode=autoNodeIndex.get（“id”，行[0]）.getSingle（）；
secondNode=autoNodeIndex.get（“id”，行[1]）.getSingle（）；
relationship=firstNode.createRelationshipTo（发生了第二个节点，RelTypes.CO_）；
关系.setProperty（“频率”，第[2]行）；
setProperty（“可生成性评分”，第[3]行）；
成功（）；
}
最后
{
tx.finish（）；
}
}

导入时使用的内存配置（堆）是什么？您运行的是什么操作系统（我假设是Linux）以及您使用的是什么Neo4j版本

我建议升级到Neo4j 2.0.3的最新稳定版本

您的导入存在一些问题：

你没有通过mmio设置

不要使用旧索引

不要为每个节点使用一个事务，而是为每个50k节点或50k关系使用一个事务

在插入期间，不要从索引中读取信息，请使用内存中的结构保存该信息（例如，地图）

不要为每个节点打印输出，而是为每个tx提交（每50k个元素）打印输出

在文件读取器周围使用BufferedReader可获得更好的CSV读取性能

使用快速初始导入将更有意义

我们使用的是Ubuntu 12.04，Neo4j 2.0.1社区版。我们将作出相应的改变。NeN4J服务器的MMIO设置是什么？下面是相关设置：除非CSVReLead在内部使用缓冲流包装器，所以您也应该考虑添加它。如果正确导入，所指示的数据大小不会超过几秒钟。嗨，Michael&Rickard，我跟着你