Warning: file_get_contents(/data/phpspider/zhask/data//catemap/9/java/400.json): failed to open stream: No such file or directory in /data/phpspider/zhask/libs/function.php on line 167

Warning: Invalid argument supplied for foreach() in /data/phpspider/zhask/libs/tag.function.php on line 1116

Notice: Undefined index: in /data/phpspider/zhask/libs/function.php on line 180

Warning: array_chunk() expects parameter 1 to be array, null given in /data/phpspider/zhask/libs/function.php on line 181

Warning: file_get_contents(/data/phpspider/zhask/data//catemap/3/apache-spark/5.json): failed to open stream: No such file or directory in /data/phpspider/zhask/libs/function.php on line 167

Warning: Invalid argument supplied for foreach() in /data/phpspider/zhask/libs/tag.function.php on line 1116

Notice: Undefined index: in /data/phpspider/zhask/libs/function.php on line 180

Warning: array_chunk() expects parameter 1 to be array, null given in /data/phpspider/zhask/libs/function.php on line 181
如何使用java创建一个简单的spark graphframe?_Java_Apache Spark_Graphframes - Fatal编程技术网

如何使用java创建一个简单的spark graphframe?

如何使用java创建一个简单的spark graphframe?,java,apache-spark,graphframes,Java,Apache Spark,Graphframes,基本上,我是一名java开发人员&现在我有机会在Spark上工作&我学习了Spark api的基础知识,如SparkConfig、SparkContaxt、RDD、SQLContaxt、DataFrame、DataSet,然后我能够使用RDD、SQL执行一些简单的转换。。。。但是,当我尝试使用java开发一些示例graphframe应用程序时,我无法成功&我浏览了很多youtube教程、论坛和stackoverflow线程,但没有找到任何直接的建议或解决方案。实际上,当我尝试创建graphfra

基本上,我是一名java开发人员&现在我有机会在Spark上工作&我学习了Spark api的基础知识,如SparkConfig、SparkContaxt、RDD、SQLContaxt、DataFrame、DataSet,然后我能够使用RDD、SQL执行一些简单的转换。。。。但是,当我尝试使用java开发一些示例graphframe应用程序时,我无法成功&我浏览了很多youtube教程、论坛和stackoverflow线程,但没有找到任何直接的建议或解决方案。实际上,当我尝试创建graphframe类的对象时,我遇到了这个问题&我下载了receptive jar(graphframes-0.2.0-spark2.0-s_2.11.jar)但现在仍然面临问题,我想把我的分析放在我到达的地方,因为非常新的Spark我无法进一步移动,所以如果有人帮助我,这对所有人都很有帮助。提前感谢。例外是我面临构造函数GraphFrame(DataFrame,DataFrame)是未定义的

import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;

import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.sql.DataFrame;
import org.apache.spark.sql.Row;
import org.apache.spark.sql.RowFactory;
import org.apache.spark.sql.SQLContext;
import org.apache.spark.sql.types.DataTypes;
import org.apache.spark.sql.types.StructField;
import org.apache.spark.sql.types.StructType;
import org.apache.spark.storage.StorageLevel;
import org.graphframes.GraphFrame;

import com.fasterxml.jackson.core.JsonParseException;
import com.fasterxml.jackson.databind.JsonMappingException;

public class SparkJavaGraphFrameOne {

    public static void main(String[] args) throws JsonParseException, JsonMappingException, IOException{

        SparkConf conf = new SparkConf().setAppName("test").setMaster("local");

        JavaSparkContext sc = new JavaSparkContext(conf);
        SQLContext sqlContext = new org.apache.spark.sql.SQLContext(sc);

        JavaRDD<Row> verRow = sc.parallelize(Arrays.asList(RowFactory.create(1,"A"),RowFactory.create(2,"B")));
        JavaRDD<Row> edgRow = sc.parallelize(Arrays.asList(RowFactory.create(1,2,"Edge")));     

        List<StructField> verFields = new ArrayList<StructField>();
        verFields.add(DataTypes.createStructField("id",DataTypes.IntegerType, true));
        verFields.add(DataTypes.createStructField("name",DataTypes.StringType, true));

        List<StructField> EdgFields = new ArrayList<StructField>();
        EdgFields.add(DataTypes.createStructField("fromId",DataTypes.IntegerType, true));
        EdgFields.add(DataTypes.createStructField("toId",DataTypes.IntegerType, true));
        EdgFields.add(DataTypes.createStructField("name",DataTypes.StringType, true));

        StructType verSchema = DataTypes.createStructType(verFields);
        StructType edgSchema = DataTypes.createStructType(EdgFields);

        DataFrame verDF = sqlContext.createDataFrame(verRow, verSchema);
        DataFrame edgDF = sqlContext.createDataFrame(edgRow, edgSchema);

        GraphFrame g = new GraphFrame(verDF,edgDF);
        g.vertices().show();
        g.edges().show();
        g.persist(StorageLevel.MEMORY_AND_DISK());
    }

}
import java.io.IOException;
导入java.util.ArrayList;
导入java.util.array;
导入java.util.List;
导入org.apache.spark.SparkConf;
导入org.apache.spark.api.java.JavaRDD;
导入org.apache.spark.api.java.JavaSparkContext;
导入org.apache.spark.sql.DataFrame;
导入org.apache.spark.sql.Row;
导入org.apache.spark.sql.RowFactory;
导入org.apache.spark.sql.SQLContext;
导入org.apache.spark.sql.types.DataTypes;
导入org.apache.spark.sql.types.StructField;
导入org.apache.spark.sql.types.StructType;
导入org.apache.spark.storage.StorageLevel;
导入org.graphframes.GraphFrame;
导入com.fasterxml.jackson.core.JsonParseException;
导入com.fasterxml.jackson.databind.JsonMappingException;
公共类SparkJavaGraphFrameOne{
公共静态void main(字符串[]args)抛出JsonParseException、JsonMappingException、IOException{
SparkConf conf=new SparkConf().setAppName(“测试”).setMaster(“本地”);
JavaSparkContext sc=新的JavaSparkContext(conf);
SQLContext SQLContext=neworg.apache.spark.sql.SQLContext(sc);
JavaRDD verRow=sc.parallelize(Arrays.asList(RowFactory.create(1,“A”)、RowFactory.create(2,“B”));
JavaRDD edgRow=sc.parallelize(Arrays.asList(RowFactory.create(1,2,“Edge”));
List verFields=new ArrayList();
add(DataTypes.createStructField(“id”,DataTypes.IntegerType,true));
add(DataTypes.createStructField(“name”,DataTypes.StringType,true));
List EdgFields=new ArrayList();
add(DataTypes.createStructField(“fromId”,DataTypes.IntegerType,true));
add(DataTypes.createStructField(“toId”,DataTypes.IntegerType,true));
add(DataTypes.createStructField(“name”,DataTypes.StringType,true));
StructType verSchema=DataTypes.createStructType(verFields);
StructType edgSchema=DataTypes.createStructType(EdgFields);
DataFrame verDF=sqlContext.createDataFrame(verRow,verSchema);
DataFrame edgDF=sqlContext.createDataFrame(edgRow,edgSchema);
GraphFrame g=新的GraphFrame(verDF、edgDF);
g、 顶点().show();
g、 边().show();
g、 持久化(StorageLevel.MEMORY_和_DISK());
}
}

我已经用java编写了使用Spark 2.0.0和GraphFrame 0.2.0的示例程序。该程序基于中给出的示例程序。希望这对您有所帮助

pom.xml

<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>com.abaghel.examples.spark</groupId>
<artifactId>spark-graphframe</artifactId>
<version>1.0.0-SNAPSHOT</version>

<dependencies>
    <dependency>
        <groupId>org.apache.spark</groupId>
        <artifactId>spark-core_2.11</artifactId>
        <version>2.0.0</version>
    </dependency>
    <dependency>
        <groupId>org.apache.spark</groupId>
        <artifactId>spark-graphx_2.11</artifactId>
        <version>2.0.0</version>
    </dependency>
    <dependency>
        <groupId>org.apache.spark</groupId>
        <artifactId>spark-sql_2.11</artifactId>
        <version>2.0.0</version>
    </dependency>
    <dependency>
        <groupId>graphframes</groupId>
        <artifactId>graphframes</artifactId>
        <version>0.2.0-spark2.0-s_2.11</version>
    </dependency>
</dependencies>

<repositories>
    <!-- list of other repositories -->
    <repository>
        <id>SparkPackagesRepo</id>
        <url>http://dl.bintray.com/spark-packages/maven</url>
    </repository>
 </repositories>
 <build>
    <plugins>
        <plugin>
            <groupId>org.apache.maven.plugins</groupId>
            <artifactId>maven-compiler-plugin</artifactId>
            <version>3.1</version>
            <configuration>
                <source>1.8</source>
                <target>1.8</target>
            </configuration>
        </plugin>
    </plugins>
  </build>
</project>
Relation.java

package com.abaghel.examples.spark.graphframe;

import java.util.ArrayList;
import java.util.List;

import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Row;
import org.apache.spark.sql.SparkSession;
import org.graphframes.GraphFrame;
import org.graphframes.lib.PageRank;
/**
 * Sample application shows how to create a GraphFrame, query it, and run the PageRank algorithm.
 * 
 * @author abaghel
 *
 */
public class SparkGraphFrameSample {

 public static void main(String[] args) {
    SparkSession spark = SparkSession.builder()
            .appName("SparkGraphFrameSample")
            .config("spark.sql.warehouse.dir", "/file:C:/temp")
            .master("local[2]")
            .getOrCreate();

    //Create a Vertex DataFrame with unique ID column "id"
    List<User> uList = new ArrayList<User>() {
        {
            add(new User("a", "Alice", 34));
            add(new User("b", "Bob", 36));
            add(new User("c", "Charlie", 30));
        }
    };

    Dataset<Row> verDF = spark.createDataFrame(uList, User.class);

    //Create an Edge DataFrame with "src" and "dst" columns
    List<Relation> rList = new ArrayList<Relation>() {
        {
            add(new Relation("a", "b", "friend"));
            add(new Relation("b", "c", "follow"));
            add(new Relation("c", "b", "follow"));
        }
    };

    Dataset<Row> edgDF = spark.createDataFrame(rList, Relation.class);

    //Create a GraphFrame
    GraphFrame gFrame = new GraphFrame(verDF, edgDF);
    //Get in-degree of each vertex.
    gFrame.inDegrees().show();
    //Count the number of "follow" connections in the graph.
    long count = gFrame.edges().filter("relationship = 'follow'").count();
    //Run PageRank algorithm, and show results.
    PageRank pRank = gFrame.pageRank().resetProbability(0.01).maxIter(5);
    pRank.run().vertices().select("id", "pagerank").show();

    //stop
    spark.stop();
  }

}
package com.abaghel.examples.spark.graphframe;
/**
 * User class
 * 
 * @author abaghel
 *
 */
public class User {
private String id;
private String name;
private int age;

public User(){      
}

public User(String id, String name, int age) {
    super();
    this.id = id;
    this.name = name;
    this.age = age;
}

public String getId() {
    return id;
}
public void setId(String id) {
    this.id = id;
}
public String getName() {
    return name;
}
public void setName(String name) {
    this.name = name;
}
public int getAge() {
    return age;
}
public void setAge(int age) {
    this.age = age;
 }
}
package com.abaghel.examples.spark.graphframe;
/**
 * Relation class
 * 
 * @author abaghel
 *
 */
public class Relation {

private String src;
private String dst;
private String relationship;

public Relation(){

}

public Relation(String src, String dst, String relationship) {
    super();
    this.src = src;
    this.dst = dst;
    this.relationship = relationship;
}

public String getSrc() {
    return src;
}

public void setSrc(String src) {
    this.src = src;
}

public String getDst() {
    return dst;
}

public void setDst(String dst) {
    this.dst = dst;
}

public String getRelationship() {
    return relationship;
}

public void setRelationship(String relationship) {
    this.relationship = relationship;
  }

}
控制台输出

16/08/27 22:34:45 INFO DAGScheduler: Job 10 finished: show at    SparkGraphFrameSample.java:56, took 0.938910 s
16/08/27 22:34:45 INFO CodeGenerator: Code generated in 6.599005 ms
+---+-------------------+
| id|           pagerank|
+---+-------------------+
|  a|               0.01|
|  b|0.08763274109799998|
|  c|     0.077926810699|
+---+-------------------+

我不知道你是否能解决你的问题。我刚刚看到你的问题。我想你能解决 线程“main”java.lang.NoClassDefFoundError中出现异常:com/typesafe/scalaLogg/slf4j/LazyLogging, 您需要在pom.xml中放入以下jar

         <dependency>
            <groupId>com.typesafe.scala-logging</groupId>
             <artifactId>scala-logging-slf4j_2.10</artifactId>
            <version>2.1.2</version>
        </dependency>

com.typesafe.scala-logging
scala-logging-slf4j_2.10
2.1.2

我遇到了相同的问题,通过添加此jar,我能够解决该问题。

我能够在0.5.0-spark2.1-s_2.11中复制该问题(连续运行),并在0.4.0-spark2.1-s_2.11中正常工作。要修复GraphFrame构造函数问题,请尝试:


GraphFrame gf=GraphFrame.apply(verDF,edgeDF);

非常感谢abaghel。今天结束前,我将尝试此方法并让您知道。再次非常感谢您,我希望它真的能帮助我更进一步。您的新GraphFrame(verDF,edgeDF)构造器的原始版本将通过使用我在本文中提供的文件来解决。如果这个java版本的示例真的帮助了你,我期待着一个接受答案的投票。abaghel-肯定会投票,但我在下载这个依赖项时面临一些问题(jar 0.2.0-spark2.0-s_2.11)无法下载。任何java版本问题我使用的是1.7。?我使用的是java 1.8。我下载jar文件时没有遇到任何问题,因为jar文件的依赖项和存储库url是在pom.xml中定义的。我使用的是Eclipse Mars,我使用下载jar文件的maven install命令进行构建。我如何从out&have下载该jar给定构建路径。现在,当运行应用程序时,我得到了这个运行时异常,我已经在google上搜索过,他们建议需要在SBT文件中进行一些更改,但我不确定这些文件位于何处。异常是线程“main”中的异常java.lang.NoClassDefFoundError:com/typesafe/scalaLogg/slf4j/Lazyloggi Treena,谢谢你的建议。但实际上,在用8更新java版本后,java版本的这个问题得到了解决。我希望你能认识我是谁?