Apache spark GraphX的Java示例
在哪里可以找到Java中的等价物?例如,以下内容如何翻译:Apache spark GraphX的Java示例,apache-spark,Apache Spark,在哪里可以找到Java中的等价物?例如,以下内容如何翻译: val users: RDD[(VertexId, (String, String))] = sc.parallelize(Array((3L, ("rxin", "student")), (7L, ("jgonzal", "postdoc")),(5L, ("franklin", "prof")), (2L, ("istoica", "prof")))) // Create an RDD for edges val relation
val users: RDD[(VertexId, (String, String))] =
sc.parallelize(Array((3L, ("rxin", "student")), (7L, ("jgonzal", "postdoc")),(5L, ("franklin", "prof")), (2L, ("istoica", "prof"))))
// Create an RDD for edges
val relationships: RDD[Edge[String]] = sc.parallelize(Array(Edge(3L, 7L, "collab"), Edge(5L, 3L, "advisor"), Edge(2L, 5L, "colleague"), Edge(5L, 7L, "pi")))
// Define a default user in case there are relationship with missing user
val defaultUser = ("John Doe", "Missing")
// Build the initial Graph
val graph = Graph(users, relationships, defaultUser)
得到了关于dev@spark.apache.org:>目前还没有Java API
根据:“目标版本/s:1.3.0”仍然没有适用于Java的API: “GraphX仅可从Scala API获得。”
来源:GraphX仅在Scala中可用。您可以看看Graphframe,在这里他们寻找的是数据帧(Java、Python、Scala)——而不是低级RDD。它没有更多的优势,因为它可以利用查询优化器催化剂,即项目优化
您可以将GraphX转换为GraphFrame,反之亦然。顶点是任何类型的,与GraphX不同的是,只有长的顶点。返回类型是Dataframe或Graphframe,但在GraphX中只有Graph[VD,ED],RDD。虽然没有官方或非官方文档,但有用于Java的GraphX库。检查并确认。您可以这样使用它:
public static void main(String[] args) {
SparkSession spark = SparkSession
.builder()
.appName("javaGraphx")
.getOrCreate();
List<Tuple2<Object, Data>> vectorList = Arrays.asList(
new Tuple2[]{
new Tuple2(1L, new Data("A", 1)),
new Tuple2(2L, new Data("B", 1)),
new Tuple2(3L, new Data("C", 1))}
);
JavaRDD<Tuple2<Object, Data>> users = spark.sparkContext().parallelize(
JavaConverters.asScalaIteratorConverter(vectorList.iterator()).asScala()
.toSeq(), 1, scala.reflect.ClassTag$.MODULE$.apply(Tuple2.class)
).toJavaRDD();
List<Edge<Integer>> edgeList = Arrays.asList(
new Edge[]{
new Edge(1L, 2L, 1),
new Edge(2L, 3L, 1)});
JavaRDD<Edge<Integer>> followers = spark.sparkContext().parallelize(
JavaConverters.asScalaIteratorConverter(edgeList.iterator()).asScala()
.toSeq(), 1, scala.reflect.ClassTag$.MODULE$.apply(Edge.class)
).toJavaRDD();
// construct graph
Graph<OddRange, Integer> followerGraph =
GraphImpl
.apply(
users.rdd(),
followers.rdd(),
new Data("OVER", 0),
StorageLevel.MEMORY_AND_DISK(),
StorageLevel.MEMORY_AND_DISK(),
scala.reflect.ClassTag$.MODULE$.apply(Data.class),
scala.reflect.ClassTag$.MODULE$.apply(Integer.class)
);
//If you wanna use pregel, you can use it like this.
Graph<OddRange, Integer> dd = Pregel.apply(followerGraph,
new Data("", 0),
5,
EdgeDirection.Out(),
new Vprog(),
new SendMsg(),
new MergemMsg(),
scala.reflect.ClassTag$.MODULE$.apply(OddRange.class),
scala.reflect.ClassTag$.MODULE$.apply(Integer.class),
scala.reflect.ClassTag$.MODULE$.apply(OddRange.class)
);
RDD<OddRange> r = dd
.vertices()
.toJavaRDD()
.map(t->t._2)
.rdd();
r.toJavaRDD().foreach(
o->{
System.out.println(o.getS());
}
);
}
//If you wanna use pregel, you can use it like this.
static class Vprog extends AbstractFunction3< Object, Data, Data, Data> implements Serializable {
@Override
public Data apply(Object l, Data self, Data sumOdd) {
System.out.println(l + "---" + self.getS()+self.getI()+" ---> "+sumOdd.getS()+sumOdd.getI());
self.setS(sumOdd.getS() + self.getS());
self.setI(self.getI() + sumOdd.getI());
System.out.println(l + "---" + self.getS()+self.getI()+" ---> "+sumOdd.getS()+sumOdd.getI());
//Don't just return self here, return a new one;
return new Data(self.getS(), self.getI());
}
}
static class SendMsg extends AbstractFunction1<EdgeTriplet<Data,Integer>, scala.collection.Iterator<Tuple2<Object, Data>>> implements Serializable {
@Override
public scala.collection.Iterator<Tuple2<Object, Data>> apply(EdgeTriplet<Data,Integer> t) {
System.out.println(t.srcId()+" ---> "+t.dstId()+" with: "+t.srcAttr().getS()+t.srcAttr().getI()+" ---> "+t.dstAttr().getS()+t.dstAttr().getI());
if(t.srcAttr().getI() <= 8){
List<Tuple2<Object, Data>> data = new ArrayList();
data.add(new Tuple2<>( t.dstId(), new Data(t.srcAttr().getS(), t.srcAttr().getI())));
return JavaConverters.asScalaIteratorConverter(data.iterator()).asScala();
}else{
return JavaConverters.asScalaIteratorConverter(new ArrayList<Tuple2<Object, Data>>().iterator()).asScala();
}
}
}
static class MergemMsg extends AbstractFunction2< Data, Data, Data> implements Serializable {
@Override
public Data apply(Data a, Data b) {
return new Data( "" + a.getS() + b.getS(), a.getI() + b.getI());
}
}
publicstaticvoidmain(字符串[]args){
火花会话火花=火花会话
.builder()
.appName(“javaGraphx”)
.getOrCreate();
List vectorList=Arrays.asList(
新元组2[]{
新元组2(1L,新数据(“A”,1)),
新元组2(2L,新数据(“B”,1)),
新元组2(3L,新数据(“C”,1))}
);
JavaRDD users=spark.sparkContext().parallelize(
JavaConverters.AsscalAteratorConverter(vectorList.iterator()).asScala()
.toSeq(),1,scala.reflect.ClassTag$.MODULE$.apply(Tuple2.class)
).toJavaRDD();
List edgeList=ARRAYST.asList(
新边[]{
新边缘(1L、2L、1),
新边(2L,3L,1)};
JavaRDD followers=spark.sparkContext().parallelize(
JavaConverters.AsscalAteratorConverter(edgeList.iterator()).asScala()
.toSeq(),1,scala.reflect.ClassTag$.MODULE$.apply(Edge.class)
).toJavaRDD();
//构造图
图跟随图=
图
.申请(
users.rdd(),
followers.rdd(),
新数据(“超过”,0),
StorageLevel.MEMORY_和磁盘(),
StorageLevel.MEMORY_和磁盘(),
scala.reflect.ClassTag$.MODULE$.apply(Data.class),
scala.reflect.ClassTag$.MODULE$.apply(Integer.class)
);
//如果你想用pregel,你可以这样用。
图dd=预编程应用(跟随图,
新数据(“,0),
5.
EdgeDirection.Out(),
新Vprog(),
新的SendMsg(),
新的MergemMsg(),
scala.reflect.ClassTag$.MODULE$.apply(OddRange.class),
scala.reflect.ClassTag$.MODULE$.apply(Integer.class),
scala.reflect.ClassTag$.MODULE$.apply(OddRange.class)
);
rddr=dd
.顶点()
.toJavaRDD()
.map(t->t._2)
.rdd();
r、 toJavaRDD().foreach(
o->{
System.out.println(o.getS());
}
);
}
//如果你想用pregel,你可以这样用。
静态类Vprog扩展了AbstractFunction3
在上收到了响应dev@spark.apache.org:>目前还没有Java API。用它回答您自己的问题,并提供一个到讨论页面的链接,这样我们也可以保持更新…耶-在中标记为已解决\解决方法