Sorting 在哪里可以找到apache spark terasort源代码? import org.apache.spark.rdd.\u 导入org.apache.spark_ 导入org.apache.spark.SparkContext_ 对象ScalaTeraSort{ def main(参数:数组[字符串]) { 如果(参数长度(line.substring(0,10),line.substring(10))) .sortByKey().map{case(k,v)=>k+v} data.saveAsTextFile(args(1)) sc.停止() }
我在stackoverflow post中尝试了此代码,但它不起作用Sorting 在哪里可以找到apache spark terasort源代码? import org.apache.spark.rdd.\u 导入org.apache.spark_ 导入org.apache.spark.SparkContext_ 对象ScalaTeraSort{ def main(参数:数组[字符串]) { 如果(参数长度(line.substring(0,10),line.substring(10))) .sortByKey().map{case(k,v)=>k+v} data.saveAsTextFile(args(1)) sc.停止() },sorting,hadoop,apache-spark,benchmarking,Sorting,Hadoop,Apache Spark,Benchmarking,我在stackoverflow post中尝试了此代码,但它不起作用 import org.apache.spark.rdd._ import org.apache.spark._ import org.apache.spark.SparkContext._ object ScalaTeraSort { def main(args: Array[String]) { if (args.length < 2) { System.err.pri
import org.apache.spark.rdd._
import org.apache.spark._
import org.apache.spark.SparkContext._
object ScalaTeraSort {
def main(args: Array[String])
{
if (args.length < 2)
{
System.err.println(s"Usage: $ScalaTeraSort <INPUT_HDFS> <OUTPUT_HDFS>"
)
System.exit(1)
}
val sparkConf = new SparkConf().setAppName("ScalaTeraSort")
val sc = new SparkContext(sparkConf)
val file = sc.textFile(args(0))
val data = file.map(line => (line.substring(0, 10), line.substring(10)))
.sortByKey().map{case(k, v) => k + v}
data.saveAsTextFile(args(1))
sc.stop()
}