试图在Spark流上运行SparkSQL

试图在Spark流上运行SparkSQL,sql,apache-spark,spark-streaming,Sql,Apache Spark,Spark Streaming,我试图在spark中对流数据运行SQL查询。这看起来很直截了当,但当我尝试它时,我得到了错误找不到表:tablename>。找不到我注册的表 将Spark SQL与批处理数据一起使用效果很好,因此我认为这与调用streamingcontext.start()的方式有关。你知道问题出在哪里吗?代码如下: import org.apache.spark.streaming.{Seconds, StreamingContext} import org.apache.spark.streaming.St

我试图在spark中对流数据运行SQL查询。这看起来很直截了当,但当我尝试它时,我得到了错误找不到表:tablename>。找不到我注册的表

将Spark SQL与批处理数据一起使用效果很好,因此我认为这与调用streamingcontext.start()的方式有关。你知道问题出在哪里吗?代码如下:

import org.apache.spark.streaming.{Seconds, StreamingContext}
import org.apache.spark.streaming.StreamingContext._
import org.apache.spark.sql.SQLContext

object Streaming {

def main(args: Array[String]) {

    val sparkConf = new SparkConf().setMaster("local").setAppName("HdfsWordCount")
    val sc = new SparkContext(sparkConf)
    // Create the context
    val ssc = new StreamingContext(sc, Seconds(2))

    val sqc = new SQLContext(sc);
    import sqc.createSchemaRDD

    // Create the FileInputDStream on the directory and use the
    // stream to count words in new files created
    val lines = ssc.textFileStream("C:/Users/pravesh.jain/Desktop/people.txt")
    lines.foreachRDD(rdd=>rdd.map(_.split(",")).map(p => Persons(p(0), p(1).trim.toInt)).registerAsTable("data"))
    // lines.foreachRDD(rdd=>rdd.foreach(println))
    val teenagers = sqc.sql("SELECT name FROM data WHERE age >= 13 AND age <= 19")
    ssc.start()
    ssc.awaitTermination()
  }
}
import org.apache.spark.streaming.{Seconds,StreamingContext}
导入org.apache.spark.streaming.StreamingContext_
导入org.apache.spark.sql.SQLContext
对象流{
def main(参数:数组[字符串]){
val sparkConf=new sparkConf().setMaster(“本地”).setAppName(“HdfsWordCount”)
val sc=新的SparkContext(sparkConf)
//创建上下文
val ssc=新的StreamingContext(sc,秒(2))
val sqc=新的SQLContext(sc);
导入sqc.createSchemaRDD
//在目录上创建FileInputDStream并使用
//流以计算创建的新文件中的字数
val lines=ssc.textFileStream(“C:/Users/pravesh.jain/Desktop/people.txt”)
lines.foreachRDD(rdd=>rdd.map(u.split(“,”)).map(p=>Persons(p(0),p(1.trim.toInt)).registerAsTable(“数据”))
//lines.foreachRDD(rdd=>rdd.foreach(println))

val=sqc.sql(“从年龄>=13和年龄的数据中选择名称。我知道了这个问题。您必须在foreachRDD函数中查询数据,否则无法识别该表。类似这样的操作:

import org.apache.spark.streaming.{Seconds, StreamingContext}
import org.apache.spark.streaming.StreamingContext._
import org.apache.spark.sql.SQLContext
import org.apache.spark.streaming.Duration

object Mlist {

  def main(args: Array[String]) {

    val sparkConf = new SparkConf().setMaster("local").setAppName("HdfsWordCount")
    val sc = new SparkContext(sparkConf)
    // Create the context
    val ssc = new StreamingContext(sc, Seconds(2))

    val lines = ssc.textFileStream("C:/Users/pravesh.jain/Desktop/people.txt")
    lines.foreachRDD(rdd=>rdd.foreach(println))

    val sqc = new SQLContext(sc);
    import sqc.createSchemaRDD

    // Create the FileInputDStream on the directory and use the
    // stream to count words in new files created

    lines.foreachRDD(rdd=>{
      rdd.map(_.split(",")).map(p => Persons(p(0), p(1).trim.toInt)).registerAsTable("data")
      val teenagers = sqc.sql("SELECT name FROM data WHERE age >= 13 AND age <= 19")
      teenagers.foreach(println)
    })

    ssc.start()
    ssc.awaitTermination()
  }
}
import org.apache.spark.streaming.{Seconds,StreamingContext}
导入org.apache.spark.streaming.StreamingContext_
导入org.apache.spark.sql.SQLContext
导入org.apache.spark.streaming.Duration
对象列表{
def main(参数:数组[字符串]){
val sparkConf=new sparkConf().setMaster(“本地”).setAppName(“HdfsWordCount”)
val sc=新的SparkContext(sparkConf)
//创建上下文
val ssc=新的StreamingContext(sc,秒(2))
val lines=ssc.textFileStream(“C:/Users/pravesh.jain/Desktop/people.txt”)
lines.foreachRDD(rdd=>rdd.foreach(println))
val sqc=新的SQLContext(sc);
导入sqc.createSchemaRDD
//在目录上创建FileInputDStream并使用
//流以计算创建的新文件中的字数
lines.foreachRDD(rdd=>{
map(p=>Persons(p(0),p(1).trim.toInt)).registerAsTable(“数据”)

val tenderings=sqc.sql(“从年龄大于等于13岁的数据中选择名称,年龄很好的搜索!我想知道在结构化流媒体中是否也有类似的应用……我一直很难让sql以一致/可以理解的方式处理它。