Warning: file_get_contents(/data/phpspider/zhask/data//catemap/3/apache-spark/5.json): failed to open stream: No such file or directory in /data/phpspider/zhask/libs/function.php on line 167

Warning: Invalid argument supplied for foreach() in /data/phpspider/zhask/libs/tag.function.php on line 1116

Notice: Undefined index: in /data/phpspider/zhask/libs/function.php on line 180

Warning: array_chunk() expects parameter 1 to be array, null given in /data/phpspider/zhask/libs/function.php on line 181
Apache spark Spark SQL中出现错误,尽管它可以打印SQL并且可以在配置单元中运行_Apache Spark_Apache Spark Sql - Fatal编程技术网

Apache spark Spark SQL中出现错误,尽管它可以打印SQL并且可以在配置单元中运行

Apache spark Spark SQL中出现错误,尽管它可以打印SQL并且可以在配置单元中运行,apache-spark,apache-spark-sql,Apache Spark,Apache Spark Sql,我可以打印SQL,它可以在配置单元中运行,但Spark SQL中出现错误 Exception in thread "main" org.apache.spark.sql.catalyst.parser.ParseException: mismatched input 'from' expecting {<EOF>, 'WHERE', 'GROUP', 'ORDER', 'HAVING', 'LIMIT', 'LATERAL', 'WINDOW', 'UNION', 'E

我可以打印SQL,它可以在配置单元中运行,但Spark SQL中出现错误

  Exception in thread "main" org.apache.spark.sql.catalyst.parser.ParseException: 
    mismatched input 'from' expecting {<EOF>, 'WHERE', 'GROUP', 'ORDER', 'HAVING', 'LIMIT', 'LATERAL', 'WINDOW', 'UNION', 'EXCEPT', 'MINUS', 'INTERSECT', 'SORT', 'CLUSTER', 'DISTRIBUTE'}(line 3, pos 11)

== SQL ==

           INSERT OVERWRITE  table zytrain.archer_odm_cate_info partition(data_date='20190227')
select u.* from
-----------^^^
(SELECT
t.cat_level1_name,t.cat_level1_id,
       t.cat_level2_name,t.goods_type,
       COUNT(t.goods_id) goods_cnt,
       COUNT(CASE WHEN t.sales_volume > 0 THEN t.goods_id END) saled_goods_cnt,
       SUM(expo_pv) expo_pv,
       SUM(goods_click) goods_click,
       SUM(cart_click) cart_click,
       SUM(bill_cnt) bill_cnt,
       SUM(sales_volume) sales_volume,
       SUM(sales_amount) sales_amount,
       SUM(gross_profit) gross_profit
FROM (SELECT a.cat_level1_name,a.cat_level1_id,a.cat_level2_name,
             case when t1.goods_id is not null then 'IS_ODM' else 'BU_NEWIN' end goods_type,
             b.goods_id,
             SUM(b.expo_pv) expo_pv,
             SUM(b.goods_click) goods_click,
             SUM(b.cart_click) cart_click,
             SUM(b.cur_day_bill) bill_cnt,
             SUM(b.sales_volume) sales_volume,
             SUM(b.sales_amount) sales_amount,
             SUM(b.gross_profit) gross_profit
      FROM dim.dim_jc_goods a
        LEFT OUTER JOIN rpt.rpt_sum_goods_daily b
                     ON a.goods_id = b.goods_id
                    AND b.site_id = 600
                    AND b.data_date BETWEEN '20190212'
                    AND '20190227'
        LEFT OUTER JOIN
        (
        SELECT DISTINCT a3.goods_id
                         FROM JOLLY.WHO_product_pool a1,
                              jolly.who_esoloo_supplier a2,
                              dim.dim_jc_goods a3
                         WHERE a1.supp_code = a2.code
                         AND   a2.supplier_genre = 8
                         AND   from_unixtime(gmt_created,'yyyyMMdd') >= '20190212'
                         AND   from_unixtime(gmt_created,'yyyyMMdd') <= '20190227'
                         AND   substr(a3.first_on_sale_time,1,10) <= '2019-02-27'
                         AND   a1.goods_id > 0
                         AND   a1.goods_id = a3.goods_id
        ) t1 ON a.goods_id = t1.goods_id
      WHERE substr(a.first_on_sale_time,1,10) BETWEEN '2019-02-13' AND '2019-02-27'
      AND   substr(a.add_time,1,10) BETWEEN '2019-02-12' AND  '2019-02-27'
      AND   a.cat_level1_id IN (2,324,7574,7576)
      GROUP BY a.cat_level1_name,a.cat_level1_id,
               a.cat_level2_name,
               b.goods_id,case when t1.goods_id is not null then 'IS_ODM' else 'BU_NEWIN' end) t
GROUP BY t.cat_level1_name,t.cat_level1_id,
         thet.cat_level2_name,t.goods_type order by t.cat_level1_id,t.goods_type
union all
SELECT
t.cat_level1_name,t.cat_level1_id,
       t.cat_level2_name,t.goods_type,
       COUNT(t.goods_id) goods_cnt,
       COUNT(CASE WHEN t.sales_volume > 0 THEN t.goods_id END) saled_goods_cnt,
       SUM(expo_pv) expo_pv,
       SUM(goods_click) goods_click,
       SUM(cart_click) cart_click,
       SUM(bill_cnt) bill_cnt,
       SUM(sales_volume) sales_volume,
       SUM(sales_amount) sales_amount,
       SUM(gross_profit) gross_profit
FROM (SELECT a.cat_level1_name,a.cat_level1_id,a.cat_level2_name,
             'BU_ALL' goods_type,
             b.goods_id,
             SUM(b.expo_pv) expo_pv,
             SUM(b.goods_click) goods_click,
             SUM(b.cart_click) cart_click,
             SUM(b.cur_day_bill) bill_cnt,
             SUM(b.sales_volume) sales_volume,
             SUM(b.sales_amount) sales_amount,
             SUM(b.gross_profit) gross_profit
      FROM dim.dim_jc_goods a
        LEFT OUTER JOIN rpt.rpt_sum_goods_daily b
                     ON a.goods_id = b.goods_id
                    AND b.site_id = 600
                    AND b.data_date BETWEEN '20190212'
                    AND '20190227'
        LEFT OUTER JOIN
        (
        SELECT DISTINCT a3.goods_id
                         FROM JOLLY.WHO_product_pool a1,
                              jolly.who_esoloo_supplier a2,
                              dim.dim_jc_goods a3
                         WHERE a1.supp_code = a2.code
                         AND   a2.supplier_genre = 8
                         AND   from_unixtime(gmt_created,'yyyyMMdd') >= '20190212'
                         AND   from_unixtime(gmt_created,'yyyyMMdd') <= '20190227'
                         AND   substr(a3.first_on_sale_time,1,10) <= '2019-02-27'
                         AND   a1.goods_id > 0
                         AND   a1.goods_id = a3.goods_id
        ) t1 ON a.goods_id = t1.goods_id
      WHERE
      t1.goods_id is null
      AND   a.cat_level1_id IN (2,324,7574,7576)
      GROUP BY a.cat_level1_name,a.cat_level1_id,
               a.cat_level2_name,
               b.goods_id) t
GROUP BY t.cat_level1_name,t.cat_level1_id,
         t.cat_level2_name,t.goods_type order by t.cat_level1_id,t.goods_type) u

我的spark版本是2.1.1,你需要分区吗。另外,动态分区列必须在SELECT语句中列的最后一个指定,并按照它们在partition子句中出现的顺序指定。
val data1=spark.sql(
      s"""
         |SELECT
         |t.cat_level1_name,t.cat_level1_id,
         |       t.cat_level2_name,t.goods_type,
         |       COUNT(t.goods_id) goods_cnt,
         |       COUNT(CASE WHEN t.sales_volume > 0 THEN t.goods_id END) saled_goods_cnt,
         |       SUM(expo_pv) expo_pv,
         |       SUM(goods_click) goods_click,
         |       SUM(cart_click) cart_click,
         |       SUM(bill_cnt) bill_cnt,
         |       SUM(sales_volume) sales_volume,
         |       SUM(sales_amount) sales_amount,
         |       SUM(gross_profit) gross_profit
         |FROM (SELECT a.cat_level1_name,a.cat_level1_id,a.cat_level2_name,
         |             case when t1.goods_id is not null then 'IS_ODM' else 'BU_NEWIN' end goods_type,
         |             b.goods_id,
         |             SUM(b.expo_pv) expo_pv,
         |             SUM(b.goods_click) goods_click,
         |             SUM(b.cart_click) cart_click,
         |             SUM(b.cur_day_bill) bill_cnt,
         |             SUM(b.sales_volume) sales_volume,
         |             SUM(b.sales_amount) sales_amount,
         |             SUM(b.gross_profit) gross_profit
         |      FROM dim.dim_jc_goods a
         |        LEFT OUTER JOIN rpt.rpt_sum_goods_daily b
         |                     ON a.goods_id = b.goods_id
         |                    AND b.site_id = 600
         |                    AND b.data_date BETWEEN '20190212'
         |                    AND '$v_date1'
         |        LEFT OUTER JOIN
         |        (
         |        SELECT DISTINCT a3.goods_id
         |                         FROM JOLLY.WHO_product_pool a1,
         |                              jolly.who_esoloo_supplier a2,
         |                              dim.dim_jc_goods a3
         |                         WHERE a1.supp_code = a2.code
         |                         AND   a2.supplier_genre = 8
         |                         AND   from_unixtime(gmt_created,'yyyyMMdd') >= '20190212'
         |                         AND   from_unixtime(gmt_created,'yyyyMMdd') <= '$v_date1'
         |                         AND   substr(a3.first_on_sale_time,1,10) <= '$v_date2'
         |                         AND   a1.goods_id > 0
         |                         AND   a1.goods_id = a3.goods_id
         |        ) t1 ON a.goods_id = t1.goods_id
         |      WHERE substr(a.first_on_sale_time,1,10) BETWEEN '2019-02-13' AND '$v_date2'
         |      AND   substr(a.add_time,1,10) BETWEEN '2019-02-12' AND  '$v_date2'
         |      AND   a.cat_level1_id IN (2,324,7574,7576)
         |      GROUP BY a.cat_level1_name,a.cat_level1_id,
         |               a.cat_level2_name,
         |               b.goods_id,case when t1.goods_id is not null then 'IS_ODM' else 'BU_NEWIN' end) t
         |GROUP BY t.cat_level1_name,t.cat_level1_id,
         |         t.cat_level2_name,t.goods_type order by t.cat_level1_id,t.goods_type
       """.stripMargin).cache()
     val data_2=spark
      .sql(
        s"""
           |SELECT
           |t.cat_level1_name,t.cat_level1_id,
           |       t.cat_level2_name,t.goods_type,
           |       COUNT(t.goods_id) goods_cnt,
           |       COUNT(CASE WHEN t.sales_volume > 0 THEN t.goods_id END) saled_goods_cnt,
           |       SUM(expo_pv) expo_pv,
           |       SUM(goods_click) goods_click,
           |       SUM(cart_click) cart_click,
           |       SUM(bill_cnt) bill_cnt,
           |       SUM(sales_volume) sales_volume,
           |       SUM(sales_amount) sales_amount,
           |       SUM(gross_profit) gross_profit
           |FROM (SELECT a.cat_level1_name,a.cat_level1_id,a.cat_level2_name,
           |             'BU_ALL' goods_type,
           |             b.goods_id,
           |             SUM(b.expo_pv) expo_pv,
           |             SUM(b.goods_click) goods_click,
           |             SUM(b.cart_click) cart_click,
           |             SUM(b.cur_day_bill) bill_cnt,
           |             SUM(b.sales_volume) sales_volume,
           |             SUM(b.sales_amount) sales_amount,
           |             SUM(b.gross_profit) gross_profit
           |      FROM dim.dim_jc_goods a
           |        LEFT OUTER JOIN rpt.rpt_sum_goods_daily b
           |                     ON a.goods_id = b.goods_id
           |                    AND b.site_id = 600
           |                    AND b.data_date BETWEEN '20190212'
           |                    AND '$v_date1'
           |        LEFT OUTER JOIN
           |        (
           |        SELECT DISTINCT a3.goods_id
           |                         FROM JOLLY.WHO_product_pool a1,
           |                              jolly.who_esoloo_supplier a2,
           |                              dim.dim_jc_goods a3
           |                         WHERE a1.supp_code = a2.code
           |                         AND   a2.supplier_genre = 8
           |                         AND   from_unixtime(gmt_created,'yyyyMMdd') >= '20190212'
           |                         AND   from_unixtime(gmt_created,'yyyyMMdd') <= '$v_date1'
           |                         AND   substr(a3.first_on_sale_time,1,10) <= '$v_date2'
           |                         AND   a1.goods_id > 0
           |                         AND   a1.goods_id = a3.goods_id
           |        ) t1 ON a.goods_id = t1.goods_id
           |      WHERE
           |      t1.goods_id is null
           |      AND   a.cat_level1_id IN (2,324,7574,7576)
           |      GROUP BY a.cat_level1_name,a.cat_level1_id,
           |               a.cat_level2_name,
           |               b.goods_id) t
           |GROUP BY t.cat_level1_name,t.cat_level1_id,
           |         t.cat_level2_name,t.goods_type order by t.cat_level1_id,t.goods_type
         """.stripMargin).cache()
    val data_3=data1.union(data_2)
    data_3.createOrReplaceTempView("data_3")
    spark.sql(
      s"""
         | INSERT OVERWRITE  table zytrain.archer_odm_cate_info partition(data_date='$v_date1')
         | select * from data_3
       """.stripMargin)
    println("end")