scala:如何像变量一样使用类

scala:如何像变量一样使用类,scala,apache-spark,Scala,Apache Spark,在每次迭代中引用不同的类是可能的吗 我有大量的Hadoop配置单元表,并将使用Spark处理它们。每个表都有一个自动生成的类,我希望遍历这些表,而不是首先采用的繁琐的、非代码重用的复制/粘贴/手工编码IndividualTableClassNames技术 import myJavaProject.myTable0Class import myJavaProject.myTable1Class object rawMaxValueSniffer extends Logging { /*

在每次迭代中引用不同的类是可能的吗

我有大量的Hadoop配置单元表,并将使用Spark处理它们。每个表都有一个自动生成的类,我希望遍历这些表,而不是首先采用的繁琐的、非代码重用的复制/粘贴/手工编码IndividualTableClassNames技术

import myJavaProject.myTable0Class
import myJavaProject.myTable1Class

object rawMaxValueSniffer extends Logging {
    /* tedious sequential:  it works, and sometimes a programmer's gotta do... */
    def tedious(args: Array[String]): Unit = {
        val tablePaths = List("path0_string_here","path1_string")
        var maxIds = ArrayBuffer[Long]()

        FileInputFormat.setInputPaths(conf, tablePaths(0))
        AvroReadSupport.setAvroReadSchema(conf.getConfiguration, myTable0Class.getClassSchema)
        ParquetInputFormat.setReadSupportClass(conf, classOf[AvroReadSupport[myTable0Class]])
        val records = sc.newAPIHadoopRDD(conf.getConfiguration,
            classOf[ParquetInputFormat[myTable0Class]],
            classOf[Void],
            classOf[myTable0Class]).map(x => x._2)
        maxIds += records.map(_.getId).collect().max

        FileInputFormat.setInputPaths(conf, tablePaths(1))
        AvroReadSupport.setAvroReadSchema(conf.getConfiguration, myTable1Class.getClassSchema)
        ParquetInputFormat.setReadSupportClass(conf, classOf[AvroReadSupport[myTable1Class]])
        val records = sc.newAPIHadoopRDD(conf.getConfiguration,
            classOf[ParquetInputFormat[myTable1Class]],
            classOf[Void],
            classOf[myTable1Class]).map(x => x._2)
        maxIds += records.map(_.getId).collect().max
    }

    /* class as variable, used in a loop.      I have seen the mountain... */
    def hopedFor(args: Array[String]): Unit = { 
        val tablePaths = List("path0_string_here","path1_string")
        var maxIds = ArrayBuffer[Long]()

        val tableClasses = List(classOf[myTable0Class],classOf[myTable1Class]) /* error free, but does not get me where I'm trying to go */
        var counter=0
        tableClasses.foreach { tc => 
            FileInputFormat.setInputPaths(conf, tablePaths(counter))
            AvroReadSupport.setAvroReadSchema(conf.getConfiguration, tc.getClassSchema)
            ParquetInputFormat.setReadSupportClass(conf, classOf[AvroReadSupport[tc]])
            val records = sc.newAPIHadoopRDD(conf.getConfiguration,
                classOf[ParquetInputFormat[tc]],
                classOf[Void],
                classOf[tc]).map(x => x._2)
                maxIds += records.map(_.getId).collect().max      /* all the myTableXXX classes have  getId()   */
            counter += 1    
        }       
    }
}       

/* the classes being referenced... */
@org.apache.avro.specific.AvroGenerated
public class myTable0Class extends org.apache.avro.specific.SpecificRecordBase implements org.apache.avro.specific.SpecificRecord {
  public static final org.apache.avro.Schema SCHEMA$ = new org.apache.avro.Schema.Parser().parse("{\"type\":\"record\",\"name\":\"rsivr_surveyquestiontypes\",\"namespace\":\"myJavaProject\",\"fields\":[{\"name\":\"id\",\"type\":\"in    t\"},{\"name\":\"description\",\"type\":\"st,ing\"},{\"name\":\"scale_range\",\"type\":\"int\"}]}");
  public static org.apache.avro.Schema getClassSchema() { return SCHEMA$; }
  @Deprecated public int id;

  yada.yada.yada0
}

@org.apache.avro.specific.AvroGenerated
public class myTable1Class extends org.apache.avro.specific.SpecificRecordBase implements org.apache.avro.specific.SpecificRecord {
  public static final org.apache.avro.Schema SCHEMA$ = new org.apache.avro.Schema.Parser().parse("{\"type\":\"record\",\"name\":\"rsivr_surveyresultdetails\",\"namespace\":\"myJavaProject\",\"fields\":[{\"name\":\"id\",\"type\":\"in    t\"},{\"name\":\"survey_dts\",\"type\":\"string\"},{\"name\":\"survey_id\",\"type\":\"int\"},{\"name\":\"question\",\"type\":\"int\"},{\"name\":\"caller_id\",\"type\":\"string\"},{\"name\":\"rec_msg\",\"type\":\"string\"},{\"name\    ":\"note\",\"type\":\"string\"},{\"name\":\"lang\",\"type\":\"string\"},{\"name\":\"result\",\"type\":\"string\"}]}");
  public static org.apache.avro.Schema getClassSchema() { return SCHEMA$; }
  @Deprecated public int id;

    yada.yada.yada1
}

也许是这样的:

def doStuff[T <: SpecificRecordBase : ClassTag](index: Int, schema: => Schema, clazz: Class[T]) = {
  FileInputFormat.setInputPaths(conf, tablePaths(index))
    AvroReadSupport.setAvroReadSchema(conf.getConfiguration, schema) 
    ParquetInputFormat.setReadSupportClass(conf, classOf[AvroReadSupport[T]])
    val records = sc.newAPIHadoopRDD(conf.getConfiguration,
        classOf[ParquetInputFormat[T]],
        classOf[Void],
        clazz).map(x => x._2)
    maxIds += records.map(_.getId).collect().max
}

Seq(
  (classOf[myTable0Class], myTable0Class.getClassSchema _),
  (classOf[myTable1Class], myTable1Class.getClassSchema _)
).zipWithIndex
.foreach { case ((clazz, schema), index) => doStuff(index, schema, clazz) }
def doStuff[T模式,clazz:Class[T])={
setInputPath(conf,tablePath(索引))
setavroreadsupport.setAvroReadSchema(conf.getConfiguration,schema)
ParquetInputFormat.setReadSupportClass(conf,classOf[AvroReadSupport[T]]
val records=sc.newAPIHadoopRDD(conf.getConfiguration,
[ParquetInputFormat[T]]的类别,
类别[无效],
图(x=>x.\u 2)
maxIds+=records.map(u.getId).collect().max
}
序号(
(类别[myTable0Class],myTable0Class.getClassSchema),
(类别为[myTable1Class],myTable1Class.getClassSchema)
)zipWithIndex先生
.foreach{case((clazz,schema,index)=>doStuff(index,schema,clazz)}

您可以使用反射来调用
getClassSchema
clazz.getMethod(“getClassSchema”).invoke(null).asInstanceOf[Schema]
),那么您就不需要将其作为参数传递,只需clazz就足够了,但这有点欺骗……我更喜欢这种方法。

记录末尾的映射有两个错误“T没有可用的类标记”和“方法映射的参数不够:(隐式证据$3:scala.reflect.ClassTag[T])org.apache spark.rdd.rdd[T].未指定的值参数证据$3。“好的,现在应该修复(在类型参数声明中添加了
:ClassTag
导入scala.reflect.ClassTag
为了让SpecificRecordBase模式和ClassTag正常工作…我需要3次导入import org.apache.avro.SpecificRecordBase import org.apache.avro.Schema import scala.reflect.ClassTag需要最后一次更改。。。maxIds+=records.map(clazz.getMethod(“getId”).invoke(z).asInstanceOf[Int]).collect().max