Scala:应为Try[Stream[（String）]，但为Try[String]_Scala_Apache Spark_Hdfs

Scala:应为Try[Stream[（String）]，但为Try[String]

scala apache-spark

Scala:应为Try[Stream[（String）]，但为Try[String],scala,apache-spark,hdfs,Scala,Apache Spark,Hdfs,我试图读取一个文本文件来比较两个文件。我已经编写了读取第一个文件的代码，我希望readFileStream函数为我提供字符串集合，但我只得到字符串。你能看出我哪里做错了吗 import java.io.{BufferedReader, FileInputStream, InputStreamReader} import java.net.URI import org.apache.hadoop.conf.Configuration import org.apache.hadoop.fs.{FS

我试图读取一个文本文件来比较两个文件。我已经编写了读取第一个文件的代码，我希望readFileStream函数为我提供字符串集合，但我只得到字符串。你能看出我哪里做错了吗

import java.io.{BufferedReader, FileInputStream, InputStreamReader}
import java.net.URI
import org.apache.hadoop.conf.Configuration
import org.apache.hadoop.fs.{FSDataInputStream, FileSystem, Path}
import scala.util.{Failure, Success, Try}

object TestCompareHDFSFiles {
  def main(args: Array[String]): Unit = {
    val hdfs = FileSystem.get(new Configuration())
    val path1 = new Path(args(0))
    val path2 = new Path(args(1))
    readHDFSFile(hdfs, path1, path2)
  }

//  Accept a parameter which implements a close method
    def using[A <: { def close(): Unit }, B](resource: A)(f: A => B): B =
    try {
      f(resource)
    } finally {
      resource.close()
    }

  def readHDFSFile(hdfs: FileSystem, path1: Path, path2: Path): Option[Stream[(String,String)]] = {
        Try(using(new BufferedReader(new InputStreamReader(hdfs.open(path1))))(readFileStream))
        } match {
            case Success(result) => {

            }
            case Failure(ex) => {
              println(s"Could not read file $path1, detail ${ex.getClass.getName}:${ex.getMessage}")
              None
            }
      }

  def readFileStream(br: BufferedReader)= {
      for {
          line <- Try(br.readLine())
          if (line != null )
      } yield line
  }

}

import java.io.{BufferedReader，FileInputStream，InputStreamReader}
导入java.net.URI
导入org.apache.hadoop.conf.Configuration
导入org.apache.hadoop.fs.{FSDataInputStream，文件系统，路径}
导入scala.util.{失败，成功，重试}
对象TestCompareHDFSFiles{
def main（参数：数组[字符串]）：单位={
val hdfs=FileSystem.get（新配置（））
val path1=新路径（args（0））
val path2=新路径（args（1））
readHDFSFile（hdfs，路径1，路径2）
}
//接受实现close方法的参数
使用[A B]的def:B=
试一试{
f（资源）
}最后{
resource.close（）
}
def readHDFSFile（hdfs:FileSystem，path1:Path，path2:Path）：选项[Stream[（String，String）]={
尝试（使用新的BufferedReader（新的InputStreamReader（hdfs.open（path1）））（readFileStream））
}匹配{
案例成功（结果）=>{
}
案例失败（ex）=>{
println（s“无法读取文件$path1，详细信息${ex.getClass.getName}:${ex.getMessage}”）
没有一个
}
}
def readFileStream（br:BufferedReader）={
为了{
在readFileStream
中，line
将永远不会是null
，因为它被包装在Try
中。谢谢。如果我没有使用readFileStream，并且想在Success（result）块内部迭代，不使用foreach而是使用for逐行获取，我使用for循环，但它似乎不起作用