Scala参与者代替Java未来
问题:我需要编写一个应用程序来处理几百个文件,每个文件都需要几百兆字节和几秒钟的时间。我使用Scala参与者代替Java未来,scala,actor,Scala,Actor,问题:我需要编写一个应用程序来处理几百个文件,每个文件都需要几百兆字节和几秒钟的时间。我使用Future[Report]对象编写了它,这些对象是使用Executors.newFixedThreadPool()创建的,但由于ExecutorService.invokeAll()返回的列表[Future[Report]对象保留了每个进程使用的中间内存,因此内存不足。我通过计算报告值(每个报告只有几百行)后从处理器中的本地方法返回报告对象来解决问题,而不是在调用方法(从接口可调用)中进行计算 我想尝试
Future[Report]
对象编写了它,这些对象是使用Executors.newFixedThreadPool()
创建的,但由于ExecutorService.invokeAll()
返回的列表[Future[Report]
对象保留了每个进程使用的中间内存,因此内存不足。我通过计算报告
值(每个报告
只有几百行)后从处理器中的本地方法返回报告
对象来解决问题,而不是在调用
方法(从接口可调用
)中进行计算
我想尝试使用Scala Actors来解决这个问题。我创建了一个类,该类接受一系列作业(作业、结果和处理函数的参数化类型),并在可配置数量的Worker
实例(Actor
的子类)中处理每个作业。代码如下
问题:
- 我不确定我的处理是否正确 对
- 我不喜欢使用
延迟从调度程序返回结果倒计时闩锁
- 我更愿意编写一个更“功能性”的dispatcher版本,它不修改
列表或jobsQueue
hashmap,也许借用Clojure的tail recursiveworkers
结构(我在其他Scala代码中使用了loop
方法)@tailrec def loop
package multi_worker
import scala.actors.Actor
import java.util.concurrent.CountDownLatch
object MultiWorker {
private val megabyte = 1024 * 1024
private val runtime = Runtime.getRuntime
}
class MultiWorker[A, B](jobs: List[A],
actorCount: Int)(process: (A) => B) {
import MultiWorker._
sealed abstract class Message
// Dispatcher -> Worker: Run this job and report results
case class Process(job: A) extends Message
// Worker -> Dispatcher: Result of processing
case class ReportResult(id: Int, result: B) extends Message
// Worker -> Dispatcher: I need work -- send me a job
case class SendJob(id: Int) extends Message
// Worker -> Dispatcher: I have stopped as requested
case class Stopped(id: Int) extends Message
// Dispatcher -> Worker: Stop working -- all jobs done
case class StopWorking extends Message
/**
* A simple logger that can be sent text messages that will be written to the
* console. Used so that messages from the actors do not step on each other.
*/
object Logger
extends Actor {
def act() {
loop {
react {
case text: String => println(text)
case StopWorking => exit()
}
}
}
}
Logger.start()
/**
* A worker actor that will process jobs and return results to the
* dispatcher.
*/
class Worker(id: Int)
extends Actor{
def act() {
// Ask the dispatcher for an initial job
dispatcher ! SendJob(id)
loop {
react {
case Process(job) =>
val startTime = System.nanoTime
dispatcher ! ReportResult(id, process(job))
val endTime = System.nanoTime
val totalMemory = (runtime.totalMemory / megabyte)
val usedMemory = totalMemory - (runtime.freeMemory / megabyte)
val message = "Finished job " + job + " in " +
((endTime - startTime) / 1000000000.0) +
" seconds using " + usedMemory +
"MB out of total " + totalMemory + "MB"
Logger ! message
dispatcher ! SendJob(id)
case StopWorking =>
dispatcher ! Stopped(id)
exit()
}
}
}
}
val latch = new CountDownLatch(1)
var res = List.empty[B]
/**
* The job dispatcher that sends jobs to the worker until the job queue
* (jobs: TraversableOnce[A]) is empty. It then tells the workers to
* stop working and returns the List[B] results to the caller.
*/
val dispatcher = new Actor {
def act() {
var jobQueue = jobs
var workers = (0 until actorCount).map(id => (id, new Worker(id))).toMap
workers.values.foreach(_.start())
loop {
react {
case ReportResult(id, result) =>
res = result :: res
if (jobQueue.isEmpty && workers.isEmpty) {
latch.countDown()
exit()
}
case SendJob(id) =>
if (!jobQueue.isEmpty) {
workers(id) ! Process(jobQueue.head)
jobQueue = jobQueue.tail
}
case Stopped(id) =>
workers = workers - id
}
}
}
}
dispatcher.start()
/**
* Get the results of the processing -- wait for the dispatcher to finish
* before returning.
*/
def results: List[B] = {
latch.await()
res
}
}
在快速浏览之后,我将提出以下更新:
val resultsChannel = new Channel[List[B]] // used instead of countdown latch to get the results
val dispatcher = new Actor {
def act = loop(Nil, (0 to actorCount).map(id =>
(id, new Worker(id).start.asInstanceOf[Worker])).toMap,
Nil)
@tailrec
def loop(jobQueue: List[A], // queue, workers and results are immutable lists, passed recursively through the loop
workers: Map[Int, Worker],
res: List[B]):Unit = react {
case ReportResult(id, result) =>
val results = result :: res
if (results.size == jobs.size) { // when the processing is finished, sends results to the output channel
resultsChannel ! results
}
loop(jobQueue, workers, results)
case SendJob(id) =>
if (!jobQueue.isEmpty) {
workers(id) ! Process(jobQueue.head)
loop(jobQueue.tail, workers, res)
}
case Stopped(id) =>
loop(jobQueue, workers - id, res)
}
}
dispatcher.start()
def results: List[B] = {
resultsChannel.receive {
case results => results // synchronously wait for the data in the channel
}
}
这是我提出的最终版本(感谢瓦西里·雷米纽克)。标有
//DEBUG
注释的println
语句用于显示进度,main
方法是单元测试:
import scala.actors.Actor
import scala.actors.Channel
import scala.actors.Scheduler
import scala.annotation.tailrec
object MultiWorker {
private val megabyte = 1024 * 1024
private val runtime = Runtime.getRuntime
def main(args: Array[String]) {
val jobs = (0 until 5).map((value: Int) => value).toList
val multiWorker = new MultiWorker[Int, Int](jobs, 2, { value =>
Thread.sleep(100)
println(value)
value
})
println("multiWorker.results: " + multiWorker.results)
Scheduler.shutdown
}
}
class MultiWorker[A, B](jobs: List[A],
actorCount: Int,
process: (A) => B) {
import MultiWorker._
sealed abstract class Message
// Dispatcher -> Worker: Run this job and report results
case class Process(job: A) extends Message
// Worker -> Dispatcher: Result of processing
case class ReportResult(id: Int, result: B) extends Message
// Worker -> Dispatcher: I need work -- send me a job
case class SendJob(id: Int) extends Message
// Worker -> Dispatcher: I have stopped as requested
case class Stopped(id: Int) extends Message
// Dispatcher -> Worker: Stop working -- all jobs done
case class StopWorking() extends Message
/**
* A simple logger that can be sent text messages that will be written to the
* console. Used so that messages from the actors do not step on each other.
*/
object Logger
extends Actor {
def act() {
loop {
react {
case text: String => println(text)
case StopWorking => exit()
}
}
}
}
Logger.start()
/**
* A worker actor that will process jobs and return results to the
* dispatcher.
*/
case class Worker(id: Int)
extends Actor{
def act() {
// Ask the dispatcher for an initial job
dispatcher ! SendJob(id)
loop {
react {
case Process(job) =>
println("Worker(" + id + "): " + Process(job)) // DEBUG
val startTime = System.nanoTime
dispatcher ! ReportResult(id, process(job))
val endTime = System.nanoTime
val totalMemory = (runtime.totalMemory / megabyte)
val usedMemory = totalMemory - (runtime.freeMemory / megabyte)
val message = "Finished job " + job + " in " +
((endTime - startTime) / 1000000000.0) +
" seconds using " + usedMemory +
"MB out of total " + totalMemory + "MB"
Logger ! message
dispatcher ! SendJob(id)
case StopWorking() =>
println("Worker(" + id + "): " + StopWorking()) // DEBUG
dispatcher ! Stopped(id)
exit()
}
}
}
}
val resultsChannel = new Channel[List[B]]
/**
* The job dispatcher that sends jobs to the worker until the job queue
* (jobs: TraversableOnce[A]) is empty. It then tells the workers to
* stop working and returns the List[B] results to the caller.
*/
val dispatcher = new Actor {
def act() {
@tailrec
def loop(jobs: List[A],
workers: Map[Int, Worker],
acc: List[B]) {
println("dispatcher: loop: jobs: " + jobs + ", workers: " + workers + ", acc: " + acc) // DEBUG
if (!workers.isEmpty) { // Stop recursion when there are no more workers
react {
case ReportResult(id, result) =>
println("dispatcher: " + ReportResult(id, result)) // DEBUG
loop(jobs, workers, result :: acc)
case SendJob(id) =>
println("dispatcher: " + SendJob(id)) // DEBUG
if (!jobs.isEmpty) {
println("dispatcher: " + "Sending: " + Process(jobs.head) + " to " + id) // DEBUG
workers(id) ! Process(jobs.head)
loop(jobs.tail, workers, acc)
} else {
println("dispatcher: " + "Sending: " + StopWorking() + " to " + id) // DEBUG
workers(id) ! StopWorking()
loop(Nil, workers, acc)
}
case Stopped(id) =>
println("dispatcher: " + Stopped(id)) // DEBUG
loop(jobs, workers - id, acc)
}
} else {
println("dispatcher: " + "jobs: " + jobs + ", workers: " + workers + ", acc: " + acc) // DEBUG
resultsChannel ! acc
}
}
loop(jobs, (0 until actorCount).map(id => (id, new Worker(id).start.asInstanceOf[Worker])).toMap, Nil)
exit()
}
}.start()
/**
* Get the results of the processing -- wait for the dispatcher to finish
* before returning.
*/
def results: List[B] = {
resultsChannel.receive {
case results => results
}
}
}
可爱的代码。为了可读性,我会对它进行一些分解——提取方法等等,但是这个概念非常好。