Multithreading 在节点模块中使用集群

mass request


var mr = require("mass-request"),
    scraper = mr();

for (var i = 0; i < my_urls_to_visit.length; i += 1) {
    scraper.add(my_urls_to_visit[i], function(resp) {
        // do something with response
我希望看到"HellofromMassRequest!"被记录了四次(实际上是这样)。令我惊讶的是,我还四次看到"hello from test.js"。显然,我不明白





mass request.js

var cluster = require('cluster');
var zmq = require('zmq');

module.exports = {
    _childId : null,
    _urls : [],
    _threadCount : 1,
    _readyThreads : 0,
    _callbacks : {},
    zmqReceive : null, //the socket we receive on for this thread
    zmqMaster : null, //the socket to the master
    zmqChildren : {}, //an object storing the sockets for the children
    setThreads : function( threadCount ) {
        this._threadCount = threadCount;
    add : function( url , cb ) {
        this._urls.push( {url: url, cb : cb } );
    run : function() {

        if( cluster.isMaster ) {


        } else {



    _masterThread : function() {

        console.log( 'Master Process Starting Up' );

        this.zmqReceive = zmq.socket('pull').bindSync( 'ipc://master.ipc' );

        //bind handler for messages coming into this process using closure to allow us to access the massrequest object inside the callback
        ( function( massRequest ) {
            this.zmqReceive.on( 'message' , function( msg ) {

                msg = JSON.parse(msg);

                //was this an online notification?
                if( msg && msg.status == 'Online' ) {
                    return; //we're done
                if( msg && msg.html ) {
                    //this was a response from a child, call the callback for it
                    massRequest._callbacks[ msg.sender ].call( massRequest , msg.html );
                    //send the child another URL
                    massRequest._sendUrlToChild( msg.sender );

            } );
        }).call( this , this );

        //fork 4 child processes and set up the sending sockets for them
        for( var i=0; i < this._threadCount; ++i ) {
            //set up the sending socket
            this.zmqChildren[i] = zmq.socket('push').connect( 'ipc://child_' + i + '.ipc' );
            //fork the process and pass it an id
            cluster.fork( {
            } );

    _sendUrlToChild : function( child ) {
        //if there's no urls left, return (this would also be a good place to send a message to the child to exit gracefully)
        if( !this._urls.length ) return;
        //grab a url to process
        var item = this._urls.pop();
        //set the callback for the child
        this._callbacks[child] = item.cb;
        this.zmqChildren[child].send( JSON.stringify( { url:item.url } ) );
    _processUrls : function() {
        for( var i=0; i < this._threadCount; ++i ) {
            this._sendUrlToChild( i );
    _threadReady : function() {
        if( ++this._readyThreads >= this._threadCount ) {
            //all threads are ready, send out urls to start the mayhem
            console.log( 'All threads online, starting URL processing' );
    _childProcessUrl : function( url ) {
        console.log( 'Child Process ' + this.childId + ' Handling URL: ' + url );
        //do something here to scrape your content however you see fit
        var html = 'HTML';
        this.zmqMaster.send( JSON.stringify( { sender:this.childId, html:html } ) );
    _childThread : function() {

        //get the child id that was passed from cluster
        this.childId = process.env._childId;

        console.log( 'Child Process ' + this.childId + ' Starting Up' );

        //bind the pull socket to receive messages to this process
        this.zmqReceive = zmq.socket('pull').bindSync( 'ipc://child_' + this.childId + '.ipc' );

        //bind the push socket to send to the master
        this.zmqMaster = zmq.socket('push').connect('ipc://master.ipc');

        //bind handler for messages coming into this process
        ( function( massRequest ) {
            this.zmqReceive.on( 'message' , function( msg ) {

                msg = JSON.parse(msg);

                console.log( 'Child ' + this.childId + ': ' + msg );

                //handle the url
                if( msg && msg.url ) massRequest._childProcessUrl( msg.url );

            } );
        }).call( this , this );

        //let the master know we're done setting up
        this.zmqMaster.send( JSON.stringify({sender:this.childId,status:'Online'}) );

var mr = require( './mass-request.js' );
mr.setThreads( 4 );
mr.add( '' , function( resp ) {
    console.log( ' is done' );
} );
mr.add( '' , function( resp ) {
    console.log( ' is done' );
} );
mr.add( '' , function( resp ) {
    console.log( ' is done' );
} );
mr.add( '' , function( resp ) {
    console.log( ' is done' );
} );
mr.add( '' , function( resp ) {
    console.log( ' is done' );
} );
mr.add( '' , function( resp ) {
    console.log( ' is done' );
} );
mr.add( '' , function( resp ) {
    console.log( ' is done' );
} );;
var cluster = require("cluster"),
  path = require("path"),
  numCPUs = require("os").cpus().length;

console.log("hello from mass-request!");
if (cluster.isMaster) {
    exec: path.join(__dirname, 'worker.js')

  for (var i = 0; i < numCPUs; i += 1) {
    var worker = cluster.fork();

  return {
    add: function (url, cb) {
} else {
  console.log("worker " + + " is born!");
console.log("worker " + + " is born!");
node demo.js








  • 设置对象
    • 工作文件的exec字符串文件路径。(默认值=process.argv[1])
    • args数组字符串参数传递给worker。(默认值=process.argv.slice(2))
    • 无提示布尔值是否将输出发送到父级的stdio。(默认值=false)





node index.js 
hello from mass-request!
worker 38821 is born!
worker 38820 is born!
worker 38822 is born!
worker 38819 is born!

在子进程js线程中运行请求会有什么帮助?Http请求已经存在于js线程之外。请参阅有趣的内容。因此,让两个或多个进程分担许多URL调用的工作不会加快进程?一个线程进行所有调用,另一个线程处理响应如何?使用多个URL调用的唯一原因是如果js线程是瓶颈,那么js线程就是瓶颈。考虑到node.js的异步性质,当io也出现在图片中时,很少会出现这种情况。因此,只有在进行加密等cpu密集型工作时,将处理分给子线程才有意义。Mozilla persona就是一个很好的例子。请注意,传统上,fork()创建整个进程的副本。它不会“重新运行”整个进程,但会复制内存,并从返回fork()开始在两个进程中继续执行@generalhenry您提供的链接并不支持您最初的说法,即http stuff运行在一个独立的线程上,而不是在主线程上,您对此有更多的信息吗?谢谢!这是群集的一个很好的例子。不过,这不是我真正的问题。问题在于如何在一个模块中执行这类操作,而该模块需要使用不同的脚本,例如父脚本不必担心检查它是否是主脚本。请参阅origin post中的代码示例。干杯!@Ch