为什么在Node.js中阻塞文件I/O时异步性能较差?

为什么在Node.js中阻塞文件I/O时异步性能较差?,node.js,asynchronous,fs,Node.js,Asynchronous,Fs,从阅读有关同步与异步的堆栈溢出的一些文章来看,异步应该具有较小的开销,或者在阻塞I/O操作方面比同步调用更快: 我调查过的一些地方: 我编写了一个小的基准测试,它可以生成4个256MB到1GB的文件,以查看fs.readFile的性能 输出: > makeFiles(); 512+0 records in 512+0 records out 536870912 bytes (537 MB, 512 MiB) copied, 4.28077 s, 125 MB/s 1024+0 reco

从阅读有关同步与异步的堆栈溢出的一些文章来看,异步应该具有较小的开销,或者在阻塞I/O操作方面比同步调用更快:

我调查过的一些地方:

我编写了一个小的基准测试,它可以生成4个256MB到1GB的文件,以查看fs.readFile的性能

输出:

> makeFiles();

512+0 records in
512+0 records out
536870912 bytes (537 MB, 512 MiB) copied, 4.28077 s, 125 MB/s
1024+0 records in
1024+0 records out
1073741824 bytes (1.1 GB, 1.0 GiB) copied, 8.45918 s, 127 MB/s
256+0 records in
256+0 records out
268435456 bytes (268 MB, 256 MiB) copied, 1.96678 s, 136 MB/s
512+0 records in
512+0 records out
536870912 bytes (537 MB, 512 MiB) copied, 4.32488 s, 124 MB/s
undefined
> syncTest();
Sync version took 1055.9131410121918ms
undefined
> asyncTest();
Promise { <pending> }
> Async version took 6991.523499011993ms
因此,异步版本似乎比同步版本慢约7倍。如何解释这种减速?什么时候应该使用同步版本

Repl.it链接:


系统:Arch linux 5.5.4-arch1-1上的节点13.9.0

有关更快的版本,请参见下面对版本2的编辑

版本1

仅供参考,除了上面我的所有评论之外,以下是我最快获得异步版本的方法:

async function asyncTestStreamParallel(files) {
    const startTime = performance.now();
    let results = [];

    for (let filename of files) {
        results.push(new Promise((resolve, reject) => {
            const stream = fs.createReadStream(filename, {highWaterMark: 64 * 1024 * 10});
            const data = [];
            stream.on('data', chunk => {
                data.push(chunk);
            }).on('end', () => {
                resolve(Buffer.concat(data));
            }).on('error', reject);
        }));
    }
    await Promise.all(results);

    console.log(`Async stream parallel version took ${performance.now() - startTime}ms`);
}
结果如下:

下面是我在Windows 10节点v12.13.1上的结果:

node --expose_gc temp
Sync version took 1175.2680000066757ms
Async version took 2315.0439999699593ms
Async stream version took 1600.0085990428925ms
Async stream parallel version took 1111.310200035572ms
Async serial version took 4387.053400993347ms
注意,我稍微修改了方案,将文件名数组传递到每个测试中,而不是每次都创建文件名,以便集中创建文件

帮助我加快速度的是:

使用较大的highWaterMark(可能是流缓冲区大小) 在一个数组中收集数据,然后在最后连接它,这大大减少了峰值内存消耗和GC工作。 允许循环中的不同文件彼此并行运行 通过这些更改,它的速度与同步版本大致相同,有时稍慢,有时大致相同

我还在每个测试运行之间设置了2秒的延迟,并强制运行垃圾收集器,以确保GC运行不会干扰我的结果

这是我的整个脚本,可以在任何平台上运行。请注意,必须使用-expose_gc命令行参数,如node-expose_gc temp.js中所示:

以及与这些匹配的代码:

// Run this with the --expose_gc command line option

const {performance} = require('perf_hooks');
const fs = require('fs');
const fsp = fs.promises;
const path = require('path')

const sizes = [512, 1024, 256, 512];   // file sizes in MB
const data = "0123456789\n";
const testDir = path.join(__dirname, "bigfile"); 

function makeFiles() {
    // make a bigger string to make fewer disk writes
    const bData = [];
    for (let i = 0; i < 1000; i++) {
        bData.push(data);
    }
    const biggerData = bData.join("");
    try {
        fs.mkdirSync(testDir);    // ignore errors if it already exists
    } catch(e) {
        // do nothing if it already exists
    }
    const files = [];

    for (let i = 0; i < sizes.length; i++) {
        let targetLen = sizes[i] * 1024 * 1024;
        let f;
        try {
            let fname = `${path.join(testDir, "test")}-${i}.txt`;
            f = fs.openSync(fname, 'w');
            files.push(fname);
            let len = 0;
            while (len < targetLen) {
                fs.writeSync(f, biggerData);
                len += biggerData.length;
            }
        } catch(e) {
            console.log(e);
            process.exit(1);
        } finally {
            if (f) fs.closeSync(f);
        }
    }
    return files;
}

function clearFiles(files) {
    for (let filename of files) {
        fs.unlinkSync(filename);
    }
    fs.rmdirSync(testDir);
}

function readFileSync(filename) {
    let handle = fs.openSync(filename, "r");
    try {
        let stats = fs.fstatSync(handle);
        let buffer = Buffer.allocUnsafe(stats.size);
        let bytesRead = fs.readSync(handle, buffer, 0, stats.size, 0);
        if (bytesRead !== stats.size) {
            throw new Error("bytesRead not full file size")
        }
    } finally {
        fs.closeSync(handle);
    }

}

// read a file in one single read
async function readFile(filename) {
    let handle = await fsp.open(filename, "r");
    try {
        let stats = await handle.stat();
        let buffer = Buffer.allocUnsafe(stats.size);
        let {bytesRead} = await handle.read(buffer, 0, stats.size, 0);
        if (bytesRead !== stats.size) {
            throw new Error("bytesRead not full file size")
        }
    } finally {
        handle.close()
    }
}



function syncTest(files) {
    const startTime = performance.now();
    const results = [];

    for (let filename of files) {
        results.push(fs.readFileSync(filename));
    }
    console.log(`Sync version took ${performance.now() - startTime}ms`);
}

function syncTestSingleRead(files) {
    const startTime = performance.now();
    const results = [];

    for (let filename of files) {
        readFileSync(filename);
    }
    console.log(`Sync single read version took ${performance.now() - startTime}ms`);
}

async function asyncTest(files) {
    const startTime = performance.now();
    const results = [];

    for (let filename of files) {
        results.push(fs.promises.readFile(filename));
    }
    await Promise.all(results);

    console.log(`Async version took ${performance.now() - startTime}ms`);
}

async function asyncTestStream(files) {
    const startTime = performance.now();

    for (let filename of files) {
        await new Promise((resolve, reject) => {
            let stream = fs.createReadStream(filename, {highWaterMark: 64 * 1024 * 10});
            let data = [];
            stream.on('data', chunk => {
                data.push(chunk);
            }).on('close', () => {
                resolve(Buffer.concat(data));
            }).on('error', reject);
        });
    }

    console.log(`Async stream version took ${performance.now() - startTime}ms`);
}

async function asyncTestStreamParallel(files) {
    const startTime = performance.now();
    let results = [];

    for (let filename of files) {
        results.push(new Promise((resolve, reject) => {
            const stream = fs.createReadStream(filename, {highWaterMark: 64 * 1024 * 100});
            const data = [];
            stream.on('data', chunk => {
                data.push(chunk);
            }).on('end', () => {
                resolve(Buffer.concat(data));
            }).on('error', reject);
        }));
    }
    await Promise.all(results);

    console.log(`Async stream parallel version took ${performance.now() - startTime}ms`);
}

async function asyncTestSingleReadSerial(files) {
    const startTime = performance.now();
    let buffer;
    for (let filename of files) {
        let handle = await fsp.open(filename, "r");
        try {
            let stats = await handle.stat();
            if (!buffer || buffer.length < stats.size) {
                buffer = Buffer.allocUnsafe(stats.size);
            }
            let {bytesRead} = await handle.read(buffer, 0, stats.size, 0);
            if (bytesRead !== stats.size) {
                throw new Error("bytesRead not full file size")
            }
        } finally {
            handle.close()
        }
    }
    console.log(`Async single read serial version took ${performance.now() - startTime}ms`);
}

async function asyncTestSingleReadParallel(files) {
    const startTime = performance.now();

    await Promise.all(files.map(readFile));

    console.log(`Async single read parallel version took ${performance.now() - startTime}ms`);
}

async function asyncTestSerial(files) {
    const startTime = performance.now();
    const results = [];

    for (let filename of files) {
        results.push(await fs.promises.readFile(filename));
    }

    console.log(`Async serial version took ${performance.now() - startTime}ms`);
}

function delay(t) {
    return new Promise(resolve => {
        global.gc();
        setTimeout(resolve, t);
    });
}

// delay between each test to let any system stuff calm down
async function run() {
    const files = makeFiles();
    try {
        await delay(2000);
        syncTest(files);

        await delay(2000);
        syncTestSingleRead(files);

        await delay(2000);
        await asyncTest(files)

        await delay(2000);
        await asyncTestSerial(files);

        await delay(2000);
        await asyncTestStream(files);

        await delay(2000);
        await asyncTestStreamParallel(files);

        await delay(2000);
        await asyncTestSingleReadSerial(files);

        await delay(2000);
        await asyncTestSingleReadParallel(files);
    } catch(e) {
        console.log(e);
    } finally {
        clearFiles(files);
    }
}

run();

有关更快的版本,请参见下面版本2的编辑

版本1

仅供参考,除了上面我的所有评论之外,以下是我最快获得异步版本的方法:

async function asyncTestStreamParallel(files) {
    const startTime = performance.now();
    let results = [];

    for (let filename of files) {
        results.push(new Promise((resolve, reject) => {
            const stream = fs.createReadStream(filename, {highWaterMark: 64 * 1024 * 10});
            const data = [];
            stream.on('data', chunk => {
                data.push(chunk);
            }).on('end', () => {
                resolve(Buffer.concat(data));
            }).on('error', reject);
        }));
    }
    await Promise.all(results);

    console.log(`Async stream parallel version took ${performance.now() - startTime}ms`);
}
结果如下:

下面是我在Windows 10节点v12.13.1上的结果:

node --expose_gc temp
Sync version took 1175.2680000066757ms
Async version took 2315.0439999699593ms
Async stream version took 1600.0085990428925ms
Async stream parallel version took 1111.310200035572ms
Async serial version took 4387.053400993347ms
注意,我稍微修改了方案,将文件名数组传递到每个测试中,而不是每次都创建文件名,以便集中创建文件

帮助我加快速度的是:

使用较大的highWaterMark(可能是流缓冲区大小) 在一个数组中收集数据,然后在最后连接它,这大大减少了峰值内存消耗和GC工作。 允许循环中的不同文件彼此并行运行 通过这些更改,它的速度与同步版本大致相同,有时稍慢,有时大致相同

我还在每个测试运行之间设置了2秒的延迟,并强制运行垃圾收集器,以确保GC运行不会干扰我的结果

这是我的整个脚本,可以在任何平台上运行。请注意,必须使用-expose_gc命令行参数,如node-expose_gc temp.js中所示:

以及与这些匹配的代码:

// Run this with the --expose_gc command line option

const {performance} = require('perf_hooks');
const fs = require('fs');
const fsp = fs.promises;
const path = require('path')

const sizes = [512, 1024, 256, 512];   // file sizes in MB
const data = "0123456789\n";
const testDir = path.join(__dirname, "bigfile"); 

function makeFiles() {
    // make a bigger string to make fewer disk writes
    const bData = [];
    for (let i = 0; i < 1000; i++) {
        bData.push(data);
    }
    const biggerData = bData.join("");
    try {
        fs.mkdirSync(testDir);    // ignore errors if it already exists
    } catch(e) {
        // do nothing if it already exists
    }
    const files = [];

    for (let i = 0; i < sizes.length; i++) {
        let targetLen = sizes[i] * 1024 * 1024;
        let f;
        try {
            let fname = `${path.join(testDir, "test")}-${i}.txt`;
            f = fs.openSync(fname, 'w');
            files.push(fname);
            let len = 0;
            while (len < targetLen) {
                fs.writeSync(f, biggerData);
                len += biggerData.length;
            }
        } catch(e) {
            console.log(e);
            process.exit(1);
        } finally {
            if (f) fs.closeSync(f);
        }
    }
    return files;
}

function clearFiles(files) {
    for (let filename of files) {
        fs.unlinkSync(filename);
    }
    fs.rmdirSync(testDir);
}

function readFileSync(filename) {
    let handle = fs.openSync(filename, "r");
    try {
        let stats = fs.fstatSync(handle);
        let buffer = Buffer.allocUnsafe(stats.size);
        let bytesRead = fs.readSync(handle, buffer, 0, stats.size, 0);
        if (bytesRead !== stats.size) {
            throw new Error("bytesRead not full file size")
        }
    } finally {
        fs.closeSync(handle);
    }

}

// read a file in one single read
async function readFile(filename) {
    let handle = await fsp.open(filename, "r");
    try {
        let stats = await handle.stat();
        let buffer = Buffer.allocUnsafe(stats.size);
        let {bytesRead} = await handle.read(buffer, 0, stats.size, 0);
        if (bytesRead !== stats.size) {
            throw new Error("bytesRead not full file size")
        }
    } finally {
        handle.close()
    }
}



function syncTest(files) {
    const startTime = performance.now();
    const results = [];

    for (let filename of files) {
        results.push(fs.readFileSync(filename));
    }
    console.log(`Sync version took ${performance.now() - startTime}ms`);
}

function syncTestSingleRead(files) {
    const startTime = performance.now();
    const results = [];

    for (let filename of files) {
        readFileSync(filename);
    }
    console.log(`Sync single read version took ${performance.now() - startTime}ms`);
}

async function asyncTest(files) {
    const startTime = performance.now();
    const results = [];

    for (let filename of files) {
        results.push(fs.promises.readFile(filename));
    }
    await Promise.all(results);

    console.log(`Async version took ${performance.now() - startTime}ms`);
}

async function asyncTestStream(files) {
    const startTime = performance.now();

    for (let filename of files) {
        await new Promise((resolve, reject) => {
            let stream = fs.createReadStream(filename, {highWaterMark: 64 * 1024 * 10});
            let data = [];
            stream.on('data', chunk => {
                data.push(chunk);
            }).on('close', () => {
                resolve(Buffer.concat(data));
            }).on('error', reject);
        });
    }

    console.log(`Async stream version took ${performance.now() - startTime}ms`);
}

async function asyncTestStreamParallel(files) {
    const startTime = performance.now();
    let results = [];

    for (let filename of files) {
        results.push(new Promise((resolve, reject) => {
            const stream = fs.createReadStream(filename, {highWaterMark: 64 * 1024 * 100});
            const data = [];
            stream.on('data', chunk => {
                data.push(chunk);
            }).on('end', () => {
                resolve(Buffer.concat(data));
            }).on('error', reject);
        }));
    }
    await Promise.all(results);

    console.log(`Async stream parallel version took ${performance.now() - startTime}ms`);
}

async function asyncTestSingleReadSerial(files) {
    const startTime = performance.now();
    let buffer;
    for (let filename of files) {
        let handle = await fsp.open(filename, "r");
        try {
            let stats = await handle.stat();
            if (!buffer || buffer.length < stats.size) {
                buffer = Buffer.allocUnsafe(stats.size);
            }
            let {bytesRead} = await handle.read(buffer, 0, stats.size, 0);
            if (bytesRead !== stats.size) {
                throw new Error("bytesRead not full file size")
            }
        } finally {
            handle.close()
        }
    }
    console.log(`Async single read serial version took ${performance.now() - startTime}ms`);
}

async function asyncTestSingleReadParallel(files) {
    const startTime = performance.now();

    await Promise.all(files.map(readFile));

    console.log(`Async single read parallel version took ${performance.now() - startTime}ms`);
}

async function asyncTestSerial(files) {
    const startTime = performance.now();
    const results = [];

    for (let filename of files) {
        results.push(await fs.promises.readFile(filename));
    }

    console.log(`Async serial version took ${performance.now() - startTime}ms`);
}

function delay(t) {
    return new Promise(resolve => {
        global.gc();
        setTimeout(resolve, t);
    });
}

// delay between each test to let any system stuff calm down
async function run() {
    const files = makeFiles();
    try {
        await delay(2000);
        syncTest(files);

        await delay(2000);
        syncTestSingleRead(files);

        await delay(2000);
        await asyncTest(files)

        await delay(2000);
        await asyncTestSerial(files);

        await delay(2000);
        await asyncTestStream(files);

        await delay(2000);
        await asyncTestStreamParallel(files);

        await delay(2000);
        await asyncTestSingleReadSerial(files);

        await delay(2000);
        await asyncTestSingleReadParallel(files);
    } catch(e) {
        console.log(e);
    } finally {
        clearFiles(files);
    }
}

run();

同步文件I/O会阻止事件循环,因此在同步文件操作处理过程中,node.js不能执行任何其他操作。这将破坏服务器的可伸缩性和性能。这与单个操作的性能差异无关。这是关于允许node.js在文件I/O进行过程中执行其他操作,而不是阻止整个事件循环。因此,除了初始化/启动过程之外,同步I/O还有其他通用用例吗?在服务器中,这并不是因为它破坏了服务器的可伸缩性。但是,node.js有许多不是服务器的用途。例如,我有构建脚本和磁盘维护脚本,它们是单用户的,使用同步文件I/O编写和调试不太复杂。FYI,我将您的脚本移植到Windows,得到的同步版本为1502.4960010051727ms异步版本为2460.8494987198ms。仍然同步速度更快,但只快了63%,而不是700%。不确定运行它时发生了什么。仅供参考,我在Windows 10上运行的是node v12.13.1。同步文件I/O会阻止事件循环,以便node.js在处理同步文件操作时不能执行任何其他操作。这将破坏服务器的可伸缩性和性能。这与单个操作的性能差异无关。这是关于允许node.js在文件I/O进行过程中执行其他操作,而不是阻止整个事件循环。因此,除了初始化/启动过程之外,同步I/O还有其他通用用例吗?在服务器中,这并不是因为它破坏了服务器的可伸缩性。但是,node.js有许多不是服务器的用途。例如,我有构建脚本和磁盘维护脚本,它们是单用户的,编写起来就不那么复杂
使用同步文件I/O.FYI进行调试,我将您的脚本移植到Windows,得到的同步版本为1502.4960010051727ms异步版本为2460.8494987198ms。仍然同步速度更快,但只快了63%,而不是700%。不确定运行它时发生了什么。仅供参考,我在Windows 10上运行的是节点v12.13.1。