Node.js 如何使用Node将Geonames 1.5Gib文件正确插入Postgresql?

Node.js 如何使用Node将Geonames 1.5Gib文件正确插入Postgresql?,node.js,postgresql,geonames,Node.js,Postgresql,Geonames,我已经下载了Geonames数据库转储文件,并试图将所有内容都放在postgresql表中,但无论我如何尝试,都会遇到多个错误 我最后一次做的修改是: Error: Connection terminated by user at Client.end (/media/DarkHawk/srv/Databases/PremadeDB/Geonames/node_modules/pg/lib/client.js:402:36) at Pool._remove (/media/Dar

我已经下载了Geonames数据库转储文件,并试图将所有内容都放在postgresql表中,但无论我如何尝试,都会遇到多个错误

我最后一次做的修改是:

Error: Connection terminated by user
    at Client.end (/media/DarkHawk/srv/Databases/PremadeDB/Geonames/node_modules/pg/lib/client.js:402:36)
    at Pool._remove (/media/DarkHawk/srv/Databases/PremadeDB/Geonames/node_modules/pg-pool/index.js:135:12)
    at Timeout.setTimeout (/media/DarkHawk/srv/Databases/PremadeDB/Geonames/node_modules/pg-pool/index.js:38:12)
    at ontimeout (timers.js:498:11)
    at tryOnTimeout (timers.js:323:5)
    at Timer.listOnTimeout (timers.js:290:5)
Line added  6052 0.05135667935111022%
(node:31819) UnhandledPromiseRejectionWarning: Error: This socket is closed
    at Socket._writeGeneric (net.js:729:18)
    at Socket._write (net.js:783:8)
    at doWrite (_stream_writable.js:397:12)
    at writeOrBuffer (_stream_writable.js:383:5)
    at Socket.Writable.write (_stream_writable.js:290:11)
    at Socket.write (net.js:707:40)
    at Connection.end (/media/DarkHawk/srv/Databases/PremadeDB/Geonames/node_modules/pg/lib/connection.js:318:22)
    at global.Promise (/media/DarkHawk/srv/Databases/PremadeDB/Geonames/node_modules/pg/lib/client.js:410:23)
    at new Promise (<anonymous>)
    at Client.end (/media/DarkHawk/srv/Databases/PremadeDB/Geonames/node_modules/pg/lib/client.js:409:12)
(node:31819) UnhandledPromiseRejectionWarning: Unhandled promise rejection. This error originated either by throwing inside of an async function without a catch block, or by rejecting a promise which was not handled with .catch(). (rejection id: 1)
(node:31819) [DEP0018] DeprecationWarning: Unhandled promise rejections are deprecated. In the future, promise rejections that are not handled will terminate the Node.js process with a non-zero exit code.
(node:31819) MaxListenersExceededWarning: Possible EventEmitter memory leak detected. 11 end listeners added. Use emitter.setMaxListeners() to increase limit
我尝试了ReadFile、ReadFileSync和readline扩展名。移动或删除
done()
函数或只是将其四处移动

我通常使用php插入大量文件,所以我不知道我在这里做错了什么

MaxListenerSexceedawarning错误对我来说毫无意义,因为我似乎关闭了打开的所有内容。我做错了什么


谢谢

如注释中所述-处理异步代码时,需要使用
map
而不是
mapSync
操作,并在插入项后调用回调

如果使用此选项,则不再需要调用
暂停
恢复
(这由
事件流
完成),只需恢复创建的最后一个流。还有一个问题是什么时候应该调用
done
——也就是说:在所有操作完成之后

您的代码应该如下所示:

var pg = require("pg");
var fs = require('fs');

const pool = new pg.Pool({
  user: 'smurf',
  host: 'localhost',
  database: 'mydb',
  password: 'smurf',
  port: 5432,
})

var filename = 'allCountries.txt';

var fs = require('fs'),
    es = require('event-stream');

var lineNr = 0;
var max = 11784251; // Number of line, dirty, to get % of lines inserted

// Connect to Postgresql
pool.connect((err, client, done) => {
  if (err) throw err

  // Stream file line by line
  var s = fs.createReadStream(filename)
    .pipe(es.split())
    .pipe(es.map(function(e, cb) {

        lineNr += 1;

        // Each line need to be properly formated
        e = e.split("\t"); //TAB split

        // The following fields need formating
        e[0] = parseInt(e[0]);
        e[4] = parseFloat(e[4]);
        e[5] = parseFloat(e[5]);
        e[14] = parseInt(e[14]);

        e[15] = e[15] == '' ? 0 : e[15];

        e[16] = parseInt(e[16]);

        // Insert into db
        pool.query('INSERT INTO geonames.rawdata (geonameid, name, asciiname, alternatenames, latitude, longitude, fclass, fcode, country, cc2, admin1, admin2, admin3, admin4, population, elevation, gtopo30, timezone, moddate) VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13, $14, $15, $16, $17, $18, $19);', e, function(err, result) {
            cb(err, result); // call the callback
            console.log("Line added ", lineNr, (lineNr / max * 100) + "%") // Monitor progress
        });

    })
    .resume()
    .on('error', function(err) {
        done();
        console.log('Error while reading file.', err);
    })
    .on('end', function() {
        done();
        console.log('Read entire file.')
    })
    );
}) // END pool.connect
上述版本的代码将不会使用您正在创建的池-它将一次对一个项目进行操作

如果您使用的是较新的节点(8.4+),则可以使用我自己的框架,该框架允许使用ES6异步函数编写更简单的代码:

const {Pool} = require("pg");
const {StringStream} = require("scramjet");
const fs = require("fs");

const pool = new Pool(options);
const max = 11784251;
const INSERT_ENTRY = 'INSERT INTO geonames.rawdata (geonameid, name, asciiname, alternatenames, latitude, longitude, fclass, fcode, country, cc2, admin1, admin2, admin3, admin4, population, elevation, gtopo30, timezone, moddate) VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13, $14, $15, $16, $17, $18, $19);';

StringStream
    .from(fs.createReadStream(filename))
    .lines()
    .parse(line => {
        // Each line need to be properly formated
        const entry = line.split("\t"); //TAB split

        // The following fields need formating
        entry[0] = parseInt(entry[0]);
        entry[4] = parseFloat(entry[4]);
        entry[5] = parseFloat(entry[5]);
        entry[14] = parseInt(entry[14]);
        entry[15] = entry[15] == '' ? 0 : entry[15];
        entry[16] = parseInt(entry[16]);

        return entry;
    })
    .setOptions({maxParallel: 32})
    .each(entry => {
        const client = await pool.connect();
        try {
            await client.query(INSERT_ENTRY, entry)
        } catch(err) {
            console.log('Error while adding line...', err);
            // some more logic could be here?
        } finally {
            client.release();
        }
    })
    .each(() => !(lineNr++ % 1000) && console.log("Line added ", lineNr, (lineNr / max * 100) + "%"))
    .run()
    .then(
        () => console.log('Read entire file.'), 
        e => console.log('Error while handling file.', err)
    );

上面的代码将尝试使用池运行32个并行插入(在每个条目上请求一个客户端-该方法将重复使用并将客户端添加到其设置的限制中。这并不意味着它一定会快32倍-因为存在一些外部限制因素,但您应该会看到速度的大幅提高。

您是否尝试过更改默认的最大侦听器?我认为有一个限制,但我从未遇到过这样的问题这个问题。我不明白为什么在关闭所有内容时会到达最大侦听器。我感到困惑。使用
require('events')更改最大侦听器。EventEmitter.defaultMaxListeners=40;
(从此:)不会让我在这个过程中走得更远(~6500行)这是因为您正在使用
mapSync
向pg数据库中注入大量条目。它会创建插入,但不会等到插入完成。请尝试改用
es.map
,并在插入数据后使用回调。您还可以将my和pg与es6 async/await代码一起使用-这应该更简单。显示了一个类似的示例我忘了告诉你你是对的,很明显:)非常感谢你的帮助。嗯……我可以回答这个问题。。。
const {Pool} = require("pg");
const {StringStream} = require("scramjet");
const fs = require("fs");

const pool = new Pool(options);
const max = 11784251;
const INSERT_ENTRY = 'INSERT INTO geonames.rawdata (geonameid, name, asciiname, alternatenames, latitude, longitude, fclass, fcode, country, cc2, admin1, admin2, admin3, admin4, population, elevation, gtopo30, timezone, moddate) VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13, $14, $15, $16, $17, $18, $19);';

StringStream
    .from(fs.createReadStream(filename))
    .lines()
    .parse(line => {
        // Each line need to be properly formated
        const entry = line.split("\t"); //TAB split

        // The following fields need formating
        entry[0] = parseInt(entry[0]);
        entry[4] = parseFloat(entry[4]);
        entry[5] = parseFloat(entry[5]);
        entry[14] = parseInt(entry[14]);
        entry[15] = entry[15] == '' ? 0 : entry[15];
        entry[16] = parseInt(entry[16]);

        return entry;
    })
    .setOptions({maxParallel: 32})
    .each(entry => {
        const client = await pool.connect();
        try {
            await client.query(INSERT_ENTRY, entry)
        } catch(err) {
            console.log('Error while adding line...', err);
            // some more logic could be here?
        } finally {
            client.release();
        }
    })
    .each(() => !(lineNr++ % 1000) && console.log("Line added ", lineNr, (lineNr / max * 100) + "%"))
    .run()
    .then(
        () => console.log('Read entire file.'), 
        e => console.log('Error while handling file.', err)
    );