Node.js Docker和alpine linux中的Puppeter失败,节点错误:导航失败,因为浏览器已断开连接
我仍然有这个错误。我看到2018年的帖子,这似乎仍然是一个问题Node.js Docker和alpine linux中的Puppeter失败,节点错误:导航失败,因为浏览器已断开连接,node.js,docker,puppeteer,alpine,Node.js,Docker,Puppeteer,Alpine,我仍然有这个错误。我看到2018年的帖子,这似乎仍然是一个问题 错误:导航失败,因为浏览器已断开连接 不知道我做错了什么 import { Readability } from '@mozilla/readability'; const puppeteer = require('puppeteer-core'); const jsdom = require('jsdom'); const { JSDOM } = jsdom; let browser; class Reader { asy
错误:导航失败,因为浏览器已断开连接
不知道我做错了什么
import { Readability } from '@mozilla/readability';
const puppeteer = require('puppeteer-core');
const jsdom = require('jsdom');
const { JSDOM } = jsdom;
let browser;
class Reader {
async getLink(link) {
browser = await puppeteer.launch({
headless: true,
executablePath: '/usr/bin/chromium-browser',
args: [
'--no-sandbox',
'--disable-dev-shm-usage',
'--disable-gpu',
'--single-process',
'--no-zygote',
],
});
const htm = await spa(link);
if (!htm) {
await browser.close();
return;
}
const text = txt(htm, link);
const data = Object.assign({}, text);
const parts = new URL(link);
data.url = link;
data.htm = htm;
data.host = parts.host;
data.text = data.textContent;
delete data.textContent;
console.log('data fetched: ' + link);
await browser.close();
// await browser.disconnect();
return data;
}
}
function txt(htm, link) {
const url = new URL(link);
const doc = new JSDOM(htm);
doc.window.document
.querySelectorAll('img')
.forEach(
(el) =>
(el.src = el.src.indexOf('http') === 0 ? el.src : url.protocol + '//' + url.host + el.src),
);
doc.window.document
.querySelectorAll('a[href]')
.forEach(
(el) =>
(el.href = el.href.indexOf('/') === 0 ? url.protocol + '//' + url.host + el.href : el.href),
);
const reader = new Readability(doc.window.document);
return reader.parse();
}
async function spa(url) {
let htm;
try {
const page = await browser.newPage();
await page.setViewport({ width: 800, height: 600 });
// await page.goto(url, { waitUntil: 'domcontentloaded' });
await page.goto(url, { waitUntil: 'networkidle2' });
htm = await page.content();
} catch (err) {
console.error(err, url);
}
return htm;
}
export default Reader;
这是我的Dockerfile:
FROM node:14-alpine
ENV CHROME_BIN="/usr/bin/chromium-browser"\
PUPPETEER_SKIP_CHROMIUM_DOWNLOAD="true"
RUN mkdir -p /usr/src/app
RUN apk add vim python python-dev py-pip build-base curl chromium htop ack
WORKDIR /usr/src/app
COPY package.json /usr/src/app/
RUN npm install --production
COPY . /usr/src/app
EXPOSE 8700
WORKDIR /usr/src/app
CMD [ "npm", "start" ]