Javascript 抓取谷歌新闻

Javascript 抓取谷歌新闻,javascript,arrays,object,cheerio,Javascript,Arrays,Object,Cheerio,我正试图抓取谷歌新闻,但我使用的代码有问题: var express = require('express'); var request = require('request'); var cheerio = require('cheerio'); var path = require('path'); var fs =require('fs'); var app = express(); var port = 8080; // Define the requests url var

我正试图抓取谷歌新闻,但我使用的代码有问题:

 var express = require('express');
var request = require('request');
var cheerio = require('cheerio');
var path = require('path');
var fs =require('fs');
var app = express();
var port = 8080;


 // Define the requests url

 var url = "https://news.google.com/news";

request(url,function(err,res,body){

var news=[];
 var $page = cheerio.load(body);

var $url=$page('table[class="esc-layout-table"]').find('tbody > tr > td > div > h2 > a').each(function (index, element) {
  news.push($page(element).attr('href'));
});
news={
//desc:$desc,
    url:$url,
//    img:$img,
};



console.log ('success ....'news);



});
我想有文章和标题以及缩略图的链接。我会存储在firebase数据库中 但不幸的是,我在剩下的时间里迷失了方向 我无法得到我期望的结果,而是在控制台中得到:

    success .... { '0':
   { type: 'tag',
     name: 'a',
     attribs:
      { target: '_blank',
        class: 'article usg-AFQjCNFxodYTzKo8-hM57511iQgBdfn8xA did-3230940966728164415',
        href: 'https://www.washingtonpost.com/news/post-nation/wp/2016/12/15/jurors-begin-deliberating-in-charleston-church-shooting-trial/',
        url: 'https://www.washingtonpost.com/news/post-nation/wp/2016/12/15/jurors-begin-deliberating-in-charleston-church-shooting-trial/',
        id: 'MAA4AEgAUABgAWoCdXM',
        ssid: 'h' },
     children: [ [Object] ],
     next: null,
     prev: null,
     parent:
      { type: 'tag',
        name: 'h2',
        attribs: [Object],
        children: [Object],
        next: null,
        prev: null,
        parent: [Object] } },
  '1':
   { type: 'tag',
     name: 'a',
     attribs:
      { target: '_blank',
        class: 'article usg-AFQjCNEQY4otecPJJevDyoBp3K-IQnes2w did-141563424311867977',
        href: 'http://www.businessinsider.com/facebook-will-fact-check-label-fake-news-in-news-feed-2016-12',
        url: 'http://www.businessinsider.com/facebook-will-fact-check-label-fake-news-in-news-feed-2016-12',
        id: 'MAA4AEgBUABgAWoCdXM',
        ssid: 'h' },
     children: [ [Object] ],
     next: null,
     prev: null,
     parent:
      { type: 'tag',
        name: 'h2',
        attribs: [Object],
        children: [Object],
        next: null,
        prev: null,
        parent: [Object] } },
  '2':
   { type: 'tag',
     name: 'a',
     attribs:
      { target: '_blank',
        class: 'article usg-AFQjCNHyGG4zl4RW-AoIILTssJX_TKCybg did--2293954291931624250',
        href: 'http://www.bbc.com/news/world-middle-east-38329461',
        url: 'http://www.bbc.com/news/world-middle-east-38329461',
        id: 'MAA4AEgCUABgAWoCdXM',
        ssid: 'h' },
     children: [ [Object] ],
     next: null,
     prev: null,
     parent:
      { type: 'tag',
        name: 'h2',
        attribs: [Object],
        children: [Object],
        next: null,
        prev: null,
        parent: [Object] } },
  '3':
注意:如果您运行代码,您将清楚地看到我的意思,这只是结果的一小部分
谢谢大家的建议。

我不确定这是你想要的,但是试试这个,我得到了你想要的所有url

 var express = require('express');
var request = require('request');
var cheerio = require('cheerio');
var path = require('path');
var fs =require('fs');
var app = express();
var port = 8080;


 // Define the requests url

 var url = "https://news.google.com/news";

request(url,function(err,res,body){

var news=[];

var $= cheerio.load(body);

$('table[class="esc-layout-table"]').each(function () {
  var image = $(this).find('tbody > tr > .esc-layout-thumbnail-cell > .esc-thumbnail-wrapper > .esc-thumbnail-state > .esc-thumbnail > a > .esc-thumbnail-image-wrapper > img').attr('src');
  var title = $(this).find('tbody > tr > .esc-layout-article-cell > .esc-lead-article-title-wrapper > .esc-lead-article-title > a > span').text();
  var url = $(this).find('tbody > tr > .esc-layout-article-cell > .esc-lead-article-title-wrapper > .esc-lead-article-title > a').attr('href');
  news.push({ url, title, image });
});


console.log ('success ....', news);



});
结果

success .... [ { url: 'http://www.cbc.ca/news/canada/saskatchewan/prisoner-killed-sask-penitentiary-1.3898175',
    title: '1 dead, at least 8 injured in Saskatchewan prison riot',
    image: '//t2.gstatic.com/images?q=tbn:ANd9GcRKhvczSgL4g3dO8EHkruAEB5AoqkI-PvbB8LzlHBZTPGJYh4bEooNKApDXqTzboNrLpqv3H7MG' },
  { url: 'http://www.cbc.ca/news/world/aleppo-convoy-evacuation-1.3895602',
    title: '3000 people evacuated from eastern Aleppo so far as fragile ceasefire holds',
    image: '//t2.gstatic.com/images?q=tbn:ANd9GcQ86WfdQJVFE4GrQvu_CPrjx3sqhqut0gjBRv6opfVA4JLIqsWeBDjRWURFGd7h_XN_0D0DnoQ5' },
  { url: 'http://www.cp24.com/news/five-family-members-dead-following-first-nation-fire-1.3204960',
    title: 'Five family members dead following First Nation fire',
    image: '//t1.gstatic.com/images?q=tbn:ANd9GcS6DkmtVrLs4wzVLDfNZvfOm9Js6rXvSg8ttjdoofJwWUZkM2wSjvLA-HpVZdTJN7pG-1FubXI' },
  { url: 'http://www.ctvnews.ca/business/b-c-offers-five-year-interest-free-down-payment-loans-to-first-time-buyers-1.3205119',
    title: 'BC offers five-year, interest-free down-payment loans to first-time buyers',
    image: '//t3.gstatic.com/images?q=tbn:ANd9GcR2gUEW4E0gtt5Sj-jFIJP0iC1JIIZ3qi5RbpbwD7otN7B5nKf8qXT-Q1Aaxcs5Z7FVn-LhNXU5' },

希望这能帮助你:)

不确定这是你想要的,但试试这个,我得到了你想要的所有url

 var express = require('express');
var request = require('request');
var cheerio = require('cheerio');
var path = require('path');
var fs =require('fs');
var app = express();
var port = 8080;


 // Define the requests url

 var url = "https://news.google.com/news";

request(url,function(err,res,body){

var news=[];

var $= cheerio.load(body);

$('table[class="esc-layout-table"]').each(function () {
  var image = $(this).find('tbody > tr > .esc-layout-thumbnail-cell > .esc-thumbnail-wrapper > .esc-thumbnail-state > .esc-thumbnail > a > .esc-thumbnail-image-wrapper > img').attr('src');
  var title = $(this).find('tbody > tr > .esc-layout-article-cell > .esc-lead-article-title-wrapper > .esc-lead-article-title > a > span').text();
  var url = $(this).find('tbody > tr > .esc-layout-article-cell > .esc-lead-article-title-wrapper > .esc-lead-article-title > a').attr('href');
  news.push({ url, title, image });
});


console.log ('success ....', news);



});
结果

success .... [ { url: 'http://www.cbc.ca/news/canada/saskatchewan/prisoner-killed-sask-penitentiary-1.3898175',
    title: '1 dead, at least 8 injured in Saskatchewan prison riot',
    image: '//t2.gstatic.com/images?q=tbn:ANd9GcRKhvczSgL4g3dO8EHkruAEB5AoqkI-PvbB8LzlHBZTPGJYh4bEooNKApDXqTzboNrLpqv3H7MG' },
  { url: 'http://www.cbc.ca/news/world/aleppo-convoy-evacuation-1.3895602',
    title: '3000 people evacuated from eastern Aleppo so far as fragile ceasefire holds',
    image: '//t2.gstatic.com/images?q=tbn:ANd9GcQ86WfdQJVFE4GrQvu_CPrjx3sqhqut0gjBRv6opfVA4JLIqsWeBDjRWURFGd7h_XN_0D0DnoQ5' },
  { url: 'http://www.cp24.com/news/five-family-members-dead-following-first-nation-fire-1.3204960',
    title: 'Five family members dead following First Nation fire',
    image: '//t1.gstatic.com/images?q=tbn:ANd9GcS6DkmtVrLs4wzVLDfNZvfOm9Js6rXvSg8ttjdoofJwWUZkM2wSjvLA-HpVZdTJN7pG-1FubXI' },
  { url: 'http://www.ctvnews.ca/business/b-c-offers-five-year-interest-free-down-payment-loans-to-first-time-buyers-1.3205119',
    title: 'BC offers five-year, interest-free down-payment loans to first-time buyers',
    image: '//t3.gstatic.com/images?q=tbn:ANd9GcR2gUEW4E0gtt5Sj-jFIJP0iC1JIIZ3qi5RbpbwD7otN7B5nKf8qXT-Q1Aaxcs5Z7FVn-LhNXU5' },

希望能对您有所帮助:)

非常感谢您,我非常感谢您的帮助,非常感谢您。您知道任何关于cheerio的信息来源吗?我如何了解更多有关cheerio的信息?!你提供的答案很好,但我想自己能找到答案。没问题,伙计。观看此视频;)我会帮助你;)非常感谢你,我很感谢你的帮助谢谢你知道什么资源吗?我怎样才能更多地了解cheerio?!你提供的答案很好,但我想自己能找到答案。没问题,伙计。观看此视频;)我会帮助你;)