Algorithm nodejs中开放图的刮削算法

Algorithm nodejs中开放图的刮削算法,algorithm,node.js,logic,metadata,facebook-opengraph,Algorithm,Node.js,Logic,Metadata,Facebook Opengraph,我正在尝试使用nodejs(使用cheerio)从URL获取开放图元数据, 使用下面的代码 我需要填写以下内容:var result={} for (var ogCounter = 0; ogCounter < metalist.length; ogCounter++) { if (!utils.isEmpty(metalist[ogCounter].attribs.property) && !utils.isEmpty(metalist[ogCounter].

我正在尝试使用nodejs(使用cheerio)从URL获取开放图元数据, 使用下面的代码

我需要填写以下内容:
var result={}

  for (var ogCounter = 0; ogCounter < metalist.length; ogCounter++) {
    if (!utils.isEmpty(metalist[ogCounter].attribs.property) && !utils.isEmpty(metalist[ogCounter].attribs.content)) {
        if (metalist[ogCounter].attribs.property.indexOf('og') == 0) {
            var ogname = metalist[ogCounter].attribs.property.split(':');
            var property = ogname[1];
            var content = metalist[ogCounter].attribs.content;

            if (utils.isEmpty(result[property])) {
                result[property] = content;
            } else {
                if (result[property].push) {
                    result[property].push(content);
                } else {
                   result[property] = [result[property], content];
                }
            }

        }
    }
}
但我想要的是:

type: "video",
image: "http://i3.ytimg.com/vi/fWNaR-rxAic/mqdefault.jpg",
video: [
"http://www.youtube.com/v/fWNaR-rxAic?version=3&amp;autohide=1",
"application/x-shockwave-flash",
"1920",
"1080"
]
type: "video",
image: "http://i3.ytimg.com/vi/fWNaR-rxAic/mqdefault.jpg",
video: {
"http://www.youtube.com/v/fWNaR-rxAic?version=3&amp;autohide=1",
{ 
type:"application/x-shockwave-flash",
width:"1920",
height:"1080"
}
}
我正在尝试这个“如果”,但它不起作用:

 if (utils.isEmpty(result[property])) {
                    result[property] = content;
                } else {
                    if (result[property].push) {
                        result[property].push(content);
                    } else {
                        var subresult={};
                        subresult[name[2]]=content;
                        subresult[property]=result[property] ;

                        result[property] = subresult;
                    }
                }

我不想把所有的元循环2次,而且我对javascript和nodejs函数不太熟悉。。。有什么建议吗?谢谢

这很棘手,因为您希望如何显示
og:video
。我认为你不能那样做。最简单的方法是给它分配一个标识符,例如
name
,并使其与
og:video:width

示例结果

{
  "type": "video.other",
  "url": "http://philippeharewood.com/facebook/video.html",
  "title": "Simple Plan",
  "video": {
    "name": "http://www.youtube.com/v/Y4MnpzG5Sqc?version=3&amp;autohide=1",
    "type": "application/x-shockwave-flash",
    "width": "398",
    "height": "224",
    "release_date": "2012-05-29T21:30"
  }
}
怎么做,

var cheerio = require('cheerio')
var request = require('request')

var url = 'http://philippeharewood.com/facebook/video.html';
var result = {};

request(url, function(error, response, body) {
  var $ = cheerio.load(body);

  var meta = $('meta')
  var keys = Object.keys(meta)

  keys.forEach(function(key){
    if ( meta[key].attribs 
         && meta[key].attribs.property 
         && meta[key].attribs.property.indexOf('og') == 0
       ) 
    {
      var og = meta[key].attribs.property.split(':');

      if(og.length > 2) {
        if(result[og[1]]) {
          if (typeof result[og[1]] == 'string' 
             || result[og[1]] instanceof String
             ) 
          {
            var set = {};
            set['name'] = result[og[1]];
            set[og[2]] = meta[key].attribs.content;
            result[og[1]] = set;
          }
          else {
            ex_set = result[og[1]];
            ex_set[og[2]] = meta[key].attribs.content;
            result[og[1]] = ex_set;
          }
        }
        else {
          var set = {};
          set[og[2]] = meta[key].attribs.content;
          result[og[1]] = set;
        }
      }
      else {
        result[og[1]] = meta[key].attribs.content;
      }
    }
  });

  console.log(JSON.stringify(result, undefined, 2));

});

这是我的答案@phwd完全回答了这个问题,但我认为最好是制定一个更通用的解决方案,将所有
meta
标记解析到
n
级别

var cheerio = require('cheerio'),
  request = require('request'),
  url = 'http://philippeharewood.com/facebook/video.html',
  result = {},
  attr = function( tag, prop ){ return tag.attribs && tag.attribs[prop] || ""; }

request( url, function( err, res, body ) {

  var metas = cheerio.load(body)('meta')
  var keys = Object.keys(metas)

  keys.forEach(function(i){
    var meta = metas[i],
      property = attr(meta,'property'),
      parts = property.split(":");

    if ( property ) {
      var og = property.split(':'),
        parent = result;

      for ( var j = 0; j < og.length; j++ ){
        var token = og[j],
          current = parent[token],
          name;

        if ( j+1 == og.length ) { // leaf node

          // expected leaf is already a branch so append a name attr
          if ( current instanceof Object ) name = token;
          // leaf should take the value given
          else parent[token] = attr(meta,'content');

        } else { // branch node

          // if no such branch exists, make one
          if ( !(current instanceof Object) ) {
            // if the branch is already a leaf, move value to name attr
            if ( typeof current == "string" ) name = current;
            current = {};
            parent[token] = current;  
          }
        }
        if ( name ) current["name"] = name;
        name = undefined
        parent = current;
      }
    }
  });

  console.log(JSON.stringify( result.og, undefined, 2));

});
var cheerio=require('cheerio'),
请求=要求(‘请求’),
url='1〕http://philippeharewood.com/facebook/video.html',
结果={},
attr=function(tag,prop){return tag.attribs&&tag.attribs[prop]| |“”}
请求(url、函数(err、res、body){
var metas=cheerio.load(主体)(“meta”)
变量键=对象键(元)
键。forEach(功能(i){
var meta=metas[i],
property=attr(meta,'property'),
零件=属性。拆分(“:”);
如果(财产){
var og=property.split(“:”),
父母=结果;
对于(var j=0;j
很乐意提供帮助,但如果您发布能够实际运行的代码,您会得到更快的响应。是的,任何想要帮助的人都必须重建程序的其余部分。正确的,不可能有JS中要求的数据结构,因为在一个对象中,每个属性都需要有一个名称。可能的做法是将外部方括号替换为卷曲的方括号,从而将视频创建为包含两个条目的数组,第一个条目是包含URL的字符串值,第二个条目是包含“元数据”的对象。但我看不出这有什么实际的好处——必须从作为对象属性访问该数据结构中的所有其他内容转变过来,而且只有在这一点上,基于索引的访问才会相当混乱。