Algorithm nodejs中开放图的刮削算法
我正在尝试使用nodejs(使用cheerio)从URL获取开放图元数据, 使用下面的代码 我需要填写以下内容:Algorithm nodejs中开放图的刮削算法,algorithm,node.js,logic,metadata,facebook-opengraph,Algorithm,Node.js,Logic,Metadata,Facebook Opengraph,我正在尝试使用nodejs(使用cheerio)从URL获取开放图元数据, 使用下面的代码 我需要填写以下内容:var result={} for (var ogCounter = 0; ogCounter < metalist.length; ogCounter++) { if (!utils.isEmpty(metalist[ogCounter].attribs.property) && !utils.isEmpty(metalist[ogCounter].
var result={}代码>
for (var ogCounter = 0; ogCounter < metalist.length; ogCounter++) {
if (!utils.isEmpty(metalist[ogCounter].attribs.property) && !utils.isEmpty(metalist[ogCounter].attribs.content)) {
if (metalist[ogCounter].attribs.property.indexOf('og') == 0) {
var ogname = metalist[ogCounter].attribs.property.split(':');
var property = ogname[1];
var content = metalist[ogCounter].attribs.content;
if (utils.isEmpty(result[property])) {
result[property] = content;
} else {
if (result[property].push) {
result[property].push(content);
} else {
result[property] = [result[property], content];
}
}
}
}
}
但我想要的是:
type: "video",
image: "http://i3.ytimg.com/vi/fWNaR-rxAic/mqdefault.jpg",
video: [
"http://www.youtube.com/v/fWNaR-rxAic?version=3&autohide=1",
"application/x-shockwave-flash",
"1920",
"1080"
]
type: "video",
image: "http://i3.ytimg.com/vi/fWNaR-rxAic/mqdefault.jpg",
video: {
"http://www.youtube.com/v/fWNaR-rxAic?version=3&autohide=1",
{
type:"application/x-shockwave-flash",
width:"1920",
height:"1080"
}
}
我正在尝试这个“如果”,但它不起作用:
if (utils.isEmpty(result[property])) {
result[property] = content;
} else {
if (result[property].push) {
result[property].push(content);
} else {
var subresult={};
subresult[name[2]]=content;
subresult[property]=result[property] ;
result[property] = subresult;
}
}
我不想把所有的元循环2次,而且我对javascript和nodejs函数不太熟悉。。。有什么建议吗?谢谢这很棘手,因为您希望如何显示og:video
。我认为你不能那样做。最简单的方法是给它分配一个标识符,例如name
,并使其与og:video:width
示例结果
{
"type": "video.other",
"url": "http://philippeharewood.com/facebook/video.html",
"title": "Simple Plan",
"video": {
"name": "http://www.youtube.com/v/Y4MnpzG5Sqc?version=3&autohide=1",
"type": "application/x-shockwave-flash",
"width": "398",
"height": "224",
"release_date": "2012-05-29T21:30"
}
}
怎么做,
var cheerio = require('cheerio')
var request = require('request')
var url = 'http://philippeharewood.com/facebook/video.html';
var result = {};
request(url, function(error, response, body) {
var $ = cheerio.load(body);
var meta = $('meta')
var keys = Object.keys(meta)
keys.forEach(function(key){
if ( meta[key].attribs
&& meta[key].attribs.property
&& meta[key].attribs.property.indexOf('og') == 0
)
{
var og = meta[key].attribs.property.split(':');
if(og.length > 2) {
if(result[og[1]]) {
if (typeof result[og[1]] == 'string'
|| result[og[1]] instanceof String
)
{
var set = {};
set['name'] = result[og[1]];
set[og[2]] = meta[key].attribs.content;
result[og[1]] = set;
}
else {
ex_set = result[og[1]];
ex_set[og[2]] = meta[key].attribs.content;
result[og[1]] = ex_set;
}
}
else {
var set = {};
set[og[2]] = meta[key].attribs.content;
result[og[1]] = set;
}
}
else {
result[og[1]] = meta[key].attribs.content;
}
}
});
console.log(JSON.stringify(result, undefined, 2));
});
这是我的答案@phwd完全回答了这个问题,但我认为最好是制定一个更通用的解决方案,将所有meta
标记解析到n
级别
var cheerio = require('cheerio'),
request = require('request'),
url = 'http://philippeharewood.com/facebook/video.html',
result = {},
attr = function( tag, prop ){ return tag.attribs && tag.attribs[prop] || ""; }
request( url, function( err, res, body ) {
var metas = cheerio.load(body)('meta')
var keys = Object.keys(metas)
keys.forEach(function(i){
var meta = metas[i],
property = attr(meta,'property'),
parts = property.split(":");
if ( property ) {
var og = property.split(':'),
parent = result;
for ( var j = 0; j < og.length; j++ ){
var token = og[j],
current = parent[token],
name;
if ( j+1 == og.length ) { // leaf node
// expected leaf is already a branch so append a name attr
if ( current instanceof Object ) name = token;
// leaf should take the value given
else parent[token] = attr(meta,'content');
} else { // branch node
// if no such branch exists, make one
if ( !(current instanceof Object) ) {
// if the branch is already a leaf, move value to name attr
if ( typeof current == "string" ) name = current;
current = {};
parent[token] = current;
}
}
if ( name ) current["name"] = name;
name = undefined
parent = current;
}
}
});
console.log(JSON.stringify( result.og, undefined, 2));
});
var cheerio=require('cheerio'),
请求=要求(‘请求’),
url='1〕http://philippeharewood.com/facebook/video.html',
结果={},
attr=function(tag,prop){return tag.attribs&&tag.attribs[prop]| |“”}
请求(url、函数(err、res、body){
var metas=cheerio.load(主体)(“meta”)
变量键=对象键(元)
键。forEach(功能(i){
var meta=metas[i],
property=attr(meta,'property'),
零件=属性。拆分(“:”);
如果(财产){
var og=property.split(“:”),
父母=结果;
对于(var j=0;j
很乐意提供帮助,但如果您发布能够实际运行的代码,您会得到更快的响应。是的,任何想要帮助的人都必须重建程序的其余部分。正确的,不可能有JS中要求的数据结构,因为在一个对象中,每个属性都需要有一个名称。可能的做法是将外部方括号替换为卷曲的方括号,从而将视频创建为包含两个条目的数组,第一个条目是包含URL的字符串值,第二个条目是包含“元数据”的对象。但我看不出这有什么实际的好处——必须从作为对象属性访问该数据结构中的所有其他内容转变过来,而且只有在这一点上,基于索引的访问才会相当混乱。