从chrome扩展获取当前页面的源HTML
我有一个chrome分机。我需要从当前页面的HTML源代码进行分析。我在这里找到了各种各样的解决方案,包括背景页和内容脚本,但没有一个对我有帮助。以下是我到目前为止的情况:从chrome扩展获取当前页面的源HTML,html,google-chrome-extension,Html,Google Chrome Extension,我有一个chrome分机。我需要从当前页面的HTML源代码进行分析。我在这里找到了各种各样的解决方案,包括背景页和内容脚本,但没有一个对我有帮助。以下是我到目前为止的情况: manifest.json { "name": "Extension", "version": "1.0", "description": "Extension", "browse
manifest.json
{
"name": "Extension",
"version": "1.0",
"description": "Extension",
"browser_action": {
"default_icon": "bmarkred.ico",
"popup": "Test.html"
},
"content_scripts": [
{
"matches": ["http://*/*"],
"js": ["content.js"]
}
],
"background": {
"page": "backgroundPage.html"
},
"permissions": [
"cookies",
"tabs",
"http://*/*",
"https://*/*"
]
}
background.html
<html>
<head>
<script type="text/javascript">
try {
chrome.tabs.getSelected(null, function (tab) {
chrome.tabs.sendRequest(tab.id, {action: "getSource"}, function(source) {
alert(source);
});
});
}
catch (ex) {
alert(ex);
}
</script>
</head>
</html>
警报始终为未定义的警报。即使我在content.js文件中将回调函数更改为:
callback('hello');
还是一样的结果。我做错了什么?也许我走错了方向。我真正需要的是:当用户打开扩展弹出窗口时(只有在那时),我需要当前页面的HTML,以便我可以对其进行分析。将脚本插入到您想要获取源代码的页面中,并将其消息返回弹出窗口 manifest.json
{
"name": "Get pages source",
"version": "1.0",
"manifest_version": 2,
"description": "Get pages source from a popup",
"browser_action": {
"default_icon": "icon.png",
"default_popup": "popup.html"
},
"permissions": ["tabs", "<all_urls>"]
}
<!DOCTYPE html>
<html style=''>
<head>
<script src='popup.js'></script>
</head>
<body style="width:400px;">
<div id='message'>Injecting Script....</div>
</body>
</html>
chrome.runtime.onMessage.addListener(function(request, sender) {
if (request.action == "getSource") {
message.innerText = request.source;
}
});
function onWindowLoad() {
var message = document.querySelector('#message');
chrome.tabs.executeScript(null, {
file: "getPagesSource.js"
}, function() {
// If you try and inject into an extensions page or the webstore/NTP you'll get an error
if (chrome.runtime.lastError) {
message.innerText = 'There was an error injecting script : \n' + chrome.runtime.lastError.message;
}
});
}
window.onload = onWindowLoad;
// @author Rob W <http://stackoverflow.com/users/938089/rob-w>
// Demo: var serialized_html = DOMtoString(document);
function DOMtoString(document_root) {
var html = '',
node = document_root.firstChild;
while (node) {
switch (node.nodeType) {
case Node.ELEMENT_NODE:
html += node.outerHTML;
break;
case Node.TEXT_NODE:
html += node.nodeValue;
break;
case Node.CDATA_SECTION_NODE:
html += '<![CDATA[' + node.nodeValue + ']]>';
break;
case Node.COMMENT_NODE:
html += '<!--' + node.nodeValue + '-->';
break;
case Node.DOCUMENT_TYPE_NODE:
// (X)HTML documents are identified by public identifiers
html += "<!DOCTYPE " + node.name + (node.publicId ? ' PUBLIC "' + node.publicId + '"' : '') + (!node.publicId && node.systemId ? ' SYSTEM' : '') + (node.systemId ? ' "' + node.systemId + '"' : '') + '>\n';
break;
}
node = node.nextSibling;
}
return html;
}
chrome.runtime.sendMessage({
action: "getSource",
source: DOMtoString(document)
});
getPagesSource.js
{
"name": "Get pages source",
"version": "1.0",
"manifest_version": 2,
"description": "Get pages source from a popup",
"browser_action": {
"default_icon": "icon.png",
"default_popup": "popup.html"
},
"permissions": ["tabs", "<all_urls>"]
}
<!DOCTYPE html>
<html style=''>
<head>
<script src='popup.js'></script>
</head>
<body style="width:400px;">
<div id='message'>Injecting Script....</div>
</body>
</html>
chrome.runtime.onMessage.addListener(function(request, sender) {
if (request.action == "getSource") {
message.innerText = request.source;
}
});
function onWindowLoad() {
var message = document.querySelector('#message');
chrome.tabs.executeScript(null, {
file: "getPagesSource.js"
}, function() {
// If you try and inject into an extensions page or the webstore/NTP you'll get an error
if (chrome.runtime.lastError) {
message.innerText = 'There was an error injecting script : \n' + chrome.runtime.lastError.message;
}
});
}
window.onload = onWindowLoad;
// @author Rob W <http://stackoverflow.com/users/938089/rob-w>
// Demo: var serialized_html = DOMtoString(document);
function DOMtoString(document_root) {
var html = '',
node = document_root.firstChild;
while (node) {
switch (node.nodeType) {
case Node.ELEMENT_NODE:
html += node.outerHTML;
break;
case Node.TEXT_NODE:
html += node.nodeValue;
break;
case Node.CDATA_SECTION_NODE:
html += '<![CDATA[' + node.nodeValue + ']]>';
break;
case Node.COMMENT_NODE:
html += '<!--' + node.nodeValue + '-->';
break;
case Node.DOCUMENT_TYPE_NODE:
// (X)HTML documents are identified by public identifiers
html += "<!DOCTYPE " + node.name + (node.publicId ? ' PUBLIC "' + node.publicId + '"' : '') + (!node.publicId && node.systemId ? ' SYSTEM' : '') + (node.systemId ? ' "' + node.systemId + '"' : '') + '>\n';
break;
}
node = node.nextSibling;
}
return html;
}
chrome.runtime.sendMessage({
action: "getSource",
source: DOMtoString(document)
});
/@作者Rob W
//演示:var serialized_html=DOMtoString(文档);
函数DOMtoString(文档根){
var html='',
node=document_root.firstChild;
while(节点){
开关(node.nodeType){
案例节点。元素节点:
html+=node.outerHTML;
打破
案例节点.TEXT\u节点:
html+=node.nodeValue;
打破
案例节点.CDATA\u节\u节点:
html+='';
打破
案例节点。注释节点:
html+='';
打破
案例节点.文档类型节点:
//(X)HTML文档由公共标识符标识
html+=“这是我的解决方案:
chrome.runtime.onMessage.addListener(function(request, sender) {
if (request.action == "getSource") {
this.pageSource = request.source;
var title = this.pageSource.match(/<title[^>]*>([^<]+)<\/title>/)[1];
alert(title)
}
});
chrome.tabs.query({ active: true, currentWindow: true }, tabs => {
chrome.tabs.executeScript(
tabs[0].id,
{ code: 'var s = document.documentElement.outerHTML; chrome.runtime.sendMessage({action: "getSource", source: s});' }
);
});
chrome.runtime.onMessage.addListener(函数(请求、发送方){
if(request.action==“getSource”){
this.pageSource=request.source;
var title=this.pageSource.match(/]*>([^{
chrome.tabs.executeScript(
选项卡[0]。id,
{code:'var s=document.documentElement.outerHTML;chrome.runtime.sendMessage({action:“getSource”,source:s});}
);
});
问题在于后台页面中的代码会立即运行(在注入内容脚本之前)。以前有人问过一个非常类似/重复的问题;请查看上的答案。谢谢你的回答。rob。我复制了你附加链接中的代码段,但仍然不起作用。问题是,我的扩展名是一个弹出窗口,只有当用户打开我的扩展名时,我才需要获取HTML。例如,如果当前选项卡是facebook.com然后,只有当我打开扩展名时,我才会将html源代码检索到我的js文件(而不是内容脚本或背景页)。用当前代码更新您的问题。代码中必须包含突出问题的注释。@Gil Tankus很抱歉我的第一篇文章,没有对注释(再次)给予足够的关注结果只是重复了Rob W所说的。新帖子应该有你想要的。谢谢,你的回答真的很有帮助,我的问题是on-onMessage是异步的。在我的弹出窗口中,我有各种各样的其他东西,这些东西都在源HTML上传递。我如何将源代码保存在全局变量中,然后只继续使用页面onload函数?我认为你不能。你要么把它放在回调代码中,要么放在函数中,然后在回调中调用它……如果JS有一个goto
命令,是吗?;为什么不只是像document.documentElement.outerHTML这样的东西,而不是DOMtoString函数?@djfm,那几乎在任何时候都可以。它这就是我可以告诉Rob W的函数更完整…返回doctype,例如,您的解决方案没有返回doctype,您的解决方案只获取html部分。您以前尝试过这种方法(并将应用程序提交到Webstore)吗?这几乎像是一种不使用内容脚本而使用内容脚本的黑客行为。