Vbscript 使用VBS保存网页上的可见文本 Dim url:url=”http://some.url“'在此处设置页面url' 使用WScript.CreateObject(“InternetExplorer.Application”、“IE”) .Visible=False .浏览网址 做 WScript.Sleep 100 在.ReadyState

Vbscript 使用VBS保存网页上的可见文本 Dim url:url=”http://some.url“'在此处设置页面url' 使用WScript.CreateObject(“InternetExplorer.Application”、“IE”) .Visible=False .浏览网址 做 WScript.Sleep 100 在.ReadyState,vbscript,Vbscript,我发现了这一点,但是一个合适的代码会很有帮助。您可以尝试类似的方法 您可以在HTML和文本模式下保存此页面;) Sub-HttpGet 出错时继续下一步 “必须使用MSXML2,因为Microsoft.XMLHTTP在重复获取页面后导致拒绝访问错误,请考虑这一点 'Set File=WScript.CreateObject(“MSXML2.ServerXMLHTTP.4.0”) Set File=WScript.CreateObject(“Microsoft.XMLHTTP”) 文件。打开“获取

我发现了这一点,但是一个合适的代码会很有帮助。

您可以尝试类似的方法

您可以在HTML和文本模式下保存此页面;)

Sub-HttpGet
出错时继续下一步
“必须使用MSXML2,因为Microsoft.XMLHTTP在重复获取页面后导致拒绝访问错误,请考虑这一点
'Set File=WScript.CreateObject(“MSXML2.ServerXMLHTTP.4.0”)
Set File=WScript.CreateObject(“Microsoft.XMLHTTP”)
文件。打开“获取”,参数(1),错误
File.setRequestHeader“用户代理”、“Mozilla/4.0(兼容;MSIE 8.0;Windows NT 6.0;Trident/4.0;SLCC1;.NET CLR 2.0.50727;Media Center PC 5.0;.NET CLR 1.1.4322;.NET CLR 3.5.30729;.NET CLR 3.0.30618;.NET4.0C;.NET4.0E;BCD2000;BCD2000)”
文件。发送
txt=File.ResponseText
"排尾",
Outp.write txt
如果错误号为0,则
输出写入线“”
Outp.writeline“获取文件时出错”
Outp.writeline“=================================================”
输出写入线“”
Outp.writeline“Error”&err.number&“(0x”&十六进制(err.number)&“”&err.description
Outp.writeline“Source”和err.Source
输出写入线“”
Outp.writeline“HTTP错误”&File.Status&&File.StatusText
Outp.writeline File.getAllResponseHeaders
输出写入线LCase(Arg(1))
如果结束
端接头
'=============================================
子移除HTMLTAGS
设置ie=CreateObject(“InternetExplorer.Application”)
即可见=0
即沉默=1
ie.Navigate2“文件:/”&FilterPath&“Filter.html”
做
wscript.sleep 50
循环直到ie.document.readystate=“完成”
ie.document.body.innerhtml=Inp.readall
Outp.write ie.document.body.innertext
”他说
端接头

以下是您在回答中可能需要的内容
Dim url: url = "http://some.url" 'set your page url here'
With WScript.CreateObject("InternetExplorer.Application", "IE_")
.Visible = False
.Navigate url
Do
    WScript.Sleep 100
Loop While .ReadyState < 4 And .Busy
Dim data: data = .Document.Body.innerText
With CreateObject("ADODB.Stream")
    .Open
    .Type     = 2 'adTypeText'
    .Position = 0
    .Charset  = "utf-8"
    .WriteText data
    .SaveToFile "output.txt", 2
    .Close
End With
.Quit
End With
Const TriStateTrue = -1 ' Pour la prise en charge de l'Unicode
URL = InputBox("Entrez l'URL pour y extraire son Code Source HTML "&vbcr&vbcr&_
"Exemple ""http://www.google.fr""","Extraction du Code Source © Hackoo © 2013","http://stackoverflow.com/questions/29597909/saving-visible-text-on-web-page-using-vbs")
If URL = "" Then WScript.Quit
Titre = "Extraction du Code Source de " & URL
Set ie = CreateObject("InternetExplorer.Application")
Set objFSO = CreateObject("Scripting.FileSystemObject")
ie.Navigate(URL)
ie.Visible=false
DO WHILE ie.busy
LOOP
DataHTML = ie.document.documentElement.innerHTML
DataTxt = ie.document.documentElement.innerText
strFileHTML = "CodeSourceHTML.txt"
strFileTxt = "CodeSourceTxt.txt"
Set objHTMLFile = objFSO.OpenTextFile(strFileHTML,2,True, TriStateTrue)
objHTMLFile.WriteLine(DataHTML)
objHTMLFile.Close
Set objTxtFile = objFSO.OpenTextFile(strFileTxt,2,True, TriStateTrue)
objTxtFile.WriteLine(DataTxt)
objTxtFile.Close
ie.Quit
Set ie=Nothing
 Ouvrir(strFileHTML)
 Ouvrir(strFileTxt)
wscript.Quit
'*************************************************
Function Ouvrir(File)
    Set ws=CreateObject("wscript.shell")
    ws.run "Notepad.exe "& File,1,False
end Function
'*************************************************
Sub HttpGet
On Error Resume Next
'   Have to use MSXML2 as Microsoft.XMLHTTP caused Access Denied errors after the page had been repeatedly gotten, go figure that one
'   Set File = WScript.CreateObject("MSXML2.ServerXMLHTTP.4.0")
    Set File = WScript.CreateObject("Microsoft.XMLHTTP")
    File.Open "GET", Arg(1), False
    File.setRequestHeader "User-Agent", "Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 6.0; Trident/4.0; SLCC1; .NET CLR 2.0.50727; Media Center PC 5.0; .NET CLR 1.1.4322; .NET CLR 3.5.30729; .NET CLR 3.0.30618; .NET4.0C; .NET4.0E; BCD2000; BCD2000)"
    File.Send
    txt=File.ResponseText
    'Putting in line endings
    Outp.write txt
    If err.number <> 0 then 
        Outp.writeline "" 
        Outp.writeline "Error getting file" 
        Outp.writeline "==================" 
        Outp.writeline "" 
        Outp.writeline "Error " & err.number & "(0x" & hex(err.number) & ") " & err.description 
        Outp.writeline "Source " & err.source 
        Outp.writeline "" 
        Outp.writeline "HTTP Error " & File.Status & " " & File.StatusText
        Outp.writeline  File.getAllResponseHeaders
        Outp.writeline LCase(Arg(1))
    End If
End Sub

'=============================================

Sub RemoveHTMLTags
    Set ie = CreateObject("InternetExplorer.Application") 
    ie.Visible = 0
    ie.Silent = 1 
    ie.Navigate2 "file://" & FilterPath & "Filter.html"
    Do 
        wscript.sleep 50            

    Loop Until ie.document.readystate = "complete"
    ie.document.body.innerhtml = Inp.readall
    Outp.write ie.document.body.innertext
'   ie.quit
End Sub