Regex 从网页中读取源代码并从中提取一些数据

Regex 从网页中读取源代码并从中提取一些数据,regex,vbscript,Regex,Vbscript,我想从网页中读取源代码并从中提取一些数据。 在我的示例中,我使用了一个正则表达式来提取数据,但没有得到 任何数据,可能是由于unicode或模式不匹配? 当我用RegExBuddy测试这个模式时,它是匹配的,但是在vbscript中没有? 也许,我遗漏了代码中的某些内容,或者我必须以另一种方式重新编写 以下是我的尝试: Option Explicit Dim URL,fso,ws,LogFile,sSrcUrl,oHTTP,bGetAsAsync,Data Set fso = CreateObj

我想从网页中读取源代码并从中提取一些数据。 在我的示例中,我使用了一个正则表达式来提取数据,但没有得到 任何数据,可能是由于unicode或模式不匹配? 当我用RegExBuddy测试这个模式时,它是匹配的,但是在vbscript中没有? 也许,我遗漏了代码中的某些内容,或者我必须以另一种方式重新编写

以下是我的尝试:

Option Explicit
Dim URL,fso,ws,LogFile,sSrcUrl,oHTTP,bGetAsAsync,Data
Set fso = CreateObject("Scripting.FileSystemObject")
Set ws = CreateObject("Wscript.Shell")
LogFile = Left(Wscript.ScriptFullName,InstrRev(Wscript.ScriptFullName, ".")) & "txt"
if fso.FileExists(LogFile) Then 
    fso.DeleteFile LogFile
end If

sSrcUrl = "https://fr.giveawayoftheday.com/"
Set oHTTP = CreateObject("MSXML2.ServerXMLHTTP.6.0")
bGetAsAsync = False
oHTTP.open "GET", sSrcUrl, bGetAsAsync
oHTTP.send
If oHTTP.status <> 200 Then
WScript.Echo "unexpected status = " & oHTTP.status & vbCrLf & oHTTP.statusText
WScript.Quit
End If
Data = oHTTP.responseText
WriteLog Data,LogFile
wscript.echo Extract(Data)
'****************************************************************
Function Extract(Data)
    Dim oRE,oMatches,Match,Line
    set oRE = New RegExp
    oRE.IgnoreCase = True
    oRE.Global = True
    oRE.MultiLine = True
    oRE.Pattern = "<div class=""giveaway_wrap cf"">(\r.*\n.*){17}</div>"
    set oMatches = oRE.Execute(Data)
    If not isEmpty(oMatches) then
        For Each Match in oMatches   
            Line = Match.Value
            Extract = Line
        Next
    End if 
End Function
'*****************************************************************
Sub WriteLog(strText,LogFile)
    Dim fs,ts 
    Const ForWriting = 2
    Set fs = CreateObject("Scripting.FileSystemObject")
    Set ts = fs.OpenTextFile(LogFile,ForWriting,True,-1)
    ts.WriteLine strText
    ts.Close
End Sub
'*****************************************************************
选项显式
Dim URL、fso、ws、日志文件、sSrcUrl、oHTTP、BGETASASASSYNC、数据
设置fso=CreateObject(“Scripting.FileSystemObject”)
设置ws=CreateObject(“Wscript.Shell”)
LogFile=Left(Wscript.ScriptFullName,InstrRev(Wscript.ScriptFullName,“.”)和“txt”
如果存在fso.files(日志文件),则
fso.DeleteFile日志文件
如果结束
sSrcUrl=”https://fr.giveawayoftheday.com/"
设置oHTTP=CreateObject(“MSXML2.ServerXMLHTTP.6.0”)
bGetAsAsync=False
oHTTP.open“GET”,sSrcUrl,bGetAsAsAsync
发送
如果oHTTP状态为200,则
WScript.Echo“意外状态=“&oHTTP.status&vbCrLf&oHTTP.statusText
WScript.Quit
如果结束
数据=oHTTP.responseText
写日志数据,日志文件
wscript.echo提取(数据)
'****************************************************************
函数提取(数据)
暗矿石、奥马奇、火柴、线
set oRE=New RegExp
oRE.IgnoreCase=True
.Global=True
oRE.MultiLine=True
oRE.Pattern=“(\r.*\n.*{17}”
设置oMatches=oRE.Execute(数据)
如果不是空的(oMatches),那么
奥马奇的每一场比赛
Line=Match.Value
提取=行
下一个
如果结束
端函数
'*****************************************************************
子写入日志(strText,日志文件)
暗fs,ts
写入常数=2
设置fs=CreateObject(“Scripting.FileSystemObject”)
设置ts=fs.OpenTextFile(LogFile,ForWriting,True,-1)
ts.WriteLine strText
关闭
端接头
'*****************************************************************
因此,我预期的结果是:

<div class="giveaway_wrap cf">
                <div class="giveaway_img">
                    <img src="https://giveawayoftheday.com/wp-content/uploads/2017/10/82810932353ab590bf475ea3980f3038.png" alt="Excel Url Validator 1.0 Giveaway" />
                    <div class="giveaway_label">
                        <a href="https://fr.giveawayoftheday.com/excel-url-validator-1-0/" class="label_link"></a>
                        <div class="old_price">$40.00</div>
                        <div class="free">
                            <span class="big">GRATUIT</span> aujourd’hui
                        </div>
                    </div>
                </div>
                <div class="over">
                    <div class="giveaway_title">
                        <a href="https://fr.giveawayoftheday.com/excel-url-validator-1-0/">Excel Url Validator 1.0</a>
                        <div class="giveaway_date">16 octobre 2017</div>
                    </div>
                    <div class="giveaway_descr">Excel Url Validator trouve des liens rompus dans les feuilles de calcul Excel.</div>
                </div>

$40.00
奥朱尔德酒店
2017年10月16日
Excel Url验证程序计算Excel。

我得到了这样的解决方案:

Option Explicit
Dim URL,fso,ws,LogFile,sSrcUrl,oHTTP,bGetAsAsync,Data
Set fso = CreateObject("Scripting.FileSystemObject")
Set ws = CreateObject("Wscript.Shell")
LogFile = Left(Wscript.ScriptFullName,InstrRev(Wscript.ScriptFullName, ".")) & "htm"
if fso.FileExists(LogFile) Then 
    fso.DeleteFile LogFile
end If

sSrcUrl = "https://fr.giveawayoftheday.com/"
Set oHTTP = CreateObject("MSXML2.ServerXMLHTTP.6.0")
bGetAsAsync = False
oHTTP.open "GET", sSrcUrl, bGetAsAsync
oHTTP.send
If oHTTP.status <> 200 Then
WScript.Echo "unexpected status = " & oHTTP.status & vbCrLf & oHTTP.statusText
WScript.Quit
End If
Data = oHTTP.responseText
WriteLog Extract(Data),LogFile
wscript.echo Extract(Data)
'****************************************************************
Function Extract(Data)
    Dim oRE,oMatches,Match,Line
    set oRE = New RegExp
    oRE.IgnoreCase = True
    oRE.Global = True
    oRE.MultiLine = True
    oRE.Pattern = "<div class=""giveaway_wrap cf"">(?:(?!""giveaway_counter first"">)[\s\S])*</div>"
    set oMatches = oRE.Execute(Data)
    If not isEmpty(oMatches) then
        For Each Match in oMatches   
            Line = Match.Value
            Extract = Line
        Next
    End if 
End Function
'*****************************************************************
Sub WriteLog(strText,LogFile)
    Dim fs,ts 
    Const ForWriting = 2
    Set fs = CreateObject("Scripting.FileSystemObject")
    Set ts = fs.OpenTextFile(LogFile,ForWriting,True,-1)
    ts.WriteLine strText
    ts.Close
End Sub
'*****************************************************************
选项显式
Dim URL、fso、ws、日志文件、sSrcUrl、oHTTP、BGETASASASSYNC、数据
设置fso=CreateObject(“Scripting.FileSystemObject”)
设置ws=CreateObject(“Wscript.Shell”)
LogFile=Left(Wscript.ScriptFullName,InstrRev(Wscript.ScriptFullName,“.”)和“htm”
如果存在fso.files(日志文件),则
fso.DeleteFile日志文件
如果结束
sSrcUrl=”https://fr.giveawayoftheday.com/"
设置oHTTP=CreateObject(“MSXML2.ServerXMLHTTP.6.0”)
bGetAsAsync=False
oHTTP.open“GET”,sSrcUrl,bGetAsAsAsync
发送
如果oHTTP状态为200,则
WScript.Echo“意外状态=“&oHTTP.status&vbCrLf&oHTTP.statusText
WScript.Quit
如果结束
数据=oHTTP.responseText
写日志提取(数据),日志文件
wscript.echo提取(数据)
'****************************************************************
函数提取(数据)
暗矿石、奥马奇、火柴、线
set oRE=New RegExp
oRE.IgnoreCase=True
.Global=True
oRE.MultiLine=True
oRE.Pattern=“(?:(?!”“首先是赠品\计数器”“>)[\s\s])*”
设置oMatches=oRE.Execute(数据)
如果不是空的(oMatches),那么
奥马奇的每一场比赛
Line=Match.Value
提取=行
下一个
如果结束
端函数
'*****************************************************************
子写入日志(strText,日志文件)
暗fs,ts
写入常数=2
设置fs=CreateObject(“Scripting.FileSystemObject”)
设置ts=fs.OpenTextFile(LogFile,ForWriting,True,-1)
ts.WriteLine strText
关闭
端接头
'*****************************************************************
于2017年10月29日编辑

更新代码以弹出显示当天赠品的HTA文件

Option Explicit
Dim URL,fso,ws,LogFile,sSrcUrl,oHTTP,bGetAsAsync,HTA,Data
Set fso = CreateObject("Scripting.FileSystemObject")
Set ws = CreateObject("Wscript.Shell")
LogFile = Left(Wscript.ScriptFullName,InstrRev(Wscript.ScriptFullName, ".")) & "hta"
if fso.FileExists(LogFile) Then 
    fso.DeleteFile LogFile
end If

If IsInternetConnected = True Then
    If Lang = True Then
        sSrcUrl = "https://fr.giveawayoftheday.com/"
    Else
        sSrcUrl = "https://www.giveawayoftheday.com/"
    End if
End If

Set oHTTP = CreateObject("MSXML2.ServerXMLHTTP.6.0")
bGetAsAsync = False
oHTTP.open "GET", sSrcUrl, bGetAsAsync
oHTTP.send
If oHTTP.status <> 200 Then
WScript.Echo "unexpected status = " & oHTTP.status & vbCrLf & oHTTP.statusText
WScript.Quit
End If
Data = oHTTP.responseText
HTA = "<html>" & vbCrLf &_
"<title>Giveaway of the day by Hackoo</title>" & vbCrLf &_
"<head>" & vbCrLf &_
"<HTA:APPLICATION" & vbCrLf &_
  "APPLICATIONNAME=""GiveAway of the Day""" & vbCrLf &_
  "Icon=DxDiag.exe" & vbCrLf &_
  "BORDER=""thin""" & vbCrLf &_
  "MAXIMIZEBUTTON=""no""" & vbCrLf &_
  "MINIMIZEBUTTON=""no""" & vbCrLf &_
  "SCROLL=""no""" & vbCrLf &_
  "SINGLEINSTANCE=""yes""" & vbCrLf &_
  "CONTEXTMENU=""no""" & vbCrLf &_
  "SELECTION=""no""/>" & vbCrLf &_
"<SCRIPT language=""VBScript"">" & vbCrLf &_
"Sub Window_OnLoad" & vbCrLf &_
    "window.resizeTo 450,380" & vbCrLf &_
    "WindowLeft = (window.screen.availWidth - 450)" & vbCrLf &_  
    "WindowTop  = (window.screen.availHeight - 380)" & vbCrLf &_
    "window.moveTo WindowLeft, WindowTop" & vbCrLf &_
"end sub" & vbCrLf &_
"</script>" & vbCrLf &_
"</head>" & vbCrLf &_
"<center>" & vbCrLf &_
"<meta http-equiv=""Content-Type"" content=""text/html; charset=UTF-8"" />" & vbCrLf &_
"<meta http-equiv=""X-UA-Compatible"" content=""IE=edge"">" & vbCrLf &_
"<link rel=""stylesheet"" href=""https://www.giveawayoftheday.com/css/main.css"" />"
WriteLog HTA,LogFile
WriteLog Extract(Data),LogFile
WriteLog "</html>",LogFile
ws.run LogFile
'****************************************************************
Function Extract(Data)
    Dim oRE,oMatches,Match,Line
    set oRE = New RegExp
    oRE.IgnoreCase = True
    oRE.Global = True
    oRE.MultiLine = True
    oRE.Pattern = "<div class=""giveaway_wrap cf"">(?:(?!""giveaway_counter first"">)[\s\S])*</div>"
    set oMatches = oRE.Execute(Data)
    If not isEmpty(oMatches) then
        For Each Match in oMatches   
            Line = Match.Value
            Extract = Line
        Next
    End if 
End Function
'*****************************************************************
Sub WriteLog(strText,LogFile)
    Dim fs,ts 
    Const ForAppending = 8
    Set fs = CreateObject("Scripting.FileSystemObject")
    Set ts = fs.OpenTextFile(LogFile,ForAppending,True,-1)
    ts.WriteLine strText
    ts.Close
End Sub
'*****************************************************************
Function Lang()
Dim sComputer,oWMI,colOperatingSystems,oOS,iOSLang
    sComputer = "."
    Set oWMI = GetObject("winmgmts:" _
        & "{impersonationLevel=impersonate}!\\" _
        & sComputer _
        & "\root\cimv2")
Set colOperatingSystems = oWMI.ExecQuery _
        ("Select * from Win32_OperatingSystem")
For Each oOS in colOperatingSystems
    iOSLang = oOS.OSLanguage
Next
If (iOSLang = 1036) Then
    Lang = True
Else
    Lang = False
End If
End Function
'*****************************************************************
Function IsInternetConnected()
Dim MyLoop,strComputer,objPing,objStatus
IsInternetConnected = False
MyLoop = True
While MyLoop = True
    strComputer = "smtp.gmail.com"
    Set objPing = GetObject("winmgmts:{impersonationLevel=impersonate}!\\").ExecQuery _
    ("select * from Win32_PingStatus where address = '" & strComputer & "'")
    For Each objStatus in objPing
        If objStatus.Statuscode = 0 Then
            MyLoop = False
            IsInternetConnected = True
            Exit Function
        End If
    Next
    MsgBox "Check your internet connection !",vbExclamation,"Check your internet connection !"
    Pause(10) 'To sleep for 10 secondes
Wend
End Function
'******************************************************************
 Sub Pause(NSeconds)
    Wscript.Sleep(NSeconds*1000)
 End Sub
'******************************************************************
选项显式
Dim URL、fso、ws、日志文件、sSrcUrl、oHTTP、BGETASASASSYNC、HTA、数据
设置fso=CreateObject(“Scripting.FileSystemObject”)
设置ws=CreateObject(“Wscript.Shell”)
LogFile=Left(Wscript.ScriptFullName,InstrRev(Wscript.ScriptFullName,“.”)和“hta”
如果存在fso.files(日志文件),则
fso.DeleteFile日志文件
如果结束
如果IsInternetConnected=True,则
如果Lang=True,则
sSrcUrl=”https://fr.giveawayoftheday.com/"
其他的
sSrcUrl=”https://www.giveawayoftheday.com/"
如果结束
如果结束
设置oHTTP=CreateObject(“MSXML2.ServerXMLHTTP.6.0”)
bGetAsAsync=False
oHTTP.open“GET”,sSrcUrl,bGetAsAsAsync
发送
如果oHTTP状态为200,则
WScript.Echo“意外状态=“&oHTTP.status&vbCrLf&oHTTP.statusText
WScript.Quit
如果结束
数据=oHTTP.responseText
HTA=”“&vbCrLf&_
“Hackoo每日赠品”&vbCrLf&_
“”&vbCrLf&_
“”&vbCrLf&_
“”&vbCrLf&_
“子窗口加载”&vbCrLf&_
“window.resizeTo 450380”和vbCrLf&_
“WindowLeft=(window.screen.availWidth-450)”&vbCrLf&\u
“WindowTop=(window.screen.availHeight-380)”&vbCrLf&_
“window.moveTo WindowLeft,WindowTop”&vbCrLf&_
“末端接头”和vbCrLf&_
“”&vbCrLf&_
“”&vbCrLf&_
“”&vbCrLf&_
“”&vbCrLf&_
“”&vbCrLf&_
""
写日志HTA,日志文件
写日志提取(数据),日志文件
写入日志“”,日志文件
ws.run日志文件
'****************************************************************
函数提取(数据)
暗矿石、奥马奇、火柴、线
set oRE=New RegExp
oRE.IgnoreCase=True
.Global=True
oRE.MultiLine=True
oRE.Pattern=“(?:(?!”“首先是赠品\计数器”“>)[\s\s])*”
设置oMatches=oRE.Execute(数据)
如果不是空的(oMatches),那么
奥马奇的每一场比赛
行=匹配