2018年8月23日星期四

vba爬虫


Public Function postHttpTextByXMLHTTP(httpurl As String, strPostData As String, strCookie As String, strrefurl As String, strHost As String, origin As String, Optional ShowMsg As Boolean, Optional strMode$) As String
    Dim httpxml As Object, HttpTxt1$, strZM$
    Set httpxml = CreateObject("Microsoft.XMLHTTP")  '("MSXML2.SERVERXMLHTTP.5.0")   '("WinHttp.WinHttpRequest.5.1")    '
    With httpxml
        If ShowMsg Then
            Application.StatusBar = "Open post:" & httpurl
            DoEvents
        End If
        '.SetTimeouts 120000, 120000, 120000, 120000 '设置操作超时时间
        If strMode <> "" Then
            .Open strMode, httpurl, False
        Else
            .Open "post", httpurl, False
        End If
        .setRequestHeader "Accept", "*/*"
        .setRequestHeader "Accept-Encoding", "gzip, deflate"            '
        .setRequestHeader "Accept-Language", "zh-CN,zh;q=0.8,zh-TW;q=0.7,zh-HK;q=0.5,en-US;q=0.3,en;q=0.2"      '
        .setRequestHeader "Cache-Control", "no-cache"
        .setRequestHeader "Connection", "keep-alive"                '
        .setRequestHeader "Content-Type", "application/json"
        .setRequestHeader "Referer", strrefurl
        .setRequestHeader "Pragma", "no-cache"
        .setRequestHeader "User-Agent", "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:59.0) Gecko/20100101 Firefox/59.0"          '
        .setRequestHeader "X-Requested-With", "XMLHttpRequest"                  '
        If strCookie <> "" Then .setRequestHeader "Cookie", strCookie
        If strHost <> "" Then .setRequestHeader "Host", strHost
        If origin <> "" Then
            If InStr(1, origin, "http") > 0 Then
                .setRequestHeader "Origin", origin
            Else
                .setRequestHeader "Origin", "http://" & origin
            End If
        End If
        If strPostData <> "" Then
            .setRequestHeader "Content-Length", Len(strPostData)
            .Send (strPostData)
        Else
            .Send
        End If
        getCookieFromHttpXml httpxml, strCookie, strZM
        If ShowMsg Then
            Application.StatusBar = False
            DoEvents
        End If
        On Error GoTo TT:
       
        HttpTxt1 = .responseText
        postHttpTextByXMLHTTP = HttpTxt1
       
        On Error GoTo 0
        Exit Function
TT:
        HttpTxt1 = BytesToBstr(.responseBody, "utf-8") ' "GB2312")
        postHttpTextByXMLHTTP = HttpTxt1
        On Error GoTo 0
    End With
End Function

Public Sub getCookieFromHttpXml(ByRef httpxml As Object, ByRef strCookie As String, ByRef strZM$)
    Dim strHeaders$, strCookie1$, strKey$, strValue$, Ar, i%, j%, strZM1$
    With httpxml
        strHeaders = .getAllResponseHeaders
        If InStr(1, strHeaders, "charset=") > 0 Then
            strZM1 = Split(strHeaders, "charset=")(1)
            strZM = GetStrWithPtn(strZM1, "\S+")
        End If
       
        Ar = Split(strHeaders, "Set-Cookie:")
        j = UBound(Ar)
        If j > 0 Then
            Dim Dict1 As Object
            Set Dict1 = CreateObject("Scripting.Dictionary")
            If strCookie <> "" Then
                Dim Ar2
                Ar2 = Split(strCookie, ";")
                For i = LBound(Ar2) To UBound(Ar2)
                    strValue = Ar2(i)
                    strKey = Split(strValue, "=")(0)
                    Dict1(strKey) = strValue
                Next
            End If
            For i = 1 To j
                strCookie1 = Ar(i)
                strCookie1 = Trim(Split(strCookie1, ";")(0))
                strKey = Split(strCookie1, "=")(0)
                Dict1(strKey) = strCookie1
            Next
            strCookie = Join(Dict1.Items(), ";")
        End If
    End With
End Sub