2017-02-12 26 views
-1

我正嘗試從網站抽取數據:阿根廷超市網刮

https://www.disco.com.ar/Comprar/Home.aspx#_atCategory=false&_atGrilla=true&_id=21063

通過宏觀

的Excel 2013,如實時價格,產品名稱和形象。

我已經嘗試了excel網絡查詢,但它不起作用。

有沒有辦法做到這一點?

+1

Excel中的QueryTable不是那麼靈活,因爲你需要。考慮IE自動化,或使用正則表達式或拆分XHR響應解析。 – omegastripes

回答

0

有一個例子顯示瞭如何使用XHR和JSON解析從網站檢索數據,它由幾個步驟組成。

  1. 檢索數據。

我使用Chrome開發人員工具網絡標籤查看了一些XHR。 我發現最相關的數據是從https://www.disco.com.ar/Comprar/HomeService.aspx/ObtenerLimiteDeProductos

POST XHR https://www.disco.com.ar/Comprar/HomeService.aspx/ObtenerLimiteDeProductos

的POST XHR由POST XHR返回我沒有餅乾頭不起作用JSON字符串。因此,我必須添加額外的HEAD XHR來首先檢索ASP.NET_SessionId cookie,服務器版本XMLHTTP用於控制cookie。唯一的響應報頭返回Cookie是由GET XHR https://www.disco.com.ar/Login/PreHome.aspx

GET XHR https://www.disco.com.ar/Login/PreHome.aspx

,因爲它包含所述第二有效載荷JSON裹着的 d屬性
  • 檢索JSON字符串應當兩次解析第一個JSON。
  • 將解析的JSON對象轉換爲2d數組中呈現的表格形式。
  • 將數組輸出到工作表。您可以通過直接訪問陣列來執行進一步的處理。
  • 對於網頁如下所示:

    webpage

    爲我的輸出如下所示:

    output

    把下面的代碼進VBA項目標準模塊:

    Option Explicit 
    
    Sub GetData() 
    
        Dim sCookie As String 
        Dim sPayLoad As String 
        Dim sCont As String 
        Dim vJSON As Variant 
        Dim sState As String 
        Dim y As Long 
        Dim sSection As Variant 
        Dim aData() 
        Dim aHeader() 
    
        ' Get cookie from the site 
        With CreateObject("MSXML2.ServerXMLHTTP") 
         .Open "HEAD", "https://www.disco.com.ar/Login/PreHome.aspx", False 
         .Send 
         sCookie = .getAllResponseHeaders 
        End With 
        sCookie = Split(sCookie, "Set-Cookie: ", 2)(1) 
        sCookie = Split(sCookie, ";", 2)(0) 
        ' Retrieve JSON data 
        sPayLoad = "{IdMenu:""21063"",textoBusqueda:"""", producto:"""", marca:"""", " & _ 
         "pager:"""", ordenamiento:0, precioDesde:"""", precioHasta:""""}" 
        With CreateObject("MSXML2.ServerXMLHTTP") 
         .Open "POST", "https://www.disco.com.ar/Comprar/HomeService.aspx/ObtenerArticulosPorDescripcionMarcaFamiliaLevex", False 
         .SetRequestHeader "Accept", "application/json, text/javascript, */*; q=0.01" 
         .SetRequestHeader "Content-Type", "application/json; charset=utf-8" 
         .SetRequestHeader "Content-Length", Len(sPayLoad) 
         .SetRequestHeader "Cookie", sCookie 
         .Send CStr(sPayLoad) 
         sCont = .responseText 
        End With 
        ' Parse JSON response 
        JSON.Parse sCont, vJSON, sState 
        sCont = vJSON.Item("d") 
        JSON.Parse sCont, vJSON, sState 
        ' Output tables 
        Sheets(1).Cells.Delete 
        y = 1 
        For Each sSection In Array("Tipo", "Marca", "Precio", "ResultadosBusquedaLevex", "ArticulosSugereridos") 
         JSON.ToArray vJSON.Item(sSection), aData, aHeader 
         With Sheets(1) 
          .Cells(y, 1).Value = sSection 
          OutputArray .Cells(y + 1, 1), aHeader 
          Output2DArray .Cells(y + 2, 1), aData 
          .Cells.Columns.AutoFit 
         End With 
         y = y + UBound(aData, 1) + 4 
        Next 
    
    End Sub 
    
    Sub OutputArray(oDstRng As Range, aCells As Variant) 
    
        With oDstRng 
         .Parent.Select 
         With .Resize(_ 
           1, _ 
           UBound(aCells) - LBound(aCells) + 1) 
          .NumberFormat = "@" 
          .Value = aCells 
         End With 
        End With 
    
    End Sub 
    
    Sub Output2DArray(oDstRng As Range, aCells As Variant) 
    
        With oDstRng 
         .Parent.Select 
         With .Resize(_ 
           UBound(aCells, 1) - LBound(aCells, 1) + 1, _ 
           UBound(aCells, 2) - LBound(aCells, 2) + 1) 
          .NumberFormat = "@" 
          .Value = aCells 
         End With 
        End With 
    
    End Sub 
    

    再創建一個標準模塊,將其命名爲JSON,把下面的代碼進去,這個代碼提供JSON處理功能:

    Option Explicit 
    
    Private sBuffer As String 
    Private oTokens As Object 
    Private oRegEx As Object 
    Private bMatch As Boolean 
    Private oChunks As Object 
    Private oHeader As Object 
    Private aData() As Variant 
    Private i As Long 
    
    Sub Parse(ByVal sSample As String, vJSON As Variant, sState As String) 
    
        ' Backus–Naur form JSON parser implementation based on RegEx 
        ' Input: 
        ' sSample - source JSON string 
        ' Output: 
        ' vJson - created object or array to be returned as result 
        ' sState - string Object|Array|Error depending on processing 
    
        sBuffer = sSample 
        Set oTokens = CreateObject("Scripting.Dictionary") 
        Set oRegEx = CreateObject("VBScript.RegExp") 
        With oRegEx ' Patterns based on specification http://www.json.org/ 
         .Global = True 
         .MultiLine = True 
         .IgnoreCase = True ' Unspecified True, False, Null accepted 
         .Pattern = "(?:'[^']*'|""(?:\\""|[^""])*"")(?=\s*[,\:\]\}])" ' Double-quoted string, unspecified quoted string 
         Tokenize "s" 
         .Pattern = "[+-]?(?:\d+\.\d*|\.\d+|\d+)(?:e[+-]?\d+)?(?=\s*[,\]\}])" ' Number, E notation number 
         Tokenize "d" 
         .Pattern = "\b(?:true|false|null)(?=\s*[,\]\}])" ' Constants true, false, null 
         Tokenize "c" 
         .Pattern = "\b[A-Za-z_]\w*(?=\s*\:)" ' Unspecified non-double-quoted property name accepted 
         Tokenize "n" 
         .Pattern = "\s+" 
         sBuffer = .Replace(sBuffer, "") ' Remove unnecessary spaces 
         .MultiLine = False 
         Do 
          bMatch = False 
          .Pattern = "<\d+(?:[sn])>\:<\d+[codas]>" ' Object property structure 
          Tokenize "p" 
          .Pattern = "\{(?:<\d+p>(?:,<\d+p>)*)?\}" ' Object structure 
          Tokenize "o" 
          .Pattern = "\[(?:<\d+[codas]>(?:,<\d+[codas]>)*)?\]" ' Array structure 
          Tokenize "a" 
         Loop While bMatch 
         .Pattern = "^<\d+[oa]>$" ' Top level object structure, unspecified array accepted 
         If .Test(sBuffer) And oTokens.Exists(sBuffer) Then 
          Retrieve sBuffer, vJSON 
          sState = IIf(IsObject(vJSON), "Object", "Array") 
         Else 
          vJSON = Null 
          sState = "Error" 
         End If 
        End With 
        Set oTokens = Nothing 
        Set oRegEx = Nothing 
    
    End Sub 
    
    Private Sub Tokenize(sType) 
    
        Dim aContent() As String 
        Dim lCopyIndex As Long 
        Dim i As Long 
        Dim sKey As String 
    
        With oRegEx.Execute(sBuffer) 
         If .Count = 0 Then Exit Sub 
         ReDim aContent(0 To .Count - 1) 
         lCopyIndex = 1 
         For i = 0 To .Count - 1 
          With .Item(i) 
           sKey = "<" & oTokens.Count & sType & ">" 
           oTokens(sKey) = .Value 
           aContent(i) = Mid(sBuffer, lCopyIndex, .FirstIndex - lCopyIndex + 1) & sKey 
           lCopyIndex = .FirstIndex + .Length + 1 
          End With 
         Next 
        End With 
        sBuffer = Join(aContent, "") & Mid(sBuffer, lCopyIndex, Len(sBuffer) - lCopyIndex + 1) 
        bMatch = True 
    
    End Sub 
    
    Private Sub Retrieve(sTokenKey, vTransfer) 
    
        Dim sTokenValue As String 
        Dim sName As String 
        Dim vValue As Variant 
        Dim aTokens() As String 
        Dim i As Long 
    
        sTokenValue = oTokens(sTokenKey) 
        With oRegEx 
         .Global = True 
         Select Case Left(Right(sTokenKey, 2), 1) 
          Case "o" 
           Set vTransfer = CreateObject("Scripting.Dictionary") 
           aTokens = Split(sTokenValue, "<") 
           For i = 1 To UBound(aTokens) 
            Retrieve "<" & Split(aTokens(i), ">", 2)(0) & ">", vTransfer 
           Next 
          Case "p" 
           aTokens = Split(sTokenValue, "<", 4) 
           Retrieve "<" & Split(aTokens(1), ">", 2)(0) & ">", sName 
           Retrieve "<" & Split(aTokens(2), ">", 2)(0) & ">", vValue 
           If IsObject(vValue) Then 
            Set vTransfer(sName) = vValue 
           Else 
            vTransfer(sName) = vValue 
           End If 
          Case "a" 
           aTokens = Split(sTokenValue, "<") 
           If UBound(aTokens) = 0 Then 
            vTransfer = Array() 
           Else 
            ReDim vTransfer(0 To UBound(aTokens) - 1) 
            For i = 1 To UBound(aTokens) 
             Retrieve "<" & Split(aTokens(i), ">", 2)(0) & ">", vValue 
             If IsObject(vValue) Then 
              Set vTransfer(i - 1) = vValue 
             Else 
              vTransfer(i - 1) = vValue 
             End If 
            Next 
           End If 
          Case "n" 
           vTransfer = sTokenValue 
          Case "s" 
           vTransfer = Replace(Replace(Replace(Replace(Replace(Replace(Replace(Replace(_ 
            Mid(sTokenValue, 2, Len(sTokenValue) - 2), _ 
            "\""", """"), _ 
            "\\", "\"), _ 
            "\/", "/"), _ 
            "\b", Chr(8)), _ 
            "\f", Chr(12)), _ 
            "\n", vbLf), _ 
            "\r", vbCr), _ 
            "\t", vbTab) 
           .Global = False 
           .Pattern = "\\u[0-9a-fA-F]{4}" 
           Do While .Test(vTransfer) 
            vTransfer = .Replace(vTransfer, ChrW(("&H" & Right(.Execute(vTransfer)(0).Value, 4)) * 1)) 
           Loop 
          Case "d" 
           vTransfer = Evaluate(sTokenValue) 
          Case "c" 
           Select Case LCase(sTokenValue) 
            Case "true" 
             vTransfer = True 
            Case "false" 
             vTransfer = False 
            Case "null" 
             vTransfer = Null 
           End Select 
         End Select 
        End With 
    
    End Sub 
    
    Function Serialize(vJSON As Variant) As String 
    
        Set oChunks = CreateObject("Scripting.Dictionary") 
        SerializeElement vJSON, "" 
        Serialize = Join(oChunks.Items(), "") 
        Set oChunks = Nothing 
    
    End Function 
    
    Private Sub SerializeElement(vElement As Variant, ByVal sIndent As String) 
    
        Dim aKeys() As Variant 
        Dim i As Long 
    
        With oChunks 
         Select Case VarType(vElement) 
          Case vbObject 
           If vElement.Count = 0 Then 
            .Item(.Count) = "{}" 
           Else 
            .Item(.Count) = "{" & vbCrLf 
            aKeys = vElement.Keys 
            For i = 0 To UBound(aKeys) 
             .Item(.Count) = sIndent & vbTab & """" & aKeys(i) & """" & ": " 
             SerializeElement vElement(aKeys(i)), sIndent & vbTab 
             If Not (i = UBound(aKeys)) Then .Item(.Count) = "," 
             .Item(.Count) = vbCrLf 
            Next 
            .Item(.Count) = sIndent & "}" 
           End If 
          Case Is >= vbArray 
           If UBound(vElement) = -1 Then 
            .Item(.Count) = "[]" 
           Else 
            .Item(.Count) = "[" & vbCrLf 
            For i = 0 To UBound(vElement) 
             .Item(.Count) = sIndent & vbTab 
             SerializeElement vElement(i), sIndent & vbTab 
             If Not (i = UBound(vElement)) Then .Item(.Count) = "," 'sResult = sResult & "," 
             .Item(.Count) = vbCrLf 
            Next 
            .Item(.Count) = sIndent & "]" 
           End If 
          Case vbInteger, vbLong 
           .Item(.Count) = vElement 
          Case vbSingle, vbDouble 
           .Item(.Count) = Replace(vElement, ",", ".") 
          Case vbNull 
           .Item(.Count) = "null" 
          Case vbBoolean 
           .Item(.Count) = IIf(vElement, "true", "false") 
          Case Else 
           .Item(.Count) = """" & _ 
            Replace(Replace(Replace(Replace(Replace(Replace(Replace(Replace(vElement, _ 
             "\", "\\"), _ 
             """", "\"""), _ 
             "/", "\/"), _ 
             Chr(8), "\b"), _ 
             Chr(12), "\f"), _ 
             vbLf, "\n"), _ 
             vbCr, "\r"), _ 
             vbTab, "\t") & _ 
            """" 
         End Select 
        End With 
    
    End Sub 
    
    Function ToString(vJSON As Variant) As String 
    
        Select Case VarType(vJSON) 
         Case vbObject, Is >= vbArray 
          Set oChunks = CreateObject("Scripting.Dictionary") 
          ToStringElement vJSON, "" 
          oChunks.Remove 0 
          ToString = Join(oChunks.Items(), "") 
          Set oChunks = Nothing 
         Case vbNull 
          ToString = "Null" 
         Case vbBoolean 
          ToString = IIf(vJSON, "True", "False") 
         Case Else 
          ToString = CStr(vJSON) 
        End Select 
    
    End Function 
    
    Private Sub ToStringElement(vElement As Variant, ByVal sIndent As String) 
    
        Dim aKeys() As Variant 
        Dim i As Long 
    
        With oChunks 
         Select Case VarType(vElement) 
          Case vbObject 
           If vElement.Count = 0 Then 
            .Item(.Count) = "''" 
           Else 
            .Item(.Count) = vbCrLf 
            aKeys = vElement.Keys 
            For i = 0 To UBound(aKeys) 
             .Item(.Count) = sIndent & aKeys(i) & ": " 
             ToStringElement vElement(aKeys(i)), sIndent & vbTab 
             If Not (i = UBound(aKeys)) Then .Item(.Count) = vbCrLf 
            Next 
           End If 
          Case Is >= vbArray 
           If UBound(vElement) = -1 Then 
            .Item(.Count) = "''" 
           Else 
            .Item(.Count) = vbCrLf 
            For i = 0 To UBound(vElement) 
             .Item(.Count) = sIndent & i & ": " 
             ToStringElement vElement(i), sIndent & vbTab 
             If Not (i = UBound(vElement)) Then .Item(.Count) = vbCrLf 
            Next 
           End If 
          Case vbNull 
           .Item(.Count) = "Null" 
          Case vbBoolean 
           .Item(.Count) = IIf(vElement, "True", "False") 
          Case Else 
           .Item(.Count) = CStr(vElement) 
         End Select 
        End With 
    
    End Sub 
    
    Sub ToArray(vJSON As Variant, aRows() As Variant, aHeader() As Variant) 
    
        ' Input: 
        ' vJSON - Array or Object which contains rows data 
        ' Output: 
        ' aData - 2d array representing JSON data 
        ' aHeader - 1d array of property names 
    
        Dim sName As Variant 
    
        Set oHeader = CreateObject("Scripting.Dictionary") 
        Select Case VarType(vJSON) 
         Case vbObject 
          If vJSON.Count > 0 Then 
           ReDim aData(0 To vJSON.Count - 1, 0 To 0) 
           oHeader("#") = 0 
           i = 0 
           For Each sName In vJSON 
            aData(i, 0) = "#" & sName 
            ToArrayElement vJSON(sName), "" 
            i = i + 1 
           Next 
          Else 
           ReDim aData(0 To 0, 0 To 0) 
          End If 
         Case Is >= vbArray 
          If UBound(vJSON) >= 0 Then 
           ReDim aData(0 To UBound(vJSON), 0 To 0) 
           For i = 0 To UBound(vJSON) 
            ToArrayElement vJSON(i), "" 
           Next 
          Else 
           ReDim aData(0 To 0, 0 To 0) 
          End If 
         Case Else 
          ReDim aData(0 To 0, 0 To 0) 
          aData(0, 0) = ToString(vJSON) 
        End Select 
        aHeader = oHeader.Keys() 
        Set oHeader = Nothing 
        aRows = aData 
        Erase aData 
    
    End Sub 
    
    Private Sub ToArrayElement(vElement As Variant, sFieldName As String) 
    
        Dim sName As Variant 
        Dim j As Long 
    
        Select Case VarType(vElement) 
         Case vbObject ' collection of objects 
          For Each sName In vElement 
           ToArrayElement vElement(sName), sFieldName & IIf(sFieldName = "", "", "_") & sName 
          Next 
         Case Is >= vbArray ' collection of arrays 
          For j = 0 To UBound(vElement) 
           ToArrayElement vElement(j), sFieldName & IIf(sFieldName = "", "", "_") & "#" & j 
          Next 
         Case Else 
          If Not oHeader.Exists(sFieldName) Then 
           oHeader(sFieldName) = oHeader.Count 
           If UBound(aData, 2) < oHeader.Count - 1 Then ReDim Preserve aData(0 To UBound(aData, 1), 0 To oHeader.Count - 1) 
          End If 
          j = oHeader(sFieldName) 
          aData(i, j) = ToString(vElement) 
        End Select 
    
    End Sub 
    
    +0

    @MartínPiña你有沒有試過這個代碼? – omegastripes