2017-06-13 93 views
1

將get/messages.json返回的數據與使用yammer中的導出檢索的數據進行比較時。 get/messages.json返回6,300條記錄。數據導出返回10,469條記錄。與此post類似,我也在使用older_than參數。該帖子中有一條評論提出了一個限速問題。我可以向你保證,我不會超過費率限制,因爲我在每10次請求後暫停15秒。Yammer API get/messages.json返回的結果不完整

爲了讓我不完整的6300行我...

  1. 使用導出API來獲取所有的組

  2. 遍歷組該列表的列表,並下載消息每個組使用https://www.yammer.com/api/v1/messages/in_group/:group_id.json

  3. 然後我使用https://www.yammer.com/api/v1/messages.json和older_than參數來獲取所有公司提要中的所有消息。

這個問題似乎是與步驟3

這裏是與步驟3中的代碼上面概述:

Sub GetAllCompanyMessages() 
      Try 
       Console.WriteLine("Getting All Company Messages") 
       If File.Exists(allCompanyPath) Then 
        'delete previous 
        If Directory.Exists(allCompanyPath) Then 
         For Each oldFile As String In Directory.GetFiles(allCompanyPath) 
          File.Delete(oldFile) 
         Next 
         Directory.Delete(allCompanyPath) 
        End If 
        'create dir 
        Directory.CreateDirectory(allCompanyPath) 
       Else 
        'create dir 
        Directory.CreateDirectory(allCompanyPath) 
        'Throw New Exception("Yammer Data Export Zip Download Failed") 
       End If 

       'get first group of messages 
       Console.WriteLine("Getting All Company Batch 1") 
       Dim client As New WebClient() 
       client.Headers.Add("Authorization", "Bearer " & accessToken) 
       client.DownloadFile(allCompanyMessagesURL, allCompanyPath & "1.json") 

       'getOlderThanID 
       Dim olderThanID As Int32 = getOlderThanID(allCompanyPath & "1.json") 

       'get remaining messages in batches of 20 
       Dim i As Int32 = 2 
       'Dim prevOlderThanID As Int32 = 0 
       Dim nextOlderThanID As Int32 = olderThanID 
       Do Until i = 0 
        Console.WriteLine("Getting All Company Batch " & i & " olderthanID " & nextOlderThanID) 
        client = Nothing 
        client = New WebClient() 
        client.Headers.Add("Authorization", "Bearer " & accessToken) 
        client.DownloadFile(allCompanyMessagesURL & "?older_than=" & nextOlderThanID, allCompanyPath & i & ".json") 
        'prevOlderThanID = nextOlderThanID 
        nextOlderThanID = getOlderThanID(allCompanyPath & i & ".json") 
        i = i + 1 
        If nextOlderThanID = 0 Then 
         'exit loop 
         i = 0 
        End If 

        ' HANDLES 10 REQUESTS IN 10 SECONDS LIMIT 
        If i >= 10 Then 
         If i Mod 10 = 0 Then 
          ' CAUSES APP TO WAIT 15 SECONDS AFTER EVERY 10th REQUEST 
          Console.WriteLine("Sleeping for 15 seconds") 
          System.Threading.Thread.Sleep(15000) 
         End If 
        End If 
       Loop 

       Console.WriteLine("Concatenating All Company Batches") 
       Dim masterJobject As New JObject 
       masterJobject = JObject.Parse("{""messages"":[]}") 
       For Each path As String In Directory.GetFiles(allCompanyPath, "*.json") 
        Console.WriteLine("Concatenating All Company Batch: " & path) 
        'open each json get messages object and append 
        Dim jObj As JObject = JObject.Parse(File.ReadAllText(path)) 
        Dim jms As New JsonMergeSettings 
        'beh 5.24.17 jms.MergeArrayHandling = MergeArrayHandling.Union 
        jms.MergeArrayHandling = MergeArrayHandling.Concat 
        masterJobject.Merge(jObj, jms) 
        'File.Delete(path) 
       Next 

       Console.WriteLine("Building Yammer-All-Company-Messages.json") 
       File.WriteAllText(outputJSONpath & "Yammer-All-Company-Messages.json", "{ ""messages"":" & masterJobject("messages").ToString() & "}") 


      Catch ex As Exception 
       ErrorHandler("ERROR GetAllCompanyMessages: " & ex.Message) 
      End Try 
     End Sub 

Function getOlderThanID(ByVal jsonPath As String) As Int32 
     Dim result As Int32 = 0 
     Try 
      Dim jObj As New JObject 
      jObj = JObject.Parse(File.ReadAllText(jsonPath)) 

      If CBool(jObj("meta")("older_available")) = True Then 

       If Not IsNothing(jObj("messages").Last()("id")) Then 
        result = jObj("messages").Last()("id") 
       End If 

      End If 
     Catch ex As Exception 
      ErrorHandler("ERROR getOlderThanID: " & ex.Message) 
     End Try 
     Return result 
    End Function 

我希望什麼問題可能與任何見解get/messages.json API端點以及我如何修改我的代碼來解決此問題。

回答

0

對於將從REST API返回的消息項目數量存在技術限制。這些是爲需要最新數據的客戶端應用程序設計的。最好的選擇是使用從REST API(api_url字段)對消息端點的單獨調用來填充存檔中的任何空白。確保一切都以持久的方式進行存儲。

+0

你怎麼知道「空白」在哪裏編程? – s15199d