2013-06-04 52 views
3

我自己學習F#(這是爲了好玩,它不適用於工作/學校),我正在嘗試編寫一個簡單的解析器來計算多個市場中的評論數一個Windows Phone應用程序。毫無疑問,我迄今爲止的代碼很難看,但我正試圖改進它,並遵循函數式編程範例。因爲我來自C,C++,C#世界,所以很難。下載Windows Phone應用程序評論使用F#

  • 來自C世界,我喜歡空值。我知道函數式編程/ F#不鼓勵使用null,但是我找不到一種不使用它的方法。例如,在函數解析中有一個空檢查。我怎麼不這樣做?

  • 現在,我的代碼只計算第一頁上的評論數量,但應用程序有可能超過10條評論,並因此可能會出現多個頁面。我如何遞歸遍歷所有頁面(functuion downloadReviews或parse)。

  • 我們如何將這段代碼擴展爲完全異步?

以下是我到目前爲止的代碼。除了上面的問題之外,我真的很希望有人能夠幫助我,並告訴我如何改進我的代碼的整體結構。

open System 
open System.IO 
open System.Xml 
open System.Xml.Linq 
open Printf 

type DownloadPageResult = { 
    Uri: System.Uri; 
    ErrorOccured: bool; 
    Source: string; 
} 

type ReviewData = { 
    CurrentPageUri: System.Uri; 
    NextPageUri: System.Uri; 
    NumberOfReviews: int; 
} 

module ReviewUrl = 
    let getBaseUri path = 
     new Uri(sprintf "http://cdn.marketplaceedgeservice.windowsphone.com/%s" path) 

    let getUri country locale appId = 
     getBaseUri(sprintf "/v8/ratings/product/%s/reviews?os=8.0.0.0&cc=%s&oc=&lang=%s&hw=520170499&dm=Test&chunksize=10" appId country locale) 

let downloadPage (uri: System.Uri) = 
    try 
     use webClient = new System.Net.WebClient() 
     printfn "%s" (uri.ToString()) 
     webClient.Headers.Add("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8") 
     webClient.Headers.Add("Accept-Encoding", "zip,deflate,sdch") 
     webClient.Headers.Add("Accept-Language", "en-US,en;q=0.8,fr;q=0.6") 
     webClient.Headers.Add("User-Agent", "Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/28.0.1482.0 Safari/537.36") 
     { Uri = uri; Source = webClient.DownloadString(uri); ErrorOccured = false } 
    with error -> { Uri = uri; Source = String.Empty; ErrorOccured = true } 

let downloadReview country locale appId = 
    let uri = ReviewUrl.getUri country locale appId 
    downloadPage uri 

let parse(pageResult: DownloadPageResult) = 
    if pageResult.ErrorOccured then { CurrentPageUri = pageResult.Uri; NextPageUri = null; NumberOfReviews = 0 } 
    else 
     let reader = new StringReader(pageResult.Source) 
     let doc = XDocument.Load(reader) 
     let ns = XNamespace.Get("http://www.w3.org/2005/Atom") 

     let nextUrl = query { for link in doc.Descendants(ns + "link") do 
           where (link.Attribute(XName.Get("rel")).Value = "next") 
           select link.Value 
           headOrDefault } 

     if nextUrl = null then 
      { CurrentPageUri = pageResult.Uri; NextPageUri = null; NumberOfReviews = doc.Descendants(ns + "entry") |> Seq.length } 
     else 
      { CurrentPageUri = pageResult.Uri; NextPageUri = ReviewUrl.getBaseUri(nextUrl); NumberOfReviews = doc.Descendants(ns + "entry") |> Seq.length } 

let downloadReviews(locale: string) = 
    let appId = "4e08377c-1240-4f80-9c35-0bacde2c66b6" 
    let country = locale.Substring(3) 
    let pageResult = downloadReview country locale appId 
    let parseResult = parse pageResult 
    parseResult 


[<EntryPoint>] 
let main argv = 
    let locales = [| "en-US"; "en-GB"; |] 
    let results = locales |> Array.map downloadReviews 

    printfn "%A" results 
    0 
+0

在回答關於不使用空值的問題 - 檢查F#中的Option類型:htt電話號碼://msdn.microsoft.com/en-us/library/dd233245.aspx。一旦開始不必編寫代碼來檢查空值,就永遠不會回頭。 –

回答

10

我這個問題有點多玩,並使用從F#數據的XML類型的供應商和其他功能試過。這是不完整的代碼,但它應該足夠給你的想法(並表明型供應商是非常好的:-)):

首先,我需要一些參考:

#r "System.Xml.Linq.dll" 
#r "FSharp.Data.dll" 
open FSharp.Data 
open FSharp.Net 

下一頁,我寫了下面的代碼來下載一個示例頁面。

let data = 
    Http.Request 
    ("http://cdn.marketplaceedgeservice.windowsphone.com//v8/ratings/product/4e08377c-1240-4f80-9c35-0bacde2c66b6/reviews", 
    query=["os", "8.0.0.0"; "cc", "US"; "lang", "en-US"; "hw", "520170499"; "dm", "Test"; "chunksize", "10" ], 
    headers=["User-Agent", "F#"]) 

我保存的樣品爲D:\temp\appstore.xml,然後使用XML類型的供應商得到一個不錯的類型解析頁:

type PageDocument = XmlProvider< @"D:\temp\appstore.xml" > 

然後你就可以下載&解析這樣的頁面(這個節目如何獲得評論數量和有關下一個鏈接的信息):

let parseAsync (locale:string) appId = async { 
    let country = locale.Substring(3) 

    // Make the request (asynchronously) using the parameters specified 
    let! data = 
    Http.AsyncRequest 
    ("http://cdn.marketplaceedgeservice.windowsphone.com//v8/ratings/product/" 
     + appId + "/reviews", 
     query=[ "os", "8.0.0.0"; "cc", country; "lang", locale; 
       "hw", "520170499"; "dm", "Test"; "chunksize", "10" ], 
     headers=["User-Agent", "F#"]) 

    // Parse the result using the type-provider generated type 
    let page = PageDocument.Parse(data) 

    // Now you can type 'page' followed by '.' and explore the results! 
    // page.GetLinks() returns all links and page.GetEntries() returns 
    // review entries. Each link also has 'Rel' and 'Href' properties: 
    let nextLink = 
    page.GetLinks() 
    |> Seq.tryFind (fun link -> link.Rel = "next") 
    |> Option.map (fun link -> link.Href) 
    let reviewsCount = page.GetEntries().Length  
    return (reviewsCount, nextLink) } 
+0

哇!謝謝托馬斯,這是超級乾淨而有趣的! :)但是,當我嘗試運行parseAsync時,出現以下錯誤:類型約束不匹配。類型(string - > Async )[]與類型seq >不兼容。類型'Async <'a>'與'string - > Async '類型不匹配。我仍在努力理解這些錯誤信息。任何想法是什麼意思? – Martin

+0

此片段只下載一個區域設置,因此您可以使用'parseAsync「en-US」「{... guid ...}」|> Async.RunSynchronously運行它# –

+0

有關異步工作流的更多信息,請查看在此MSDN頁面上:http://msdn.microsoft.com/en-us/library/dd233250.aspx –

2

製作代碼異步的一般模式是找到I/O昂貴的操作(調用樹某處),然後進入「上升」從那裏,讓使用它的異步過,直到所有的代碼達到您需要阻止的點。

在您的例子,原始操作下載,所以你會通過使downloadPage異步啓動:

let downloadPage (uri: System.Uri) = async { 
    try 
     use webClient = new System.Net.WebClient() 
     printfn "%s" (uri.ToString()) 
     // (Headers omitted) 
     let! source = webClient.AsyncDownloadString(uri) 
     return { Uri = uri; Source = source; ErrorOccured = false } 
    with error -> 
     return { Uri = uri; Source = String.Empty; ErrorOccured = true } } 

您需要async { ... }包裝代碼,使用let!撥打電話到DownloadString異步版本,返回結果使用return(在兩個分支中)。

然後你需要的功能,如downloadReviewdownloadReviews(同樣,在異步塊包好,調用其他異步操作使用let!或使用return!downloadPage等)。最後,如果您正在編寫控制檯應用程序,則需要阻止,但可以並行運行鍼對不同語言環境的下載。假設downloadReviews是異步的:

let locales = [| "en-US"; "en-GB"; |] 
let results = 
    locales 
    |> Array.map downloadReviews // Build an array of asynchronous computations 
    |> Async.Parallel    // Compose them into a single, parallel computation 
    |> Async.RunSynchronously  // Run the computation and wait 

回答其他問題,我覺得在例如使用null上面可能是好的(你調用LINQ返回它,所以沒有簡單的方法來避免這種情況)。實際上可以使用選項類型,但它有點棘手 - 請參閱this snippet if you're interested

此外,您可以使用來自F# Data LibraryHttp.AsyncRequest方法,讓你簡單一點的方法來構建複雜的HTTP請求(但我的貢獻者該庫之一,所以我有偏見!)

2

正如Tomas所說,創建一個基於異步的版本DownloadString(或者只是使用他的FSharp.Data庫來處理它)會更「功能性」。

您還可以將FSharp.DataExtCore結合使用以充分利用ExtCore中的asyncMaybeasyncChoice工作流程。這些工作流程在正常的工作流程之上提供了非常易於使用的錯誤處理。

無論如何,我花了幾分鐘清理你的代碼。這不是很多,但它確實簡化了幾處代碼:

open System 
open System.IO 
open System.Xml 
open System.Xml.Linq 
open Printf 

type DownloadPageResult = { 
    Uri : System.Uri; 
    ErrorOccured : bool; 
    Source : string; 
} 

type ReviewData = { 
    CurrentPageUri : System.Uri; 
    NextPageUri : System.Uri option; 
    NumberOfReviews : uint32; 
} 

module ReviewUrl = 
    let baseUri = Uri ("http://cdn.marketplaceedgeservice.windowsphone.com/", UriKind.Absolute) 

    let getUri country locale (appId : System.Guid) = 
     let localUri = 
      let appIdStr = appId.ToString "D" 
      sprintf "/v8/ratings/product/%s/reviews?os=8.0.0.0&cc=%s&oc=&lang=%s&hw=520170499&dm=Test&chunksize=10" appIdStr country locale 
     Uri (baseUri, localUri) 

let downloadPage (uri : System.Uri) = 
    try 
     use webClient = new System.Net.WebClient() 
     printfn "%s" (uri.ToString()) 
     webClient.Headers.Add("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8") 
     webClient.Headers.Add("Accept-Encoding", "zip,deflate,sdch") 
     webClient.Headers.Add("Accept-Language", "en-US,en;q=0.8,fr;q=0.6") 
     webClient.Headers.Add("User-Agent", "Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/28.0.1482.0 Safari/537.36") 
     { Uri = uri; Source = webClient.DownloadString uri; ErrorOccured = false } 
    with error -> 
     { Uri = uri; Source = String.Empty; ErrorOccured = true } 


let parse (pageResult : DownloadPageResult) = 
    if pageResult.ErrorOccured then 
     { CurrentPageUri = pageResult.Uri; NextPageUri = None; NumberOfReviews = 0u } 
    else 
     use reader = new StringReader (pageResult.Source) 
     let doc = XDocument.Load reader 
     let ns = XNamespace.Get "http://www.w3.org/2005/Atom" 

     let nextUrl = 
      query { 
      for link in doc.Descendants(ns + "link") do 
      where (link.Attribute(XName.Get("rel")).Value = "next") 
      select link.Value 
      headOrDefault } 

     { CurrentPageUri = pageResult.Uri; 
      NextPageUri = 
       if System.String.IsNullOrEmpty nextUrl then None 
       else Some <| Uri (ReviewUrl.baseUri, nextUrl); 
      NumberOfReviews = 
       doc.Descendants (ns + "entry") |> Seq.length |> uint32; } 

let downloadReviews (locale : string) = 
    System.Guid "4e08377c-1240-4f80-9c35-0bacde2c66b6" 
    |> ReviewUrl.getUri (locale.Substring 3) locale 
    |> downloadPage 
    |> parse 


[<EntryPoint>] 
let main argv = 
    let locales = [| "en-US"; "en-GB"; |] 
    let results = locales |> Array.map downloadReviews 

    printfn "%A" results 
    0 
相關問題