2008-11-17 158 views
5

我仍在努力研究F#的事情 - 試圖弄清楚如何在F#中「思考」,而不是僅僅從我認識的其他語言翻譯出來。F#中的移動平均值計算#

我最近一直在考慮你之前和之後沒有1:1地圖的情況。 List.map掉落的情況。

其中一個例子是移動平均值,其中通常在n項中取平均值時,您將獲得長度爲len的列表的len-n + 1個結果。

對於那裏的高手,這是一個很好的方法來做到這一點(使用隊列捏Jomo Fisher)?

//Immutable queue, with added Length member 
type Fifo<'a> = 
    new()={xs=[];rxs=[]} 
    new(xs,rxs)={xs=xs;rxs=rxs} 

    val xs: 'a list; 
    val rxs: 'a list; 

    static member Empty() = new Fifo<'a>() 
    member q.IsEmpty = (q.xs = []) && (q.rxs = []) 
    member q.Enqueue(x) = Fifo(q.xs,x::q.rxs) 
    member q.Length() = (List.length q.xs) + (List.length q.rxs) 
    member q.Take() = 
     if q.IsEmpty then failwith "fifo.Take: empty queue" 
     else match q.xs with 
       | [] -> (Fifo(List.rev q.rxs,[])).Take() 
       | y::ys -> (Fifo(ys, q.rxs)),y 

//List module, add function to split one list into two parts (not safe if n > lst length) 
module List = 
    let splitat n lst = 
     let rec loop acc n lst = 
      if List.length acc = n then 
       (List.rev acc, lst) 
      else 
       loop (List.hd lst :: acc) n (List.tl lst) 
     loop [] n lst 

//Return list with moving average accross len elements of lst 
let MovingAverage (len:int) (lst:float list) = 
    //ugly mean - including this in Fifo kills genericity 
    let qMean (q:Fifo<float>) = ((List.sum q.xs) + (List.sum q.rxs))/(float (q.Length())) 

    //get first part of list to initialise queue 
    let (init, rest) = List.splitat len lst 

    //initialise queue with first n items 
    let q = new Fifo<float>([], init) 

    //loop through input list, use fifo to push/pull values as they come 
    let rec loop (acc:float list) ls (q:Fifo<float>) = 
     match ls with 
     | [] -> List.rev acc 
     | h::t -> 
      let nq = q.Enqueue(h) //enqueue new value 
      let (nq, _) = nq.Take() //drop old value 
      loop ((qMean nq)::acc) t nq //tail recursion 

    loop [qMean q] rest q 

//Example usage  
MovingAverage 3 [1.;1.;1.;1.;1.;2.;2.;2.;2.;2.] 

(也許一個更好的方式是由先進先出繼承來實現MovingAverageQueue?)

回答

7

如果你不在乎太多的表現,這裏是一個非常簡單的解決辦法:

#light 

let MovingAverage n s = 
    Seq.windowed n s 
    |> Seq.map Array.average 

let avgs = MovingAverage 5000 (Seq.map float [|1..999999|]) 

for avg in avgs do 
    printfn "%f" avg 
    System.Console.ReadKey() |> ignore 

這將重新計算從頭每個「窗口」的平均值,因此,如果窗戶是大是很差。

在任何情況下,請Seq.windowed:

http://research.microsoft.com/projects/cambridge/fsharp/manual/FSharp.Core/Microsoft.FSharp.Collections.Seq.html

,因爲它很方便的在你這樣的事情後面的口袋。

+0

非常好,這是幫助我「成長」的那種答案 - 即發現已經存在的東西,而不是重新發明輪子! – Benjol 2008-11-18 06:38:13

+1

死鏈接,我想現在所有的文檔都被移到了msdn,所以類似的頁面將是http://msdn.microsoft.com/en-us/library/dd233209(VS.100).aspx或http:// msdn .Microsoft.com/en-us/library/ee353635(VS.100).aspx – 2010-01-20 02:35:35

-2

據我所看到的,你的代碼是充滿了let聲明。我不熟悉F#,但做過一些Haskell。功能範式意味着不考慮「如何」,而是考慮「什麼」:你認爲Fifo,但實際上應該指定移動平均的語義。

-- the limited average of a list 
limitedaverage 0 _ = 0 
limited verage n (x:xs) = (x/n) + (limited average (n-1) xs) 

-- a list, transformed into a sequence of moving averages of 
movingaverages n [] = [] 
movingaverages n (x:xs) = (movingaverage n (x:xs) : movingaverages n xs) 
+0

嗯,測試? 我不是一個Haskellite,但有限平均每次除以最初的n,否則結果是錯誤的。 移動平均值應讀取(限制平均數n(x:xs):限制平均數n xs)? – Benjol 2008-11-17 13:36:45

1

這裏的哈斯克爾解決方案的(糾正,我希望)F#版本提出here

編輯:現在尾遞歸,沒有任何快,但不與爆炸N = 50000(請參閱非尾遞歸版本的編輯歷史)

let LimitedAverage n ls = 
    let rec loop acc i n ls = 
     match i with 
     | 0 -> acc //i counts down from n to 0, when we hit 0 we stop 
     | _ -> match ls with 
       | [] -> acc //if we hit empty list before end of n, we stop too 
       | x::xs -> (loop (acc + (x/float n)) (i - 1) n xs) //divide this value by n, perform average on 'rest' of list 
    loop 0. n n ls 

LimitedAverage 50000 (List.map float [1..9999999]) 
//>val it : float = 25000.5 

let rec MovingAverage3 n ls = 
    let rec acc loop i n ls = 
     match i with 
     | 0 -> List.rev acc //i counts down from n to 0, when we hit 0 we stop 
     | _ -> match ls with 
       | [] -> List.rev acc //if we hit empty list before end of n, we stop too 
       | x::xs -> loop (LimitedAverage2 n ls :: acc) (i - 1) n xs // limited average on whole list, then moving average on tail 
    loop [] (n + 1) n ls 

MovingAverage3 50000 (List.map float [1..9999999]) 
//>val it : float list = [25000.5; 25001.5; 25002.5; ...] 
+1

@Jon Harrop,我看到你:)。我的downvote評論在哪裏? – Benjol 2010-08-11 13:22:05

2

如果做一下性能護理,那麼你就可以計算出一個平均有效利用這樣的移動(假設我們正在計算在3天的窗口移動平均)

 
Numbers[n] Running Total[n] 
---------  --------------- 
n[0] = 7  7 = Numbers[0] 
n[1] = 1  8 = RunningTotal[1-1] + Numbers[1] 
n[2] = 2  10 = RunningTotal[2-1] + Numbers[2] 
n[3] = 8  11 = RunningTotal[3-1] + Numbers[3] - Numbers[3-3] 
n[4] = 4  14 = RunningTotal[4-1] + Numbers[4] - Numbers[4-3] 
n[5] = 1  13 = RunningTotal[5-1] + Numbers[5] - Numbers[5-3] 
n[6] = 9  14 = RunningTotal[6-1] + Numbers[6] - Numbers[6-3] 
... 
N    RunningTotal[N] = RunningTotal[N-1] + Numbers[N] - Numbers[N-3] 

硬部分關於這是持有您以前的運行總數和號碼 N窗口。我想出了下面的代碼:

let movingAverage days l = 
    seq { 
     let queue = new Queue<_>(days : int) 
     let divisor = decimal days 

     let total = ref 0m 
     for cur in l do 
      queue.Enqueue(cur) 
      total := !total + cur 
      if queue.Count < days then 
       yield (cur, 0m) 
      else 
       yield (cur, !total/divisor) 
       total := !total - (queue.Dequeue()) 
    } 

這個版本是不是好看的Haskell代碼,但應避免在每次運行時重新計算你的「窗口」,相關的性能問題。它保持運行總數並將先前使用的數字保存在隊列中,所以它應該非常快。

只是爲了好玩,我寫了一個簡單的基準:

#light 
open System 
open System.Collections.Generic 
open System.Diagnostics; 

let windowAverage days (l : #seq<decimal>) = Seq.windowed days l |> Seq.map (Seq.average) 

let princessAverage days l = 
    seq { 
     let queue = new Queue<_>(days : int) 
     let divisor = decimal days 

     let total = ref 0m 
     for cur in l do 
      queue.Enqueue(cur) 
      total := !total + cur 
      if queue.Count < days then 
       yield (cur, 0m) 
      else 
       yield (cur, !total/divisor) 
       total := !total - (queue.Dequeue()) 
    } 

let testData = 
    let rnd = new System.Random() 
    seq { for a in 1 .. 1000000 -> decimal (rnd.Next(1000)) } 

let benchmark msg f iterations = 
    let rec loop = function 
     | 0 ->() 
     | n -> f 3 testData |> ignore; loop (n - 1) 

    let stopWatch = Stopwatch.StartNew() 
    loop iterations 
    stopWatch.Stop() 
    printfn "%s: %f" msg stopWatch.Elapsed.TotalMilliseconds 

let _ = 
    let iterations = 10000000 
    benchmark "princessAverage" princessAverage iterations 
    benchmark "windowAverage" windowAverage iterations 
    printfn "Done" 

結果:

princessAverage: 1670.791800 
windowAverage: 2986.146900

我的版本是〜1.79x較快。

1

如果你關心性能,像優雅的代碼,然後嘗試

module MovingAverage = 
    let selfZip n l = 
     Seq.skip n l |> Seq.zip l 

    let runTotal i z = 
     Seq.scan (fun sum (s, e) -> sum - s + e) i z 

    let average n l:seq<'a> = 
     Seq.skip n l 
     |> selfZip n 
     |> runTotal (l |> Seq.take n |> Seq.sum) 
     |> Seq.map (fun sum -> decimal sum/decimal n) 

let ma = MovingAverage.average 2 myseq 

使用FSUnit我們可以測試它

let myseq = seq { for i in 0..10 do yield i } 

Seq.nth 0 ma |> should equal 0.5 
    Seq.nth 1 ma |> should equal 1.5 
    Seq.nth 2 ma |> should equal 2.5 
    Seq.nth 3 ma |> should equal 3.5 

算法的訣竅是第一和第n個數和 然後通過添加窗口的頭部 並減去窗口的尾部來保持運行總計。滑動窗口爲 ,通過對該序列執行自拉伸實現,但第二個參數爲按窗口大小進行拉伸的第一個 參數。

在管道的末端,我們只是將窗口的運行總量除以窗口 的大小。

注意掃描就像摺疊,但產生的每個版本的狀態爲 一個序列。

一個更優雅的解決方案,雖然可能與性能命中是 ,使觀察,如果我們零填充序列,我們不需要 來計算初始和。

namespace Utils 

module MovingAverage = 
    let selfZip n l = 
     Seq.skip n l |> Seq.zip l 

    let rec zeros = 
     seq { yield 0.0; yield! zeros} 

    // Create a running total given 
    let runTotal z = 
     Seq.scan (fun sum (s,e) -> sum - s + e) 0.0 z 

    let average n l = 
     Seq.concat [(Seq.take n zeros); l] 
     |> selfZip n 
     |> runTotal 
     |> Seq.map (fun sum -> sum/float n) 
     |> Seq.skip n 

有可能是由於命中相關 包裝兩個序列的第二間接性能,但也許它的窗口​​

0

的大小取決於 這是我的版本是不顯著。

let sma list n = 
    let len = List.length list 
    let rec loop acc sum cnt = 
     if cnt >= len then List.rev acc 
     else if cnt < n-1 then loop (0.0::acc) (sum + List.nth list cnt) (cnt+1) 
     else loop (((sum + List.nth list cnt)/(float n))::acc) (sum + (List.nth list cnt) - (List.nth list (cnt-n+1))) (cnt+1) 
    loop [] 0.0 0 

實施例:

sma (List.map float [5..50]) 5 
[0, 0, 0, 0, 7, 8, 9, ...]