我正試圖寫入一個非常大量的數據到恆定內存中的文件。爲什麼不能在常量內存中運行?
import qualified Data.ByteString.Lazy as B
{- Creates and writes num grids of dimensions aa x aa -}
writeGrids :: Int -> Int -> IO()
writeGrids num aa = do
rng <- newPureMT
let (grids,shuffleds) = createGrids rng aa
createDirectoryIfMissing True "data/grids/"
B.writeFile (gridFileName num aa)
(encode (take num grids))
B.writeFile (shuffledFileName num aa)
(encode (take num shuffleds))
但是,這會消耗與num
大小成正比的內存。我知道createGrids
是一個足夠懶惰的函數,因爲我已經通過將error "not lazy enough"
(如Haskell wiki here建議的)附加到它返回的列表末尾來測試它,並且不會引發錯誤。 take
是在Data.List
中定義的惰性函數。 encode
也是在Data.Binary
中定義的懶惰函數。 B.writeFile
在Data.ByteString.Lazy
中定義。
下面是完整的代碼,以便能夠執行它:
import Control.Arrow (first)
import Data.Binary
import GHC.Float (double2Float)
import System.Random (next)
import System.Random.Mersenne.Pure64 (PureMT, newPureMT, randomDouble)
import System.Random.Shuffle (shuffle')
import qualified Data.ByteString.Lazy as B
main :: IO()
main = writeGrids 1000 64
{- Creates and writes num grids of dimensions aa x aa -}
writeGrids :: Int -> Int -> IO()
writeGrids num aa = do
rng <- newPureMT
let (grids,shuffleds) = createGrids rng aa
B.writeFile "grids.bin" (encode (take num grids))
B.writeFile "shuffleds.bin" (encode (take num shuffleds))
{- a random number generator, dimension of grids to make
returns a pair of lists, the first is a list of grids of dimensions
aa x aa, the second is a list of the shuffled grids corresponding to the first list -}
createGrids :: PureMT -> Int -> ([[(Float,Float)]],[[(Float,Float)]])
createGrids rng aa = (grids,shuffleds) where
rs = randomFloats rng
grids = map (getGridR aa) (chunksOf (2 * aa * aa) rs)
shuffleds = shuffler (aa * aa) rng grids
{- length of each grid, a random number generator, a list of grids
returns a the list with each grid shuffled -}
shuffler :: Int -> PureMT -> [[(Float,Float)]] -> [[(Float,Float)]]
shuffler n rng (xs:xss) = shuffle' xs n rng : shuffler n (snd (next rng)) xss
shuffler _ _ [] = []
{- divides list into chunks of size n -}
chunksOf :: Int -> [a] -> [[a]]
chunksOf n = go
where go xs = case splitAt n xs of
(ys,zs) | null ys -> []
| otherwise -> ys : go zs
{- dimension of grid, list of random floats [0,1]
returns a list of (x,y) points of length n^2 such that all
points are in the range [0,1] and the points are a randomly
perturbed regular grid -}
getGridR :: Int -> [Float] -> [(Float,Float)]
getGridR n rs = pts where
nn = n * n
(irs,jrs) = splitAt nn rs
n' = fromIntegral n
grid = [ (p,q) | p <- [0..n'-1], q <- [0..n'-1] ]
pts = zipWith (\(p,q) (ir,jr) -> ((p+ir)/n',(q+jr)/n')) grid (zip irs jrs)
{- an infinite list of random floats in range [0,1] -}
randomFloats :: PureMT -> [Float]
randomFloats rng = let (d,rng') = first double2Float (randomDouble rng)
in d : randomFloats rng'
所需的軟件包是: ,字節串 ,二進制 ,隨機 ,梅森 - 隨機pure64 ,隨機洗牌
重新第一點:該'Binary'實例'[α]'迫使列表與'length' – jberryman
二進制編碼字節串的格式的脊柱'word64與大端長度|| data'。這解釋了jberryman提到的嚴格脊柱。人們可以很容易地對塊進行序列化並終止布爾值,從而使編碼形式像「Word16 big endian chunk size ||塊||假||塊|| .. || TRUE'。這應該能夠以O(塊大小)運行。要保持以前的格式,你必須重寫第一個字段,使你的IO稍微複雜一些。 –