Haskell/效能示例
目標:用實際發生的示例逐步解釋最佳化。
dons:像 C 一樣快速編寫 Haskell:利用嚴格性、惰性和遞迴。
dons:像 C 一樣快速編寫 Haskell:利用嚴格性、惰性和遞迴。
haskell-cafe:另一個新手效能問題 我希望他不介意我在這裡釋出他的程式碼,我還是得問他。 -- apfeλmus 08:46, 18 May 2008 (UTC)
type CSV = [[String]]
main = do
args <- getArgs
file <- readFile (head args)
writeFile (head args ++ "2") (processFile (args !! 1) file)
processFile s = writeCSV . doInteraction s . readCSV
doInteraction line csv = insertLine (show line) (length csv - 1) csv
writeCSV = (\x -> x ++ "\n") . concat . intersperse "\n" . (map (concat . intersperse "," . (map show)))
insertLine line pos csv = (take pos csv) ++ [readCSVLine line] ++ drop pos csv
readCSVLine = read . (\x -> "["++x++"]")
readCSV = map readCSVLine . lines
我認為在郵件列表中還有另一個 cvs 解析執行緒我認為很合適,但我記不起來了。
jkff 在 #haskell 中詢問了一些分析日誌檔案的程式碼。基本上,它是在構建直方圖
foldl' (\m (x,y) -> insertWith' x (\[y] ys -> y:ys) [y] m) M.empty [(ByteString.copy foo, ByteString.copy bar) | (foo,bar) <- map (match regex) lines]
輸入是一個 1GB 的日誌檔案,程式佔用了可用記憶體,主要是因為 ByteString.copy 沒有被強制,整個檔案都停留在記憶體中。
此程式碼演示了您的 CPU 可能在預測程式碼分支(即 if 子句)時成功或失敗。該程式碼生成了一個很大的隨機 Int 的 Vector。由此,我們計算所有大於 999 的數字之和。如果資料已排序,CPU 可以輕鬆推斷出下一個分支的可能結果。如果資料未排序,這將更加困難且成功率更低。不幸的是,為一個目的而排序很少是一個好主意。該程式碼使用 criterion 包進行時間測量。例如,使用 -Wall -keep-llvm-files -O2 -optlo-O3 -fllvm -rtsopts -threaded -eventlog -feager-blackholing -fmax-simplifier-iterations=20 -fsimplifier-phases=3 -fno-liberate-case -fspecialise-aggressively 編譯此程式碼。這個想法來自 github.com/Kobzol/hardware-effects。(tkx68hamburg)
{-# LANGUAGE FlexibleInstances, RankNTypes, BangPatterns, CPP, PartialTypeSignatures, UnicodeSyntax, OverloadedStrings #-}
{-# OPTIONS_GHC -fno-warn-orphans #-}
module Main (main) where
import Control.DeepSeq
import Control.Exception (evaluate)
import Control.Monad.ST
import Criterion.Main
import qualified Data.Vector.Algorithms.Radix as Radix
import qualified Data.Vector.Unboxed as UV
import System.Random
#ifdef DEFINE_NFDATA_VECTOR
instance (NFData a) => NFData (V.Vector a) where
rnf v = V.foldr (\a b -> rnf a `seq` b) () v
#endif
setupUnsorted :: IO [UV.Vector Int]
setupUnsorted = do
small <- randomSampleUVVector 1024
med <- randomSampleUVVector $ 1024*1024
large <- randomSampleUVVector $ 20*1024*1024
return [small, med, large]
setupSorted :: IO [UV.Vector Int]
setupSorted = fmap (fmap sortUVec) setupUnsorted
main :: IO ()
main = do
defaultMainWith
defaultConfig [
env setupUnsorted
(\ ~[small, med, large] -> bgroup "Branch without sort" [
bench "small" $ whnf core1 small,
bench "medium" $ whnf core1 med,
bench "large" $ whnf core1 large
]
),
env setupUnsorted
(\ ~[small, med, large] -> bgroup "Branch without sort using filter" [
bench "small" $ whnf core1a small,
bench "medium" $ whnf core1a med,
bench "large" $ whnf core1a large
]
),
env setupUnsorted
(\ ~[small, med, large] -> bgroup "Branch including sort" [
bench "small" $ whnf core2 small,
bench "medium" $ whnf core2 med,
bench "large" $ whnf core2 large
]
),
env setupSorted
(\ ~[small, med, large] -> bgroup "Branch after sort" [
bench "small" $ whnf core1 small,
bench "medium" $ whnf core1 med,
bench "large" $ whnf core1 large
]
)
]
{- INLINE -}
core1 :: UV.Vector Int -> Int
core1 = UV.foldl' condSum 0
{- INLINE -}
core2 :: UV.Vector Int -> Int
core2 = UV.foldl' condSum 0 . sortUVec
{- INLINE -}
core1a :: UV.Vector Int -> Int
core1a = UV.sum . (UV.filter (>999))
{- INLINE -}
condSum :: Int -> Int -> Int
condSum !x !y = if y>999 then x+y else x
randomSampleUVVector :: Int -> IO (UV.Vector Int)
randomSampleUVVector i = evaluate $ force $ UV.fromList (take i (randoms (mkStdGen 0) :: [Int]))
sortUVec :: forall a . (Ord a, Radix.Radix a, UV.Unbox a) => UV.Vector a -> UV.Vector a
sortUVec vec =
runST
(do mv <- UV.thaw vec
Radix.sort mv
UV.unsafeFreeze mv)