From d7575f8220b4cdeab341a60823c4c85b1473c47e Mon Sep 17 00:00:00 2001 From: Harendra Kumar Date: Mon, 15 Jun 2026 20:06:12 +0530 Subject: [PATCH 01/24] Flatten the list of FileSystem.Handle benchmarks --- .../Benchmark/FileSystem/Handle/Read.hs | 71 ++++++++----------- .../Benchmark/FileSystem/Handle/ReadWrite.hs | 57 ++++++--------- 2 files changed, 49 insertions(+), 79 deletions(-) diff --git a/benchmark/Streamly/Benchmark/FileSystem/Handle/Read.hs b/benchmark/Streamly/Benchmark/FileSystem/Handle/Read.hs index 9a4794fba0..5da851d97e 100644 --- a/benchmark/Streamly/Benchmark/FileSystem/Handle/Read.hs +++ b/benchmark/Streamly/Benchmark/FileSystem/Handle/Read.hs @@ -151,31 +151,6 @@ inspect $ hasNoTypeClasses 'readDecodeUtf8 -- inspect $ 'readDecodeUtf8Lax `hasNoType` ''Step #endif -o_1_space_reduce_read :: BenchEnv -> [Benchmark] -o_1_space_reduce_read env = - -- read raw bytes without any decoding - [ mkBench "S.drain" env $ \inh _ -> - readDrain inh - , mkBench "S.last" env $ \inh _ -> - readLast inh - , mkBench "S.sum" env $ \inh _ -> - readSumBytes inh - - -- read with Latin1 decoding - , mkBench "SS.decodeLatin1" env $ \inh _ -> - readDecodeLatin1 inh - , mkBench "S.length" env $ \inh _ -> - readCountBytes inh - , mkBench "US.lines . SS.decodeLatin1" env $ \inh _ -> - readCountLines inh - , mkBench "US.words . SS.decodeLatin1" env $ \inh _ -> - readCountWords inh - - -- read with utf8 decoding - , mkBenchSmall "SS.decodeUtf8" env $ \inh _ -> - readDecodeUtf8 inh - ] - ------------------------------------------------------------------------------- -- stream toBytes ------------------------------------------------------------------------------- @@ -195,12 +170,6 @@ inspect $ 'getChunksConcatUnfoldCountLines `hasNoType` ''Step inspect $ 'getChunksConcatUnfoldCountLines `hasNoType` ''Producer.ConcatState #endif -o_1_space_reduce_toBytes :: BenchEnv -> [Benchmark] -o_1_space_reduce_toBytes env = - [ mkBench "toBytes/US.lines . SS.decodeLatin1" env $ \inh _ -> - getChunksConcatUnfoldCountLines inh - ] - ------------------------------------------------------------------------------- -- reduce after grouping in chunks ------------------------------------------------------------------------------- @@ -244,10 +213,35 @@ chunksOf :: Int -> Handle -> IO Int chunksOf n inh = S.fold Fold.length $ A.chunksOf n (S.unfold FH.reader inh) --- XXX all these require @-fspec-constr-recursive=12@. -o_1_space_reduce_read_grouped :: BenchEnv -> [Benchmark] -o_1_space_reduce_read_grouped env = - [ mkBench ("S.groupsOf " ++ show (bigSize env) ++ " FL.sum") env $ +allBenchmarks :: BenchEnv -> [Benchmark] +allBenchmarks env = + -- read raw bytes without any decoding + [ mkBench "S.drain" env $ \inh _ -> + readDrain inh + , mkBench "S.last" env $ \inh _ -> + readLast inh + , mkBench "S.sum" env $ \inh _ -> + readSumBytes inh + + -- read with Latin1 decoding + , mkBench "SS.decodeLatin1" env $ \inh _ -> + readDecodeLatin1 inh + , mkBench "S.length" env $ \inh _ -> + readCountBytes inh + , mkBench "US.lines . SS.decodeLatin1" env $ \inh _ -> + readCountLines inh + , mkBench "US.words . SS.decodeLatin1" env $ \inh _ -> + readCountWords inh + + -- read with utf8 decoding + , mkBenchSmall "SS.decodeUtf8" env $ \inh _ -> + readDecodeUtf8 inh + + , mkBench "toBytes/US.lines . SS.decodeLatin1" env $ \inh _ -> + getChunksConcatUnfoldCountLines inh + + -- XXX all these require @-fspec-constr-recursive=12@. + , mkBench ("S.groupsOf " ++ show (bigSize env) ++ " FL.sum") env $ \inh _ -> chunksOfSum (bigSize env) inh , mkBench "S.groupsOf 1 FL.sum" env $ \inh _ -> @@ -288,10 +282,3 @@ o_1_space_reduce_read_grouped env = , mkBench "A.chunksOf 1000" env $ \inh _ -> chunksOf 1000 inh ] - -allBenchmarks :: BenchEnv -> [Benchmark] -allBenchmarks env = Prelude.concat - [ o_1_space_reduce_read env - , o_1_space_reduce_toBytes env - , o_1_space_reduce_read_grouped env - ] diff --git a/benchmark/Streamly/Benchmark/FileSystem/Handle/ReadWrite.hs b/benchmark/Streamly/Benchmark/FileSystem/Handle/ReadWrite.hs index ab1fc51400..34ab5459ff 100644 --- a/benchmark/Streamly/Benchmark/FileSystem/Handle/ReadWrite.hs +++ b/benchmark/Streamly/Benchmark/FileSystem/Handle/ReadWrite.hs @@ -60,14 +60,6 @@ inspect $ hasNoTypeClasses 'copyChunks inspect $ 'copyChunks `hasNoType` ''Step #endif -o_1_space_copy_chunked :: BenchEnv -> [Benchmark] -o_1_space_copy_chunked env = - [ mkBench "toNull" env $ \inH _ -> - copyChunks inH (nullH env) - , mkBench "raw" env $ \inH outH -> - copyChunks inH outH - ] - ------------------------------------------------------------------------------- -- copy unfold ------------------------------------------------------------------------------- @@ -85,14 +77,6 @@ inspect $ 'copyStream `hasNoType` ''MutArray.ArrayUnsafe -- FH.write/writeNUnsaf inspect $ 'copyStream `hasNoType` ''Strict.Tuple3' -- FH.write/chunksOf #endif -o_1_space_copy_read :: BenchEnv -> [Benchmark] -o_1_space_copy_read env = - [ mkBench "rawToNull" env $ \inh _ -> - copyStream inh (nullH env) - , mkBench "rawToFile" env $ \inh outh -> - copyStream inh outh - ] - ------------------------------------------------------------------------------- -- copy stream ------------------------------------------------------------------------------- @@ -143,14 +127,6 @@ _readChunksWith inh devNull = IUF.fold fld unf (defaultChunkSize, inh) fld = FH.write devNull unf = IUF.unfoldEach A.reader FH.chunkReaderWith -o_1_space_copy_fromBytes :: BenchEnv -> [Benchmark] -o_1_space_copy_fromBytes env = - [ mkBench "putBytes rawToNull" env $ \inh _ -> - readFromBytesNull inh (nullH env) - , mkBench "FH.readWith" env $ \inh _ -> - readWithFromBytesNull inh (nullH env) - ] - -- | Send the file contents ('defaultChunkSize') to /dev/null writeReadWith :: Handle -> Handle -> IO () writeReadWith inh devNull = IUF.fold fld unf (defaultChunkSize, inh) @@ -185,22 +161,29 @@ inspect $ 'writeRead `hasNoType` ''MutArray.ArrayUnsafe -- FH.write/writeNUnsafe -- FH.read/A.read #endif -o_1_space_copy :: BenchEnv -> [Benchmark] -o_1_space_copy env = - [ mkBench "FH.write . FH.read" env $ \inh _ -> - writeRead inh (nullH env) - , mkBench "FH.writeWith . FH.readWith" env $ \inh _ -> - writeReadWith inh (nullH env) - ] - ------------------------------------------------------------------------------- -- ------------------------------------------------------------------------------- allBenchmarks :: BenchEnv -> [Benchmark] -allBenchmarks env = Prelude.concat - [ o_1_space_copy_chunked env - , o_1_space_copy_read env - , o_1_space_copy_fromBytes env - , o_1_space_copy env +allBenchmarks env = + [ mkBench "toNull" env $ \inH _ -> + copyChunks inH (nullH env) + , mkBench "raw" env $ \inH outH -> + copyChunks inH outH + + , mkBench "rawToNull" env $ \inh _ -> + copyStream inh (nullH env) + , mkBench "rawToFile" env $ \inh outh -> + copyStream inh outh + + , mkBench "putBytes rawToNull" env $ \inh _ -> + readFromBytesNull inh (nullH env) + , mkBench "FH.readWith" env $ \inh _ -> + readWithFromBytesNull inh (nullH env) + + , mkBench "FH.write . FH.read" env $ \inh _ -> + writeRead inh (nullH env) + , mkBench "FH.writeWith . FH.readWith" env $ \inh _ -> + writeReadWith inh (nullH env) ] From e1153383132c1f15318d7dc440e4555245d3d6c3 Mon Sep 17 00:00:00 2001 From: Harendra Kumar Date: Mon, 15 Jun 2026 20:57:34 +0530 Subject: [PATCH 02/24] Rename FileSystem.Handle benchmark names with better description --- .../Benchmark/FileSystem/Handle/Read.hs | 48 ++++++++++--------- .../Benchmark/FileSystem/Handle/ReadWrite.hs | 16 +++---- 2 files changed, 34 insertions(+), 30 deletions(-) diff --git a/benchmark/Streamly/Benchmark/FileSystem/Handle/Read.hs b/benchmark/Streamly/Benchmark/FileSystem/Handle/Read.hs index 5da851d97e..785c6c6898 100644 --- a/benchmark/Streamly/Benchmark/FileSystem/Handle/Read.hs +++ b/benchmark/Streamly/Benchmark/FileSystem/Handle/Read.hs @@ -213,72 +213,76 @@ chunksOf :: Int -> Handle -> IO Int chunksOf n inh = S.fold Fold.length $ A.chunksOf n (S.unfold FH.reader inh) +-- Benchmarks a mix of file reading and stream operations. The purpose is +-- twofold: (1) verify that file read operations fuse with downstream stream +-- operations, and (2) measure end-to-end performance of common real-world +-- pipelines (e.g. line count, word count) that combine file I/O with stream +-- processing. allBenchmarks :: BenchEnv -> [Benchmark] allBenchmarks env = -- read raw bytes without any decoding - [ mkBench "S.drain" env $ \inh _ -> + [ mkBench "Fold.drain" env $ \inh _ -> readDrain inh - , mkBench "S.last" env $ \inh _ -> + , mkBench "Fold.latest" env $ \inh _ -> readLast inh - , mkBench "S.sum" env $ \inh _ -> + , mkBench "Fold.sum" env $ \inh _ -> readSumBytes inh -- read with Latin1 decoding - , mkBench "SS.decodeLatin1" env $ \inh _ -> + , mkBench "Unicode.decodeLatin1" env $ \inh _ -> readDecodeLatin1 inh - , mkBench "S.length" env $ \inh _ -> + , mkBench "Fold.length (wc -c)" env $ \inh _ -> readCountBytes inh - , mkBench "US.lines . SS.decodeLatin1" env $ \inh _ -> + , mkBench "Unicode.lines . Unicode.decodeLatin1 (wc -l)" env $ \inh _ -> readCountLines inh - , mkBench "US.words . SS.decodeLatin1" env $ \inh _ -> + , mkBench "Unicode.words . Unicode.decodeLatin1 (wc -w)" env $ \inh _ -> readCountWords inh -- read with utf8 decoding - , mkBenchSmall "SS.decodeUtf8" env $ \inh _ -> + , mkBenchSmall "Unicode.decodeUtf8" env $ \inh _ -> readDecodeUtf8 inh - , mkBench "toBytes/US.lines . SS.decodeLatin1" env $ \inh _ -> + , mkBench "Handle.readChunks . Unicode.lines . Unicode.decodeLatin1 (wc -l)" env $ \inh _ -> getChunksConcatUnfoldCountLines inh -- XXX all these require @-fspec-constr-recursive=12@. - , mkBench ("S.groupsOf " ++ show (bigSize env) ++ " FL.sum") env $ + , mkBench ("Stream.groupsOf " ++ show (bigSize env) ++ " . Fold.sum") env $ \inh _ -> chunksOfSum (bigSize env) inh - , mkBench "S.groupsOf 1 FL.sum" env $ \inh _ -> + , mkBench "Stream.groupsOf 1 . Fold.sum" env $ \inh _ -> chunksOfSum 1 inh -- XXX investigate why we need inline/noinline in these cases (GHC) - -- Chunk using parsers , mkBench - ("S.foldMany1 (FL.take " ++ show (bigSize env) ++ " FL.sum)") + ("Stream.foldManyPost " ++ show (bigSize env) ++ " . Fold.sum") env $ \inh _ -> noinline foldMany1ChunksOfSum (bigSize env) inh , mkBench - "S.foldMany1 (FL.take 1 FL.sum)" + "Stream.foldManyPost 1 . Fold.sum" env $ \inh _ -> inline foldMany1ChunksOfSum 1 inh , mkBench - ("S.foldMany (FL.take " ++ show (bigSize env) ++ " FL.sum)") + ("Stream.foldMany " ++ show (bigSize env) ++ " . Fold.sum") env $ \inh _ -> noinline foldManyChunksOfSum (bigSize env) inh , mkBench - "S.foldMany (FL.take 1 FL.sum)" + "Stream.foldMany 1 . Fold.sum" env $ \inh _ -> inline foldManyChunksOfSum 1 inh -- folding chunks to arrays - , mkBenchSmall "S.groupsOf 1" env $ \inh _ -> + , mkBenchSmall "Stream.groupsOf 1 . Array.unsafeCreateOf" env $ \inh _ -> groupsOf 1 inh - , mkBench "S.groupsOf 10" env $ \inh _ -> + , mkBench "Stream.groupsOf 10 . Array.unsafeCreateOf" env $ \inh _ -> groupsOf 10 inh - , mkBench "S.groupsOf 1000" env $ \inh _ -> + , mkBench "Stream.groupsOf 1000 . Array.unsafeCreateOf" env $ \inh _ -> groupsOf 1000 inh -- chunksOf may use a different impl than groupsOf - , mkBenchSmall "A.chunksOf 1" env $ \inh _ -> + , mkBenchSmall "Array.chunksOf 1" env $ \inh _ -> chunksOf 1 inh - , mkBench "A.chunksOf 10" env $ \inh _ -> + , mkBench "Array.chunksOf 10" env $ \inh _ -> chunksOf 10 inh - , mkBench "A.chunksOf 1000" env $ \inh _ -> + , mkBench "Array.chunksOf 1000" env $ \inh _ -> chunksOf 1000 inh ] diff --git a/benchmark/Streamly/Benchmark/FileSystem/Handle/ReadWrite.hs b/benchmark/Streamly/Benchmark/FileSystem/Handle/ReadWrite.hs index 34ab5459ff..ce4c6c5472 100644 --- a/benchmark/Streamly/Benchmark/FileSystem/Handle/ReadWrite.hs +++ b/benchmark/Streamly/Benchmark/FileSystem/Handle/ReadWrite.hs @@ -167,23 +167,23 @@ inspect $ 'writeRead `hasNoType` ''MutArray.ArrayUnsafe -- FH.write/writeNUnsafe allBenchmarks :: BenchEnv -> [Benchmark] allBenchmarks env = - [ mkBench "toNull" env $ \inH _ -> + [ mkBench "Handle.readChunks . Handle.writeChunks" env $ \inH _ -> copyChunks inH (nullH env) - , mkBench "raw" env $ \inH outH -> + , mkBench "Handle.readChunks . Handle.writeChunks (cat)" env $ \inH outH -> copyChunks inH outH - , mkBench "rawToNull" env $ \inh _ -> + , mkBench "Handle.reader . Handle.write" env $ \inh _ -> copyStream inh (nullH env) - , mkBench "rawToFile" env $ \inh outh -> + , mkBench "Handle.reader . Handle.write (cat)" env $ \inh outh -> copyStream inh outh - , mkBench "putBytes rawToNull" env $ \inh _ -> + , mkBench "Handle.reader . Handle.putBytes" env $ \inh _ -> readFromBytesNull inh (nullH env) - , mkBench "FH.readWith" env $ \inh _ -> + , mkBench "Handle.readerWith . Handle.putBytes" env $ \inh _ -> readWithFromBytesNull inh (nullH env) - , mkBench "FH.write . FH.read" env $ \inh _ -> + , mkBench "Handle.reader . Handle.write (Unfold.fold)" env $ \inh _ -> writeRead inh (nullH env) - , mkBench "FH.writeWith . FH.readWith" env $ \inh _ -> + , mkBench "Handle.readerWith . Handle.writeWith (Unfold.fold)" env $ \inh _ -> writeReadWith inh (nullH env) ] From a9c5d65d8764a3eaf07f2013fd7f98e74c2d9ad4 Mon Sep 17 00:00:00 2001 From: Harendra Kumar Date: Mon, 15 Jun 2026 23:32:22 +0530 Subject: [PATCH 03/24] Flatten the list of benchmarks in Unicode.Stream --- .../Streamly/Benchmark/Unicode/Stream.hs | 98 ++++++++----------- 1 file changed, 42 insertions(+), 56 deletions(-) diff --git a/benchmark/Streamly/Benchmark/Unicode/Stream.hs b/benchmark/Streamly/Benchmark/Unicode/Stream.hs index 9755e85e25..c34e20f3d2 100644 --- a/benchmark/Streamly/Benchmark/Unicode/Stream.hs +++ b/benchmark/Streamly/Benchmark/Unicode/Stream.hs @@ -32,7 +32,6 @@ import System.IO (Handle) import qualified Streamly.Data.Array as Array import qualified Streamly.Data.Fold as Fold -import qualified Streamly.Internal.Data.Producer as Producer import qualified Streamly.Internal.Data.Stream as Stream import qualified Streamly.Internal.Data.Unfold as Unfold import qualified Streamly.Internal.FileSystem.Handle as Handle @@ -49,6 +48,7 @@ import Streamly.Internal.Data.MutByteArray (Unbox) import Streamly.Internal.Data.Stream (Step(..)) import qualified Streamly.Internal.Data.MutArray as MutArray import qualified Streamly.Internal.Data.Fold as Fold +import qualified Streamly.Internal.Data.Producer as Producer import qualified Streamly.Internal.Data.Tuple.Strict as Strict import Test.Inspection @@ -71,12 +71,6 @@ inspect $ hasNoTypeClasses 'copyCodecUtf8ArraysLenient -- inspect $ 'copyCodecUtf8ArraysLenient `hasNoType` ''Step #endif -o_1_space_decode_encode_chunked :: BenchEnv -> [Benchmark] -o_1_space_decode_encode_chunked env = - [ mkBenchSmall "encodeUtf8' . decodeUtf8Arrays" env $ \inH outH -> - copyCodecUtf8ArraysLenient inH outH - ] - ------------------------------------------------------------------------------- -- copy with group/ungroup transformations ------------------------------------------------------------------------------- @@ -187,24 +181,6 @@ wordsUnwordsCharArrayCopy inh outh = $ Unicode.decodeLatin1 $ Stream.unfold Handle.reader inh --- XXX all these require @-fspec-constr-recursive=12@. -o_1_space_copy_read_group_ungroup :: BenchEnv -> [Benchmark] -o_1_space_copy_read_group_ungroup env = - [ mkBenchSmall "unlines . splitOnSuffix ([Word8])" env - $ \inh outh -> linesUnlinesCopy inh outh - , mkBenchSmall "interposeSuffix . splitOnSuffix (Array Word8)" env - $ \inh outh -> linesUnlinesArrayWord8Copy inh outh - , mkBenchSmall "UnicodeArr.unlines . UnicodeArr.lines (Array Char)" env - $ \inh outh -> linesUnlinesArrayCharCopy inh outh - - , mkBenchSmall "interposeSuffix . wordsBy ([Word8])" env - $ \inh outh -> wordsUnwordsCopyWord8 inh outh - , mkBenchSmall "unwords . wordsBy ([Char])" env - $ \inh outh -> wordsUnwordsCopy inh outh - , mkBenchSmall "UnicodeArr.unwords . UnicodeArr.words (Array Char)" env - $ \inh outh -> wordsUnwordsCharArrayCopy inh outh - ] - ------------------------------------------------------------------------------- -- copy unfold ------------------------------------------------------------------------------- @@ -314,29 +290,49 @@ _copyStreamUtf8Parser inh outh = (Unicode.parseCharUtf8With Unicode.TransliterateCodingFailure) $ Stream.unfold Handle.reader inh --- XXX all these require @-fspec-constr-recursive=12@. -o_1_space_decode_encode_read :: BenchEnv -> [Benchmark] -o_1_space_decode_encode_read env = - -- This needs an ascii file, as decode just errors out. - [ mkBench "encodeLatin1' . decodeLatin1" env $ \inh outh -> - copyStreamLatin1' inh outh - , mkBench "encodeLatin1 . decodeLatin1" env $ \inh outh -> - copyStreamLatin1 inh outh +allBenchmarks :: BenchEnv -> [Benchmark] +allBenchmarks env = + [ bgroup (o_1_space_prefix moduleName) + [ -- XXX all these require @-fspec-constr-recursive=12@. + mkBenchSmall "unlines . splitOnSuffix ([Word8])" env + $ \inh outh -> linesUnlinesCopy inh outh + , mkBenchSmall "interposeSuffix . splitOnSuffix (Array Word8)" env + $ \inh outh -> linesUnlinesArrayWord8Copy inh outh + , mkBenchSmall "UnicodeArr.unlines . UnicodeArr.lines (Array Char)" env + $ \inh outh -> linesUnlinesArrayCharCopy inh outh + + , mkBenchSmall "interposeSuffix . wordsBy ([Word8])" env + $ \inh outh -> wordsUnwordsCopyWord8 inh outh + , mkBenchSmall "unwords . wordsBy ([Char])" env + $ \inh outh -> wordsUnwordsCopy inh outh + , mkBenchSmall "UnicodeArr.unwords . UnicodeArr.words (Array Char)" env + $ \inh outh -> wordsUnwordsCharArrayCopy inh outh + + , mkBenchSmall "encodeUtf8' . decodeUtf8Arrays" env $ \inH outH -> + copyCodecUtf8ArraysLenient inH outH + + -- XXX all these require @-fspec-constr-recursive=12@. + -- This needs an ascii file, as decode just errors out. + , mkBench "encodeLatin1' . decodeLatin1" env $ \inh outh -> + copyStreamLatin1' inh outh + , mkBench "encodeLatin1 . decodeLatin1" env $ \inh outh -> + copyStreamLatin1 inh outh #ifdef INCLUDE_STRICT_UTF8 - -- Requires valid unicode input - , mkBench "encodeUtf8' . decodeUtf8'" env $ \inh outh -> - _copyStreamUtf8' inh outh - , mkBench "encodeUtf8' . foldMany writeCharUtf8'" env $ \inh outh -> - _copyStreamUtf8'Fold inh outh + -- Requires valid unicode input + , mkBench "encodeUtf8' . decodeUtf8'" env $ \inh outh -> + _copyStreamUtf8' inh outh + , mkBench "encodeUtf8' . foldMany writeCharUtf8'" env $ \inh outh -> + _copyStreamUtf8'Fold inh outh #endif - , mkBenchSmall "encodeUtf8 . parseMany parseCharUtf8" env - $ \inh outh -> _copyStreamUtf8Parser inh outh - , mkBenchSmall "encodeUtf8 . decodeUtf8" env $ \inh outh -> - copyStreamUtf8 inh outh - {- - , mkBenchSmall "encodeUtf16 . decodeUtf16" env $ \inh outh -> - copyStreamUtf16 inh outh - -} + , mkBenchSmall "encodeUtf8 . parseMany parseCharUtf8" env + $ \inh outh -> _copyStreamUtf8Parser inh outh + , mkBenchSmall "encodeUtf8 . decodeUtf8" env $ \inh outh -> + copyStreamUtf8 inh outh + {- + , mkBenchSmall "encodeUtf16 . decodeUtf16" env $ \inh outh -> + copyStreamUtf16 inh outh + -} + ] ] main :: IO () @@ -344,16 +340,6 @@ main = do #ifndef FUSION_CHECK env <- mkHandleBenchEnv defaultMain (allBenchmarks env) - - where - - allBenchmarks env = - [ bgroup (o_1_space_prefix moduleName) $ Prelude.concat - [ o_1_space_copy_read_group_ungroup env - , o_1_space_decode_encode_chunked env - , o_1_space_decode_encode_read env - ] - ] #else -- Enable FUSION_CHECK macro at the beginning of the file -- Enable one benchmark below, and run the benchmark From 18b537264a1891610520501019baa08bb03a03d5 Mon Sep 17 00:00:00 2001 From: Harendra Kumar Date: Mon, 15 Jun 2026 23:51:50 +0530 Subject: [PATCH 04/24] Improve benchmark names in Unicode.Stream --- .../Streamly/Benchmark/Unicode/Stream.hs | 31 ++++++++++--------- 1 file changed, 17 insertions(+), 14 deletions(-) diff --git a/benchmark/Streamly/Benchmark/Unicode/Stream.hs b/benchmark/Streamly/Benchmark/Unicode/Stream.hs index c34e20f3d2..0911053c6b 100644 --- a/benchmark/Streamly/Benchmark/Unicode/Stream.hs +++ b/benchmark/Streamly/Benchmark/Unicode/Stream.hs @@ -294,42 +294,45 @@ allBenchmarks :: BenchEnv -> [Benchmark] allBenchmarks env = [ bgroup (o_1_space_prefix moduleName) [ -- XXX all these require @-fspec-constr-recursive=12@. - mkBenchSmall "unlines . splitOnSuffix ([Word8])" env + -- lines/unlines + mkBenchSmall "Unicode.unlines . Stream.splitOnSuffix ([Word8])" env $ \inh outh -> linesUnlinesCopy inh outh - , mkBenchSmall "interposeSuffix . splitOnSuffix (Array Word8)" env + , mkBenchSmall "Stream.unfoldEachEndBy . Stream.splitOnSuffix (Array Word8)" env $ \inh outh -> linesUnlinesArrayWord8Copy inh outh - , mkBenchSmall "UnicodeArr.unlines . UnicodeArr.lines (Array Char)" env + , mkBenchSmall "UnicodeArray.unlines . UnicodeArray.lines (Array Char)" env $ \inh outh -> linesUnlinesArrayCharCopy inh outh - , mkBenchSmall "interposeSuffix . wordsBy ([Word8])" env + -- words/unwords + , mkBenchSmall "Stream.unfoldEachEndBy . Stream.wordsBy ([Word8])" env $ \inh outh -> wordsUnwordsCopyWord8 inh outh - , mkBenchSmall "unwords . wordsBy ([Char])" env + , mkBenchSmall "Unicode.unwords . Stream.wordsBy ([Char])" env $ \inh outh -> wordsUnwordsCopy inh outh - , mkBenchSmall "UnicodeArr.unwords . UnicodeArr.words (Array Char)" env + , mkBenchSmall "UnicodeArray.unwords . UnicodeArray.words (Array Char)" env $ \inh outh -> wordsUnwordsCharArrayCopy inh outh - , mkBenchSmall "encodeUtf8' . decodeUtf8Arrays" env $ \inH outH -> + -- decode/encode + , mkBenchSmall "Handle.readChunks . Unicode.decodeUtf8Chunks . Unicode.encodeUtf8' (copy)" env $ \inH outH -> copyCodecUtf8ArraysLenient inH outH -- XXX all these require @-fspec-constr-recursive=12@. -- This needs an ascii file, as decode just errors out. - , mkBench "encodeLatin1' . decodeLatin1" env $ \inh outh -> + , mkBench "Unicode.encodeLatin1' . Unicode.decodeLatin1 (copy)" env $ \inh outh -> copyStreamLatin1' inh outh - , mkBench "encodeLatin1 . decodeLatin1" env $ \inh outh -> + , mkBench "Unicode.encodeLatin1 . Unicode.decodeLatin1 (copy)" env $ \inh outh -> copyStreamLatin1 inh outh #ifdef INCLUDE_STRICT_UTF8 -- Requires valid unicode input - , mkBench "encodeUtf8' . decodeUtf8'" env $ \inh outh -> + , mkBench "Unicode.encodeUtf8' . Unicode.decodeUtf8' (copy)" env $ \inh outh -> _copyStreamUtf8' inh outh - , mkBench "encodeUtf8' . foldMany writeCharUtf8'" env $ \inh outh -> + , mkBench "Unicode.encodeUtf8 . Stream.parseMany Unicode.writeCharUtf8' (copy)" env $ \inh outh -> _copyStreamUtf8'Fold inh outh #endif - , mkBenchSmall "encodeUtf8 . parseMany parseCharUtf8" env + , mkBenchSmall "Unicode.encodeUtf8 . Stream.parseMany Unicode.parseCharUtf8 (copy)" env $ \inh outh -> _copyStreamUtf8Parser inh outh - , mkBenchSmall "encodeUtf8 . decodeUtf8" env $ \inh outh -> + , mkBenchSmall "Unicode.encodeUtf8 . Unicode.decodeUtf8 (copy)" env $ \inh outh -> copyStreamUtf8 inh outh {- - , mkBenchSmall "encodeUtf16 . decodeUtf16" env $ \inh outh -> + , mkBenchSmall "Unicode.encodeUtf16le' . Unicode.decodeUtf16le (copy)" env $ \inh outh -> copyStreamUtf16 inh outh -} ] From fc7db6b349bba12d7a3807d6431e6faf0651ba60 Mon Sep 17 00:00:00 2001 From: Harendra Kumar Date: Tue, 16 Jun 2026 01:05:17 +0530 Subject: [PATCH 05/24] Move benchmarks from FileSystem.Handle to Unicode.Stream --- .../Benchmark/FileSystem/Handle/Read.hs | 84 ------------------- .../Streamly/Benchmark/Unicode/Stream.hs | 67 ++++++++++++++- 2 files changed, 64 insertions(+), 87 deletions(-) diff --git a/benchmark/Streamly/Benchmark/FileSystem/Handle/Read.hs b/benchmark/Streamly/Benchmark/FileSystem/Handle/Read.hs index 785c6c6898..279166ff48 100644 --- a/benchmark/Streamly/Benchmark/FileSystem/Handle/Read.hs +++ b/benchmark/Streamly/Benchmark/FileSystem/Handle/Read.hs @@ -33,10 +33,7 @@ import qualified Streamly.FileSystem.Handle as FH import qualified Streamly.Internal.Data.Array as A import qualified Streamly.Internal.Data.Fold as FL import qualified Streamly.Internal.Data.Stream as IP -import qualified Streamly.Internal.FileSystem.Handle as IFH -import qualified Streamly.Internal.Unicode.Stream as IUS import qualified Streamly.Data.Stream.Prelude as S -import qualified Streamly.Unicode.Stream as SS import Test.Tasty.Bench hiding (env) import Prelude hiding (last, length) @@ -68,7 +65,6 @@ inspect $ 'readLast `hasNoType` ''Producer.ConcatState -- FH.read/UF.many inspect $ 'readLast `hasNoType` ''MutArray.ArrayUnsafe -- FH.read/A.read #endif --- assert that flattenArrays constructors are not present -- | Count the number of bytes in a file. readCountBytes :: Handle -> IO Int readCountBytes = S.fold Fold.length . S.unfold FH.reader @@ -80,34 +76,6 @@ inspect $ 'readCountBytes `hasNoType` ''Producer.ConcatState -- FH.read/UF.many inspect $ 'readCountBytes `hasNoType` ''MutArray.ArrayUnsafe -- FH.read/A.read #endif --- | Count the number of lines in a file. -readCountLines :: Handle -> IO Int -readCountLines = - S.fold Fold.length - . IUS.lines FL.drain - . SS.decodeLatin1 - . S.unfold FH.reader - -#ifdef INSPECTION -inspect $ hasNoTypeClasses 'readCountLines -inspect $ 'readCountLines `hasNoType` ''Step -inspect $ 'readCountLines `hasNoType` ''Producer.ConcatState -- FH.read/UF.many -inspect $ 'readCountLines `hasNoType` ''MutArray.ArrayUnsafe -- FH.read/A.read -#endif - --- | Count the number of words in a file. -readCountWords :: Handle -> IO Int -readCountWords = - S.fold Fold.length - . IUS.words FL.drain - . SS.decodeLatin1 - . S.unfold FH.reader - -#ifdef INSPECTION -inspect $ hasNoTypeClasses 'readCountWords --- inspect $ 'readCountWords `hasNoType` ''Step -#endif - -- | Sum the bytes in a file. readSumBytes :: Handle -> IO Word8 readSumBytes = S.fold Fold.sum . S.unfold FH.reader @@ -132,44 +100,6 @@ inspect $ 'readSumBytes `hasNoType` ''MutArray.ArrayUnsafe -- FH.read/A.read readDrain :: Handle -> IO () readDrain inh = S.fold Fold.drain $ S.unfold FH.reader inh --- XXX investigate why we need an INLINE in this case (GHC) -{-# INLINE readDecodeLatin1 #-} -readDecodeLatin1 :: Handle -> IO () -readDecodeLatin1 inh = - S.fold Fold.drain - $ SS.decodeLatin1 - $ S.unfold FH.reader inh - -readDecodeUtf8 :: Handle -> IO () -readDecodeUtf8 inh = - S.fold Fold.drain - $ SS.decodeUtf8 - $ S.unfold FH.reader inh - -#ifdef INSPECTION -inspect $ hasNoTypeClasses 'readDecodeUtf8 --- inspect $ 'readDecodeUtf8Lax `hasNoType` ''Step -#endif - -------------------------------------------------------------------------------- --- stream toBytes -------------------------------------------------------------------------------- - --- | Count the number of lines in a file. -getChunksConcatUnfoldCountLines :: Handle -> IO Int -getChunksConcatUnfoldCountLines inh = - S.fold Fold.length - $ IUS.lines FL.drain - $ SS.decodeLatin1 - -- XXX replace with toBytes - $ S.unfoldEach A.reader (IFH.readChunks inh) - -#ifdef INSPECTION -inspect $ hasNoTypeClasses 'getChunksConcatUnfoldCountLines -inspect $ 'getChunksConcatUnfoldCountLines `hasNoType` ''Step -inspect $ 'getChunksConcatUnfoldCountLines `hasNoType` ''Producer.ConcatState -#endif - ------------------------------------------------------------------------------- -- reduce after grouping in chunks ------------------------------------------------------------------------------- @@ -228,22 +158,8 @@ allBenchmarks env = , mkBench "Fold.sum" env $ \inh _ -> readSumBytes inh - -- read with Latin1 decoding - , mkBench "Unicode.decodeLatin1" env $ \inh _ -> - readDecodeLatin1 inh , mkBench "Fold.length (wc -c)" env $ \inh _ -> readCountBytes inh - , mkBench "Unicode.lines . Unicode.decodeLatin1 (wc -l)" env $ \inh _ -> - readCountLines inh - , mkBench "Unicode.words . Unicode.decodeLatin1 (wc -w)" env $ \inh _ -> - readCountWords inh - - -- read with utf8 decoding - , mkBenchSmall "Unicode.decodeUtf8" env $ \inh _ -> - readDecodeUtf8 inh - - , mkBench "Handle.readChunks . Unicode.lines . Unicode.decodeLatin1 (wc -l)" env $ \inh _ -> - getChunksConcatUnfoldCountLines inh -- XXX all these require @-fspec-constr-recursive=12@. , mkBench ("Stream.groupsOf " ++ show (bigSize env) ++ " . Fold.sum") env $ diff --git a/benchmark/Streamly/Benchmark/Unicode/Stream.hs b/benchmark/Streamly/Benchmark/Unicode/Stream.hs index 0911053c6b..7ef6c2669c 100644 --- a/benchmark/Streamly/Benchmark/Unicode/Stream.hs +++ b/benchmark/Streamly/Benchmark/Unicode/Stream.hs @@ -290,12 +290,73 @@ _copyStreamUtf8Parser inh outh = (Unicode.parseCharUtf8With Unicode.TransliterateCodingFailure) $ Stream.unfold Handle.reader inh +------------------------------------------------------------------------------- +-- read and decode +------------------------------------------------------------------------------- + +-- XXX investigate why we need an INLINE in this case (GHC) +{-# INLINE readDecodeLatin1 #-} +readDecodeLatin1 :: Handle -> IO () +readDecodeLatin1 inh = + Stream.fold Fold.drain + $ Unicode.decodeLatin1 + $ Handle.read inh + +readCountLines :: Handle -> IO Int +readCountLines = + Stream.fold Fold.length + . Unicode.lines Fold.drain + . Unicode.decodeLatin1 + . Handle.read + +#ifdef INSPECTION +inspect $ hasNoTypeClasses 'readCountLines +inspect $ 'readCountLines `hasNoType` ''Step +inspect $ 'readCountLines `hasNoType` ''Producer.ConcatState -- Handle.read/UF.many +inspect $ 'readCountLines `hasNoType` ''MutArray.ArrayUnsafe -- Handle.read/Array.read +#endif + +readCountWords :: Handle -> IO Int +readCountWords = + Stream.fold Fold.length + . Unicode.words Fold.drain + . Unicode.decodeLatin1 + . Handle.read + +#ifdef INSPECTION +inspect $ hasNoTypeClasses 'readCountWords +-- inspect $ 'readCountWords `hasNoType` ''Step +#endif + +readDecodeUtf8 :: Handle -> IO () +readDecodeUtf8 inh = + Stream.fold Fold.drain + $ Unicode.decodeUtf8 + $ Handle.read inh + +#ifdef INSPECTION +inspect $ hasNoTypeClasses 'readDecodeUtf8 +-- inspect $ 'readDecodeUtf8Lax `hasNoType` ''Step +#endif + allBenchmarks :: BenchEnv -> [Benchmark] allBenchmarks env = [ bgroup (o_1_space_prefix moduleName) - [ -- XXX all these require @-fspec-constr-recursive=12@. + [ -- read with Latin1 decoding + mkBench "Unicode.decodeLatin1 (read file)" env + $ \inh _ -> readDecodeLatin1 inh + , mkBench "Unicode.lines . Unicode.decodeLatin1 (wc -l)" env + $ \inh _ -> readCountLines inh + , mkBench "Unicode.words . Unicode.decodeLatin1 (wc -w)" env + $ \inh _ -> readCountWords inh + + -- read with utf8 decoding + , mkBenchSmall "Unicode.decodeUtf8" env $ \inh _ -> + readDecodeUtf8 inh + + -- XXX all these require @-fspec-constr-recursive=12@. -- lines/unlines - mkBenchSmall "Unicode.unlines . Stream.splitOnSuffix ([Word8])" env + , mkBenchSmall "Unicode.unlines . Stream.splitOnSuffix ([Word8])" env $ \inh outh -> linesUnlinesCopy inh outh , mkBenchSmall "Stream.unfoldEachEndBy . Stream.splitOnSuffix (Array Word8)" env $ \inh outh -> linesUnlinesArrayWord8Copy inh outh @@ -311,7 +372,7 @@ allBenchmarks env = $ \inh outh -> wordsUnwordsCharArrayCopy inh outh -- decode/encode - , mkBenchSmall "Handle.readChunks . Unicode.decodeUtf8Chunks . Unicode.encodeUtf8' (copy)" env $ \inH outH -> + , mkBenchSmall "Unicode.decodeUtf8Chunks . Unicode.encodeUtf8' (copy)" env $ \inH outH -> copyCodecUtf8ArraysLenient inH outH -- XXX all these require @-fspec-constr-recursive=12@. From b8870a5820897d8ed4633a9dae2f105fbac31392 Mon Sep 17 00:00:00 2001 From: Harendra Kumar Date: Tue, 16 Jun 2026 02:31:43 +0530 Subject: [PATCH 06/24] Move the cross module benchmarks to CrossModule suite --- benchmark/Streamly/Benchmark/CrossModule.hs | 190 ++++++++++++++++++ .../Benchmark/FileSystem/Handle/Read.hs | 155 +------------- benchmark/streamly-benchmarks.cabal | 6 + targets/Targets.hs | 1 + 4 files changed, 199 insertions(+), 153 deletions(-) create mode 100644 benchmark/Streamly/Benchmark/CrossModule.hs diff --git a/benchmark/Streamly/Benchmark/CrossModule.hs b/benchmark/Streamly/Benchmark/CrossModule.hs new file mode 100644 index 0000000000..995969fe61 --- /dev/null +++ b/benchmark/Streamly/Benchmark/CrossModule.hs @@ -0,0 +1,190 @@ +-- +-- Module : Streamly.Benchmark.CrossModule +-- Copyright : (c) 2019 Composewell Technologies +-- License : BSD-3-Clause +-- Maintainer : streamly@composewell.com +-- Stability : experimental +-- Portability : GHC +-- +-- Benchmarks that combine operations from multiple library modules, primarily +-- to test fusion and performance across module boundaries (e.g. file I/O +-- fused with stream folds and chunking operations). + +{-# LANGUAGE CPP #-} +{-# LANGUAGE ScopedTypeVariables #-} + +#ifdef __HADDOCK_VERSION__ +#undef INSPECTION +#endif + +#ifdef INSPECTION +{-# LANGUAGE TemplateHaskell #-} +{-# OPTIONS_GHC -fplugin Test.Inspection.Plugin #-} +#endif + +import Data.Word (Word8) +import GHC.Magic (inline) +import GHC.Magic (noinline) +import System.IO (Handle) + +import qualified Streamly.Data.Fold as Fold +import qualified Streamly.FileSystem.Handle as FH +import qualified Streamly.Internal.Data.Array as A +import qualified Streamly.Internal.Data.Fold as FL +import qualified Streamly.Internal.Data.Stream as IP +import qualified Streamly.Data.Stream.Prelude as S + +import Test.Tasty.Bench hiding (env) +import Prelude hiding (last, length) +import Streamly.Benchmark.Common +import Streamly.Benchmark.Common.Handle + +#ifdef INSPECTION +import Streamly.Internal.Data.Stream (Step(..), FoldMany) + +import qualified Streamly.Internal.Data.MutArray as MutArray +import qualified Streamly.Internal.Data.Producer as Producer + +import Test.Inspection +#endif + +moduleName :: String +moduleName = "CrossModule" + +------------------------------------------------------------------------------- +-- Handle read +------------------------------------------------------------------------------- + +-- | Get the last byte from a file bytestream. +readLast :: Handle -> IO (Maybe Word8) +readLast = S.fold Fold.latest . S.unfold FH.reader + +#ifdef INSPECTION +inspect $ hasNoTypeClasses 'readLast +inspect $ 'readLast `hasNoType` ''Step -- S.unfold +inspect $ 'readLast `hasNoType` ''Producer.ConcatState -- FH.read/UF.many +inspect $ 'readLast `hasNoType` ''MutArray.ArrayUnsafe -- FH.read/A.read +#endif + +-- | Count the number of bytes in a file. +readCountBytes :: Handle -> IO Int +readCountBytes = S.fold Fold.length . S.unfold FH.reader + +#ifdef INSPECTION +inspect $ hasNoTypeClasses 'readCountBytes +inspect $ 'readCountBytes `hasNoType` ''Step -- S.unfold +inspect $ 'readCountBytes `hasNoType` ''Producer.ConcatState -- FH.read/UF.many +inspect $ 'readCountBytes `hasNoType` ''MutArray.ArrayUnsafe -- FH.read/A.read +#endif + +-- | Sum the bytes in a file. +readSumBytes :: Handle -> IO Word8 +readSumBytes = S.fold Fold.sum . S.unfold FH.reader + +#ifdef INSPECTION +inspect $ hasNoTypeClasses 'readSumBytes +inspect $ 'readSumBytes `hasNoType` ''Step +inspect $ 'readSumBytes `hasNoType` ''Producer.ConcatState -- FH.read/UF.many +inspect $ 'readSumBytes `hasNoType` ''MutArray.ArrayUnsafe -- FH.read/A.read +#endif + +------------------------------------------------------------------------------- +-- reduce after grouping in chunks +------------------------------------------------------------------------------- + +chunksOfSum :: Int -> Handle -> IO Int +chunksOfSum n inh = + S.fold Fold.length $ IP.groupsOf n FL.sum (S.unfold FH.reader inh) + +foldMany1ChunksOfSum :: Int -> Handle -> IO Int +foldMany1ChunksOfSum n inh = + S.fold Fold.length + $ IP.foldManyPost (FL.take n FL.sum) (S.unfold FH.reader inh) + +foldManyChunksOfSum :: Int -> Handle -> IO Int +foldManyChunksOfSum n inh = + S.fold Fold.length + $ IP.foldMany (FL.take n FL.sum) (S.unfold FH.reader inh) + +-- XXX investigate why we need an INLINE in this case (GHC) +-- Even though allocations remain the same in both cases inlining improves time +-- by 4x. +-- | Slice in chunks of size n and get the count of chunks. +{-# INLINE groupsOf #-} +groupsOf :: Int -> Handle -> IO Int +groupsOf n inh = + -- writeNUnsafe gives 2.5x boost here over writeN. + S.fold Fold.length + $ IP.groupsOf n (A.unsafeCreateOf n) (S.unfold FH.reader inh) + +#ifdef INSPECTION +inspect $ hasNoTypeClasses 'groupsOf +inspect $ 'groupsOf `hasNoType` ''Step +inspect $ 'groupsOf `hasNoType` ''FoldMany +inspect $ 'groupsOf `hasNoType` ''MutArray.ArrayUnsafe -- AT.writeNUnsafe + -- FH.read/A.read +inspect $ 'groupsOf `hasNoType` ''Producer.ConcatState -- FH.read/UF.many +#endif + +{-# INLINE chunksOf #-} +chunksOf :: Int -> Handle -> IO Int +chunksOf n inh = + S.fold Fold.length $ A.chunksOf n (S.unfold FH.reader inh) + +allBenchmarks :: BenchEnv -> [Benchmark] +allBenchmarks env = + [ bgroup (o_1_space_prefix moduleName) + [ mkBench "Fold.latest" env $ \inh _ -> + readLast inh + , mkBench "Fold.sum" env $ \inh _ -> + readSumBytes inh + , mkBench "Fold.length (wc -c)" env $ \inh _ -> + readCountBytes inh + + -- XXX all these require @-fspec-constr-recursive=12@. + , mkBench ("Stream.groupsOf " ++ show (bigSize env) ++ " . Fold.sum") env $ + \inh _ -> + chunksOfSum (bigSize env) inh + , mkBench "Stream.groupsOf 1 . Fold.sum" env $ \inh _ -> + chunksOfSum 1 inh + + -- XXX investigate why we need inline/noinline in these cases (GHC) + , mkBench + ("Stream.foldManyPost " ++ show (bigSize env) ++ " . Fold.sum") + env + $ \inh _ -> noinline foldMany1ChunksOfSum (bigSize env) inh + , mkBench + "Stream.foldManyPost 1 . Fold.sum" + env + $ \inh _ -> inline foldMany1ChunksOfSum 1 inh + , mkBench + ("Stream.foldMany " ++ show (bigSize env) ++ " . Fold.sum") + env + $ \inh _ -> noinline foldManyChunksOfSum (bigSize env) inh + , mkBench + "Stream.foldMany 1 . Fold.sum" + env + $ \inh _ -> inline foldManyChunksOfSum 1 inh + + -- folding chunks to arrays + , mkBenchSmall "Stream.groupsOf 1 . Array.unsafeCreateOf" env $ \inh _ -> + groupsOf 1 inh + , mkBench "Stream.groupsOf 10 . Array.unsafeCreateOf" env $ \inh _ -> + groupsOf 10 inh + , mkBench "Stream.groupsOf 1000 . Array.unsafeCreateOf" env $ \inh _ -> + groupsOf 1000 inh + + -- chunksOf may use a different impl than groupsOf + , mkBenchSmall "Array.chunksOf 1" env $ \inh _ -> + chunksOf 1 inh + , mkBench "Array.chunksOf 10" env $ \inh _ -> + chunksOf 10 inh + , mkBench "Array.chunksOf 1000" env $ \inh _ -> + chunksOf 1000 inh + ] + ] + +main :: IO () +main = do + env <- mkHandleBenchEnv + defaultMain (allBenchmarks env) diff --git a/benchmark/Streamly/Benchmark/FileSystem/Handle/Read.hs b/benchmark/Streamly/Benchmark/FileSystem/Handle/Read.hs index 279166ff48..5228688261 100644 --- a/benchmark/Streamly/Benchmark/FileSystem/Handle/Read.hs +++ b/benchmark/Streamly/Benchmark/FileSystem/Handle/Read.hs @@ -10,83 +10,26 @@ {-# LANGUAGE CPP #-} {-# LANGUAGE ScopedTypeVariables #-} -#ifdef __HADDOCK_VERSION__ -#undef INSPECTION -#endif - -#ifdef INSPECTION -{-# LANGUAGE TemplateHaskell #-} -{-# OPTIONS_GHC -fplugin Test.Inspection.Plugin #-} -#endif - module Handle.Read (allBenchmarks) where -import Data.Word (Word8) -import GHC.Magic (inline) -import GHC.Magic (noinline) import System.IO (Handle) import qualified Streamly.Data.Fold as Fold import qualified Streamly.FileSystem.Handle as FH -import qualified Streamly.Internal.Data.Array as A -import qualified Streamly.Internal.Data.Fold as FL -import qualified Streamly.Internal.Data.Stream as IP import qualified Streamly.Data.Stream.Prelude as S import Test.Tasty.Bench hiding (env) import Prelude hiding (last, length) import Streamly.Benchmark.Common.Handle -#ifdef INSPECTION -import Streamly.Internal.Data.Stream (Step(..), FoldMany) - -import qualified Streamly.Internal.Data.MutArray as MutArray -import qualified Streamly.Internal.Data.Producer as Producer - -import Test.Inspection -#endif - -- TBD reading with unfold ------------------------------------------------------------------------------- -- unfold read ------------------------------------------------------------------------------- --- | Get the last byte from a file bytestream. -readLast :: Handle -> IO (Maybe Word8) -readLast = S.fold Fold.latest . S.unfold FH.reader - -#ifdef INSPECTION -inspect $ hasNoTypeClasses 'readLast -inspect $ 'readLast `hasNoType` ''Step -- S.unfold -inspect $ 'readLast `hasNoType` ''Producer.ConcatState -- FH.read/UF.many -inspect $ 'readLast `hasNoType` ''MutArray.ArrayUnsafe -- FH.read/A.read -#endif - --- | Count the number of bytes in a file. -readCountBytes :: Handle -> IO Int -readCountBytes = S.fold Fold.length . S.unfold FH.reader - -#ifdef INSPECTION -inspect $ hasNoTypeClasses 'readCountBytes -inspect $ 'readCountBytes `hasNoType` ''Step -- S.unfold -inspect $ 'readCountBytes `hasNoType` ''Producer.ConcatState -- FH.read/UF.many -inspect $ 'readCountBytes `hasNoType` ''MutArray.ArrayUnsafe -- FH.read/A.read -#endif - --- | Sum the bytes in a file. -readSumBytes :: Handle -> IO Word8 -readSumBytes = S.fold Fold.sum . S.unfold FH.reader - -#ifdef INSPECTION -inspect $ hasNoTypeClasses 'readSumBytes -inspect $ 'readSumBytes `hasNoType` ''Step -inspect $ 'readSumBytes `hasNoType` ''Producer.ConcatState -- FH.read/UF.many -inspect $ 'readSumBytes `hasNoType` ''MutArray.ArrayUnsafe -- FH.read/A.read -#endif - -- XXX When we mark this with INLINE and we have two benchmarks using S.drain -- in one benchmark group then somehow GHC ends up delaying the inlining of -- readDrain. Since S.drain has an INLINE[2] for proper rule firing, that does @@ -100,105 +43,11 @@ inspect $ 'readSumBytes `hasNoType` ''MutArray.ArrayUnsafe -- FH.read/A.read readDrain :: Handle -> IO () readDrain inh = S.fold Fold.drain $ S.unfold FH.reader inh -------------------------------------------------------------------------------- --- reduce after grouping in chunks -------------------------------------------------------------------------------- - -chunksOfSum :: Int -> Handle -> IO Int -chunksOfSum n inh = - S.fold Fold.length $ IP.groupsOf n FL.sum (S.unfold FH.reader inh) - -foldMany1ChunksOfSum :: Int -> Handle -> IO Int -foldMany1ChunksOfSum n inh = - S.fold Fold.length - $ IP.foldManyPost (FL.take n FL.sum) (S.unfold FH.reader inh) - -foldManyChunksOfSum :: Int -> Handle -> IO Int -foldManyChunksOfSum n inh = - S.fold Fold.length - $ IP.foldMany (FL.take n FL.sum) (S.unfold FH.reader inh) - --- XXX investigate why we need an INLINE in this case (GHC) --- Even though allocations remain the same in both cases inlining improves time --- by 4x. --- | Slice in chunks of size n and get the count of chunks. -{-# INLINE groupsOf #-} -groupsOf :: Int -> Handle -> IO Int -groupsOf n inh = - -- writeNUnsafe gives 2.5x boost here over writeN. - S.fold Fold.length - $ IP.groupsOf n (A.unsafeCreateOf n) (S.unfold FH.reader inh) - -#ifdef INSPECTION -inspect $ hasNoTypeClasses 'groupsOf -inspect $ 'groupsOf `hasNoType` ''Step -inspect $ 'groupsOf `hasNoType` ''FoldMany -inspect $ 'groupsOf `hasNoType` ''MutArray.ArrayUnsafe -- AT.writeNUnsafe - -- FH.read/A.read -inspect $ 'groupsOf `hasNoType` ''Producer.ConcatState -- FH.read/UF.many -#endif - -{-# INLINE chunksOf #-} -chunksOf :: Int -> Handle -> IO Int -chunksOf n inh = - S.fold Fold.length $ A.chunksOf n (S.unfold FH.reader inh) - --- Benchmarks a mix of file reading and stream operations. The purpose is --- twofold: (1) verify that file read operations fuse with downstream stream --- operations, and (2) measure end-to-end performance of common real-world --- pipelines (e.g. line count, word count) that combine file I/O with stream --- processing. +-- Benchmarks file reading fused with a trivial drain fold, primarily to +-- measure the raw read throughput of FH.reader and verify fusion. allBenchmarks :: BenchEnv -> [Benchmark] allBenchmarks env = -- read raw bytes without any decoding [ mkBench "Fold.drain" env $ \inh _ -> readDrain inh - , mkBench "Fold.latest" env $ \inh _ -> - readLast inh - , mkBench "Fold.sum" env $ \inh _ -> - readSumBytes inh - - , mkBench "Fold.length (wc -c)" env $ \inh _ -> - readCountBytes inh - - -- XXX all these require @-fspec-constr-recursive=12@. - , mkBench ("Stream.groupsOf " ++ show (bigSize env) ++ " . Fold.sum") env $ - \inh _ -> - chunksOfSum (bigSize env) inh - , mkBench "Stream.groupsOf 1 . Fold.sum" env $ \inh _ -> - chunksOfSum 1 inh - - -- XXX investigate why we need inline/noinline in these cases (GHC) - , mkBench - ("Stream.foldManyPost " ++ show (bigSize env) ++ " . Fold.sum") - env - $ \inh _ -> noinline foldMany1ChunksOfSum (bigSize env) inh - , mkBench - "Stream.foldManyPost 1 . Fold.sum" - env - $ \inh _ -> inline foldMany1ChunksOfSum 1 inh - , mkBench - ("Stream.foldMany " ++ show (bigSize env) ++ " . Fold.sum") - env - $ \inh _ -> noinline foldManyChunksOfSum (bigSize env) inh - , mkBench - "Stream.foldMany 1 . Fold.sum" - env - $ \inh _ -> inline foldManyChunksOfSum 1 inh - - -- folding chunks to arrays - , mkBenchSmall "Stream.groupsOf 1 . Array.unsafeCreateOf" env $ \inh _ -> - groupsOf 1 inh - , mkBench "Stream.groupsOf 10 . Array.unsafeCreateOf" env $ \inh _ -> - groupsOf 10 inh - , mkBench "Stream.groupsOf 1000 . Array.unsafeCreateOf" env $ \inh _ -> - groupsOf 1000 inh - - -- chunksOf may use a different impl than groupsOf - , mkBenchSmall "Array.chunksOf 1" env $ \inh _ -> - chunksOf 1 inh - , mkBench "Array.chunksOf 10" env $ \inh _ -> - chunksOf 10 inh - , mkBench "Array.chunksOf 1000" env $ \inh _ -> - chunksOf 1000 inh ] diff --git a/benchmark/streamly-benchmarks.cabal b/benchmark/streamly-benchmarks.cabal index 7d2219c56c..e84cbdd470 100644 --- a/benchmark/streamly-benchmarks.cabal +++ b/benchmark/streamly-benchmarks.cabal @@ -622,6 +622,12 @@ benchmark Data.Unfold.Prelude else buildable: True +benchmark CrossModule + import: bench-options + type: exitcode-stdio-1.0 + hs-source-dirs: Streamly/Benchmark + main-is: CrossModule.hs + benchmark FileSystem.DirIO import: bench-options type: exitcode-stdio-1.0 diff --git a/targets/Targets.hs b/targets/Targets.hs index 13741f42de..da817730cc 100644 --- a/targets/Targets.hs +++ b/targets/Targets.hs @@ -225,6 +225,7 @@ targets = , "noTest" ] ) + , ("CrossModule", ["noTest"]) , ("FileSystem.DirIO", []) , ("FileSystem.Event", [ "noBench" From bb85d6f2ec6f2cb10c7764878d091457cc4cd836 Mon Sep 17 00:00:00 2001 From: Harendra Kumar Date: Tue, 16 Jun 2026 02:54:09 +0530 Subject: [PATCH 07/24] Consolidate FileSystem.Handle benchmark files --- .../Streamly/Benchmark/FileSystem/Handle.hs | 193 ++++++++++++++++-- .../Benchmark/FileSystem/Handle/Read.hs | 53 ----- .../Benchmark/FileSystem/Handle/ReadWrite.hs | 189 ----------------- benchmark/streamly-benchmarks.cabal | 3 - streamly.cabal | 1 - 5 files changed, 181 insertions(+), 258 deletions(-) delete mode 100644 benchmark/Streamly/Benchmark/FileSystem/Handle/Read.hs delete mode 100644 benchmark/Streamly/Benchmark/FileSystem/Handle/ReadWrite.hs diff --git a/benchmark/Streamly/Benchmark/FileSystem/Handle.hs b/benchmark/Streamly/Benchmark/FileSystem/Handle.hs index 9dee2782fd..85f9756810 100644 --- a/benchmark/Streamly/Benchmark/FileSystem/Handle.hs +++ b/benchmark/Streamly/Benchmark/FileSystem/Handle.hs @@ -17,14 +17,163 @@ {-# LANGUAGE TemplateHaskell #-} {-# OPTIONS_GHC -fplugin Test.Inspection.Plugin #-} #endif -import Streamly.Benchmark.Common.Handle (mkHandleBenchEnv) -import qualified Handle.ReadWrite as RW -import qualified Handle.Read as RO +import System.IO (Handle) +import Streamly.Internal.System.IO (defaultChunkSize) + +import qualified Streamly.Data.Array as A +import qualified Streamly.Data.Fold as Fold +import qualified Streamly.Data.Stream.Prelude as S +import qualified Streamly.FileSystem.Handle as FH +import qualified Streamly.Internal.Data.Unfold as IUF +import qualified Streamly.Internal.FileSystem.Handle as IFH import Test.Tasty.Bench hiding (env) import Prelude hiding (last, length) import Streamly.Benchmark.Common +import Streamly.Benchmark.Common.Handle + +#ifdef INSPECTION +import Streamly.Internal.Data.Stream (Step(..)) + +import qualified Streamly.Internal.Data.Producer as Producer +import qualified Streamly.Internal.Data.Stream as D +import qualified Streamly.Internal.Data.Tuple.Strict as Strict +import qualified Streamly.Internal.Data.MutArray as MutArray + +import Test.Inspection +#endif + +------------------------------------------------------------------------------- +-- Handle read +------------------------------------------------------------------------------- + +-- XXX When we mark this with INLINE and we have two benchmarks using S.drain +-- in one benchmark group then somehow GHC ends up delaying the inlining of +-- readDrain. Since S.drain has an INLINE[2] for proper rule firing, that does +-- not work well because of delyaed inlining and the code does not fuse. We +-- need some way of propagating the inline phase information up so that we can +-- expedite inlining of the callers too automatically. The minimal example for +-- the problem can be created by using just two benchmarks in a bench group +-- both using "readDrain". Either GHC should be fixed or we can use +-- fusion-plugin to propagate INLINE phase information such that this problem +-- does not occur. +readDrain :: Handle -> IO () +readDrain inh = S.fold Fold.drain $ S.unfold FH.reader inh + +------------------------------------------------------------------------------- +-- copy chunked +------------------------------------------------------------------------------- + +-- | Copy file +copyChunks :: Handle -> Handle -> IO () +copyChunks inh outh = S.fold (IFH.writeChunks outh) $ IFH.readChunks inh + +#ifdef INSPECTION +inspect $ hasNoTypeClasses 'copyChunks +inspect $ 'copyChunks `hasNoType` ''Step +#endif + +------------------------------------------------------------------------------- +-- copy unfold +------------------------------------------------------------------------------- + +-- | Copy file +copyStream :: Handle -> Handle -> IO () +copyStream inh outh = S.fold (FH.write outh) (S.unfold FH.reader inh) + +#ifdef INSPECTION +inspect $ hasNoTypeClasses 'copyStream +inspect $ 'copyStream `hasNoType` ''Step -- S.unfold +inspect $ 'copyStream `hasNoType` ''Producer.ConcatState -- FH.read/UF.many +inspect $ 'copyStream `hasNoType` ''MutArray.ArrayUnsafe -- FH.write/writeNUnsafe + -- FH.read/A.read +inspect $ 'copyStream `hasNoType` ''Strict.Tuple3' -- FH.write/chunksOf +#endif + +------------------------------------------------------------------------------- +-- copy stream +------------------------------------------------------------------------------- + +-- | Send the file contents to /dev/null +readFromBytesNull :: Handle -> Handle -> IO () +readFromBytesNull inh devNull = IFH.putBytes devNull $ S.unfold FH.reader inh + +#ifdef INSPECTION +inspect $ hasNoTypeClasses 'readFromBytesNull +inspect $ 'readFromBytesNull `hasNoType` ''Step +inspect $ 'readFromBytesNull `hasNoType` ''MutArray.SpliceState +inspect $ 'readFromBytesNull `hasNoType` ''MutArray.ArrayUnsafe -- FH.fromBytes/S.chunksOf +inspect $ 'readFromBytesNull `hasNoType` ''D.FoldMany +#endif + +-- | Send the file contents ('defaultChunkSize') to /dev/null +readWithFromBytesNull :: Handle -> Handle -> IO () +readWithFromBytesNull inh devNull = + IFH.putBytes devNull + $ S.unfold FH.readerWith (defaultChunkSize, inh) + +#ifdef INSPECTION +inspect $ hasNoTypeClasses 'readWithFromBytesNull +inspect $ 'readWithFromBytesNull `hasNoType` ''Step +inspect $ 'readWithFromBytesNull `hasNoType` ''MutArray.SpliceState +inspect $ 'readWithFromBytesNull `hasNoType` ''MutArray.ArrayUnsafe -- FH.fromBytes/S.chunksOf +inspect $ 'readWithFromBytesNull `hasNoType` ''D.FoldMany +#endif + +-- | Send the chunk content ('defaultChunkSize') to /dev/null +-- Implicitly benchmarked via 'readFromBytesNull' +_readChunks :: Handle -> Handle -> IO () +_readChunks inh devNull = IUF.fold fld unf inh + + where + + fld = FH.write devNull + unf = IUF.unfoldEach A.reader FH.chunkReader + +-- | Send the chunk content to /dev/null +-- Implicitly benchmarked via 'readWithFromBytesNull' +_readChunksWith :: Handle -> Handle -> IO () +_readChunksWith inh devNull = IUF.fold fld unf (defaultChunkSize, inh) + + where + + fld = FH.write devNull + unf = IUF.unfoldEach A.reader FH.chunkReaderWith + +-- | Send the file contents ('defaultChunkSize') to /dev/null +writeReadWith :: Handle -> Handle -> IO () +writeReadWith inh devNull = IUF.fold fld unf (defaultChunkSize, inh) + + where + + fld = FH.writeWith defaultChunkSize devNull + unf = FH.readerWith + +#ifdef INSPECTION +inspect $ hasNoTypeClasses 'writeReadWith +inspect $ 'writeReadWith `hasNoType` ''Step +inspect $ 'writeReadWith `hasNoType` ''Producer.ConcatState -- FH.read/UF.many +inspect $ 'writeReadWith `hasNoType` ''MutArray.ArrayUnsafe -- FH.write/writeNUnsafe + -- FH.read/A.read +#endif + +-- | Send the file contents ('AT.defaultChunkSize') to /dev/null +writeRead :: Handle -> Handle -> IO () +writeRead inh devNull = IUF.fold fld unf inh + + where + + fld = FH.write devNull + unf = FH.reader + +#ifdef INSPECTION +inspect $ hasNoTypeClasses 'writeRead +inspect $ 'writeRead `hasNoType` ''Step +inspect $ 'writeRead `hasNoType` ''Producer.ConcatState -- FH.read/UF.many +inspect $ 'writeRead `hasNoType` ''MutArray.ArrayUnsafe -- FH.write/writeNUnsafe + -- FH.read/A.read +#endif ------------------------------------------------------------------------------- -- @@ -33,6 +182,33 @@ import Streamly.Benchmark.Common moduleName :: String moduleName = "FileSystem.Handle" +allBenchmarks :: BenchEnv -> [Benchmark] +allBenchmarks env = + -- read raw bytes without any decoding + [ mkBench "Fold.drain" env $ \inh _ -> + readDrain inh + + , mkBench "Handle.readChunks . Handle.writeChunks" env $ \inH _ -> + copyChunks inH (nullH env) + , mkBench "Handle.readChunks . Handle.writeChunks (cat)" env $ \inH outH -> + copyChunks inH outH + + , mkBench "Handle.reader . Handle.write" env $ \inh _ -> + copyStream inh (nullH env) + , mkBench "Handle.reader . Handle.write (cat)" env $ \inh outh -> + copyStream inh outh + + , mkBench "Handle.reader . Handle.putBytes" env $ \inh _ -> + readFromBytesNull inh (nullH env) + , mkBench "Handle.readerWith . Handle.putBytes" env $ \inh _ -> + readWithFromBytesNull inh (nullH env) + + , mkBench "Handle.reader . Handle.write (Unfold.fold)" env $ \inh _ -> + writeRead inh (nullH env) + , mkBench "Handle.readerWith . Handle.writeWith (Unfold.fold)" env $ \inh _ -> + writeReadWith inh (nullH env) + ] + ------------------------------------------------------------------------------- -- ------------------------------------------------------------------------------- @@ -40,13 +216,6 @@ moduleName = "FileSystem.Handle" main :: IO () main = do env <- mkHandleBenchEnv - defaultMain (allBenchmarks env) - - where - - allBenchmarks env = - [ bgroup (o_1_space_prefix moduleName) $ Prelude.concat - [ RO.allBenchmarks env - , RW.allBenchmarks env - ] + defaultMain + [ bgroup (o_1_space_prefix moduleName) (allBenchmarks env) ] diff --git a/benchmark/Streamly/Benchmark/FileSystem/Handle/Read.hs b/benchmark/Streamly/Benchmark/FileSystem/Handle/Read.hs deleted file mode 100644 index 5228688261..0000000000 --- a/benchmark/Streamly/Benchmark/FileSystem/Handle/Read.hs +++ /dev/null @@ -1,53 +0,0 @@ - --- | --- Module : Streamly.Benchmark.FileSystem.Handle --- Copyright : (c) 2019 Composewell Technologies --- License : BSD-3-Clause --- Maintainer : streamly@composewell.com --- Stability : experimental --- Portability : GHC - -{-# LANGUAGE CPP #-} -{-# LANGUAGE ScopedTypeVariables #-} - -module Handle.Read - (allBenchmarks) -where - -import System.IO (Handle) - -import qualified Streamly.Data.Fold as Fold -import qualified Streamly.FileSystem.Handle as FH -import qualified Streamly.Data.Stream.Prelude as S - -import Test.Tasty.Bench hiding (env) -import Prelude hiding (last, length) -import Streamly.Benchmark.Common.Handle - --- TBD reading with unfold - -------------------------------------------------------------------------------- --- unfold read -------------------------------------------------------------------------------- - --- XXX When we mark this with INLINE and we have two benchmarks using S.drain --- in one benchmark group then somehow GHC ends up delaying the inlining of --- readDrain. Since S.drain has an INLINE[2] for proper rule firing, that does --- not work well because of delyaed inlining and the code does not fuse. We --- need some way of propagating the inline phase information up so that we can --- expedite inlining of the callers too automatically. The minimal example for --- the problem can be created by using just two benchmarks in a bench group --- both using "readDrain". Either GHC should be fixed or we can use --- fusion-plugin to propagate INLINE phase information such that this problem --- does not occur. -readDrain :: Handle -> IO () -readDrain inh = S.fold Fold.drain $ S.unfold FH.reader inh - --- Benchmarks file reading fused with a trivial drain fold, primarily to --- measure the raw read throughput of FH.reader and verify fusion. -allBenchmarks :: BenchEnv -> [Benchmark] -allBenchmarks env = - -- read raw bytes without any decoding - [ mkBench "Fold.drain" env $ \inh _ -> - readDrain inh - ] diff --git a/benchmark/Streamly/Benchmark/FileSystem/Handle/ReadWrite.hs b/benchmark/Streamly/Benchmark/FileSystem/Handle/ReadWrite.hs deleted file mode 100644 index ce4c6c5472..0000000000 --- a/benchmark/Streamly/Benchmark/FileSystem/Handle/ReadWrite.hs +++ /dev/null @@ -1,189 +0,0 @@ - --- | --- Module : Streamly.Benchmark.FileSystem.Handle --- Copyright : (c) 2019 Composewell Technologies --- License : BSD3-3-Clause --- Maintainer : streamly@composewell.com --- Stability : experimental --- Portability : GHC - -{-# LANGUAGE CPP #-} -{-# LANGUAGE ScopedTypeVariables #-} - -#ifdef __HADDOCK_VERSION__ -#undef INSPECTION -#endif - -#ifdef INSPECTION -{-# LANGUAGE TemplateHaskell #-} -{-# OPTIONS_GHC -fplugin Test.Inspection.Plugin #-} -#endif - -module Handle.ReadWrite - (allBenchmarks) -where - -import System.IO (Handle) -import Prelude hiding (last, length) -import Streamly.Internal.System.IO (defaultChunkSize) - -import qualified Streamly.FileSystem.Handle as FH -import qualified Streamly.Internal.Data.Unfold as IUF -import qualified Streamly.Internal.FileSystem.Handle as IFH -import qualified Streamly.Data.Array as A -import qualified Streamly.Data.Stream.Prelude as S - -import Test.Tasty.Bench hiding (env) -import Streamly.Benchmark.Common.Handle - -#ifdef INSPECTION -import Streamly.Internal.Data.Stream (Step(..)) - -import qualified Streamly.Internal.Data.Producer as Producer -import qualified Streamly.Internal.Data.Stream as D -import qualified Streamly.Internal.Data.Tuple.Strict as Strict -import qualified Streamly.Internal.Data.MutArray as MutArray - -import Test.Inspection -#endif - -------------------------------------------------------------------------------- --- copy chunked -------------------------------------------------------------------------------- - --- | Copy file -copyChunks :: Handle -> Handle -> IO () -copyChunks inh outh = S.fold (IFH.writeChunks outh) $ IFH.readChunks inh - -#ifdef INSPECTION -inspect $ hasNoTypeClasses 'copyChunks -inspect $ 'copyChunks `hasNoType` ''Step -#endif - -------------------------------------------------------------------------------- --- copy unfold -------------------------------------------------------------------------------- - --- | Copy file -copyStream :: Handle -> Handle -> IO () -copyStream inh outh = S.fold (FH.write outh) (S.unfold FH.reader inh) - -#ifdef INSPECTION -inspect $ hasNoTypeClasses 'copyStream -inspect $ 'copyStream `hasNoType` ''Step -- S.unfold -inspect $ 'copyStream `hasNoType` ''Producer.ConcatState -- FH.read/UF.many -inspect $ 'copyStream `hasNoType` ''MutArray.ArrayUnsafe -- FH.write/writeNUnsafe - -- FH.read/A.read -inspect $ 'copyStream `hasNoType` ''Strict.Tuple3' -- FH.write/chunksOf -#endif - -------------------------------------------------------------------------------- --- copy stream -------------------------------------------------------------------------------- - --- | Send the file contents to /dev/null -readFromBytesNull :: Handle -> Handle -> IO () -readFromBytesNull inh devNull = IFH.putBytes devNull $ S.unfold FH.reader inh - -#ifdef INSPECTION -inspect $ hasNoTypeClasses 'readFromBytesNull -inspect $ 'readFromBytesNull `hasNoType` ''Step -inspect $ 'readFromBytesNull `hasNoType` ''MutArray.SpliceState -inspect $ 'readFromBytesNull `hasNoType` ''MutArray.ArrayUnsafe -- FH.fromBytes/S.chunksOf -inspect $ 'readFromBytesNull `hasNoType` ''D.FoldMany -#endif - --- | Send the file contents ('defaultChunkSize') to /dev/null -readWithFromBytesNull :: Handle -> Handle -> IO () -readWithFromBytesNull inh devNull = - IFH.putBytes devNull - $ S.unfold FH.readerWith (defaultChunkSize, inh) - -#ifdef INSPECTION -inspect $ hasNoTypeClasses 'readWithFromBytesNull -inspect $ 'readWithFromBytesNull `hasNoType` ''Step -inspect $ 'readWithFromBytesNull `hasNoType` ''MutArray.SpliceState -inspect $ 'readWithFromBytesNull `hasNoType` ''MutArray.ArrayUnsafe -- FH.fromBytes/S.chunksOf -inspect $ 'readWithFromBytesNull `hasNoType` ''D.FoldMany -#endif - --- | Send the chunk content ('defaultChunkSize') to /dev/null --- Implicitly benchmarked via 'readFromBytesNull' -_readChunks :: Handle -> Handle -> IO () -_readChunks inh devNull = IUF.fold fld unf inh - - where - - fld = FH.write devNull - unf = IUF.unfoldEach A.reader FH.chunkReader - --- | Send the chunk content to /dev/null --- Implicitly benchmarked via 'readWithFromBytesNull' -_readChunksWith :: Handle -> Handle -> IO () -_readChunksWith inh devNull = IUF.fold fld unf (defaultChunkSize, inh) - - where - - fld = FH.write devNull - unf = IUF.unfoldEach A.reader FH.chunkReaderWith - --- | Send the file contents ('defaultChunkSize') to /dev/null -writeReadWith :: Handle -> Handle -> IO () -writeReadWith inh devNull = IUF.fold fld unf (defaultChunkSize, inh) - - where - - fld = FH.writeWith defaultChunkSize devNull - unf = FH.readerWith - -#ifdef INSPECTION -inspect $ hasNoTypeClasses 'writeReadWith -inspect $ 'writeReadWith `hasNoType` ''Step -inspect $ 'writeReadWith `hasNoType` ''Producer.ConcatState -- FH.read/UF.many -inspect $ 'writeReadWith `hasNoType` ''MutArray.ArrayUnsafe -- FH.write/writeNUnsafe - -- FH.read/A.read -#endif - --- | Send the file contents ('AT.defaultChunkSize') to /dev/null -writeRead :: Handle -> Handle -> IO () -writeRead inh devNull = IUF.fold fld unf inh - - where - - fld = FH.write devNull - unf = FH.reader - -#ifdef INSPECTION -inspect $ hasNoTypeClasses 'writeRead -inspect $ 'writeRead `hasNoType` ''Step -inspect $ 'writeRead `hasNoType` ''Producer.ConcatState -- FH.read/UF.many -inspect $ 'writeRead `hasNoType` ''MutArray.ArrayUnsafe -- FH.write/writeNUnsafe - -- FH.read/A.read -#endif - -------------------------------------------------------------------------------- --- -------------------------------------------------------------------------------- - -allBenchmarks :: BenchEnv -> [Benchmark] -allBenchmarks env = - [ mkBench "Handle.readChunks . Handle.writeChunks" env $ \inH _ -> - copyChunks inH (nullH env) - , mkBench "Handle.readChunks . Handle.writeChunks (cat)" env $ \inH outH -> - copyChunks inH outH - - , mkBench "Handle.reader . Handle.write" env $ \inh _ -> - copyStream inh (nullH env) - , mkBench "Handle.reader . Handle.write (cat)" env $ \inh outh -> - copyStream inh outh - - , mkBench "Handle.reader . Handle.putBytes" env $ \inh _ -> - readFromBytesNull inh (nullH env) - , mkBench "Handle.readerWith . Handle.putBytes" env $ \inh _ -> - readWithFromBytesNull inh (nullH env) - - , mkBench "Handle.reader . Handle.write (Unfold.fold)" env $ \inh _ -> - writeRead inh (nullH env) - , mkBench "Handle.readerWith . Handle.writeWith (Unfold.fold)" env $ \inh _ -> - writeReadWith inh (nullH env) - ] diff --git a/benchmark/streamly-benchmarks.cabal b/benchmark/streamly-benchmarks.cabal index e84cbdd470..039668c8bf 100644 --- a/benchmark/streamly-benchmarks.cabal +++ b/benchmark/streamly-benchmarks.cabal @@ -646,9 +646,6 @@ benchmark FileSystem.Handle type: exitcode-stdio-1.0 hs-source-dirs: Streamly/Benchmark/FileSystem main-is: Handle.hs - other-modules: - Handle.Read - , Handle.ReadWrite if flag(use-streamly-core) buildable: False else diff --git a/streamly.cabal b/streamly.cabal index b8c59480a7..b3a6c394bd 100644 --- a/streamly.cabal +++ b/streamly.cabal @@ -89,7 +89,6 @@ extra-source-files: benchmark/Streamly/Benchmark/Data/StreamK/*.hs benchmark/Streamly/Benchmark/Data/Unfold/*.hs benchmark/Streamly/Benchmark/FileSystem/*.hs - benchmark/Streamly/Benchmark/FileSystem/Handle/*.hs benchmark/Streamly/Benchmark/Unicode/*.hs benchmark/Streamly/Benchmark/Unicode/data/AllChars.txt benchmark/Streamly/Benchmark/Unicode/data/Devanagari.txt From 2df7e3e41406f030fe633f456dd1aa8f55b84c77 Mon Sep 17 00:00:00 2001 From: Harendra Kumar Date: Tue, 16 Jun 2026 05:06:54 +0530 Subject: [PATCH 08/24] Move file based parseMany benchmarks to CrossModule suite --- benchmark/Streamly/Benchmark/CrossModule.hs | 16 +++++++++++ benchmark/Streamly/Benchmark/Data/Parser.hs | 3 +-- .../Benchmark/Data/Parser/Sequence.hs | 27 ------------------- 3 files changed, 17 insertions(+), 29 deletions(-) diff --git a/benchmark/Streamly/Benchmark/CrossModule.hs b/benchmark/Streamly/Benchmark/CrossModule.hs index 995969fe61..c22606a918 100644 --- a/benchmark/Streamly/Benchmark/CrossModule.hs +++ b/benchmark/Streamly/Benchmark/CrossModule.hs @@ -31,6 +31,7 @@ import qualified Streamly.Data.Fold as Fold import qualified Streamly.FileSystem.Handle as FH import qualified Streamly.Internal.Data.Array as A import qualified Streamly.Internal.Data.Fold as FL +import qualified Streamly.Internal.Data.Parser as PR import qualified Streamly.Internal.Data.Stream as IP import qualified Streamly.Data.Stream.Prelude as S @@ -106,6 +107,11 @@ foldManyChunksOfSum n inh = S.fold Fold.length $ IP.foldMany (FL.take n FL.sum) (S.unfold FH.reader inh) +parseManyChunksOfSum :: Int -> Handle -> IO Int +parseManyChunksOfSum n inh = + S.fold Fold.length + $ IP.parseMany (PR.fromFold $ FL.take n FL.sum) (S.unfold FH.reader inh) + -- XXX investigate why we need an INLINE in this case (GHC) -- Even though allocations remain the same in both cases inlining improves time -- by 4x. @@ -166,6 +172,16 @@ allBenchmarks env = env $ \inh _ -> inline foldManyChunksOfSum 1 inh + -- parseMany with file IO + , mkBench + ("Stream.parseMany (Fold.take " ++ show (bigSize env) ++ " Fold.sum)") + env + $ \inh _ -> noinline parseManyChunksOfSum (bigSize env) inh + , mkBench + "Stream.parseMany (Fold.take 1 Fold.sum)" + env + $ \inh _ -> inline parseManyChunksOfSum 1 inh + -- folding chunks to arrays , mkBenchSmall "Stream.groupsOf 1 . Array.unsafeCreateOf" env $ \inh _ -> groupsOf 1 inh diff --git a/benchmark/Streamly/Benchmark/Data/Parser.hs b/benchmark/Streamly/Benchmark/Data/Parser.hs index c37cb9ee7e..e418de0486 100644 --- a/benchmark/Streamly/Benchmark/Data/Parser.hs +++ b/benchmark/Streamly/Benchmark/Data/Parser.hs @@ -45,7 +45,7 @@ benchmarkList :: -> BenchEnv -> [Array.Array Int] -> [(SpaceComplexity, Benchmark)] -benchmarkList value env _arrays = +benchmarkList value _env _arrays = -- Benchmarks for lower level Parser/Type.hs source module Alternative.benchmarks value ++ Applicative.benchmarks value @@ -53,7 +53,6 @@ benchmarkList value env _arrays = -- Benchmarks for Higher level Parser.hs source module. ++ Sequence.benchmarks value - ++ Sequence.benchmarksFileIO env ++ Groups.benchmarks value ++ Interleave.benchmarks value diff --git a/benchmark/Streamly/Benchmark/Data/Parser/Sequence.hs b/benchmark/Streamly/Benchmark/Data/Parser/Sequence.hs index 2d263e3f1e..0b666be5c8 100644 --- a/benchmark/Streamly/Benchmark/Data/Parser/Sequence.hs +++ b/benchmark/Streamly/Benchmark/Data/Parser/Sequence.hs @@ -32,26 +32,20 @@ module Streamly.Benchmark.Data.Parser.Sequence ( benchmarks - , benchmarksFileIO ) where import Control.DeepSeq (NFData(..)) import Data.Monoid (Sum(..)) -import GHC.Magic (inline) -import GHC.Magic (noinline) -import System.IO (Handle) import System.Random (randomRIO) import Streamly.Internal.Data.Parser (ParseError(..)) import Streamly.Internal.Data.Stream (Stream) -import qualified Streamly.FileSystem.Handle as Handle import qualified Streamly.Internal.Data.Fold as Fold import qualified Streamly.Internal.Data.Parser as PR import qualified Streamly.Internal.Data.Stream as Stream import Test.Tasty.Bench hiding (env) import Streamly.Benchmark.Common -import Streamly.Benchmark.Common.Handle #ifdef INSPECTION import GHC.Types (SPEC(..)) @@ -215,24 +209,3 @@ benchmarks value = {-# NOINLINE parseManyGroupsRollingEitherAlt1 #-} parseManyGroupsRollingEitherAlt1 = parseManyGroupsRollingEitherAlt (>) value - -------------------------------------------------------------------------------- --- parseMany with FileIO -------------------------------------------------------------------------------- - -parseManyChunksOfSum :: Int -> Handle -> IO Int -parseManyChunksOfSum n inh = - Stream.fold Fold.length - $ Stream.parseMany - (PR.fromFold $ Fold.take n Fold.sum) - (Stream.unfold Handle.reader inh) - -benchmarksFileIO :: BenchEnv -> [(SpaceComplexity, Benchmark)] -benchmarksFileIO env = - [ - -- parseMany with file IO - (SpaceO_1, mkBench ("parseMany (Fold.take " ++ show (bigSize env) ++ " Fold.sum)") env - $ \inh _ -> noinline parseManyChunksOfSum (bigSize env) inh) - , (SpaceO_1, mkBench "parseMany (Fold.take 1 Fold.sum)" env - $ \inh _ -> inline parseManyChunksOfSum 1 inh) - ] From 9bba096c174a7ce2cd1a02e3a7f139a013bf50f2 Mon Sep 17 00:00:00 2001 From: Harendra Kumar Date: Tue, 16 Jun 2026 06:06:01 +0530 Subject: [PATCH 09/24] Move benchmarks from Array/Stream to CrossModule --- benchmark/Streamly/Benchmark/CrossModule.hs | 124 ++++++++++++++ .../Streamly/Benchmark/Data/Array/Stream.hs | 151 +----------------- .../Streamly/Benchmark/Unicode/Stream.hs | 11 ++ 3 files changed, 137 insertions(+), 149 deletions(-) diff --git a/benchmark/Streamly/Benchmark/CrossModule.hs b/benchmark/Streamly/Benchmark/CrossModule.hs index c22606a918..e6fad288d3 100644 --- a/benchmark/Streamly/Benchmark/CrossModule.hs +++ b/benchmark/Streamly/Benchmark/CrossModule.hs @@ -22,6 +22,7 @@ {-# OPTIONS_GHC -fplugin Test.Inspection.Plugin #-} #endif +import Data.Functor.Identity (runIdentity) import Data.Word (Word8) import GHC.Magic (inline) import GHC.Magic (noinline) @@ -41,6 +42,7 @@ import Streamly.Benchmark.Common import Streamly.Benchmark.Common.Handle #ifdef INSPECTION +import Streamly.Internal.Data.MutByteArray (Unbox) import Streamly.Internal.Data.Stream (Step(..), FoldMany) import qualified Streamly.Internal.Data.MutArray as MutArray @@ -137,6 +139,107 @@ chunksOf :: Int -> Handle -> IO Int chunksOf n inh = S.fold Fold.length $ A.chunksOf n (S.unfold FH.reader inh) +------------------------------------------------------------------------------- +-- read chunked using toChunks +------------------------------------------------------------------------------- + +-- | Get the last byte from a file bytestream. +toChunksLast :: Handle -> IO (Maybe Word8) +toChunksLast inh = do + let s = FH.readChunks inh + larr <- IP.last s + return $ case larr of + Nothing -> Nothing + Just arr -> A.getIndex (A.length arr - 1) arr + +#ifdef INSPECTION +inspect $ hasNoTypeClasses 'toChunksLast +inspect $ 'toChunksLast `hasNoType` ''Step +#endif + +-- | Count the number of bytes in a file. +toChunksSumLengths :: Handle -> IO Int +toChunksSumLengths inh = + let s = FH.readChunks inh + in IP.fold Fold.sum (IP.map A.length s) + +#ifdef INSPECTION +inspect $ hasNoTypeClasses 'toChunksSumLengths +inspect $ 'toChunksSumLengths `hasNoType` ''Step +#endif + +-- | Sum the bytes in a file. +toChunksCountBytes :: Handle -> IO Word8 +toChunksCountBytes inh = do + let foldlArr' f z = runIdentity . IP.foldl' f z . A.read + let s = FH.readChunks inh + IP.foldl' (\acc arr -> acc + foldlArr' (+) 0 arr) 0 s + +#ifdef INSPECTION +inspect $ hasNoTypeClasses 'toChunksCountBytes +inspect $ 'toChunksCountBytes `hasNoType` ''Step +#endif + +------------------------------------------------------------------------------- +-- Splitting +------------------------------------------------------------------------------- + +-- | Count the number of lines in a file. +toChunksSplitOnSuffix :: Handle -> IO Int +toChunksSplitOnSuffix = + IP.fold Fold.length + . A.compactEndByByte_ 10 + . FH.readChunks + +#ifdef INSPECTION +inspect $ hasNoTypeClasses 'toChunksSplitOnSuffix +inspect $ 'toChunksSplitOnSuffix `hasNoType` ''Step +#endif + +-- | Count the number of words in a file. +toChunksSplitOn :: Handle -> IO Int +toChunksSplitOn = + IP.fold Fold.length + . A.compactSepByByte_ 32 + . FH.readChunks + +#ifdef INSPECTION +inspect $ hasNoTypeClasses 'toChunksSplitOn +inspect $ 'toChunksSplitOn `hasNoType` ''Step +#endif + +------------------------------------------------------------------------------- +-- copy with group/ungroup transformations +------------------------------------------------------------------------------- + +-- | Lines and unlines +{-# NOINLINE copyChunksSplitInterposeSuffix #-} +copyChunksSplitInterposeSuffix :: Handle -> Handle -> IO () +copyChunksSplitInterposeSuffix inh outh = + IP.fold (FH.write outh) + $ A.concatEndBy 10 . A.compactEndByByte_ 10 + $ FH.readChunks inh + +#ifdef INSPECTION +inspect $ hasNoTypeClassesExcept 'copyChunksSplitInterposeSuffix [''Unbox] +inspect $ 'copyChunksSplitInterposeSuffix `hasNoType` ''Step +#endif + +-- | Words and unwords +{-# NOINLINE copyChunksSplitInterpose #-} +copyChunksSplitInterpose :: Handle -> Handle -> IO () +copyChunksSplitInterpose inh outh = + IP.fold (FH.write outh) + -- XXX requires @-fspec-constr-recursive=12@. + -- XXX this is not correct word splitting combinator + $ A.concatSepBy 32 . A.compactSepByByte_ 32 + $ FH.readChunks inh + +#ifdef INSPECTION +inspect $ hasNoTypeClassesExcept 'copyChunksSplitInterpose [''Unbox] +inspect $ 'copyChunksSplitInterpose `hasNoType` ''Step +#endif + allBenchmarks :: BenchEnv -> [Benchmark] allBenchmarks env = [ bgroup (o_1_space_prefix moduleName) @@ -197,6 +300,27 @@ allBenchmarks env = chunksOf 10 inh , mkBench "Array.chunksOf 1000" env $ \inh _ -> chunksOf 1000 inh + + -- read chunked using toChunks + , mkBench "Stream.last" env $ \inh _ -> + toChunksLast inh + -- Note: this cannot be fairly compared with GNU wc -c or wc -m as + -- wc uses lseek to just determine the file size rather than reading + -- and counting characters. + , mkBench "Stream.sum . Stream.map Array.length" env $ \inh _ -> + toChunksSumLengths inh + , mkBench "splitOnSuffix" env $ \inh _ -> + toChunksSplitOnSuffix inh + , mkBench "splitOn" env $ \inh _ -> + toChunksSplitOn inh + , mkBench "countBytes" env $ \inh _ -> + toChunksCountBytes inh + + -- copy with group/ungroup transformations + , mkBench "interposeSuffix . splitOnSuffix" env $ \inh outh -> + copyChunksSplitInterposeSuffix inh outh + , mkBenchSmall "interpose . splitOn" env $ \inh outh -> + copyChunksSplitInterpose inh outh ] ] diff --git a/benchmark/Streamly/Benchmark/Data/Array/Stream.hs b/benchmark/Streamly/Benchmark/Data/Array/Stream.hs index ab5a947807..4085e6fe79 100644 --- a/benchmark/Streamly/Benchmark/Data/Array/Stream.hs +++ b/benchmark/Streamly/Benchmark/Data/Array/Stream.hs @@ -27,12 +27,9 @@ module Main import Control.DeepSeq (NFData(..)) import Control.Monad (void, when) import Control.Monad.Catch (MonadCatch) -import Data.Functor.Identity (runIdentity) import Data.Maybe (isJust) -import Data.Word (Word8) import Streamly.Internal.Data.Stream (Stream) import Streamly.Internal.Data.StreamK (StreamK) -import System.IO (Handle) import System.Random (randomRIO) import Prelude hiding () @@ -40,22 +37,13 @@ import qualified Streamly.Data.Stream as Stream import qualified Streamly.Internal.Data.Array as Array import qualified Streamly.Internal.Data.Fold as Fold import qualified Streamly.Internal.Data.Parser as Parser -import qualified Streamly.Internal.Data.Stream as Stream import qualified Streamly.Internal.Data.StreamK as StreamK -import qualified Streamly.Internal.FileSystem.Handle as Handle -import qualified Streamly.Internal.Unicode.Stream as Unicode import Test.Tasty.Bench hiding (env) import Streamly.Benchmark.Common import Streamly.Benchmark.Common.Handle import Control.Monad.IO.Class (MonadIO) -#ifdef INSPECTION -import Streamly.Internal.Data.MutByteArray (Unbox) -import Streamly.Internal.Data.Stream (Step(..)) -import Test.Inspection -#endif - ------------------------------------------------------------------------------- -- Utilities ------------------------------------------------------------------------------- @@ -78,119 +66,6 @@ benchIO benchIO name src sink = bench name $ nfIO $ randomRIO (1,1) >>= sink . src -------------------------------------------------------------------------------- --- read chunked using toChunks -------------------------------------------------------------------------------- - --- | Get the last byte from a file bytestream. -toChunksLast :: Handle -> IO (Maybe Word8) -toChunksLast inh = do - let s = Handle.readChunks inh - larr <- Stream.last s - return $ case larr of - Nothing -> Nothing - Just arr -> Array.getIndex (Array.length arr - 1) arr - -#ifdef INSPECTION -inspect $ hasNoTypeClasses 'toChunksLast -inspect $ 'toChunksLast `hasNoType` ''Step -#endif - --- | Count the number of bytes in a file. -toChunksSumLengths :: Handle -> IO Int -toChunksSumLengths inh = - let s = Handle.readChunks inh - in Stream.fold Fold.sum (Stream.map Array.length s) - -#ifdef INSPECTION -inspect $ hasNoTypeClasses 'toChunksSumLengths -inspect $ 'toChunksSumLengths `hasNoType` ''Step -#endif - --- | Sum the bytes in a file. -toChunksCountBytes :: Handle -> IO Word8 -toChunksCountBytes inh = do - let foldlArr' f z = runIdentity . Stream.foldl' f z . Array.read - let s = Handle.readChunks inh - Stream.foldl' (\acc arr -> acc + foldlArr' (+) 0 arr) 0 s - -#ifdef INSPECTION -inspect $ hasNoTypeClasses 'toChunksCountBytes -inspect $ 'toChunksCountBytes `hasNoType` ''Step -#endif - -toChunksDecodeUtf8Arrays :: Handle -> IO () -toChunksDecodeUtf8Arrays = - Stream.drain . Unicode.decodeUtf8Chunks . Handle.readChunks - -#ifdef INSPECTION -inspect $ hasNoTypeClasses 'toChunksDecodeUtf8Arrays --- inspect $ 'toChunksDecodeUtf8ArraysLenient `hasNoType` ''Step -#endif - -------------------------------------------------------------------------------- --- Splitting -------------------------------------------------------------------------------- - --- | Count the number of lines in a file. -toChunksSplitOnSuffix :: Handle -> IO Int -toChunksSplitOnSuffix = - Stream.fold Fold.length - . Array.compactEndByByte_ 10 - . Handle.readChunks - -#ifdef INSPECTION -inspect $ hasNoTypeClasses 'toChunksSplitOnSuffix -inspect $ 'toChunksSplitOnSuffix `hasNoType` ''Step -#endif - --- XXX use a word splitting combinator instead of splitOn and test it. --- | Count the number of words in a file. -toChunksSplitOn :: Handle -> IO Int -toChunksSplitOn = - Stream.fold Fold.length - . Array.compactSepByByte_ 32 - . Handle.readChunks - -#ifdef INSPECTION -inspect $ hasNoTypeClasses 'toChunksSplitOn -inspect $ 'toChunksSplitOn `hasNoType` ''Step -#endif - - -------------------------------------------------------------------------------- --- copy with group/ungroup transformations -------------------------------------------------------------------------------- - --- | Lines and unlines -{-# NOINLINE copyChunksSplitInterposeSuffix #-} -copyChunksSplitInterposeSuffix :: Handle -> Handle -> IO () -copyChunksSplitInterposeSuffix inh outh = - Stream.fold (Handle.write outh) - $ Array.concatEndBy 10 . Array.compactEndByByte_ 10 - $ Handle.readChunks inh - -#ifdef INSPECTION -inspect $ hasNoTypeClassesExcept 'copyChunksSplitInterposeSuffix [''Unbox] -inspect $ 'copyChunksSplitInterposeSuffix `hasNoType` ''Step -#endif - --- | Words and unwords -{-# NOINLINE copyChunksSplitInterpose #-} -copyChunksSplitInterpose :: Handle -> Handle -> IO () -copyChunksSplitInterpose inh outh = - Stream.fold (Handle.write outh) - -- XXX requires @-fspec-constr-recursive=12@. - -- XXX this is not correct word splitting combinator - $ Array.concatSepBy 32 . Array.compactSepByByte_ 32 - $ Handle.readChunks inh - -#ifdef INSPECTION -inspect $ hasNoTypeClassesExcept 'copyChunksSplitInterpose [''Unbox] -inspect $ 'copyChunksSplitInterpose `hasNoType` ''Step -#endif - - ------------------------------------------------------------------------------- -- Parsers ------------------------------------------------------------------------------- @@ -252,27 +127,10 @@ main = do big <- Stream.toList $ Array.chunksOf value $ sourceUnfoldrM value 0 return (small, big) - benchmarks env arrays value = + benchmarks _env arrays value = let (arraysSmall, arraysBig) = arrays in - -- read using toChunks instead of read - [ (SpaceO_1, mkBench "Stream.last" env $ \inH _ -> - toChunksLast inH) - -- Note: this cannot be fairly compared with GNU wc -c or wc -m as - -- wc uses lseek to just determine the file size rather than reading - -- and counting characters. - , (SpaceO_1, mkBench "Stream.sum . Stream.map Array.length" env $ \inH _ -> - toChunksSumLengths inH) - , (SpaceO_1, mkBench "splitOnSuffix" env $ \inH _ -> - toChunksSplitOnSuffix inH) - , (SpaceO_1, mkBench "splitOn" env $ \inH _ -> - toChunksSplitOn inH) - , (SpaceO_1, mkBench "countBytes" env $ \inH _ -> - toChunksCountBytes inH) - , (SpaceO_1, mkBenchSmall "decodeUtf8Arrays" env $ \inH _ -> - toChunksDecodeUtf8Arrays inH) - - , (SpaceO_1, benchIO "fold (of 100)" (\_ -> Stream.fromList arraysSmall) fold) + [ (SpaceO_1, benchIO "fold (of 100)" (\_ -> Stream.fromList arraysSmall) fold) , (SpaceO_1, benchIO "fold (single)" (\_ -> Stream.fromList arraysBig) fold) , (SpaceO_1, benchIO "foldBreak (recursive, small arrays)" @@ -286,11 +144,6 @@ main = do "parseBreak (recursive, small arrays)" (\_ -> Stream.fromList arraysSmall) (parseBreak . StreamK.fromStream)) - - , (SpaceO_1, mkBench "interposeSuffix . splitOnSuffix" env $ \inh outh -> - copyChunksSplitInterposeSuffix inh outh) - , (SpaceO_1, mkBenchSmall "interpose . splitOn" env $ \inh outh -> - copyChunksSplitInterpose inh outh) ] allBenchmarks env arrays value = diff --git a/benchmark/Streamly/Benchmark/Unicode/Stream.hs b/benchmark/Streamly/Benchmark/Unicode/Stream.hs index 7ef6c2669c..7897aa22b3 100644 --- a/benchmark/Streamly/Benchmark/Unicode/Stream.hs +++ b/benchmark/Streamly/Benchmark/Unicode/Stream.hs @@ -339,6 +339,15 @@ inspect $ hasNoTypeClasses 'readDecodeUtf8 -- inspect $ 'readDecodeUtf8Lax `hasNoType` ''Step #endif +toChunksDecodeUtf8Arrays :: Handle -> IO () +toChunksDecodeUtf8Arrays = + Stream.drain . Unicode.decodeUtf8Chunks . Handle.readChunks + +#ifdef INSPECTION +inspect $ hasNoTypeClasses 'toChunksDecodeUtf8Arrays +-- inspect $ 'toChunksDecodeUtf8Arrays `hasNoType` ''Step +#endif + allBenchmarks :: BenchEnv -> [Benchmark] allBenchmarks env = [ bgroup (o_1_space_prefix moduleName) @@ -353,6 +362,8 @@ allBenchmarks env = -- read with utf8 decoding , mkBenchSmall "Unicode.decodeUtf8" env $ \inh _ -> readDecodeUtf8 inh + , mkBenchSmall "Unicode.decodeUtf8Chunks" env $ \inh _ -> + toChunksDecodeUtf8Arrays inh -- XXX all these require @-fspec-constr-recursive=12@. -- lines/unlines From fceb4f53ec5c38539b491b50bb5d04a3fadcdbd5 Mon Sep 17 00:00:00 2001 From: Harendra Kumar Date: Tue, 16 Jun 2026 06:49:43 +0530 Subject: [PATCH 10/24] Move Split/SplitChunks files to CrossModule benchmark --- .../FileSystem.hs} | 19 ++++++------------- .../Stream/Parse => CrossModule}/Split.hs | 4 ++-- .../Parse => CrossModule}/SplitChunks.hs | 4 ++-- .../Streamly/Benchmark/Data/Stream/Parse.hs | 6 +----- .../Benchmark/Data/Stream/Parse/Group.hs | 3 --- benchmark/streamly-benchmarks.cabal | 12 +++++++----- 6 files changed, 18 insertions(+), 30 deletions(-) rename benchmark/Streamly/Benchmark/{CrossModule.hs => CrossModule/FileSystem.hs} (97%) rename benchmark/Streamly/Benchmark/{Data/Stream/Parse => CrossModule}/Split.hs (99%) rename benchmark/Streamly/Benchmark/{Data/Stream/Parse => CrossModule}/SplitChunks.hs (96%) diff --git a/benchmark/Streamly/Benchmark/CrossModule.hs b/benchmark/Streamly/Benchmark/CrossModule/FileSystem.hs similarity index 97% rename from benchmark/Streamly/Benchmark/CrossModule.hs rename to benchmark/Streamly/Benchmark/CrossModule/FileSystem.hs index e6fad288d3..d87958be4c 100644 --- a/benchmark/Streamly/Benchmark/CrossModule.hs +++ b/benchmark/Streamly/Benchmark/CrossModule/FileSystem.hs @@ -1,5 +1,5 @@ -- --- Module : Streamly.Benchmark.CrossModule +-- Module : CrossModule.FileSystem -- Copyright : (c) 2019 Composewell Technologies -- License : BSD-3-Clause -- Maintainer : streamly@composewell.com @@ -22,6 +22,8 @@ {-# OPTIONS_GHC -fplugin Test.Inspection.Plugin #-} #endif +module CrossModule.FileSystem (benchmarks) where + import Data.Functor.Identity (runIdentity) import Data.Word (Word8) import GHC.Magic (inline) @@ -51,9 +53,6 @@ import qualified Streamly.Internal.Data.Producer as Producer import Test.Inspection #endif -moduleName :: String -moduleName = "CrossModule" - ------------------------------------------------------------------------------- -- Handle read ------------------------------------------------------------------------------- @@ -240,9 +239,9 @@ inspect $ hasNoTypeClassesExcept 'copyChunksSplitInterpose [''Unbox] inspect $ 'copyChunksSplitInterpose `hasNoType` ''Step #endif -allBenchmarks :: BenchEnv -> [Benchmark] -allBenchmarks env = - [ bgroup (o_1_space_prefix moduleName) +benchmarks :: BenchEnv -> [(SpaceComplexity, Benchmark)] +benchmarks env = + map (\b -> (SpaceO_1, b)) [ mkBench "Fold.latest" env $ \inh _ -> readLast inh , mkBench "Fold.sum" env $ \inh _ -> @@ -322,9 +321,3 @@ allBenchmarks env = , mkBenchSmall "interpose . splitOn" env $ \inh outh -> copyChunksSplitInterpose inh outh ] - ] - -main :: IO () -main = do - env <- mkHandleBenchEnv - defaultMain (allBenchmarks env) diff --git a/benchmark/Streamly/Benchmark/Data/Stream/Parse/Split.hs b/benchmark/Streamly/Benchmark/CrossModule/Split.hs similarity index 99% rename from benchmark/Streamly/Benchmark/Data/Stream/Parse/Split.hs rename to benchmark/Streamly/Benchmark/CrossModule/Split.hs index 71be2343ee..b8588a587b 100644 --- a/benchmark/Streamly/Benchmark/Data/Stream/Parse/Split.hs +++ b/benchmark/Streamly/Benchmark/CrossModule/Split.hs @@ -1,6 +1,6 @@ -- | --- Module : Stream.Split +-- Module : CrossModule.Split -- Copyright : (c) 2019 Composewell Technologies -- License : BSD-3-Clause -- Maintainer : streamly@composewell.com @@ -20,7 +20,7 @@ {-# OPTIONS_GHC -fplugin Test.Inspection.Plugin #-} #endif -module Stream.Parse.Split (benchmarks) where +module CrossModule.Split (benchmarks) where import Data.Char (ord) import Data.Word (Word8) diff --git a/benchmark/Streamly/Benchmark/Data/Stream/Parse/SplitChunks.hs b/benchmark/Streamly/Benchmark/CrossModule/SplitChunks.hs similarity index 96% rename from benchmark/Streamly/Benchmark/Data/Stream/Parse/SplitChunks.hs rename to benchmark/Streamly/Benchmark/CrossModule/SplitChunks.hs index dee978cc82..f9ba96cbbd 100644 --- a/benchmark/Streamly/Benchmark/Data/Stream/Parse/SplitChunks.hs +++ b/benchmark/Streamly/Benchmark/CrossModule/SplitChunks.hs @@ -1,6 +1,6 @@ -- | --- Module : Stream.SplitChunks +-- Module : CrossModule.SplitChunks -- Copyright : (c) 2019 Composewell Technologies -- License : BSD-3-Clause -- Maintainer : streamly@composewell.com @@ -20,7 +20,7 @@ {-# OPTIONS_GHC -fplugin Test.Inspection.Plugin #-} #endif -module Stream.Parse.SplitChunks (benchmarks) where +module CrossModule.SplitChunks (benchmarks) where import System.IO (Handle) diff --git a/benchmark/Streamly/Benchmark/Data/Stream/Parse.hs b/benchmark/Streamly/Benchmark/Data/Stream/Parse.hs index 703712d356..6d50af6480 100644 --- a/benchmark/Streamly/Benchmark/Data/Stream/Parse.hs +++ b/benchmark/Streamly/Benchmark/Data/Stream/Parse.hs @@ -7,15 +7,11 @@ module Stream.Parse (benchmarks) where import qualified Stream.Parse.Group as Group -import qualified Stream.Parse.Split as Split -import qualified Stream.Parse.SplitChunks as SplitChunks import Test.Tasty.Bench (Benchmark) import Streamly.Benchmark.Common import Streamly.Benchmark.Common.Handle (BenchEnv) benchmarks :: Int -> BenchEnv -> [(SpaceComplexity, Benchmark)] -benchmarks size env = +benchmarks size _env = Group.benchmarks size - ++ Split.benchmarks env - ++ SplitChunks.benchmarks env diff --git a/benchmark/Streamly/Benchmark/Data/Stream/Parse/Group.hs b/benchmark/Streamly/Benchmark/Data/Stream/Parse/Group.hs index 149abcacc3..5fcde8fc4a 100644 --- a/benchmark/Streamly/Benchmark/Data/Stream/Parse/Group.hs +++ b/benchmark/Streamly/Benchmark/Data/Stream/Parse/Group.hs @@ -121,8 +121,5 @@ benchmarks size = , (SpaceO_1, benchIO "groupsByRollingLT" $ groupsByRollingLT size) , (SpaceO_1, benchIO "groupsByRollingEq" $ groupsByRollingEq size) - -- XXX parseMany/parseIterate benchmarks are in the Parser/ParserD - -- modules we can bring those here. chunksOf benchmarks are in - -- Parser/ParserD/Array.Stream/FileSystem.Handle. , (SpaceO_1, benchIO "foldIterateM" $ foldIterateM size) ] diff --git a/benchmark/streamly-benchmarks.cabal b/benchmark/streamly-benchmarks.cabal index 039668c8bf..ff6adfe9f1 100644 --- a/benchmark/streamly-benchmarks.cabal +++ b/benchmark/streamly-benchmarks.cabal @@ -463,9 +463,6 @@ benchmark Data.Stream Stream.Lift Stream.Parse Stream.Parse.Group - Stream.Parse.Split - -- Note: uses around 400MB during build - Stream.Parse.SplitChunks Stream.Transform Stream.Transform.Basic Stream.Transform.Composed @@ -474,8 +471,6 @@ benchmark Data.Stream Stream.Type.MultiStream Stream.Type.Nested Stream.Type.Logic - if flag(limit-build-mem) - ghc-options: +RTS -M500M -RTS benchmark Data.Stream.Adaptive import: bench-options-threaded @@ -627,6 +622,13 @@ benchmark CrossModule type: exitcode-stdio-1.0 hs-source-dirs: Streamly/Benchmark main-is: CrossModule.hs + other-modules: + CrossModule.FileSystem + CrossModule.Split + -- Note: uses around 400MB during build + CrossModule.SplitChunks + if flag(limit-build-mem) + ghc-options: +RTS -M500M -RTS benchmark FileSystem.DirIO import: bench-options From d44d3ed92026df403b9be38acc964659ea3a928a Mon Sep 17 00:00:00 2001 From: Harendra Kumar Date: Tue, 16 Jun 2026 19:25:25 +0530 Subject: [PATCH 11/24] Commit the missing file CrossModule.hs --- benchmark/Streamly/Benchmark/CrossModule.hs | 40 +++++++++++++++++++++ 1 file changed, 40 insertions(+) create mode 100644 benchmark/Streamly/Benchmark/CrossModule.hs diff --git a/benchmark/Streamly/Benchmark/CrossModule.hs b/benchmark/Streamly/Benchmark/CrossModule.hs new file mode 100644 index 0000000000..433630100e --- /dev/null +++ b/benchmark/Streamly/Benchmark/CrossModule.hs @@ -0,0 +1,40 @@ +-- +-- Module : CrossModule +-- Copyright : (c) 2019 Composewell Technologies +-- License : BSD-3-Clause +-- Maintainer : streamly@composewell.com +-- Stability : experimental +-- Portability : GHC +-- +-- Driver for benchmarks that combine operations from multiple library modules, +-- primarily to test fusion and performance across module boundaries (e.g. file +-- I/O fused with stream folds, chunking and splitting operations). + +import Test.Tasty.Bench hiding (env) +import Streamly.Benchmark.Common +import Streamly.Benchmark.Common.Handle + +import qualified CrossModule.FileSystem as FileSystem +import qualified CrossModule.Split as Split +import qualified CrossModule.SplitChunks as SplitChunks + +moduleName :: String +moduleName = "CrossModule" + +main :: IO () +main = do + env <- mkHandleBenchEnv + defaultMain (allBenchmarks env) + + where + + allBenchmarks env = + let allBenches = + FileSystem.benchmarks env + ++ Split.benchmarks env + ++ SplitChunks.benchmarks env + get x = map snd $ filter ((==) x . fst) allBenches + o_1_space = get SpaceO_1 + in + [ bgroup (o_1_space_prefix moduleName) o_1_space + ] From b3230fbf43b1c51f658845fa338e110c3a411638 Mon Sep 17 00:00:00 2001 From: Harendra Kumar Date: Wed, 17 Jun 2026 00:59:15 +0530 Subject: [PATCH 12/24] Use "streamly-core" import instead of "streamly" --- benchmark/Streamly/Benchmark/CrossModule/FileSystem.hs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/benchmark/Streamly/Benchmark/CrossModule/FileSystem.hs b/benchmark/Streamly/Benchmark/CrossModule/FileSystem.hs index d87958be4c..152bfecccb 100644 --- a/benchmark/Streamly/Benchmark/CrossModule/FileSystem.hs +++ b/benchmark/Streamly/Benchmark/CrossModule/FileSystem.hs @@ -36,7 +36,7 @@ import qualified Streamly.Internal.Data.Array as A import qualified Streamly.Internal.Data.Fold as FL import qualified Streamly.Internal.Data.Parser as PR import qualified Streamly.Internal.Data.Stream as IP -import qualified Streamly.Data.Stream.Prelude as S +import qualified Streamly.Data.Stream as S import Test.Tasty.Bench hiding (env) import Prelude hiding (last, length) From acf9ca6b0f4aa4c4c4631b5ffdd95b6ab95fec0f Mon Sep 17 00:00:00 2001 From: Harendra Kumar Date: Wed, 17 Jun 2026 01:42:45 +0530 Subject: [PATCH 13/24] Add CrossModule to extra-source-files --- streamly.cabal | 2 ++ 1 file changed, 2 insertions(+) diff --git a/streamly.cabal b/streamly.cabal index b3a6c394bd..520a6749b8 100644 --- a/streamly.cabal +++ b/streamly.cabal @@ -67,6 +67,8 @@ build-type: Configure extra-source-files: bench-test-lib/bench-test-lib.cabal bench-test-lib/src/BenchTestLib/DirIO.hs + benchmark/Streamly/Benchmark/CrossModule.hs + benchmark/Streamly/Benchmark/CrossModule/*.hs benchmark/Streamly/Benchmark/Data/*.hs benchmark/Streamly/Benchmark/Data/Array.hs benchmark/Streamly/Benchmark/Data/Array/Common.hs From aa0271b6ff50015d62bc59bea4b96e0f79825d95 Mon Sep 17 00:00:00 2001 From: Harendra Kumar Date: Wed, 17 Jun 2026 01:44:33 +0530 Subject: [PATCH 14/24] Increase heap limit for Data.Stream benchmark --- benchmark/streamly-benchmarks.cabal | 2 ++ 1 file changed, 2 insertions(+) diff --git a/benchmark/streamly-benchmarks.cabal b/benchmark/streamly-benchmarks.cabal index ff6adfe9f1..9375cd98d0 100644 --- a/benchmark/streamly-benchmarks.cabal +++ b/benchmark/streamly-benchmarks.cabal @@ -471,6 +471,8 @@ benchmark Data.Stream Stream.Type.MultiStream Stream.Type.Nested Stream.Type.Logic + if flag(limit-build-mem) + ghc-options: +RTS -M500M -RTS benchmark Data.Stream.Adaptive import: bench-options-threaded From 3aa034d456097c6eab41a8a28ffeaf301f554fc3 Mon Sep 17 00:00:00 2001 From: Harendra Kumar Date: Tue, 16 Jun 2026 17:55:36 +0530 Subject: [PATCH 15/24] Merge Data.Array and Data.Array.Stream benchmarks --- benchmark/Streamly/Benchmark/Data/Array.hs | 8 +- .../Streamly/Benchmark/Data/Array/Stream.hs | 87 ++++++++----------- benchmark/streamly-benchmarks.cabal | 13 +-- 3 files changed, 44 insertions(+), 64 deletions(-) diff --git a/benchmark/Streamly/Benchmark/Data/Array.hs b/benchmark/Streamly/Benchmark/Data/Array.hs index 9ed09c1ac5..e878ac80b1 100644 --- a/benchmark/Streamly/Benchmark/Data/Array.hs +++ b/benchmark/Streamly/Benchmark/Data/Array.hs @@ -12,6 +12,8 @@ import Data.Kind (Type) import qualified Streamly.Internal.Data.Array as IA import qualified GHC.Exts as GHC +import qualified Array.Stream as ArrayStream + -- import qualified Streamly.Data.Array as A import qualified Streamly.Internal.Data.Array as A @@ -105,12 +107,12 @@ benchmarks size = ++ commonBenchmarks size main :: IO () -main = runWithCLIOpts defStreamSize allBenchmarks +main = runWithCLIOptsEnv defStreamSize ArrayStream.alloc allBenchmarks where - allBenchmarks size = - let allBenches = benchmarks size + allBenchmarks arrays size = + let allBenches = benchmarks size ++ ArrayStream.benchmarks arrays size get x = fmap snd $ filter ((==) x . fst) allBenches o_1_space = get SpaceO_1 o_n_heap = get HeapO_n diff --git a/benchmark/Streamly/Benchmark/Data/Array/Stream.hs b/benchmark/Streamly/Benchmark/Data/Array/Stream.hs index 4085e6fe79..bf5e5b82b0 100644 --- a/benchmark/Streamly/Benchmark/Data/Array/Stream.hs +++ b/benchmark/Streamly/Benchmark/Data/Array/Stream.hs @@ -1,6 +1,6 @@ -- | --- Module : Streamly.Benchmark.Data.ParserD +-- Module : Array.Stream -- Copyright : (c) 2020 Composewell Technologies -- -- License : BSD-3-Clause @@ -19,9 +19,11 @@ {-# OPTIONS_GHC -fplugin Test.Inspection.Plugin #-} #endif -module Main +module Array.Stream ( - main + Arrays + , alloc + , benchmarks ) where import Control.DeepSeq (NFData(..)) @@ -41,7 +43,6 @@ import qualified Streamly.Internal.Data.StreamK as StreamK import Test.Tasty.Bench hiding (env) import Streamly.Benchmark.Common -import Streamly.Benchmark.Common.Handle import Control.Monad.IO.Class (MonadIO) ------------------------------------------------------------------------------- @@ -105,51 +106,37 @@ parseBreak s = do ------------------------------------------------------------------------------- --- Driver +-- Benchmarks ------------------------------------------------------------------------------- -moduleName :: String -moduleName = "Data.Array.Stream" - -main :: IO () -main = do - env <- mkHandleBenchEnv - runWithCLIOptsEnv defaultStreamSize alloc (allBenchmarks env) - - where - - alloc value = - if value <= 0 - then return (undefined, undefined) - else - do - small <- Stream.toList $ Array.chunksOf 100 $ sourceUnfoldrM value 0 - big <- Stream.toList $ Array.chunksOf value $ sourceUnfoldrM value 0 - return (small, big) - - benchmarks _env arrays value = - let (arraysSmall, arraysBig) = arrays - in - [ (SpaceO_1, benchIO "fold (of 100)" (\_ -> Stream.fromList arraysSmall) fold) - , (SpaceO_1, benchIO "fold (single)" (\_ -> Stream.fromList arraysBig) fold) - , (SpaceO_1, benchIO - "foldBreak (recursive, small arrays)" - (\_ -> Stream.fromList arraysSmall) - (foldBreak . StreamK.fromStream)) - , (SpaceO_1, benchIO "parse (of 100)" (\_ -> Stream.fromList arraysSmall) - $ parse value) - , (SpaceO_1, benchIO "parse (single)" (\_ -> Stream.fromList arraysBig) - $ parse value) - , (SpaceO_1, benchIO - "parseBreak (recursive, small arrays)" - (\_ -> Stream.fromList arraysSmall) - (parseBreak . StreamK.fromStream)) - ] - - allBenchmarks env arrays value = - let allBenches = benchmarks env arrays value - get x = map snd $ filter ((==) x . fst) allBenches - o_1_space = get SpaceO_1 - in - [ bgroup (o_1_space_prefix moduleName) o_1_space - ] +type Arrays = ([Array.Array Int], [Array.Array Int]) + +alloc :: Int -> IO Arrays +alloc value = + if value <= 0 + then return (undefined, undefined) + else + do + small <- Stream.toList $ Array.chunksOf 100 $ sourceUnfoldrM value 0 + big <- Stream.toList $ Array.chunksOf value $ sourceUnfoldrM value 0 + return (small, big) + +benchmarks :: Arrays -> Int -> [(SpaceComplexity, Benchmark)] +benchmarks arrays value = + let (arraysSmall, arraysBig) = arrays + in + [ (SpaceO_1, benchIO "fold (of 100)" (\_ -> Stream.fromList arraysSmall) fold) + , (SpaceO_1, benchIO "fold (single)" (\_ -> Stream.fromList arraysBig) fold) + , (SpaceO_1, benchIO + "foldBreak (recursive, small arrays)" + (\_ -> Stream.fromList arraysSmall) + (foldBreak . StreamK.fromStream)) + , (SpaceO_1, benchIO "parse (of 100)" (\_ -> Stream.fromList arraysSmall) + $ parse value) + , (SpaceO_1, benchIO "parse (single)" (\_ -> Stream.fromList arraysBig) + $ parse value) + , (SpaceO_1, benchIO + "parseBreak (recursive, small arrays)" + (\_ -> Stream.fromList arraysSmall) + (parseBreak . StreamK.fromStream)) + ] diff --git a/benchmark/streamly-benchmarks.cabal b/benchmark/streamly-benchmarks.cabal index 9375cd98d0..a37db81b88 100644 --- a/benchmark/streamly-benchmarks.cabal +++ b/benchmark/streamly-benchmarks.cabal @@ -242,10 +242,12 @@ benchmark Data.Array main-is: Streamly/Benchmark/Data/Array.hs other-modules: Stream.Common + , Array.Stream if flag(use-streamly-core) buildable: False else buildable: True + build-depends: exceptions >= 0.8 && < 0.11 if flag(limit-build-mem) && !flag(fusion-plugin) ghc-options: +RTS -M600M -RTS @@ -263,17 +265,6 @@ benchmark Data.Array.Generic if flag(limit-build-mem) ghc-options: +RTS -M600M -RTS -benchmark Data.Array.Stream - import: bench-options - type: exitcode-stdio-1.0 - hs-source-dirs: Streamly/Benchmark/Data/Array - main-is: Stream.hs - if flag(use-streamly-core) || impl(ghcjs) - buildable: False - else - buildable: True - build-depends: exceptions >= 0.8 && < 0.11 - benchmark Data.Fold import: bench-options type: exitcode-stdio-1.0 From 27afe33ab4ee0f7e0f4aae6b7edcf5b2c6453c11 Mon Sep 17 00:00:00 2001 From: Harendra Kumar Date: Tue, 16 Jun 2026 23:07:35 +0530 Subject: [PATCH 16/24] Separate Array/Type.hs benchmarks from Array.hs --- benchmark/Streamly/Benchmark/Data/Array.hs | 13 +- .../Streamly/Benchmark/Data/Array/Common.hs | 111 +---------------- .../Streamly/Benchmark/Data/Array/Generic.hs | 3 + .../Benchmark/Data/Array/SmallArray.hs | 3 + .../Streamly/Benchmark/Data/Array/Type.hs | 53 ++++++++ .../Benchmark/Data/Array/TypeCommon.hs | 115 ++++++++++++++++++ benchmark/streamly-benchmarks.cabal | 1 + streamly.cabal | 2 + 8 files changed, 182 insertions(+), 119 deletions(-) create mode 100644 benchmark/Streamly/Benchmark/Data/Array/Type.hs create mode 100644 benchmark/Streamly/Benchmark/Data/Array/TypeCommon.hs diff --git a/benchmark/Streamly/Benchmark/Data/Array.hs b/benchmark/Streamly/Benchmark/Data/Array.hs index e878ac80b1..c10739c08a 100644 --- a/benchmark/Streamly/Benchmark/Data/Array.hs +++ b/benchmark/Streamly/Benchmark/Data/Array.hs @@ -17,6 +17,9 @@ import qualified Array.Stream as ArrayStream -- import qualified Streamly.Data.Array as A import qualified Streamly.Internal.Data.Array as A +import Array.Type + (typeCommonBenchmarks, benchIO, withRandomIntIO, withArray, withStream) + #if __GLASGOW_HASKELL__ >= 810 type Arr :: Type -> Type #endif @@ -24,19 +27,10 @@ type Arr = A.Array #include "Streamly/Benchmark/Data/Array/Common.hs" -instance NFData (A.Array a) where - {-# INLINE rnf #-} - rnf _ = () - ------------------------------------------------------------------------------- -- Bench Ops ------------------------------------------------------------------------------- -{-# INLINE sourceIntFromToFromList #-} -sourceIntFromToFromList :: Int -> IO (Arr Int) -sourceIntFromToFromList value = withRandomIntIO $ \n -> - P.return $ A.fromListN value [n..n + value] - {-# INLINE parseInstance #-} parseInstance :: P.String -> Arr Int parseInstance str = @@ -104,6 +98,7 @@ benchmarks size = , (HeapO_n, benchIO "createOfLast.Max" $ createOfLastMax size) ] + ++ typeCommonBenchmarks size ++ commonBenchmarks size main :: IO () diff --git a/benchmark/Streamly/Benchmark/Data/Array/Common.hs b/benchmark/Streamly/Benchmark/Data/Array/Common.hs index 5466d9cafb..027197160a 100644 --- a/benchmark/Streamly/Benchmark/Data/Array/Common.hs +++ b/benchmark/Streamly/Benchmark/Data/Array/Common.hs @@ -1,51 +1,3 @@ -------------------------------------------------------------------------------- --- Benchmark helpers -------------------------------------------------------------------------------- - -{-# INLINE withRandomIntIO #-} -withRandomIntIO :: (Int -> IO b) -> IO b -withRandomIntIO f = randomRIO (1, 1 :: Int) >>= f - -{-# INLINE benchIO #-} -benchIO :: NFData b => String -> IO b -> Benchmark -benchIO name = bench name . nfIO - -{-# INLINE withArray #-} -withArray :: Int -> (Arr Int -> IO b) -> IO b -withArray value f = sourceIntFromTo value >>= f - -{-# INLINE withStream #-} -withStream :: Int -> (S.Stream IO Int -> IO b) -> IO b -withStream value f = withRandomIntIO $ \n -> f $ P.sourceUnfoldrM value n - -------------------------------------------------------------------------------- --- Bench Ops -------------------------------------------------------------------------------- - -{-# INLINE sourceIntFromTo #-} -sourceIntFromTo :: Int -> IO (Arr Int) -sourceIntFromTo value = withRandomIntIO $ \n -> - S.fold (A.createOf value) $ S.enumerateFromTo n (n + value) - -{-# INLINE sourceUnfoldr #-} -sourceUnfoldr :: Int -> IO (Arr Int) -sourceUnfoldr value = withRandomIntIO $ \n -> - let step cnt = - if cnt > n + value - then Nothing - else Just (cnt, cnt + 1) - in S.fold (A.createOf value) $ S.unfoldr step n - -{-# INLINE sourceFromList #-} -sourceFromList :: Int -> IO (Arr Int) -sourceFromList value = withRandomIntIO $ \n -> - S.fold (A.createOf value) $ S.fromList [n..n+value] - - -{-# INLINE showStream #-} -showStream :: Int -> IO P.String -showStream value = withArray value (return . showInstance) - ------------------------------------------------------------------------------- -- Transformation ------------------------------------------------------------------------------- @@ -92,78 +44,17 @@ map value = withArray value $ composeN 1 $ onArray value $ fmap (+1) mapX4 :: Int -> IO (Arr Int) mapX4 value = withArray value $ composeN 4 $ onArray value $ fmap (+1) -{-# INLINE idArr #-} -idArr :: Int -> IO (Arr Int) -idArr value = withArray value return - -{-# INLINE eqInstance #-} -eqInstance :: Int -> IO Bool -eqInstance value = withArray value $ \src -> return (src == src) - -{-# INLINE eqInstanceNotEq #-} -eqInstanceNotEq :: Int -> IO Bool -eqInstanceNotEq value = withArray value $ \src -> return (src P./= src) - -{-# INLINE ordInstance #-} -ordInstance :: Int -> IO Bool -ordInstance value = withArray value $ \src -> return (src P.< src) - -{-# INLINE ordInstanceMin #-} -ordInstanceMin :: Int -> IO (Arr Int) -ordInstanceMin value = withArray value $ \src -> return (P.min src src) - -{-# INLINE showInstance #-} -showInstance :: Arr Int -> P.String -showInstance = P.show - -{-# INLINE pureFoldl' #-} -pureFoldl' :: Int -> IO Int -pureFoldl' value = withArray value $ S.fold (Fold.foldl' (+) 0) . S.unfold A.reader - -------------------------------------------------------------------------------- --- Elimination -------------------------------------------------------------------------------- - -{-# INLINE unfoldReadDrain #-} -unfoldReadDrain :: Int -> IO () -unfoldReadDrain value = withArray value $ S.fold Fold.drain . S.unfold A.reader - -{-# INLINE toStreamRevDrain #-} -toStreamRevDrain :: Int -> IO () -toStreamRevDrain value = withArray value $ S.fold Fold.drain . A.readRev - -{-# INLINE writeN #-} -writeN :: Int -> IO (Arr Int) -writeN value = withStream value (S.fold (A.createOf value)) - ------------------------------------------------------------------------------- -- Bench groups ------------------------------------------------------------------------------- commonBenchmarks :: Int -> [(SpaceComplexity, Benchmark)] commonBenchmarks size = - [ (SpaceO_1, benchIO "writeN . intFromTo" $ sourceIntFromTo size) - , (SpaceO_1, benchIO "fromList . intFromTo" $ sourceIntFromToFromList size) - , (SpaceO_1, benchIO "writeN . unfoldr" $ sourceUnfoldr size) - , (SpaceO_1, benchIO "writeN . fromList" $ sourceFromList size) - , (SpaceO_1, benchIO "show" $ showStream size) - - , (SpaceO_1, benchIO "id" $ idArr size) - , (SpaceO_1, benchIO "==" $ eqInstance size) - , (SpaceO_1, benchIO "/=" $ eqInstanceNotEq size) - , (SpaceO_1, benchIO "<" $ ordInstance size) - , (SpaceO_1, benchIO "min" $ ordInstanceMin size) - , (SpaceO_1, benchIO "foldl'" $ pureFoldl' size) - , (SpaceO_1, benchIO "unfoldRead" $ unfoldReadDrain size) - , (SpaceO_1, benchIO "toStreamRev" $ toStreamRevDrain size) - - , (SpaceO_1, benchIO "scanl'" $ scanl' size) + [ (SpaceO_1, benchIO "scanl'" $ scanl' size) , (SpaceO_1, benchIO "scanl1'" $ scanl1' size) , (SpaceO_1, benchIO "map" $ map size) , (SpaceO_1, benchIO "scanl'X4" $ scanl'X4 size) , (SpaceO_1, benchIO "scanl1'X4" $ scanl1'X4 size) , (SpaceO_1, benchIO "mapX4" $ mapX4 size) - - , (HeapO_n, benchIO "writeN" $ writeN size) ] diff --git a/benchmark/Streamly/Benchmark/Data/Array/Generic.hs b/benchmark/Streamly/Benchmark/Data/Array/Generic.hs index 50f3473405..10403aac56 100644 --- a/benchmark/Streamly/Benchmark/Data/Array/Generic.hs +++ b/benchmark/Streamly/Benchmark/Data/Array/Generic.hs @@ -9,6 +9,8 @@ import qualified Streamly.Internal.Data.Array.Generic as A type Arr = A.Array +#include "Streamly/Benchmark/Data/Array/TypeCommon.hs" + #include "Streamly/Benchmark/Data/Array/Common.hs" instance NFData a => NFData (A.Array a) where @@ -87,6 +89,7 @@ benchmarks size = , (HeapO_n, benchIO "createOfLast.Max" $ createOfLastMax size) ] + ++ typeCommonBenchmarks size ++ commonBenchmarks size main :: IO () diff --git a/benchmark/Streamly/Benchmark/Data/Array/SmallArray.hs b/benchmark/Streamly/Benchmark/Data/Array/SmallArray.hs index e62fb622d1..d066362254 100644 --- a/benchmark/Streamly/Benchmark/Data/Array/SmallArray.hs +++ b/benchmark/Streamly/Benchmark/Data/Array/SmallArray.hs @@ -5,6 +5,8 @@ import qualified Streamly.Internal.Data.SmallArray as A type Arr = A.SmallArray +#include "Streamly/Benchmark/Data/Array/TypeCommon.hs" + #include "Streamly/Benchmark/Data/Array/Common.hs" ------------------------------------------------------------------------------- @@ -72,6 +74,7 @@ benchmarks :: Int -> [(SpaceComplexity, Benchmark)] benchmarks size = [ (SpaceO_1, benchIO "read" $ readInstance size) ] + ++ typeCommonBenchmarks size ++ commonBenchmarks size main :: IO () diff --git a/benchmark/Streamly/Benchmark/Data/Array/Type.hs b/benchmark/Streamly/Benchmark/Data/Array/Type.hs new file mode 100644 index 0000000000..4710138886 --- /dev/null +++ b/benchmark/Streamly/Benchmark/Data/Array/Type.hs @@ -0,0 +1,53 @@ +-- | +-- Module : Array.Type +-- Copyright : (c) 2020 Composewell Technologies +-- +-- License : BSD-3-Clause +-- Maintainer : streamly@composewell.com + +{-# OPTIONS_GHC -Wno-orphans #-} + +{-# LANGUAGE CPP #-} +{-# LANGUAGE FlexibleContexts #-} + +module Array.Type + ( + typeCommonBenchmarks + , benchIO + , withRandomIntIO + , withArray + , withStream + ) where + +#if __GLASGOW_HASKELL__ >= 810 +import Data.Kind (Type) +#endif + +import Control.DeepSeq (NFData(..)) +import System.Random (randomRIO) + +import qualified Streamly.Data.Fold as Fold +import qualified Streamly.Data.Stream as S +import qualified Streamly.Internal.Data.Array as A + +import Test.Tasty.Bench +import Streamly.Benchmark.Common hiding (benchPureSrc) +import qualified Stream.Common as P + +import Prelude as P + +#if __GLASGOW_HASKELL__ >= 810 +type Arr :: Type -> Type +#endif +type Arr = A.Array + +instance NFData (A.Array a) where + {-# INLINE rnf #-} + rnf _ = () + +{-# INLINE sourceIntFromToFromList #-} +sourceIntFromToFromList :: Int -> IO (Arr Int) +sourceIntFromToFromList value = withRandomIntIO $ \n -> + P.return $ A.fromListN value [n..n + value] + +#include "Streamly/Benchmark/Data/Array/TypeCommon.hs" diff --git a/benchmark/Streamly/Benchmark/Data/Array/TypeCommon.hs b/benchmark/Streamly/Benchmark/Data/Array/TypeCommon.hs new file mode 100644 index 0000000000..59ebf388ea --- /dev/null +++ b/benchmark/Streamly/Benchmark/Data/Array/TypeCommon.hs @@ -0,0 +1,115 @@ +------------------------------------------------------------------------------- +-- Benchmark helpers +------------------------------------------------------------------------------- + +{-# INLINE withRandomIntIO #-} +withRandomIntIO :: (Int -> IO b) -> IO b +withRandomIntIO f = randomRIO (1, 1 :: Int) >>= f + +{-# INLINE benchIO #-} +benchIO :: NFData b => String -> IO b -> Benchmark +benchIO name = bench name . nfIO + +{-# INLINE withArray #-} +withArray :: Int -> (Arr Int -> IO b) -> IO b +withArray value f = sourceIntFromTo value >>= f + +{-# INLINE withStream #-} +withStream :: Int -> (S.Stream IO Int -> IO b) -> IO b +withStream value f = withRandomIntIO $ \n -> f $ P.sourceUnfoldrM value n + +------------------------------------------------------------------------------- +-- Bench Ops +------------------------------------------------------------------------------- + +{-# INLINE sourceIntFromTo #-} +sourceIntFromTo :: Int -> IO (Arr Int) +sourceIntFromTo value = withRandomIntIO $ \n -> + S.fold (A.createOf value) $ S.enumerateFromTo n (n + value) + +{-# INLINE sourceUnfoldr #-} +sourceUnfoldr :: Int -> IO (Arr Int) +sourceUnfoldr value = withRandomIntIO $ \n -> + let step cnt = + if cnt > n + value + then Nothing + else Just (cnt, cnt + 1) + in S.fold (A.createOf value) $ S.unfoldr step n + +{-# INLINE sourceFromList #-} +sourceFromList :: Int -> IO (Arr Int) +sourceFromList value = withRandomIntIO $ \n -> + S.fold (A.createOf value) $ S.fromList [n..n+value] + + +{-# INLINE showStream #-} +showStream :: Int -> IO P.String +showStream value = withArray value (return . showInstance) + +{-# INLINE idArr #-} +idArr :: Int -> IO (Arr Int) +idArr value = withArray value return + +{-# INLINE eqInstance #-} +eqInstance :: Int -> IO Bool +eqInstance value = withArray value $ \src -> return (src == src) + +{-# INLINE eqInstanceNotEq #-} +eqInstanceNotEq :: Int -> IO Bool +eqInstanceNotEq value = withArray value $ \src -> return (src P./= src) + +{-# INLINE ordInstance #-} +ordInstance :: Int -> IO Bool +ordInstance value = withArray value $ \src -> return (src P.< src) + +{-# INLINE ordInstanceMin #-} +ordInstanceMin :: Int -> IO (Arr Int) +ordInstanceMin value = withArray value $ \src -> return (P.min src src) + +{-# INLINE showInstance #-} +showInstance :: Arr Int -> P.String +showInstance = P.show + +{-# INLINE pureFoldl' #-} +pureFoldl' :: Int -> IO Int +pureFoldl' value = withArray value $ S.fold (Fold.foldl' (+) 0) . S.unfold A.reader + +------------------------------------------------------------------------------- +-- Elimination +------------------------------------------------------------------------------- + +{-# INLINE unfoldReadDrain #-} +unfoldReadDrain :: Int -> IO () +unfoldReadDrain value = withArray value $ S.fold Fold.drain . S.unfold A.reader + +{-# INLINE toStreamRevDrain #-} +toStreamRevDrain :: Int -> IO () +toStreamRevDrain value = withArray value $ S.fold Fold.drain . A.readRev + +{-# INLINE writeN #-} +writeN :: Int -> IO (Arr Int) +writeN value = withStream value (S.fold (A.createOf value)) + +------------------------------------------------------------------------------- +-- Bench groups +------------------------------------------------------------------------------- + +typeCommonBenchmarks :: Int -> [(SpaceComplexity, Benchmark)] +typeCommonBenchmarks size = + [ (SpaceO_1, benchIO "writeN . intFromTo" $ sourceIntFromTo size) + , (SpaceO_1, benchIO "fromList . intFromTo" $ sourceIntFromToFromList size) + , (SpaceO_1, benchIO "writeN . unfoldr" $ sourceUnfoldr size) + , (SpaceO_1, benchIO "writeN . fromList" $ sourceFromList size) + , (SpaceO_1, benchIO "show" $ showStream size) + + , (SpaceO_1, benchIO "id" $ idArr size) + , (SpaceO_1, benchIO "==" $ eqInstance size) + , (SpaceO_1, benchIO "/=" $ eqInstanceNotEq size) + , (SpaceO_1, benchIO "<" $ ordInstance size) + , (SpaceO_1, benchIO "min" $ ordInstanceMin size) + , (SpaceO_1, benchIO "foldl'" $ pureFoldl' size) + , (SpaceO_1, benchIO "unfoldRead" $ unfoldReadDrain size) + , (SpaceO_1, benchIO "toStreamRev" $ toStreamRevDrain size) + + , (HeapO_n, benchIO "writeN" $ writeN size) + ] diff --git a/benchmark/streamly-benchmarks.cabal b/benchmark/streamly-benchmarks.cabal index a37db81b88..2d4ac73b82 100644 --- a/benchmark/streamly-benchmarks.cabal +++ b/benchmark/streamly-benchmarks.cabal @@ -243,6 +243,7 @@ benchmark Data.Array other-modules: Stream.Common , Array.Stream + , Array.Type if flag(use-streamly-core) buildable: False else diff --git a/streamly.cabal b/streamly.cabal index 520a6749b8..0b3c759921 100644 --- a/streamly.cabal +++ b/streamly.cabal @@ -76,6 +76,8 @@ extra-source-files: benchmark/Streamly/Benchmark/Data/Array/Generic.hs benchmark/Streamly/Benchmark/Data/Array/SmallArray.hs benchmark/Streamly/Benchmark/Data/Array/Stream.hs + benchmark/Streamly/Benchmark/Data/Array/Type.hs + benchmark/Streamly/Benchmark/Data/Array/TypeCommon.hs benchmark/Streamly/Benchmark/Data/Fold/*.hs benchmark/Streamly/Benchmark/Data/MutArray.hs benchmark/Streamly/Benchmark/Data/MutByteArray/*.hs From a51aafac126f569faa0458be17492556b6ce947f Mon Sep 17 00:00:00 2001 From: Harendra Kumar Date: Tue, 16 Jun 2026 23:36:21 +0530 Subject: [PATCH 17/24] Breakup MutArray module, create MutArray/Type.hs --- benchmark/Streamly/Benchmark/Data/MutArray.hs | 164 ++------------- .../Streamly/Benchmark/Data/MutArray/Type.hs | 187 ++++++++++++++++++ benchmark/streamly-benchmarks.cabal | 2 + streamly.cabal | 1 + 4 files changed, 205 insertions(+), 149 deletions(-) create mode 100644 benchmark/Streamly/Benchmark/Data/MutArray/Type.hs diff --git a/benchmark/Streamly/Benchmark/Data/MutArray.hs b/benchmark/Streamly/Benchmark/Data/MutArray.hs index 25ea401b6c..74bc940e3e 100644 --- a/benchmark/Streamly/Benchmark/Data/MutArray.hs +++ b/benchmark/Streamly/Benchmark/Data/MutArray.hs @@ -1,7 +1,5 @@ -{-# OPTIONS_GHC -Wno-orphans #-} - -- | --- Module : Streamly.Benchmark.Data.Array.Unboxed.Mut +-- Module : Streamly.Benchmark.Data.MutArray -- Copyright : (c) 2021 Composewell Technologies -- License : BSD-3-Clause -- Maintainer : streamly@composewell.com @@ -19,25 +17,20 @@ {-# OPTIONS_GHC -fplugin Test.Inspection.Plugin #-} #endif -import Control.DeepSeq (NFData(..)) import Control.Monad.IO.Class (MonadIO) #if __GLASGOW_HASKELL__ >= 810 import Data.Kind (Type) #endif -import System.Random (randomRIO) import Prelude ( IO , Int - , Integral(..) - , Eq(..) - , Maybe(..) , Monad(..) , Num(..) - , Ord(..) + , Eq(..) , String , ($) , (.) - , (||) + , (++) , filter , fmap , fst @@ -48,87 +41,19 @@ import Streamly.Internal.Data.MutArray (MutArray) import qualified Streamly.Internal.Data.Array as Array import qualified Streamly.Internal.Data.MutArray as MArray -import qualified Streamly.Internal.Data.Fold as Fold import qualified Streamly.Internal.Data.Scanl as Scanl import qualified Streamly.Internal.Data.Stream as Stream import Test.Tasty.Bench import Streamly.Benchmark.Common hiding (benchPureSrc) +import Streamly.Benchmark.Data.MutArray.Type + (typeCommonBenchmarks, benchIO, withArray, sourceUnfoldrM) #if __GLASGOW_HASKELL__ >= 810 type Stream :: Type -> Type #endif type Stream = MutArray -instance NFData (MutArray a) where - {-# INLINE rnf #-} - rnf _ = () - -------------------------------------------------------------------------------- --- Benchmark helpers -------------------------------------------------------------------------------- - -{-# INLINE withRandomIntIO #-} -withRandomIntIO :: (Int -> IO b) -> IO b -withRandomIntIO f = randomRIO (1, 1 :: Int) >>= f - -{-# INLINE benchIO #-} -benchIO :: NFData b => String -> IO b -> Benchmark -benchIO name = bench name . nfIO - -{-# INLINE withArray #-} -withArray :: Int -> (Stream Int -> IO b) -> IO b -withArray value f = sourceIntFromTo value >>= f - -{-# INLINE withStream #-} -withStream :: Int -> (Stream.Stream IO Int -> IO b) -> IO b -withStream value f = withRandomIntIO $ \n -> f $ sourceUnfoldrM value n - -drain :: Monad m => Stream.Stream m a -> m () -drain = Stream.fold Fold.drain - -------------------------------------------------------------------------------- --- Bench Ops -------------------------------------------------------------------------------- - -{-# INLINE sourceUnfoldr #-} -sourceUnfoldr :: Int -> IO (Stream Int) -sourceUnfoldr value = withRandomIntIO $ \n -> - let step cnt = - if cnt > n + value - then Nothing - else Just (cnt, cnt + 1) - in Stream.fold (MArray.createOf value) $ Stream.unfoldr step n - -{-# INLINE sourceIntFromTo #-} -sourceIntFromTo :: Int -> IO (Stream Int) -sourceIntFromTo value = withRandomIntIO $ \n -> - Stream.fold (MArray.createOf value) $ Stream.enumerateFromTo n (n + value) - -{-# INLINE sourceFromList #-} -sourceFromList :: Int -> IO (Stream Int) -sourceFromList value = withRandomIntIO $ \n -> - Stream.fold (MArray.createOf value) $ Stream.fromList [n .. n + value] - -{-# INLINE sourceIntFromToFromList #-} -sourceIntFromToFromList :: Int -> IO (Stream Int) -sourceIntFromToFromList value = withRandomIntIO $ \n -> - MArray.fromListN value [n..n + value] - -{-# INLINE sourceIntFromToFromStream #-} -sourceIntFromToFromStream :: Int -> IO (Stream Int) -sourceIntFromToFromStream value = withRandomIntIO $ \n -> - Stream.fold MArray.create $ Stream.enumerateFromTo n (n + value) - -{-# INLINE sourceUnfoldrM #-} -sourceUnfoldrM :: Monad m => Int -> Int -> Stream.Stream m Int -sourceUnfoldrM value n = Stream.unfoldrM step n - where - step cnt = - if cnt > n + value - then return Nothing - else return (Just (cnt, cnt + 1)) - ------------------------------------------------------------------------------- -- Transformation ------------------------------------------------------------------------------- @@ -176,38 +101,6 @@ map value = withArray value $ composeN 1 $ onArray value $ fmap (+ 1) mapX4 :: Int -> IO (Stream Int) mapX4 value = withArray value $ composeN 4 $ onArray value $ fmap (+ 1) -{-# INLINE idArr #-} -idArr :: Int -> IO (Stream Int) -idArr value = withArray value return - -------------------------------------------------------------------------------- --- Elimination -------------------------------------------------------------------------------- - -{-# INLINE unfoldReadDrain #-} -unfoldReadDrain :: Int -> IO () -unfoldReadDrain value = withArray value $ drain . Stream.unfold MArray.reader - -{-# INLINE unfoldReadRevDrain #-} -unfoldReadRevDrain :: Int -> IO () -unfoldReadRevDrain value = withArray value $ drain . Stream.unfold MArray.readerRev - -{-# INLINE toStreamDRevDrain #-} -toStreamDRevDrain :: Int -> IO () -toStreamDRevDrain value = withArray value $ drain . MArray.readRev - -{-# INLINE toStreamDDrain #-} -toStreamDDrain :: Int -> IO () -toStreamDDrain value = withArray value $ drain . MArray.read - -{-# INLINE unfoldFold #-} -unfoldFold :: Int -> IO Int -unfoldFold value = withArray value $ Stream.fold (Fold.foldl' (+) 0) . Stream.unfold MArray.reader - -{-# INLINE writeN #-} -writeN :: Int -> IO (Stream Int) -writeN value = withStream value (Stream.fold (MArray.createOf value)) - ------------------------------------------------------------------------------- -- Bench groups ------------------------------------------------------------------------------- @@ -221,43 +114,16 @@ moduleName = "Data.MutArray" benchmarks :: (MutArray Int, Array.Array Int) -> Int -> [(SpaceComplexity, Benchmark)] -benchmarks ~(array, indices) value = - [ (SpaceO_1, benchIO "partitionBy (< 0)" $ MArray.partitionBy (< 0) array) - , (SpaceO_1, benchIO "partitionBy (> 0)" $ MArray.partitionBy (> 0) array) - , (SpaceO_1, benchIO "partitionBy (< value/2)" $ - MArray.partitionBy (< (value `div` 2)) array) - , (SpaceO_1, benchIO "partitionBy (> value/2)" $ - MArray.partitionBy (> (value `div` 2)) array) - , (SpaceO_1, benchIO "strip (< value/2 || > value/2)" $ - MArray.dropAround (\x -> x < value `div` 2 || x > value `div` 2) array) - , (SpaceO_1, benchIO "strip (> 0)" $ MArray.dropAround (> 0) array) - , (SpaceO_1, benchIO "modifyIndices (+ 1)" $ - Stream.fold (MArray.modifyIndices array (\_idx val -> val + 1)) - $ Stream.unfold Array.reader indices) - - , (SpaceO_1, benchIO "createOf . intFromTo" $ sourceIntFromTo value) - , (SpaceO_1, benchIO "fromList . intFromTo" $ sourceIntFromToFromList value) - , (SpaceO_1, benchIO "createOf . unfoldr" $ sourceUnfoldr value) - , (SpaceO_1, benchIO "createOf . fromList" $ sourceFromList value) - , (SpaceO_1, benchIO "write . intFromTo" $ sourceIntFromToFromStream value) - - , (SpaceO_1, benchIO "id" $ idArr value) - , (SpaceO_1, benchIO "foldl'" $ unfoldFold value) - , (SpaceO_1, benchIO "read" $ unfoldReadDrain value) - , (SpaceO_1, benchIO "readRev" $ unfoldReadRevDrain value) - , (SpaceO_1, benchIO "toStream" $ toStreamDDrain value) - , (SpaceO_1, benchIO "toStreamRev" $ toStreamDRevDrain value) - - , (SpaceO_1, benchIO "scanl'" $ scanl' value) - , (SpaceO_1, benchIO "scanl1'" $ scanl1' value) - , (SpaceO_1, benchIO "map" $ map value) - - , (SpaceO_1, benchIO "scanl'X4" $ scanl'X4 value) - , (SpaceO_1, benchIO "scanl1'X4" $ scanl1'X4 value) - , (SpaceO_1, benchIO "mapX4" $ mapX4 value) - - , (HeapO_n, benchIO "createOf" $ writeN value) - ] +benchmarks env value = + typeCommonBenchmarks env value + ++ [ (SpaceO_1, benchIO "scanl'" $ scanl' value) + , (SpaceO_1, benchIO "scanl1'" $ scanl1' value) + , (SpaceO_1, benchIO "map" $ map value) + + , (SpaceO_1, benchIO "scanl'X4" $ scanl'X4 value) + , (SpaceO_1, benchIO "scanl1'X4" $ scanl1'X4 value) + , (SpaceO_1, benchIO "mapX4" $ mapX4 value) + ] main :: IO () main = do diff --git a/benchmark/Streamly/Benchmark/Data/MutArray/Type.hs b/benchmark/Streamly/Benchmark/Data/MutArray/Type.hs new file mode 100644 index 0000000000..7084bf103d --- /dev/null +++ b/benchmark/Streamly/Benchmark/Data/MutArray/Type.hs @@ -0,0 +1,187 @@ +{-# OPTIONS_GHC -Wno-orphans #-} + +-- | +-- Module : Streamly.Benchmark.Data.MutArray.Type +-- Copyright : (c) 2021 Composewell Technologies +-- License : BSD-3-Clause +-- Maintainer : streamly@composewell.com + +{-# LANGUAGE CPP #-} +{-# LANGUAGE FlexibleContexts #-} +{-# LANGUAGE ScopedTypeVariables #-} + +#ifdef __HADDOCK_VERSION__ +#undef INSPECTION +#endif + +#ifdef INSPECTION +{-# LANGUAGE TemplateHaskell #-} +{-# OPTIONS_GHC -fplugin Test.Inspection.Plugin #-} +#endif + +module Streamly.Benchmark.Data.MutArray.Type + ( + typeCommonBenchmarks + , benchIO + , withArray + , sourceUnfoldrM + ) where + +import Control.DeepSeq (NFData(..)) +#if __GLASGOW_HASKELL__ >= 810 +import Data.Kind (Type) +#endif +import System.Random (randomRIO) +import Prelude + +import Streamly.Internal.Data.MutArray (MutArray) + +import qualified Streamly.Internal.Data.Array as Array +import qualified Streamly.Internal.Data.MutArray as MArray +import qualified Streamly.Internal.Data.Fold as Fold +import qualified Streamly.Internal.Data.Stream as Stream + +import Test.Tasty.Bench +import Streamly.Benchmark.Common hiding (benchPureSrc) + +#if __GLASGOW_HASKELL__ >= 810 +type Stream :: Type -> Type +#endif +type Stream = MutArray + +instance NFData (MutArray a) where + {-# INLINE rnf #-} + rnf _ = () + +------------------------------------------------------------------------------- +-- Benchmark helpers +------------------------------------------------------------------------------- + +{-# INLINE withRandomIntIO #-} +withRandomIntIO :: (Int -> IO b) -> IO b +withRandomIntIO f = randomRIO (1, 1 :: Int) >>= f + +{-# INLINE benchIO #-} +benchIO :: NFData b => String -> IO b -> Benchmark +benchIO name = bench name . nfIO + +{-# INLINE withArray #-} +withArray :: Int -> (Stream Int -> IO b) -> IO b +withArray value f = sourceIntFromTo value >>= f + +{-# INLINE withStream #-} +withStream :: Int -> (Stream.Stream IO Int -> IO b) -> IO b +withStream value f = withRandomIntIO $ \n -> f $ sourceUnfoldrM value n + +drain :: Monad m => Stream.Stream m a -> m () +drain = Stream.fold Fold.drain + +------------------------------------------------------------------------------- +-- Bench Ops +------------------------------------------------------------------------------- + +{-# INLINE sourceUnfoldr #-} +sourceUnfoldr :: Int -> IO (Stream Int) +sourceUnfoldr value = withRandomIntIO $ \n -> + let step cnt = + if cnt > n + value + then Nothing + else Just (cnt, cnt + 1) + in Stream.fold (MArray.createOf value) $ Stream.unfoldr step n + +{-# INLINE sourceIntFromTo #-} +sourceIntFromTo :: Int -> IO (Stream Int) +sourceIntFromTo value = withRandomIntIO $ \n -> + Stream.fold (MArray.createOf value) $ Stream.enumerateFromTo n (n + value) + +{-# INLINE sourceFromList #-} +sourceFromList :: Int -> IO (Stream Int) +sourceFromList value = withRandomIntIO $ \n -> + Stream.fold (MArray.createOf value) $ Stream.fromList [n .. n + value] + +{-# INLINE sourceIntFromToFromList #-} +sourceIntFromToFromList :: Int -> IO (Stream Int) +sourceIntFromToFromList value = withRandomIntIO $ \n -> + MArray.fromListN value [n..n + value] + +{-# INLINE sourceIntFromToFromStream #-} +sourceIntFromToFromStream :: Int -> IO (Stream Int) +sourceIntFromToFromStream value = withRandomIntIO $ \n -> + Stream.fold MArray.create $ Stream.enumerateFromTo n (n + value) + +{-# INLINE sourceUnfoldrM #-} +sourceUnfoldrM :: Monad m => Int -> Int -> Stream.Stream m Int +sourceUnfoldrM value n = Stream.unfoldrM step n + where + step cnt = + if cnt > n + value + then return Nothing + else return (Just (cnt, cnt + 1)) + +{-# INLINE idArr #-} +idArr :: Int -> IO (Stream Int) +idArr value = withArray value return + +------------------------------------------------------------------------------- +-- Elimination +------------------------------------------------------------------------------- + +{-# INLINE unfoldReadDrain #-} +unfoldReadDrain :: Int -> IO () +unfoldReadDrain value = withArray value $ drain . Stream.unfold MArray.reader + +{-# INLINE unfoldReadRevDrain #-} +unfoldReadRevDrain :: Int -> IO () +unfoldReadRevDrain value = withArray value $ drain . Stream.unfold MArray.readerRev + +{-# INLINE toStreamDRevDrain #-} +toStreamDRevDrain :: Int -> IO () +toStreamDRevDrain value = withArray value $ drain . MArray.readRev + +{-# INLINE toStreamDDrain #-} +toStreamDDrain :: Int -> IO () +toStreamDDrain value = withArray value $ drain . MArray.read + +{-# INLINE unfoldFold #-} +unfoldFold :: Int -> IO Int +unfoldFold value = withArray value $ Stream.fold (Fold.foldl' (+) 0) . Stream.unfold MArray.reader + +{-# INLINE writeN #-} +writeN :: Int -> IO (Stream Int) +writeN value = withStream value (Stream.fold (MArray.createOf value)) + +------------------------------------------------------------------------------- +-- Bench groups +------------------------------------------------------------------------------- + +typeCommonBenchmarks :: + (MutArray Int, Array.Array Int) -> Int -> [(SpaceComplexity, Benchmark)] +typeCommonBenchmarks ~(array, indices) value = + [ (SpaceO_1, benchIO "partitionBy (< 0)" $ MArray.partitionBy (< 0) array) + , (SpaceO_1, benchIO "partitionBy (> 0)" $ MArray.partitionBy (> 0) array) + , (SpaceO_1, benchIO "partitionBy (< value/2)" $ + MArray.partitionBy (< (value `div` 2)) array) + , (SpaceO_1, benchIO "partitionBy (> value/2)" $ + MArray.partitionBy (> (value `div` 2)) array) + , (SpaceO_1, benchIO "strip (< value/2 || > value/2)" $ + MArray.dropAround (\x -> x < value `div` 2 || x > value `div` 2) array) + , (SpaceO_1, benchIO "strip (> 0)" $ MArray.dropAround (> 0) array) + , (SpaceO_1, benchIO "modifyIndices (+ 1)" $ + Stream.fold (MArray.modifyIndices array (\_idx val -> val + 1)) + $ Stream.unfold Array.reader indices) + + , (SpaceO_1, benchIO "createOf . intFromTo" $ sourceIntFromTo value) + , (SpaceO_1, benchIO "fromList . intFromTo" $ sourceIntFromToFromList value) + , (SpaceO_1, benchIO "createOf . unfoldr" $ sourceUnfoldr value) + , (SpaceO_1, benchIO "createOf . fromList" $ sourceFromList value) + , (SpaceO_1, benchIO "write . intFromTo" $ sourceIntFromToFromStream value) + + , (SpaceO_1, benchIO "id" $ idArr value) + , (SpaceO_1, benchIO "foldl'" $ unfoldFold value) + , (SpaceO_1, benchIO "read" $ unfoldReadDrain value) + , (SpaceO_1, benchIO "readRev" $ unfoldReadRevDrain value) + , (SpaceO_1, benchIO "toStream" $ toStreamDDrain value) + , (SpaceO_1, benchIO "toStreamRev" $ toStreamDRevDrain value) + + , (HeapO_n, benchIO "createOf" $ writeN value) + ] diff --git a/benchmark/streamly-benchmarks.cabal b/benchmark/streamly-benchmarks.cabal index 2d4ac73b82..4b41936437 100644 --- a/benchmark/streamly-benchmarks.cabal +++ b/benchmark/streamly-benchmarks.cabal @@ -298,6 +298,8 @@ benchmark Data.MutArray import: bench-options type: exitcode-stdio-1.0 main-is: Streamly/Benchmark/Data/MutArray.hs + other-modules: + Streamly.Benchmark.Data.MutArray.Type if flag(limit-build-mem) && !flag(fusion-plugin) ghc-options: +RTS -M500M -RTS if flag(use-streamly-core) diff --git a/streamly.cabal b/streamly.cabal index 0b3c759921..b3812e4366 100644 --- a/streamly.cabal +++ b/streamly.cabal @@ -80,6 +80,7 @@ extra-source-files: benchmark/Streamly/Benchmark/Data/Array/TypeCommon.hs benchmark/Streamly/Benchmark/Data/Fold/*.hs benchmark/Streamly/Benchmark/Data/MutArray.hs + benchmark/Streamly/Benchmark/Data/MutArray/Type.hs benchmark/Streamly/Benchmark/Data/MutByteArray/*.hs benchmark/Streamly/Benchmark/Data/Parser/*.hs benchmark/Streamly/Benchmark/Data/RingArray.hs From 28263be9922326e3770bc53d8bac554dd85b599b Mon Sep 17 00:00:00 2001 From: Harendra Kumar Date: Wed, 17 Jun 2026 00:17:25 +0530 Subject: [PATCH 18/24] Push common Array benchmarks to common files --- benchmark/Streamly/Benchmark/Data/Array.hs | 50 +++---------------- .../Streamly/Benchmark/Data/Array/Common.hs | 17 +++++++ .../Streamly/Benchmark/Data/Array/Generic.hs | 46 ++--------------- .../Benchmark/Data/Array/TypeCommon.hs | 23 ++++++++- 4 files changed, 52 insertions(+), 84 deletions(-) diff --git a/benchmark/Streamly/Benchmark/Data/Array.hs b/benchmark/Streamly/Benchmark/Data/Array.hs index c10739c08a..9badcddcda 100644 --- a/benchmark/Streamly/Benchmark/Data/Array.hs +++ b/benchmark/Streamly/Benchmark/Data/Array.hs @@ -9,7 +9,6 @@ import Data.Kind (Type) #endif -import qualified Streamly.Internal.Data.Array as IA import qualified GHC.Exts as GHC import qualified Array.Stream as ArrayStream @@ -31,20 +30,6 @@ type Arr = A.Array -- Bench Ops ------------------------------------------------------------------------------- -{-# INLINE parseInstance #-} -parseInstance :: P.String -> Arr Int -parseInstance str = - let r = P.reads str - in case r of - [(x,"")] -> x - _ -> P.error "parseInstance: no parse" - -{-# INLINE readInstance #-} -readInstance :: Int -> IO (Arr Int) -readInstance value = withRandomIntIO $ \n -> - let testStr = "fromList " ++ show [n..n+value] - in return $! parseInstance testStr - {-# INLINE sourceIsList #-} sourceIsList :: Int -> IO (Arr Int) sourceIsList value = withRandomIntIO $ \n -> return $! GHC.fromList [n..n+value] @@ -54,27 +39,10 @@ sourceIsString :: Int -> IO (Arr P.Char) sourceIsString value = withRandomIntIO $ \n -> return $! GHC.fromString (P.replicate (n + value) 'a') -{-# INLINE sourceIntFromToFromStream #-} -sourceIntFromToFromStream :: Int -> IO (Arr Int) -sourceIntFromToFromStream value = withRandomIntIO $ \n -> - S.fold A.create $ S.enumerateFromTo n (n + value) - {-# INLINE toListLength #-} toListLength :: Int -> IO Int toListLength value = withArray value $ \arr -> return $! length (GHC.toList arr) -{-# INLINE createOfLast1 #-} -createOfLast1 :: Int -> IO (Arr Int) -createOfLast1 value = withStream value (S.fold (IA.createOfLast 1)) - -{-# INLINE createOfLast10 #-} -createOfLast10 :: Int -> IO (Arr Int) -createOfLast10 value = withStream value (S.fold (IA.createOfLast 10)) - -{-# INLINE createOfLastMax #-} -createOfLastMax :: Int -> IO (Arr Int) -createOfLastMax value = withStream value (S.fold (IA.createOfLast (value + 1))) - ------------------------------------------------------------------------------- -- Bench groups ------------------------------------------------------------------------------- @@ -87,19 +55,17 @@ defStreamSize = defaultStreamSize benchmarks :: Int -> [(SpaceComplexity, Benchmark)] benchmarks size = - [ (SpaceO_1, benchIO "write . intFromTo" $ sourceIntFromToFromStream size) - , (SpaceO_1, benchIO "read" $ readInstance size) - , (SpaceO_1, benchIO "writeN . IsList.fromList" $ sourceIsList size) + typeCommonBenchmarks size + ++ commonBenchmarks size + ++ + -- Before adding any benchmarks here check if they can be added to + -- typeCommonBenchmarks (Array.Type source module common with + -- Array.Generic) or commonBenchmarks (Array module common with + -- Array.Generic) above. + [ (SpaceO_1, benchIO "writeN . IsList.fromList" $ sourceIsList size) , (SpaceO_1, benchIO "writeN . IsString.fromString" $ sourceIsString size) - , (SpaceO_1, benchIO "length . IsList.toList" $ toListLength size) - , (SpaceO_1, benchIO "createOfLast.1" $ createOfLast1 size) - , (SpaceO_1, benchIO "createOfLast.10" $ createOfLast10 size) - - , (HeapO_n, benchIO "createOfLast.Max" $ createOfLastMax size) ] - ++ typeCommonBenchmarks size - ++ commonBenchmarks size main :: IO () main = runWithCLIOptsEnv defStreamSize ArrayStream.alloc allBenchmarks diff --git a/benchmark/Streamly/Benchmark/Data/Array/Common.hs b/benchmark/Streamly/Benchmark/Data/Array/Common.hs index 027197160a..9b087dce94 100644 --- a/benchmark/Streamly/Benchmark/Data/Array/Common.hs +++ b/benchmark/Streamly/Benchmark/Data/Array/Common.hs @@ -44,6 +44,18 @@ map value = withArray value $ composeN 1 $ onArray value $ fmap (+1) mapX4 :: Int -> IO (Arr Int) mapX4 value = withArray value $ composeN 4 $ onArray value $ fmap (+1) +{-# INLINE createOfLast1 #-} +createOfLast1 :: Int -> IO (Arr Int) +createOfLast1 value = withStream value (S.fold (A.createOfLast 1)) + +{-# INLINE createOfLast10 #-} +createOfLast10 :: Int -> IO (Arr Int) +createOfLast10 value = withStream value (S.fold (A.createOfLast 10)) + +{-# INLINE createOfLastMax #-} +createOfLastMax :: Int -> IO (Arr Int) +createOfLastMax value = withStream value (S.fold (A.createOfLast (value + 1))) + ------------------------------------------------------------------------------- -- Bench groups ------------------------------------------------------------------------------- @@ -57,4 +69,9 @@ commonBenchmarks size = , (SpaceO_1, benchIO "scanl'X4" $ scanl'X4 size) , (SpaceO_1, benchIO "scanl1'X4" $ scanl1'X4 size) , (SpaceO_1, benchIO "mapX4" $ mapX4 size) + + , (SpaceO_1, benchIO "createOfLast.1" $ createOfLast1 size) + , (SpaceO_1, benchIO "createOfLast.10" $ createOfLast10 size) + + , (HeapO_n, benchIO "createOfLast.Max" $ createOfLastMax size) ] diff --git a/benchmark/Streamly/Benchmark/Data/Array/Generic.hs b/benchmark/Streamly/Benchmark/Data/Array/Generic.hs index 10403aac56..feeb5e1792 100644 --- a/benchmark/Streamly/Benchmark/Data/Array/Generic.hs +++ b/benchmark/Streamly/Benchmark/Data/Array/Generic.hs @@ -4,7 +4,6 @@ #include "Streamly/Benchmark/Data/Array/CommonImports.hs" -import qualified Streamly.Internal.Data.Array.Generic as IA import qualified Streamly.Internal.Data.Array.Generic as A type Arr = A.Array @@ -26,20 +25,6 @@ sourceIntFromToFromList :: Int -> IO (Arr Int) sourceIntFromToFromList value = withRandomIntIO $ \n -> P.return $ A.fromListN value [n..n + value] -{-# INLINE parseInstance #-} -parseInstance :: P.String -> Arr Int -parseInstance str = - let r = P.reads str - in case r of - [(x,"")] -> x - _ -> P.error "parseInstance: no parse" - -{-# INLINE readInstance #-} -readInstance :: Int -> IO (Arr Int) -readInstance value = withRandomIntIO $ \n -> - let testStr = "fromList " ++ show [n..n+value] - in return $! parseInstance testStr - #ifdef DEVBUILD {- {-# INLINE foldableFoldl' #-} @@ -52,23 +37,6 @@ foldableSum = P.sum -} #endif -{-# INLINE sourceIntFromToFromStream #-} -sourceIntFromToFromStream :: Int -> IO (Arr Int) -sourceIntFromToFromStream value = withRandomIntIO $ \n -> - S.fold A.create $ S.enumerateFromTo n (n + value) - -{-# INLINE createOfLast1 #-} -createOfLast1 :: Int -> IO (Arr Int) -createOfLast1 value = withStream value (S.fold (IA.createOfLast 1)) - -{-# INLINE createOfLast10 #-} -createOfLast10 :: Int -> IO (Arr Int) -createOfLast10 value = withStream value (S.fold (IA.createOfLast 10)) - -{-# INLINE createOfLastMax #-} -createOfLastMax :: Int -> IO (Arr Int) -createOfLastMax value = withStream value (S.fold (IA.createOfLast (value + 1))) - ------------------------------------------------------------------------------- -- Bench groups ------------------------------------------------------------------------------- @@ -81,16 +49,12 @@ defStreamSize = defaultStreamSize benchmarks :: Int -> [(SpaceComplexity, Benchmark)] benchmarks size = - [ (SpaceO_1, benchIO "write . intFromTo" $ sourceIntFromToFromStream size) - , (SpaceO_1, benchIO "read" $ readInstance size) - - , (SpaceO_1, benchIO "createOfLast.1" $ createOfLast1 size) - , (SpaceO_1, benchIO "createOfLast.10" $ createOfLast10 size) - - , (HeapO_n, benchIO "createOfLast.Max" $ createOfLastMax size) - ] - ++ typeCommonBenchmarks size + typeCommonBenchmarks size ++ commonBenchmarks size + -- Before adding any benchmarks here check if they can be added to + -- typeCommonBenchmarks (Array.Type source module common with + -- Array.Generic) or commonBenchmarks (Array module common with + -- Array.Generic) above. main :: IO () main = runWithCLIOpts defStreamSize allBenchmarks diff --git a/benchmark/Streamly/Benchmark/Data/Array/TypeCommon.hs b/benchmark/Streamly/Benchmark/Data/Array/TypeCommon.hs index 59ebf388ea..e23f88958d 100644 --- a/benchmark/Streamly/Benchmark/Data/Array/TypeCommon.hs +++ b/benchmark/Streamly/Benchmark/Data/Array/TypeCommon.hs @@ -41,6 +41,25 @@ sourceFromList :: Int -> IO (Arr Int) sourceFromList value = withRandomIntIO $ \n -> S.fold (A.createOf value) $ S.fromList [n..n+value] +{-# INLINE sourceIntFromToFromStream #-} +sourceIntFromToFromStream :: Int -> IO (Arr Int) +sourceIntFromToFromStream value = withRandomIntIO $ \n -> + S.fold A.create $ S.enumerateFromTo n (n + value) + +{-# INLINE parseInstance #-} +parseInstance :: P.String -> Arr Int +parseInstance str = + let r = P.reads str + in case r of + [(x,"")] -> x + _ -> P.error "parseInstance: no parse" + +{-# INLINE readInstance #-} +readInstance :: Int -> IO (Arr Int) +readInstance value = withRandomIntIO $ \n -> + let testStr = "fromList " ++ show [n..n+value] + in return $! parseInstance testStr + {-# INLINE showStream #-} showStream :: Int -> IO P.String @@ -96,11 +115,13 @@ writeN value = withStream value (S.fold (A.createOf value)) typeCommonBenchmarks :: Int -> [(SpaceComplexity, Benchmark)] typeCommonBenchmarks size = - [ (SpaceO_1, benchIO "writeN . intFromTo" $ sourceIntFromTo size) + [ (SpaceO_1, benchIO "write . intFromTo" $ sourceIntFromToFromStream size) + , (SpaceO_1, benchIO "writeN . intFromTo" $ sourceIntFromTo size) , (SpaceO_1, benchIO "fromList . intFromTo" $ sourceIntFromToFromList size) , (SpaceO_1, benchIO "writeN . unfoldr" $ sourceUnfoldr size) , (SpaceO_1, benchIO "writeN . fromList" $ sourceFromList size) , (SpaceO_1, benchIO "show" $ showStream size) + , (SpaceO_1, benchIO "read" $ readInstance size) , (SpaceO_1, benchIO "id" $ idArr size) , (SpaceO_1, benchIO "==" $ eqInstance size) From cf20cae387b7c8a5ba3646e5a4c3866ae7a44b03 Mon Sep 17 00:00:00 2001 From: Harendra Kumar Date: Wed, 17 Jun 2026 00:31:56 +0530 Subject: [PATCH 19/24] Make some updates to the benchmarking readme --- docs/Developer/Benchmarks.md | 48 +++++++++++++++++++----------------- 1 file changed, 26 insertions(+), 22 deletions(-) diff --git a/docs/Developer/Benchmarks.md b/docs/Developer/Benchmarks.md index 26ac7e608c..ff7f5cf03f 100644 --- a/docs/Developer/Benchmarks.md +++ b/docs/Developer/Benchmarks.md @@ -3,12 +3,12 @@ ## Build and run benchmarks directly ``` -$ cabal run bench:Prelude.Serial # run selected -$ cabal run bench:Prelude.Serial -- --help # help on arguments -$ cabal run bench:Prelude.Serial -- --stdev 100000 # specify arguments -$ cabal run bench:Prelude.Serial --flag fusion-plugin # with fusion-plugin +$ cabal run bench:Data.Stream # run selected +$ cabal run bench:Data.Stream -- --help # help on arguments +$ cabal run bench:Data.Stream -- --stdev 100000 # specify arguments +$ cabal run bench:Data.Stream --flag fusion-plugin # with fusion-plugin -$ cabal build bench:Prelude.Serial # build selected +$ cabal build bench:Data.Stream # build selected $ cabal build --enable-benchmarks streamly-benchmarks # build all $ cabal build --enable-benchmarks all # build all, alternate method @@ -17,9 +17,13 @@ $ cabal build --flag "-opt" ... # disable optimization, faster build ## Building and Running Benchmarks with bench-runner -The executable `bench-runner` is the top level driver for -running benchmarks. It runs the requested benchmarks and then creates a -report from the results using the `bench-show` package. +`bench-runner` executable can be used to run benchmarks. Why use it? +* It runs one benchmark pre invocation of the benchmark executable, no + GC interference, more reliable results +* It specifies limits on heap, stack memory to verify the usage. +* It exposes groups of benchmarks to run +* It shows a summary report of all benchmark results +* It shows comparisons of results with previous runs IMPORTANT NOTE: The first time you run this executable it may take a long time because it has to build the `bench-report` executable which has a @@ -27,21 +31,21 @@ lot of dependencies. You can install it once in the root of the repository and use it multiple times. -You can use `cabal.project.report` to install bench-runner like so: -``` -$ cabal install bench-runner --project-file=cabal.project.report --installdir=./ --overwrite-policy=always -$ ./bench-runner -``` - If you're using nix, you can install bench-runner like so: ``` $ cd benchmark/bench-runner -$ nix-shell --run 'cabal install bench-runner --installdir=../../ --overwrite-policy=always' +$ nix develop -c 'cabal install bench-runner --installdir=../../bin --overwrite-policy=always' $ cd ../../ -$ ./bench-runner +$ bin/bench-runner +``` + +You can use `cabal.project.report` to install bench-runner like so: +``` +$ cabal install bench-runner --project-file=cabal.project.report --installdir=bin/ --overwrite-policy=always +$ bin/bench-runner ``` -You can run the `bench-runner` without installing, like so: +To run without installing: ``` $ cabal run bench-runner --project-file=cabal.project.report -- ``` @@ -69,11 +73,11 @@ $ ./bench-runner --targets serial_grp # Run all serial benchmark suites $ ./bench-runner --targets "Prelude.Serial Data.Parser" # run selected suites $ ./bench-runner --no-measure # don't run benchmarks just show previous results -# Run all O(1) space complexity benchmarks in `Prelude.Serial` suite -$ ./bench-runner --targets Prelude.Serial --prefix Prelude.Serial/o-1-space +# Run all O(1) space complexity benchmarks in `Data.Stream` suite +$ ./bench-runner --targets Data.Stream --prefix Data.Stream/o-1-space -# Run a specific benchmark in `Prelude.Serial` suite -$ ./bench-runner --targets Prelude.Serial --prefix Prelude.Serial/o-1-space.generation.unfoldr +# Run a specific benchmark in `Data.Stream` suite +$ ./bench-runner --targets Data.Stream --prefix Data.Stream/o-1-space.generation.unfoldr ``` Note: `bench-runner` enables fusion-plugin by default. @@ -133,7 +137,7 @@ You can specify the stream size (default is 100000) to be used for benchmarking: ``` -$ cabal run bench:Prelude.Serial -- --stream-size 1000000 +$ cabal run bench:Data.Stream -- --stream-size 1000000 ``` ### External input file From 45383e8554a0ede5a72c8e15af1f384901116fc1 Mon Sep 17 00:00:00 2001 From: Harendra Kumar Date: Wed, 17 Jun 2026 00:40:04 +0530 Subject: [PATCH 20/24] Fix warnings in Array benchmarks --- benchmark/Streamly/Benchmark/Data/Array/CommonImports.hs | 4 ---- benchmark/Streamly/Benchmark/Data/Array/Generic.hs | 6 ++++++ 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/benchmark/Streamly/Benchmark/Data/Array/CommonImports.hs b/benchmark/Streamly/Benchmark/Data/Array/CommonImports.hs index 4ce1a6eef0..520149b37a 100644 --- a/benchmark/Streamly/Benchmark/Data/Array/CommonImports.hs +++ b/benchmark/Streamly/Benchmark/Data/Array/CommonImports.hs @@ -1,16 +1,12 @@ {-# LANGUAGE FlexibleContexts #-} -import Control.DeepSeq (NFData(..)) import Control.Monad.IO.Class (MonadIO) -import System.Random (randomRIO) -import qualified Streamly.Data.Fold as Fold import qualified Streamly.Data.Scanl as Scanl import qualified Streamly.Data.Stream as S import qualified Streamly.Internal.Data.Stream as Stream import Test.Tasty.Bench import Streamly.Benchmark.Common hiding (benchPureSrc) -import qualified Stream.Common as P import Prelude as P hiding (map) diff --git a/benchmark/Streamly/Benchmark/Data/Array/Generic.hs b/benchmark/Streamly/Benchmark/Data/Array/Generic.hs index feeb5e1792..d8aff492f2 100644 --- a/benchmark/Streamly/Benchmark/Data/Array/Generic.hs +++ b/benchmark/Streamly/Benchmark/Data/Array/Generic.hs @@ -4,6 +4,12 @@ #include "Streamly/Benchmark/Data/Array/CommonImports.hs" +import Control.DeepSeq (NFData(..)) +import System.Random (randomRIO) + +import qualified Streamly.Data.Fold as Fold +import qualified Stream.Common as P + import qualified Streamly.Internal.Data.Array.Generic as A type Arr = A.Array From 108499da1f2fd23af138a15d0f8b7bb40dfa6003 Mon Sep 17 00:00:00 2001 From: Harendra Kumar Date: Tue, 16 Jun 2026 02:55:51 +0530 Subject: [PATCH 21/24] Sync CI benchmark list with current benchmarks Add CrossModule benchmark to CI regression --- .github/workflows/regression-check.yml | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/.github/workflows/regression-check.yml b/.github/workflows/regression-check.yml index 051acac986..110ae88913 100644 --- a/.github/workflows/regression-check.yml +++ b/.github/workflows/regression-check.yml @@ -17,7 +17,6 @@ on: #---------------------------------------------------------------------- #-- Benchmarks listed in alphabetical order #---------------------------------------------------------------------- -# Removed Data.Fold.Prelude - was failing for some reason (memory issue?) jobs: build: name: "Comparison: master vs PR (ubuntu-latest ghc-9.14.1)" @@ -27,8 +26,8 @@ jobs: CI_BENCHMARKS_WITH_CUTOFF: >- Data.Array Data.Array.Generic - Data.Array.Stream Data.Fold + Data.Fold.Prelude Data.MutArray Data.MutByteArray.DeriveSerialize Data.MutByteArray.DeriveUnboxGeneric @@ -37,7 +36,9 @@ jobs: Data.ParserK Data.ParserK.Chunked Data.ParserK.Chunked.Generic + Data.Pipe Data.RingArray + Data.Scan Data.Scanl Data.Scanl.Concurrent Data.Stream @@ -49,6 +50,7 @@ jobs: Data.StreamK:6 Data.Unfold Data.Unfold.Prelude + CrossModule FileSystem.DirIO FileSystem.Handle Unicode.Parser From d3820b9a01fad49c3f4d56240f8d8bdbbbc673e6 Mon Sep 17 00:00:00 2001 From: Harendra Kumar Date: Wed, 17 Jun 2026 00:59:33 +0530 Subject: [PATCH 22/24] Sync Targets with current state of benchmarks --- targets/Targets.hs | 27 ++++++++++++++++++++++----- 1 file changed, 22 insertions(+), 5 deletions(-) diff --git a/targets/Targets.hs b/targets/Targets.hs index da817730cc..8bcb875b71 100644 --- a/targets/Targets.hs +++ b/targets/Targets.hs @@ -28,11 +28,6 @@ targets = , "array_cmp" ] ) - , ("Data.Array.Stream", - [ "infinite_grp" - , "serial_stream_grp" - ] - ) , ("Data.Binary", [ "noBench" ] @@ -43,6 +38,11 @@ targets = , "streamly_core_grp" ] ) + , ("Data.Fold.Prelude", + [ "fold_parser_grp" + , "noTest" + ] + ) , ("Data.List", [ "list_grp" @@ -90,11 +90,23 @@ targets = , "noTest" ] ) + , ("Data.Pipe", + [ "streamly_core_grp" + , "noTest" + ] + ) , ("Data.RingArray", [ "array_grp" , "streamly_core_grp" ] ) + , ("Data.Scan", + [ "infinite_grp" + , "fold_parser_grp" + , "streamly_core_grp" + , "noTest" + ] + ) , ("Data.Scanl", [ "infinite_grp" , "fold_parser_grp" @@ -177,6 +189,11 @@ targets = , "noTest" ] ) + , ("Data.Stream.Prelude", + [ "infinite_grp" + , "concurrent_stream_grp" + ] + ) , ("Data.Stream.Rate", [ "infinite_grp" , "concurrent_stream_grp" From c63d0b45759e34c44918387aa1ed074669ff2f9b Mon Sep 17 00:00:00 2001 From: Harendra Kumar Date: Wed, 17 Jun 2026 01:26:08 +0530 Subject: [PATCH 23/24] Make the names of Array Stream benchmarks more intuitive --- benchmark/Streamly/Benchmark/Data/Array/Stream.hs | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/benchmark/Streamly/Benchmark/Data/Array/Stream.hs b/benchmark/Streamly/Benchmark/Data/Array/Stream.hs index bf5e5b82b0..53c7732932 100644 --- a/benchmark/Streamly/Benchmark/Data/Array/Stream.hs +++ b/benchmark/Streamly/Benchmark/Data/Array/Stream.hs @@ -125,18 +125,18 @@ benchmarks :: Arrays -> Int -> [(SpaceComplexity, Benchmark)] benchmarks arrays value = let (arraysSmall, arraysBig) = arrays in - [ (SpaceO_1, benchIO "fold (of 100)" (\_ -> Stream.fromList arraysSmall) fold) - , (SpaceO_1, benchIO "fold (single)" (\_ -> Stream.fromList arraysBig) fold) + [ (SpaceO_1, benchIO "foldBreak drain (100-elem arrays)" (\_ -> Stream.fromList arraysSmall) fold) + , (SpaceO_1, benchIO "foldBreak drain (one large array)" (\_ -> Stream.fromList arraysBig) fold) , (SpaceO_1, benchIO - "foldBreak (recursive, small arrays)" + "foldBreak (recursive, one at a time, 100-elem arrays)" (\_ -> Stream.fromList arraysSmall) (foldBreak . StreamK.fromStream)) - , (SpaceO_1, benchIO "parse (of 100)" (\_ -> Stream.fromList arraysSmall) + , (SpaceO_1, benchIO "parseBreak drain (100-elem arrays)" (\_ -> Stream.fromList arraysSmall) $ parse value) - , (SpaceO_1, benchIO "parse (single)" (\_ -> Stream.fromList arraysBig) + , (SpaceO_1, benchIO "parseBreak drain (one large array)" (\_ -> Stream.fromList arraysBig) $ parse value) , (SpaceO_1, benchIO - "parseBreak (recursive, small arrays)" + "parseBreak (recursive, one at a time, 100-elem arrays)" (\_ -> Stream.fromList arraysSmall) (parseBreak . StreamK.fromStream)) ] From 4118616890b938f9ccf5ac01fdb7f7bbede79605 Mon Sep 17 00:00:00 2001 From: Harendra Kumar Date: Wed, 17 Jun 2026 01:26:30 +0530 Subject: [PATCH 24/24] Fix memory requirements of benchmarks --- benchmark/bench-runner/Main.hs | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/benchmark/bench-runner/Main.hs b/benchmark/bench-runner/Main.hs index 2d06ec3b44..cf1892d59a 100644 --- a/benchmark/bench-runner/Main.hs +++ b/benchmark/bench-runner/Main.hs @@ -37,20 +37,20 @@ rtsOpts exeName benchName0 = unwords [general, exeSpecific, benchSpecific] -- XXX GHC 9.6 onwards needs 64M, earlier it was 32M | "Data.Array" `isPrefixOf` benchName && "/o-1-space.show" `isSuffixOf` benchName = "-M64M" + -- XXX For --long option, need to check why so much heap is required. + | "Data.Array/o-1-space.foldBreak" + `isPrefixOf` benchName = "-K4M -M512M" + | "Data.Array/o-1-space.parseBreak" + `isPrefixOf` benchName = "-K4M -M512M" -- XXX GHC 9.6 onwards needs 64M, earlier it was 32M | "Data.Array.Generic/o-1-space.mapX4" `isPrefixOf` benchName = "-M64M" - -- XXX For --long option, need to check why so much heap is required. - -- Note, if we remove the chunked stream module we need to keep the - -- chunked stream benchmarks in the stream module. - | "Data.Array.Stream/o-1-space" - `isPrefixOf` benchName = "-K4M -M512M" ---------------------------------------------------------------------- -- GHC-9.6 requires 64M, earlier it was 16M - | "Data.Fold/o-n-heap.toHashMapIO (max buckets) sum" + | "Data.Fold.Prelude/o-n-heap.toHashMapIO (max buckets) sum" == benchName = "-M64M" ----------------------------------------------------------------------