From 301f13fa516642c3aba52abc96544310b5655343 Mon Sep 17 00:00:00 2001 From: Kristian Larsson Date: Sun, 24 Sep 2023 14:22:03 +0200 Subject: [PATCH] Use flat for .ty serialization This switches the serialization and deserialization format for our .ty files to use the flat library. Flat is one of the fastest serialization formats out there and it does indeed provide a speedup for Acton as well. Unfortunately, I started this work based on a vague memory I had of having seen that reading the .ty files took up a significant portion of total compilation time, in particular when a lot of files are just cached / up to date... and so I set out to speed it up. After having implemented flat, I see that I misremembered. Reading .ty files is actually very fast, the largest one I have takes 4ms. Using flat we cut that in half, down to 2ms. If I turn off zlib compression we can get it down to 1ms but at the expense of quite a lot larger files (like 4-5x). The files are about 30% larger with flat than before, but since we're on KB or tens of KB level here, that's not much of a deal. A 100% increase in speed is worth slightly larger files on disk. If we want ever more sp33d, we can consider removing zlib. --- compiler/Acton/Syntax.hs | 27 ++++++++++++++------------- compiler/InterfaceFiles.hs | 34 ++++++++++++++++++++++------------ compiler/Utils.hs | 3 ++- compiler/package.yaml.in | 3 ++- 4 files changed, 40 insertions(+), 27 deletions(-) diff --git a/compiler/Acton/Syntax.hs b/compiler/Acton/Syntax.hs index 5db04c680..68e7cadb6 100644 --- a/compiler/Acton/Syntax.hs +++ b/compiler/Acton/Syntax.hs @@ -14,6 +14,7 @@ {-# LANGUAGE MultiParamTypeClasses, FlexibleInstances, FlexibleContexts, DeriveGeneric, DeriveAnyClass #-} module Acton.Syntax where +import Flat import Utils import qualified Data.Binary import qualified Data.Set @@ -116,9 +117,9 @@ data Pattern = PWild { ploc::SrcLoc, pann::Maybe Type } type Target = Expr data Prefix = Globvar | Kindvar | Xistvar | Typevar | Tempvar | Witness | NormPass | CPSPass | LLiftPass - deriving (Eq,Ord,Show,Read,Generic,NFData) + deriving (Eq,Ord,Show,Read,Generic,NFData,Flat) -data Name = Name SrcLoc String | Derived Name Name | Internal Prefix String Int deriving (Generic,Show,NFData) +data Name = Name SrcLoc String | Derived Name Name | Internal Prefix String Int deriving (Generic,Show,NFData,Flat) nloc (Name l _) = l nloc _ = NoLoc @@ -154,7 +155,7 @@ globalName s = Internal Globvar s 0 globalNames s = map (Internal Globvar s) [1..] -data ModName = ModName [Name] deriving (Show,Read,Eq,Generic,NFData) +data ModName = ModName [Name] deriving (Show,Read,Eq,Generic,NFData,Flat) modName ss = ModName (map name ss) @@ -165,7 +166,7 @@ modCat (ModName ns) n = ModName (ns++[n]) instance Ord ModName where compare a b = compare (modPath a) (modPath b) -data QName = QName { mname::ModName, noq::Name } | NoQ { noq::Name } | GName { mname::ModName, noq::Name } deriving (Show,Read,Eq,Ord,Generic,NFData) +data QName = QName { mname::ModName, noq::Name } | NoQ { noq::Name } | GName { mname::ModName, noq::Name } deriving (Show,Read,Eq,Ord,Generic,NFData,Flat) qName ss s = QName (modName ss) (name s) @@ -206,19 +207,19 @@ data Binary = Or|And|Plus|Minus|Mult|Pow|Div|Mod|EuDiv|BOr|BXor|BAnd|ShiftL| data Aug = PlusA|MinusA|MultA|PowA|DivA|ModA|EuDivA|BOrA|BXorA|BAndA|ShiftLA|ShiftRA|MMultA deriving (Show,Eq) data Comparison = Eq|NEq|LtGt|Lt|Gt|GE|LE|In|NotIn|Is|IsNot deriving (Show,Eq) -data Deco = NoDec | Property | Static deriving (Eq,Show,Read,Generic,NFData) +data Deco = NoDec | Property | Static deriving (Eq,Show,Read,Generic,NFData,Flat) -data Kind = KType | KProto | KFX | PRow | KRow | KFun [Kind] Kind | KVar Name | KWild deriving (Eq,Ord,Show,Read,Generic,NFData) +data Kind = KType | KProto | KFX | PRow | KRow | KFun [Kind] Kind | KVar Name | KWild deriving (Eq,Ord,Show,Read,Generic,NFData,Flat) -data TSchema = TSchema { scloc::SrcLoc, scbind::QBinds, sctype::Type } deriving (Show,Read,Generic,NFData) +data TSchema = TSchema { scloc::SrcLoc, scbind::QBinds, sctype::Type } deriving (Show,Read,Generic,NFData,Flat) -data TVar = TV { tvkind::Kind, tvname::Name } deriving (Show,Read,Generic,NFData) -- the Name is an uppercase letter, optionally followed by digits. +data TVar = TV { tvkind::Kind, tvname::Name } deriving (Show,Read,Generic,NFData,Flat) -- the Name is an uppercase letter, optionally followed by digits. -data TCon = TC { tcname::QName, tcargs::[Type] } deriving (Eq,Show,Read,Generic,NFData) +data TCon = TC { tcname::QName, tcargs::[Type] } deriving (Eq,Show,Read,Generic,NFData,Flat) -data FX = FXPure | FXMut | FXProc | FXAction deriving (Eq,Show,Read,Generic,NFData) +data FX = FXPure | FXMut | FXProc | FXAction deriving (Eq,Show,Read,Generic,NFData,Flat) -data QBind = Quant TVar [TCon] deriving (Eq,Show,Read,Generic,NFData) +data QBind = Quant TVar [TCon] deriving (Eq,Show,Read,Generic,NFData,Flat) type QBinds = [QBind] @@ -237,7 +238,7 @@ data Type = TVar { tloc::SrcLoc, tvar::TVar } | TNil { tloc::SrcLoc, rkind::Kind } | TRow { tloc::SrcLoc, rkind::Kind, label::Name, rtype::Type, rtail::TRow } | TFX { tloc::SrcLoc, tfx::FX } - deriving (Show,Read,Generic,NFData) + deriving (Show,Read,Generic,NFData,Flat) type TFX = Type type PosRow = Type @@ -431,7 +432,7 @@ data NameInfo = NVar Type | NMAlias ModName | NModule TEnv | NReserved - deriving (Eq,Show,Read,Generic) + deriving (Eq,Show,Read,Generic,Flat) data Witness = WClass { binds::QBinds, wtype::Type, proto::PCon, wname::QName, wsteps::WPath } | WInst { binds::QBinds, wtype::Type, proto::PCon, wname::QName, wsteps::WPath } diff --git a/compiler/InterfaceFiles.hs b/compiler/InterfaceFiles.hs index 406661375..4863078a2 100644 --- a/compiler/InterfaceFiles.hs +++ b/compiler/InterfaceFiles.hs @@ -1,3 +1,4 @@ +{-# LANGUAGE ScopedTypeVariables #-} -- Copyright (C) 2019-2021 Data Ductus AB -- -- Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: @@ -13,24 +14,33 @@ module InterfaceFiles where -import Data.Binary -import qualified Data.ByteString.Lazy +import Flat +import Flat.Decoder +import qualified Data.ByteString.Lazy as BL import Codec.Compression.Zlib import qualified System.Exit import qualified Acton.Syntax import System.IO writeFile :: FilePath -> [Acton.Syntax.ModName] -> Acton.Syntax.TEnv -> IO () -writeFile f ms a = do h <- openFile f WriteMode - Data.ByteString.Lazy.hPut h (compress (encode (Acton.Syntax.version, (ms,a)))) - hClose h +writeFile f ms a = do + h <- openFile f WriteMode + BL.hPut h (compress $ BL.fromStrict $ flat (Acton.Syntax.version, (ms, a))) + hClose h readFile :: FilePath -> IO ([Acton.Syntax.ModName], Acton.Syntax.TEnv) readFile f = do - h <- openFile f ReadMode - bs <- Data.ByteString.Lazy.hGetContents h - let (vs,a) = decode (decompress bs) - if vs == Acton.Syntax.version then do hClose h - return a - else do putStrLn ("Interface file has version "++show vs++"; current version is "++show Acton.Syntax.version) - System.Exit.exitFailure \ No newline at end of file + h <- openFile f ReadMode + bs <- BL.hGetContents h + let decoded = unflat $ BL.toStrict $ decompress bs + case decoded of + Left (e :: DecodeException) -> do + putStrLn "Error during deserialization" + System.Exit.exitFailure + Right (vs, a) -> + if vs == Acton.Syntax.version then do + hClose h + return a + else do + putStrLn $ "Interface file has version " ++ show vs ++ "; current version is " ++ show Acton.Syntax.version + System.Exit.exitFailure diff --git a/compiler/Utils.hs b/compiler/Utils.hs index a5540855c..09fd8e6f0 100644 --- a/compiler/Utils.hs +++ b/compiler/Utils.hs @@ -14,6 +14,7 @@ {-# LANGUAGE DeriveDataTypeable, DeriveGeneric, DeriveAnyClass #-} module Utils(module Utils, module SrcLocation, module Data.List, module Data.Maybe, module Debug.Trace) where +import Flat import Debug.Trace import Data.List hiding ((\\)) import Data.Maybe @@ -26,7 +27,7 @@ import Pretty import Control.DeepSeq import Prelude hiding((<>)) -data SrcLoc = Loc Int Int | NoLoc deriving (Eq,Ord,Show,Read,Generic,NFData) +data SrcLoc = Loc Int Int | NoLoc deriving (Eq,Ord,Show,Read,Generic,NFData,Flat) instance Data.Binary.Binary SrcLoc diff --git a/compiler/package.yaml.in b/compiler/package.yaml.in index c7b239243..566fccb16 100644 --- a/compiler/package.yaml.in +++ b/compiler/package.yaml.in @@ -13,7 +13,7 @@ copyright: "2018 Author name here" # To avoid duplicated efforts in documentation and dealing with the # complications of embedding Haddock markup inside cabal files, it is # common to point users to the README.md file. -description: Please see the README on Github at +description: Please see the README on Github at executables: actonc: @@ -35,6 +35,7 @@ executables: - directory >= 1.3.1 - filelock - filepath + - flat - hashable - megaparsec >= 9 - mtl