intmap-based priority queue change

speedup is significant. Now does >5MB RAW images with ease
This commit is contained in:
Jack Wines 2024-04-14 23:26:33 -07:00
parent ab7880216e
commit b8cb909fc9
No known key found for this signature in database
GPG key ID: 25B20640600571E6
5 changed files with 63 additions and 66 deletions

1
.gitignore vendored
View file

@ -2,3 +2,4 @@
/result /result
/.direnv/ /.direnv/
**/.DS_Store **/.DS_Store
/cabal.project.local

View file

@ -50,4 +50,8 @@ executable compress
default-language: default-language:
GHC2021 GHC2021
other-modules: other-modules:
Data.SortedList Data.PQueue
ghc-options:
-fprof-auto
-fprof-late
"-with-rtsopts=-p -hc"

26
src/Data/PQueue.hs Normal file
View file

@ -0,0 +1,26 @@
module Data.PQueue where
import Data.Bifunctor qualified as Bi
import Data.IntMap.Strict qualified as IM
import Data.List qualified as L
import Data.List.NonEmpty qualified as NE
newtype PQueue a = PQueue
{ toMap :: IM.IntMap (NE.NonEmpty a)
}
deriving (Functor, Show)
fromList :: [(Int, a)] -> PQueue a
fromList = PQueue . IM.fromListWith NE.append . map (Bi.second NE.singleton)
singleton :: Int -> a -> PQueue a
singleton key a = PQueue $ IM.singleton key [a]
minView :: PQueue a -> Maybe (Int, (a, PQueue a))
minView (PQueue m) = case IM.lookupMin m of
Nothing -> Nothing
(Just (key, x NE.:| (x' : xs))) -> Just (key, (x, PQueue $ IM.insert key (x' NE.:| xs) m))
(Just (key, x NE.:| [])) -> Just (key, (x, PQueue $ IM.delete key m))
insert :: Int -> p -> PQueue p -> PQueue p
insert key a = PQueue . IM.insertWith NE.append key (NE.singleton a) . toMap

View file

@ -1,27 +0,0 @@
module Data.SortedList where
import qualified Data.List as L
data SortedList a = Ord a => SortedList {
toList :: [a]
}
deriving instance Show a => Show (SortedList a)
map :: Ord a1 => ([a2] -> [a1]) -> SortedList a2 -> SortedList a1
map f (SortedList a) = SortedList . f $ a
fromList :: Ord a => [a] -> SortedList a
fromList = SortedList . L.sort
singleton :: Ord a => a -> SortedList a
singleton a = SortedList [a]
minView :: SortedList a -> Maybe (a, SortedList a)
minView (SortedList (x:xs)) = Just (x, SortedList xs)
minView (SortedList _) = Nothing
insert :: p -> SortedList p -> SortedList p
insert x (SortedList []) = singleton x
insert x (SortedList (x' : xs))
| x <= x' = SortedList (x : x' : xs)
| otherwise = Data.SortedList.map (x' :) . insert x . SortedList $ xs

View file

@ -7,13 +7,14 @@ import Data.Bits (Bits ((.|.)))
import Data.Bits qualified as B import Data.Bits qualified as B
import Data.ByteString (fromFilePath) import Data.ByteString (fromFilePath)
import Data.ByteString qualified as BS import Data.ByteString qualified as BS
import Data.Data qualified as D
import Data.Foldable qualified as F import Data.Foldable qualified as F
import Data.IntMap.Strict qualified as IM import Data.IntMap.Strict qualified as IM
import Data.Map.Strict qualified as M import Data.Map.Strict qualified as M
import Data.Maybe (fromMaybe) import Data.Maybe (fromMaybe)
import Data.Maybe qualified as My import Data.Maybe qualified as My
import Data.PQueue qualified as PQ
import Data.Serialize qualified as C import Data.Serialize qualified as C
import Data.SortedList qualified as SL
import Data.Text qualified as T import Data.Text qualified as T
import Data.Text.IO qualified as TIO import Data.Text.IO qualified as TIO
import Data.Vector.Unboxed qualified as V import Data.Vector.Unboxed qualified as V
@ -29,12 +30,6 @@ data HuffmanTree a
} }
deriving (Eq, Ord, Show, Generic, C.Serialize) deriving (Eq, Ord, Show, Generic, C.Serialize)
data SizedHuffmanTree a = SizedHuffmanTree
{ size :: Int
, tree :: HuffmanTree a
}
deriving (Eq, Ord, Show)
data TreeDir = L | R deriving (Eq, Ord, Show, Generic, C.Serialize) data TreeDir = L | R deriving (Eq, Ord, Show, Generic, C.Serialize)
findTreeDirections :: forall a. (Ord a) => HuffmanTree a -> M.Map a [TreeDir] findTreeDirections :: forall a. (Ord a) => HuffmanTree a -> M.Map a [TreeDir]
@ -44,48 +39,34 @@ findTreeDirections (Node{..}) = M.union (rec' L left) (rec' R right)
rec' :: TreeDir -> HuffmanTree a -> M.Map a [TreeDir] rec' :: TreeDir -> HuffmanTree a -> M.Map a [TreeDir]
rec' dir = M.map (dir :) . findTreeDirections rec' dir = M.map (dir :) . findTreeDirections
mergeSizedTrees tree0 tree1 =
SizedHuffmanTree
{ size = (size tree0) + (size tree1)
, tree = Node (tree tree0) (tree tree1)
}
-- compress :: BS.ByteString -> Maybe (SizedHuffmanTree BS.ByteString)
compress :: forall a. (Ord a, Integral a, B.FiniteBits a) => BS.ByteString -> Maybe ([TreeDir], HuffmanTree a) compress :: forall a. (Ord a, Integral a, B.FiniteBits a) => BS.ByteString -> Maybe ([TreeDir], HuffmanTree a)
compress bs = compress bs =
liftA2 (,) treeDirections unsizedTree liftA2 (,) treeDirections mergedHuffmanTrees
where where
unsizedTree :: Maybe (HuffmanTree a)
unsizedTree = tree <$> mergedHuffmanTrees
treeDirections = concat <$> mapM (treeDirMap M.!?) dividedByteString treeDirections = concat <$> mapM (treeDirMap M.!?) dividedByteString
mergedHuffmanTrees = mergedHuffmanTrees =
mergeHuffmanTrees mergeHuffmanTrees
. SL.fromList . PQ.fromList
. map (uncurry (flip SizedHuffmanTree) . Bi.first Leaf) . map (uncurry (flip (,)) . Bi.first Leaf)
. counts . counts
$ dividedByteString $ dividedByteString
treeDirMap :: M.Map a [TreeDir] treeDirMap :: M.Map a [TreeDir]
treeDirMap = My.maybe M.empty (findTreeDirections . tree) mergedHuffmanTrees treeDirMap = My.maybe M.empty findTreeDirections mergedHuffmanTrees
dividedByteString = toBitsList bs dividedByteString = toBitsList bs
encodeCompressed :: C.Serialize a => Maybe ([TreeDir], HuffmanTree a) -> BS.ByteString encodeCompressed :: (C.Serialize a) => Maybe ([TreeDir], HuffmanTree a) -> BS.ByteString
encodeCompressed = C.encode . fmap (Bi.first encodeTreeDirs) encodeCompressed = C.encode . fmap (Bi.first encodeTreeDirs)
where where
encodeTreeDirs = BV.cloneToByteString . V.fromList . map (BV.Bit . (== R)) encodeTreeDirs = BV.cloneToByteString . V.fromList . map (BV.Bit . (== R))
mergeHuffmanTrees :: SL.SortedList (SizedHuffmanTree a) -> Maybe (SizedHuffmanTree a) mergeHuffmanTrees :: PQ.PQueue (HuffmanTree a) -> Maybe (HuffmanTree a)
mergeHuffmanTrees (SL.SortedList (tree0 : tree1 : xs)) = mergeHuffmanTrees queue = case (Bi.second . Bi.second $ PQ.minView) <$> PQ.minView queue of
mergeHuffmanTrees Nothing -> Nothing
. SL.insert (mergeSizedTrees tree0 tree1) Just (size, (x, Nothing)) -> Just x
. SL.SortedList Just (size, (x, Just (size1, (x', rest)))) -> mergeHuffmanTrees $ PQ.insert (size + size1) (Node x x') rest
$ xs
mergeHuffmanTrees (SL.SortedList [x]) = Just x
mergeHuffmanTrees (SL.SortedList []) = Nothing
counts :: (Ord a) => [a] -> [(a, Int)] counts :: (Ord a) => [a] -> [(a, Int)]
counts = M.toList . F.foldl' combiningInsert M.empty counts = M.toList . F.foldl' combiningInsert M.empty
@ -121,14 +102,26 @@ finiteBitUncons bs =
rest :: BS.ByteString rest :: BS.ByteString
(takenBytes, rest) = BS.splitAt numBytes bs (takenBytes, rest) = BS.splitAt numBytes bs
compressionRatioFor ::
forall a.
(Integral a, B.FiniteBits a, Ord a, C.Serialize a) =>
D.Proxy a ->
BS.ByteString ->
Double
compressionRatioFor proxy bs =
(/ (fromIntegral . BS.length $ bs))
. fromIntegral
. BS.length
. encodeCompressed
. (compress :: BS.ByteString -> Maybe ([TreeDir], HuffmanTree a))
$ bs
main :: IO () main :: IO ()
main = do main = do
[filePath] <- SE.getArgs [filePath] <- SE.getArgs
f <- BS.readFile filePath f <- BS.readFile filePath
TIO.putStrLn "original:" TIO.putStrLn "compression ratios:"
print . BS.length $ f print . ("Word64",) . compressionRatioFor (D.Proxy :: D.Proxy Word64) $ f
TIO.putStrLn "rest:" print . ("Word32",) . compressionRatioFor (D.Proxy :: D.Proxy Word32) $ f
print . ("Word64",) . BS.length . encodeCompressed . (compress :: BS.ByteString -> Maybe ([TreeDir], HuffmanTree Word64)) $ f print . ("Word16",) . compressionRatioFor (D.Proxy :: D.Proxy Word16) $ f
print . ("Word32",) . BS.length . encodeCompressed . (compress :: BS.ByteString -> Maybe ([TreeDir], HuffmanTree Word32)) $ f print . ("Word8",) . compressionRatioFor (D.Proxy :: D.Proxy Word8) $ f
print . ("Word16",) . BS.length . encodeCompressed . (compress :: BS.ByteString -> Maybe ([TreeDir], HuffmanTree Word16)) $ f
print . ("Word8",) . BS.length . encodeCompressed . (compress :: BS.ByteString -> Maybe ([TreeDir], HuffmanTree Word8)) $ f