intmap-based priority queue change
speedup is significant. Now does >5MB RAW images with ease
This commit is contained in:
parent
ab7880216e
commit
b8cb909fc9
5 changed files with 63 additions and 66 deletions
1
.gitignore
vendored
1
.gitignore
vendored
|
|
@ -2,3 +2,4 @@
|
||||||
/result
|
/result
|
||||||
/.direnv/
|
/.direnv/
|
||||||
**/.DS_Store
|
**/.DS_Store
|
||||||
|
/cabal.project.local
|
||||||
|
|
|
||||||
|
|
@ -50,4 +50,8 @@ executable compress
|
||||||
default-language:
|
default-language:
|
||||||
GHC2021
|
GHC2021
|
||||||
other-modules:
|
other-modules:
|
||||||
Data.SortedList
|
Data.PQueue
|
||||||
|
ghc-options:
|
||||||
|
-fprof-auto
|
||||||
|
-fprof-late
|
||||||
|
"-with-rtsopts=-p -hc"
|
||||||
|
|
|
||||||
26
src/Data/PQueue.hs
Normal file
26
src/Data/PQueue.hs
Normal file
|
|
@ -0,0 +1,26 @@
|
||||||
|
module Data.PQueue where
|
||||||
|
|
||||||
|
import Data.Bifunctor qualified as Bi
|
||||||
|
import Data.IntMap.Strict qualified as IM
|
||||||
|
import Data.List qualified as L
|
||||||
|
import Data.List.NonEmpty qualified as NE
|
||||||
|
|
||||||
|
newtype PQueue a = PQueue
|
||||||
|
{ toMap :: IM.IntMap (NE.NonEmpty a)
|
||||||
|
}
|
||||||
|
deriving (Functor, Show)
|
||||||
|
|
||||||
|
fromList :: [(Int, a)] -> PQueue a
|
||||||
|
fromList = PQueue . IM.fromListWith NE.append . map (Bi.second NE.singleton)
|
||||||
|
|
||||||
|
singleton :: Int -> a -> PQueue a
|
||||||
|
singleton key a = PQueue $ IM.singleton key [a]
|
||||||
|
|
||||||
|
minView :: PQueue a -> Maybe (Int, (a, PQueue a))
|
||||||
|
minView (PQueue m) = case IM.lookupMin m of
|
||||||
|
Nothing -> Nothing
|
||||||
|
(Just (key, x NE.:| (x' : xs))) -> Just (key, (x, PQueue $ IM.insert key (x' NE.:| xs) m))
|
||||||
|
(Just (key, x NE.:| [])) -> Just (key, (x, PQueue $ IM.delete key m))
|
||||||
|
|
||||||
|
insert :: Int -> p -> PQueue p -> PQueue p
|
||||||
|
insert key a = PQueue . IM.insertWith NE.append key (NE.singleton a) . toMap
|
||||||
|
|
@ -1,27 +0,0 @@
|
||||||
module Data.SortedList where
|
|
||||||
import qualified Data.List as L
|
|
||||||
|
|
||||||
data SortedList a = Ord a => SortedList {
|
|
||||||
toList :: [a]
|
|
||||||
}
|
|
||||||
|
|
||||||
deriving instance Show a => Show (SortedList a)
|
|
||||||
|
|
||||||
map :: Ord a1 => ([a2] -> [a1]) -> SortedList a2 -> SortedList a1
|
|
||||||
map f (SortedList a) = SortedList . f $ a
|
|
||||||
|
|
||||||
fromList :: Ord a => [a] -> SortedList a
|
|
||||||
fromList = SortedList . L.sort
|
|
||||||
|
|
||||||
singleton :: Ord a => a -> SortedList a
|
|
||||||
singleton a = SortedList [a]
|
|
||||||
|
|
||||||
minView :: SortedList a -> Maybe (a, SortedList a)
|
|
||||||
minView (SortedList (x:xs)) = Just (x, SortedList xs)
|
|
||||||
minView (SortedList _) = Nothing
|
|
||||||
|
|
||||||
insert :: p -> SortedList p -> SortedList p
|
|
||||||
insert x (SortedList []) = singleton x
|
|
||||||
insert x (SortedList (x' : xs))
|
|
||||||
| x <= x' = SortedList (x : x' : xs)
|
|
||||||
| otherwise = Data.SortedList.map (x' :) . insert x . SortedList $ xs
|
|
||||||
69
src/Main.hs
69
src/Main.hs
|
|
@ -7,13 +7,14 @@ import Data.Bits (Bits ((.|.)))
|
||||||
import Data.Bits qualified as B
|
import Data.Bits qualified as B
|
||||||
import Data.ByteString (fromFilePath)
|
import Data.ByteString (fromFilePath)
|
||||||
import Data.ByteString qualified as BS
|
import Data.ByteString qualified as BS
|
||||||
|
import Data.Data qualified as D
|
||||||
import Data.Foldable qualified as F
|
import Data.Foldable qualified as F
|
||||||
import Data.IntMap.Strict qualified as IM
|
import Data.IntMap.Strict qualified as IM
|
||||||
import Data.Map.Strict qualified as M
|
import Data.Map.Strict qualified as M
|
||||||
import Data.Maybe (fromMaybe)
|
import Data.Maybe (fromMaybe)
|
||||||
import Data.Maybe qualified as My
|
import Data.Maybe qualified as My
|
||||||
|
import Data.PQueue qualified as PQ
|
||||||
import Data.Serialize qualified as C
|
import Data.Serialize qualified as C
|
||||||
import Data.SortedList qualified as SL
|
|
||||||
import Data.Text qualified as T
|
import Data.Text qualified as T
|
||||||
import Data.Text.IO qualified as TIO
|
import Data.Text.IO qualified as TIO
|
||||||
import Data.Vector.Unboxed qualified as V
|
import Data.Vector.Unboxed qualified as V
|
||||||
|
|
@ -29,12 +30,6 @@ data HuffmanTree a
|
||||||
}
|
}
|
||||||
deriving (Eq, Ord, Show, Generic, C.Serialize)
|
deriving (Eq, Ord, Show, Generic, C.Serialize)
|
||||||
|
|
||||||
data SizedHuffmanTree a = SizedHuffmanTree
|
|
||||||
{ size :: Int
|
|
||||||
, tree :: HuffmanTree a
|
|
||||||
}
|
|
||||||
deriving (Eq, Ord, Show)
|
|
||||||
|
|
||||||
data TreeDir = L | R deriving (Eq, Ord, Show, Generic, C.Serialize)
|
data TreeDir = L | R deriving (Eq, Ord, Show, Generic, C.Serialize)
|
||||||
|
|
||||||
findTreeDirections :: forall a. (Ord a) => HuffmanTree a -> M.Map a [TreeDir]
|
findTreeDirections :: forall a. (Ord a) => HuffmanTree a -> M.Map a [TreeDir]
|
||||||
|
|
@ -44,48 +39,34 @@ findTreeDirections (Node{..}) = M.union (rec' L left) (rec' R right)
|
||||||
rec' :: TreeDir -> HuffmanTree a -> M.Map a [TreeDir]
|
rec' :: TreeDir -> HuffmanTree a -> M.Map a [TreeDir]
|
||||||
rec' dir = M.map (dir :) . findTreeDirections
|
rec' dir = M.map (dir :) . findTreeDirections
|
||||||
|
|
||||||
mergeSizedTrees tree0 tree1 =
|
|
||||||
SizedHuffmanTree
|
|
||||||
{ size = (size tree0) + (size tree1)
|
|
||||||
, tree = Node (tree tree0) (tree tree1)
|
|
||||||
}
|
|
||||||
|
|
||||||
-- compress :: BS.ByteString -> Maybe (SizedHuffmanTree BS.ByteString)
|
|
||||||
compress :: forall a. (Ord a, Integral a, B.FiniteBits a) => BS.ByteString -> Maybe ([TreeDir], HuffmanTree a)
|
compress :: forall a. (Ord a, Integral a, B.FiniteBits a) => BS.ByteString -> Maybe ([TreeDir], HuffmanTree a)
|
||||||
compress bs =
|
compress bs =
|
||||||
liftA2 (,) treeDirections unsizedTree
|
liftA2 (,) treeDirections mergedHuffmanTrees
|
||||||
where
|
where
|
||||||
unsizedTree :: Maybe (HuffmanTree a)
|
|
||||||
unsizedTree = tree <$> mergedHuffmanTrees
|
|
||||||
|
|
||||||
treeDirections = concat <$> mapM (treeDirMap M.!?) dividedByteString
|
treeDirections = concat <$> mapM (treeDirMap M.!?) dividedByteString
|
||||||
|
|
||||||
mergedHuffmanTrees =
|
mergedHuffmanTrees =
|
||||||
mergeHuffmanTrees
|
mergeHuffmanTrees
|
||||||
. SL.fromList
|
. PQ.fromList
|
||||||
. map (uncurry (flip SizedHuffmanTree) . Bi.first Leaf)
|
. map (uncurry (flip (,)) . Bi.first Leaf)
|
||||||
. counts
|
. counts
|
||||||
$ dividedByteString
|
$ dividedByteString
|
||||||
|
|
||||||
treeDirMap :: M.Map a [TreeDir]
|
treeDirMap :: M.Map a [TreeDir]
|
||||||
treeDirMap = My.maybe M.empty (findTreeDirections . tree) mergedHuffmanTrees
|
treeDirMap = My.maybe M.empty findTreeDirections mergedHuffmanTrees
|
||||||
|
|
||||||
dividedByteString = toBitsList bs
|
dividedByteString = toBitsList bs
|
||||||
|
|
||||||
encodeCompressed :: C.Serialize a => Maybe ([TreeDir], HuffmanTree a) -> BS.ByteString
|
encodeCompressed :: (C.Serialize a) => Maybe ([TreeDir], HuffmanTree a) -> BS.ByteString
|
||||||
encodeCompressed = C.encode . fmap (Bi.first encodeTreeDirs)
|
encodeCompressed = C.encode . fmap (Bi.first encodeTreeDirs)
|
||||||
where
|
where
|
||||||
encodeTreeDirs = BV.cloneToByteString . V.fromList . map (BV.Bit . (== R))
|
encodeTreeDirs = BV.cloneToByteString . V.fromList . map (BV.Bit . (== R))
|
||||||
|
|
||||||
mergeHuffmanTrees :: SL.SortedList (SizedHuffmanTree a) -> Maybe (SizedHuffmanTree a)
|
mergeHuffmanTrees :: PQ.PQueue (HuffmanTree a) -> Maybe (HuffmanTree a)
|
||||||
mergeHuffmanTrees (SL.SortedList (tree0 : tree1 : xs)) =
|
mergeHuffmanTrees queue = case (Bi.second . Bi.second $ PQ.minView) <$> PQ.minView queue of
|
||||||
mergeHuffmanTrees
|
Nothing -> Nothing
|
||||||
. SL.insert (mergeSizedTrees tree0 tree1)
|
Just (size, (x, Nothing)) -> Just x
|
||||||
. SL.SortedList
|
Just (size, (x, Just (size1, (x', rest)))) -> mergeHuffmanTrees $ PQ.insert (size + size1) (Node x x') rest
|
||||||
$ xs
|
|
||||||
|
|
||||||
mergeHuffmanTrees (SL.SortedList [x]) = Just x
|
|
||||||
mergeHuffmanTrees (SL.SortedList []) = Nothing
|
|
||||||
|
|
||||||
counts :: (Ord a) => [a] -> [(a, Int)]
|
counts :: (Ord a) => [a] -> [(a, Int)]
|
||||||
counts = M.toList . F.foldl' combiningInsert M.empty
|
counts = M.toList . F.foldl' combiningInsert M.empty
|
||||||
|
|
@ -121,14 +102,26 @@ finiteBitUncons bs =
|
||||||
rest :: BS.ByteString
|
rest :: BS.ByteString
|
||||||
(takenBytes, rest) = BS.splitAt numBytes bs
|
(takenBytes, rest) = BS.splitAt numBytes bs
|
||||||
|
|
||||||
|
compressionRatioFor ::
|
||||||
|
forall a.
|
||||||
|
(Integral a, B.FiniteBits a, Ord a, C.Serialize a) =>
|
||||||
|
D.Proxy a ->
|
||||||
|
BS.ByteString ->
|
||||||
|
Double
|
||||||
|
compressionRatioFor proxy bs =
|
||||||
|
(/ (fromIntegral . BS.length $ bs))
|
||||||
|
. fromIntegral
|
||||||
|
. BS.length
|
||||||
|
. encodeCompressed
|
||||||
|
. (compress :: BS.ByteString -> Maybe ([TreeDir], HuffmanTree a))
|
||||||
|
$ bs
|
||||||
|
|
||||||
main :: IO ()
|
main :: IO ()
|
||||||
main = do
|
main = do
|
||||||
[filePath] <- SE.getArgs
|
[filePath] <- SE.getArgs
|
||||||
f <- BS.readFile filePath
|
f <- BS.readFile filePath
|
||||||
TIO.putStrLn "original:"
|
TIO.putStrLn "compression ratios:"
|
||||||
print . BS.length $ f
|
print . ("Word64",) . compressionRatioFor (D.Proxy :: D.Proxy Word64) $ f
|
||||||
TIO.putStrLn "rest:"
|
print . ("Word32",) . compressionRatioFor (D.Proxy :: D.Proxy Word32) $ f
|
||||||
print . ("Word64",) . BS.length . encodeCompressed . (compress :: BS.ByteString -> Maybe ([TreeDir], HuffmanTree Word64)) $ f
|
print . ("Word16",) . compressionRatioFor (D.Proxy :: D.Proxy Word16) $ f
|
||||||
print . ("Word32",) . BS.length . encodeCompressed . (compress :: BS.ByteString -> Maybe ([TreeDir], HuffmanTree Word32)) $ f
|
print . ("Word8",) . compressionRatioFor (D.Proxy :: D.Proxy Word8) $ f
|
||||||
print . ("Word16",) . BS.length . encodeCompressed . (compress :: BS.ByteString -> Maybe ([TreeDir], HuffmanTree Word16)) $ f
|
|
||||||
print . ("Word8",) . BS.length . encodeCompressed . (compress :: BS.ByteString -> Maybe ([TreeDir], HuffmanTree Word8)) $ f
|
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue