compress/src/Main.hs
Jack Wines b8cb909fc9
intmap-based priority queue change
speedup is significant. Now does >5MB RAW images with ease
2024-04-14 23:34:16 -07:00

127 lines
4.1 KiB
Haskell

module Main where
import Data.Bifunctor qualified as Bi
import Data.Bit qualified as B
import Data.Bit qualified as BV
import Data.Bits (Bits ((.|.)))
import Data.Bits qualified as B
import Data.ByteString (fromFilePath)
import Data.ByteString qualified as BS
import Data.Data qualified as D
import Data.Foldable qualified as F
import Data.IntMap.Strict qualified as IM
import Data.Map.Strict qualified as M
import Data.Maybe (fromMaybe)
import Data.Maybe qualified as My
import Data.PQueue qualified as PQ
import Data.Serialize qualified as C
import Data.Text qualified as T
import Data.Text.IO qualified as TIO
import Data.Vector.Unboxed qualified as V
import Data.Word
import GHC.Generics (Generic)
import System.Environment qualified as SE
data HuffmanTree a
= Leaf a
| Node
{ left :: HuffmanTree a
, right :: HuffmanTree a
}
deriving (Eq, Ord, Show, Generic, C.Serialize)
data TreeDir = L | R deriving (Eq, Ord, Show, Generic, C.Serialize)
findTreeDirections :: forall a. (Ord a) => HuffmanTree a -> M.Map a [TreeDir]
findTreeDirections (Leaf a) = M.singleton a []
findTreeDirections (Node{..}) = M.union (rec' L left) (rec' R right)
where
rec' :: TreeDir -> HuffmanTree a -> M.Map a [TreeDir]
rec' dir = M.map (dir :) . findTreeDirections
compress :: forall a. (Ord a, Integral a, B.FiniteBits a) => BS.ByteString -> Maybe ([TreeDir], HuffmanTree a)
compress bs =
liftA2 (,) treeDirections mergedHuffmanTrees
where
treeDirections = concat <$> mapM (treeDirMap M.!?) dividedByteString
mergedHuffmanTrees =
mergeHuffmanTrees
. PQ.fromList
. map (uncurry (flip (,)) . Bi.first Leaf)
. counts
$ dividedByteString
treeDirMap :: M.Map a [TreeDir]
treeDirMap = My.maybe M.empty findTreeDirections mergedHuffmanTrees
dividedByteString = toBitsList bs
encodeCompressed :: (C.Serialize a) => Maybe ([TreeDir], HuffmanTree a) -> BS.ByteString
encodeCompressed = C.encode . fmap (Bi.first encodeTreeDirs)
where
encodeTreeDirs = BV.cloneToByteString . V.fromList . map (BV.Bit . (== R))
mergeHuffmanTrees :: PQ.PQueue (HuffmanTree a) -> Maybe (HuffmanTree a)
mergeHuffmanTrees queue = case (Bi.second . Bi.second $ PQ.minView) <$> PQ.minView queue of
Nothing -> Nothing
Just (size, (x, Nothing)) -> Just x
Just (size, (x, Just (size1, (x', rest)))) -> mergeHuffmanTrees $ PQ.insert (size + size1) (Node x x') rest
counts :: (Ord a) => [a] -> [(a, Int)]
counts = M.toList . F.foldl' combiningInsert M.empty
where
combiningInsert m key = M.insertWith (+) key 1 m
divideByteString :: Int -> BS.ByteString -> [BS.ByteString]
divideByteString n [] = []
divideByteString n bs = x : divideByteString n xs
where
(x, xs) = BS.splitAt n bs
toBitsList :: forall a. (Integral a, B.FiniteBits a) => BS.ByteString -> [a]
toBitsList bs = case finiteBitUncons bs of
Nothing -> []
(Just (x, xs)) -> x : (toBitsList xs)
finiteBitUncons :: forall a. (Integral a, B.FiniteBits a) => BS.ByteString -> Maybe (a, BS.ByteString)
finiteBitUncons [] = Nothing
finiteBitUncons bs =
Just
. (,rest)
. F.foldl' (.|.) 0
. zipWith (flip B.shiftL) [0, 8 ..]
. reverse
. map (fromIntegral :: Word8 -> a)
. BS.unpack
$ takenBytes
where
numBytes = (`div` 8) . B.finiteBitSize $ (B.zeroBits :: a)
takenBytes :: BS.ByteString
rest :: BS.ByteString
(takenBytes, rest) = BS.splitAt numBytes bs
compressionRatioFor ::
forall a.
(Integral a, B.FiniteBits a, Ord a, C.Serialize a) =>
D.Proxy a ->
BS.ByteString ->
Double
compressionRatioFor proxy bs =
(/ (fromIntegral . BS.length $ bs))
. fromIntegral
. BS.length
. encodeCompressed
. (compress :: BS.ByteString -> Maybe ([TreeDir], HuffmanTree a))
$ bs
main :: IO ()
main = do
[filePath] <- SE.getArgs
f <- BS.readFile filePath
TIO.putStrLn "compression ratios:"
print . ("Word64",) . compressionRatioFor (D.Proxy :: D.Proxy Word64) $ f
print . ("Word32",) . compressionRatioFor (D.Proxy :: D.Proxy Word32) $ f
print . ("Word16",) . compressionRatioFor (D.Proxy :: D.Proxy Word16) $ f
print . ("Word8",) . compressionRatioFor (D.Proxy :: D.Proxy Word8) $ f