decompression works
I also added a cli interface
This commit is contained in:
parent
5bb8afb672
commit
43afd380c4
3 changed files with 69 additions and 27 deletions
12
README.md
12
README.md
|
|
@ -1,13 +1,19 @@
|
|||
Compresses files using [Huffman coding](https://en.wikipedia.org/wiki/Huffman_coding). Please note that decompression isn't fully implemented yet. It is however implemented to the point where we know compression works correctly.
|
||||
Compresses files using [Huffman coding](https://en.wikipedia.org/wiki/Huffman_coding).
|
||||
|
||||
On [The Great Gatsby in plain text](https://www.gutenberg.org/cache/epub/64317/pg64317.txt) on project Gutenberg, it achieves a 0.526 compression ratio when using a word size of two bytes. On an arbitrary RAW image, it peaks at .622 with a 32 bit word size and .645 with a 16 bit one.
|
||||
|
||||
To run with [Nix](nixos.org):
|
||||
``` sh
|
||||
nix run ".#" --experimental-features "nix-command flakes" -- myFile
|
||||
nix run ".#" --experimental-features "nix-command flakes" -- compress < myFile > myFile.compressed
|
||||
nix run ".#" --experimental-features "nix-command flakes" -- decompress < myFile.compressed > myFile-decompressed
|
||||
sha256sum myFile myFile-decompressed
|
||||
wc -c myFile.compressed myFile
|
||||
```
|
||||
|
||||
To run with [cabal](https://www.haskell.org/ghcup/):
|
||||
``` sh
|
||||
cabal run . -- myFile
|
||||
cabal run . -- compress < myFile > myFile.compressed
|
||||
cabal run . -- decompress < myFile > myFile-decompressed
|
||||
sha256sum myFile myFile-decompressed
|
||||
wc -c myFile.compressed myFile
|
||||
```
|
||||
|
|
|
|||
|
|
@ -30,6 +30,7 @@ executable compress
|
|||
StandaloneDeriving,
|
||||
StrictData,
|
||||
TemplateHaskell,
|
||||
LambdaCase,
|
||||
TupleSections,
|
||||
TypeApplications,
|
||||
TypeFamilies,
|
||||
|
|
@ -46,12 +47,13 @@ executable compress
|
|||
containers,
|
||||
text,
|
||||
uuid,
|
||||
optparse-generic,
|
||||
vector
|
||||
default-language:
|
||||
GHC2021
|
||||
other-modules:
|
||||
Data.PQueue
|
||||
ghc-options:
|
||||
-fprof-auto
|
||||
-fprof-late
|
||||
"-with-rtsopts=-p -hc"
|
||||
-- ghc-options:
|
||||
-- -fprof-auto
|
||||
-- -fprof-late
|
||||
-- "-with-rtsopts=-p -hc"
|
||||
|
|
|
|||
74
src/Main.hs
74
src/Main.hs
|
|
@ -1,6 +1,7 @@
|
|||
module Main where
|
||||
|
||||
import Data.Bifunctor qualified as Bi
|
||||
import Data.Bit (cloneToByteString)
|
||||
import Data.Bit qualified as B
|
||||
import Data.Bit qualified as BV
|
||||
import Data.Bits (Bits ((.|.)))
|
||||
|
|
@ -19,9 +20,15 @@ import Data.Text qualified as T
|
|||
import Data.Text.IO qualified as TIO
|
||||
import Data.Vector.Unboxed qualified as V
|
||||
import Data.Word
|
||||
import Debug.Trace qualified as D
|
||||
import GHC.Generics (Generic)
|
||||
import Options.Generic qualified as O
|
||||
import System.Environment qualified as SE
|
||||
|
||||
data CompressOrDecompress = Compress | Decompress deriving (Show, Generic)
|
||||
|
||||
instance O.ParseRecord CompressOrDecompress
|
||||
|
||||
data HuffmanTree a
|
||||
= Leaf a
|
||||
| Node
|
||||
|
|
@ -37,9 +44,9 @@ findTreeDirections (Node {..}) = M.union (rec' L left) (rec' R right)
|
|||
rec' :: TreeDir -> HuffmanTree a -> M.Map a [TreeDir]
|
||||
rec' dir = M.map (dir :) . findTreeDirections
|
||||
|
||||
decompress :: forall a. (Ord a, Integral a, B.FiniteBits a) => Maybe ([TreeDir], HuffmanTree a) -> Maybe [a]
|
||||
decompress :: forall a. (Ord a, Integral a, B.FiniteBits a) => Maybe ([TreeDir], HuffmanTree a) -> Maybe BS.ByteString
|
||||
decompress Nothing = Just []
|
||||
decompress (Just (treeDirs, tree)) = decompress' treeDirs
|
||||
decompress (Just (treeDirs, tree)) = BS.concat . map toByteString <$> decompress' treeDirs
|
||||
where
|
||||
decompress' :: [TreeDir] -> Maybe [a]
|
||||
decompress' [] = Just []
|
||||
|
|
@ -71,13 +78,31 @@ compress bs =
|
|||
|
||||
dividedByteString = toBitsList bs
|
||||
|
||||
testCompression :: forall a. (Ord a, Eq a, Integral a, B.FiniteBits a) => D.Proxy a -> BS.ByteString -> Bool
|
||||
testCompression _ bs = ((Just . toBitsList $ bs) :: Maybe [a]) == (decompress . compress $ bs)
|
||||
testCompression :: forall a. (Ord a, Eq a, Integral a, B.FiniteBits a, C.Serialize a) => D.Proxy a -> BS.ByteString -> Bool
|
||||
testCompression _ bs =
|
||||
((Right . Just $ bs) ==)
|
||||
. Bi.second (decompress :: Maybe ([TreeDir], HuffmanTree a) -> Maybe BS.ByteString)
|
||||
-- . D.traceShowWith (Bi.second (fmap fst))
|
||||
. (decodeCompressed :: BS.ByteString -> Either String (Maybe ([TreeDir], HuffmanTree a)))
|
||||
. encodeCompressed
|
||||
-- . D.traceShowWith (fmap fst)
|
||||
. (compress :: BS.ByteString -> Maybe ([TreeDir], HuffmanTree a))
|
||||
$ bs
|
||||
|
||||
encodeCompressed :: (C.Serialize a) => Maybe ([TreeDir], HuffmanTree a) -> BS.ByteString
|
||||
encodeCompressed :: (C.Serialize a) => Maybe ([TreeDir], HuffmanTree a) -> (BS.ByteString)
|
||||
encodeCompressed = C.encode . fmap (Bi.first encodeTreeDirs)
|
||||
where
|
||||
encodeTreeDirs = BV.cloneToByteString . V.fromList . map (BV.Bit . (== R))
|
||||
encodeTreeDirs = cloneToByteStringWithLen . V.fromList . map (BV.Bit . (== R))
|
||||
|
||||
cloneToByteStringWithLen vec = (BV.cloneToByteString vec, V.length vec)
|
||||
|
||||
cloneFromByteStringWithLen (bs, len) = V.take len . BV.cloneFromByteString $ bs
|
||||
|
||||
decodeTreeDirs :: (BS.ByteString, Int) -> [TreeDir]
|
||||
decodeTreeDirs = map (\x -> if BV.unBit x then R else L) . V.toList . cloneFromByteStringWithLen
|
||||
|
||||
decodeCompressed :: forall a. (Ord a, Integral a, B.FiniteBits a, C.Serialize a) => BS.ByteString -> Either String (Maybe ([TreeDir], HuffmanTree a))
|
||||
decodeCompressed = Bi.second (fmap (Bi.first decodeTreeDirs)) . C.decode
|
||||
|
||||
mergeHuffmanTrees :: PQ.PQueue (HuffmanTree a) -> Maybe (HuffmanTree a)
|
||||
mergeHuffmanTrees queue = case (Bi.second . Bi.second $ PQ.minView) <$> PQ.minView queue of
|
||||
|
|
@ -101,23 +126,28 @@ toBitsList bs = case finiteBitUncons bs of
|
|||
Nothing -> []
|
||||
(Just (x, xs)) -> x : (toBitsList xs)
|
||||
|
||||
toByteString :: forall a. (Integral a, B.FiniteBits a) => a -> BS.ByteString
|
||||
toByteString n = BS.pack . take numBytes . map (fromIntegral . (n `B.rotateL`)) $ [8, 16 ..]
|
||||
where
|
||||
numBytes = numBytesIn (D.Proxy :: D.Proxy a)
|
||||
|
||||
finiteBitUncons :: forall a. (Integral a, B.FiniteBits a) => BS.ByteString -> Maybe (a, BS.ByteString)
|
||||
finiteBitUncons [] = Nothing
|
||||
finiteBitUncons bs =
|
||||
Just
|
||||
. (,rest)
|
||||
. F.foldl' (.|.) 0
|
||||
. zipWith (flip B.shiftL) [0, 8 ..]
|
||||
. reverse
|
||||
. zipWith (flip B.rotateR) [8, 16 ..]
|
||||
. map (fromIntegral :: Word8 -> a)
|
||||
. BS.unpack
|
||||
$ takenBytes
|
||||
where
|
||||
numBytes = (`div` 8) . B.finiteBitSize $ (B.zeroBits :: a)
|
||||
|
||||
takenBytes :: BS.ByteString
|
||||
rest :: BS.ByteString
|
||||
(takenBytes, rest) = BS.splitAt numBytes bs
|
||||
(takenBytes, rest) = BS.splitAt (numBytesIn (D.Proxy :: D.Proxy a)) bs
|
||||
|
||||
numBytesIn :: forall a. (B.FiniteBits a) => D.Proxy a -> Int
|
||||
numBytesIn _ = (`div` 8) . B.finiteBitSize $ (B.zeroBits :: a)
|
||||
|
||||
compressionRatioFor
|
||||
:: forall a
|
||||
|
|
@ -133,14 +163,18 @@ compressionRatioFor proxy bs =
|
|||
. (compress :: BS.ByteString -> Maybe ([TreeDir], HuffmanTree a))
|
||||
$ bs
|
||||
|
||||
applyCompressionOptions :: CompressOrDecompress -> BS.ByteString -> BS.ByteString
|
||||
applyCompressionOptions Compress f = encodeCompressed . (compress :: BS.ByteString -> Maybe ([TreeDir], HuffmanTree Word8)) $ f
|
||||
applyCompressionOptions Decompress f = case Bi.second decompress
|
||||
. (decodeCompressed :: BS.ByteString -> Either String (Maybe ([TreeDir], HuffmanTree Word8)))
|
||||
$ f of
|
||||
-- TODO: write errors to stderr
|
||||
(Left _) -> []
|
||||
(Right Nothing) -> []
|
||||
(Right (Just bs)) -> bs
|
||||
|
||||
main :: IO ()
|
||||
main = do
|
||||
[filePath] <- SE.getArgs
|
||||
f <- BS.readFile filePath
|
||||
TIO.putStrLn "does decompress . compress == id ?"
|
||||
print . ("Word64",) . testCompression (D.Proxy :: D.Proxy Word16) $ f
|
||||
TIO.putStrLn "compression ratios:"
|
||||
print . ("Word64",) . compressionRatioFor (D.Proxy :: D.Proxy Word64) $ f
|
||||
print . ("Word32",) . compressionRatioFor (D.Proxy :: D.Proxy Word32) $ f
|
||||
print . ("Word16",) . compressionRatioFor (D.Proxy :: D.Proxy Word16) $ f
|
||||
print . ("Word8",) . compressionRatioFor (D.Proxy :: D.Proxy Word8) $ f
|
||||
compresionOrDecompression :: CompressOrDecompress <- O.getRecord "compression/decompression"
|
||||
f <- BS.getContents
|
||||
BS.putStr . applyCompressionOptions compresionOrDecompression $ f
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue