97 lines
4.5 KiB
Haskell
97 lines
4.5 KiB
Haskell
module Main where
|
|
|
|
import Compress.Huffman
|
|
import Compress.Huffman qualified as HT
|
|
import Compress.PrefixTree qualified as PT
|
|
import Compress.Arithmetic qualified as A
|
|
import Data.Bifunctor qualified as Bi
|
|
import Data.Bit qualified as B
|
|
import Data.Bits (Bits ((.|.)))
|
|
import Data.Bits qualified as B
|
|
import Data.ByteString qualified as BS
|
|
import Data.HuffmanTree
|
|
import Data.Proxy qualified as P
|
|
import Codec.Winery qualified as C
|
|
import Data.Word
|
|
import GHC.Generics (Generic)
|
|
import Options.Generic qualified as O
|
|
import qualified Data.FiniteBit as FB
|
|
import qualified Relude.Unsafe as U
|
|
import qualified Data.ByteString.Lazy as BSL
|
|
-- import Data.Word4 (Word4(Word4))
|
|
import Compress.BurrowsWheeler
|
|
import qualified Control.Monad.Par as P
|
|
import qualified Compress.LengthDistancePairs as LDP
|
|
|
|
data CompressOrDecompress = Compress | Decompress deriving (Show, Generic, O.ParseField, O.ParseFields, O.ParseRecord, Read)
|
|
|
|
data CompressionStrategy = Huffman | MarkovHuffman deriving (Show, Generic, O.ParseField, O.ParseFields, O.ParseRecord, Read)
|
|
|
|
data CLIOpts = CLIOpts
|
|
{ task :: CompressOrDecompress,
|
|
strategy :: CompressionStrategy
|
|
}
|
|
deriving (Show, Generic, O.ParseRecord)
|
|
|
|
-- applyCompressionOptions ::
|
|
-- forall a.
|
|
-- (Integral a, B.Bits a, B.FiniteBits a, Ord a, C.Serialise a) =>
|
|
-- P.Proxy a ->
|
|
-- CLIOpts ->
|
|
-- BS.By(++ []) . teString ->
|
|
-- BS.ByteString
|
|
-- applyCompressionOptions _ (CLIOpts Compress Huffman) f =
|
|
-- C.serialise . (compress :: BS.ByteString -> Maybe (TreeDirs, HuffmanTree a)) $ f
|
|
-- applyCompressionOptions _ (CLIOpts Compress MarkovHuffman) f =
|
|
-- C.encode . (PT.compress :: BS.ByteString -> Maybe (TreeDirs, HuffmanPrefixTree a a, a)) $ f
|
|
-- applyCompressionOptions _ (CLIOpts Decompress Huffman) f =
|
|
-- handleError $ Bi.second decompress . (C.decode :: BS.ByteString -> Either String (TreeDirs, HuffmanTree a)) $ f
|
|
-- applyCompressionOptions _ (CLIOpts Decompress MarkovHuffman) f =
|
|
-- handleError $ Bi.second PT.decompress . (C.decode :: BS.ByteString -> Either String (TreeDirs, HuffmanPrefixTree a a, a)) $ f
|
|
|
|
handleError (Right (Just bs)) = bs
|
|
handleError _ = []
|
|
|
|
|
|
|
|
main :: IO ()
|
|
main = do
|
|
-- cliOpts :: CLIOpts <- O.getRecord "compress/decompress & strategy"
|
|
f <- BS.getContents
|
|
let f' = BS.unpack f
|
|
print . BS.pack . A.decompress . A.compress $ f'
|
|
-- print =<< (P.runParIO $ do
|
|
-- let fCounts = (toCounts :: [Word8] -> [(Word8, Word64)]) . {- toBurrowsWheeler . -} (FB.toWordsList :: BS.ByteString -> [ Word8 ]) $ f
|
|
-- let fAsWords = map fst fCounts
|
|
-- -- let compressedUnencoded {- :: (A.Compressed Word8 (A.Word4MapSerialized Word8)) -} = (A.compress fAsWords)
|
|
-- -- print . A.talkAboutCompressed $ compressedUnencoded
|
|
-- -- let compressed = C.serialise (compressedUnencoded, A.compress . map snd $ fCounts)
|
|
-- -- huffmanCompressionRatio' <- P.spawnP $ (compressionRatioFor (Proxy :: Proxy Word8) (P.Proxy :: Proxy Word8) f)
|
|
-- -- arithmaticCodingCompressionRatio' <- P.spawnP $ ((fromIntegral . BS.length $ compressed) / (fromIntegral . BS.length $ f))
|
|
-- -- huffmanCompressionRatio <- P.get huffmanCompressionRatio'
|
|
-- -- arithmaticCodingCompressionRatio <- P.get arithmaticCodingCompressionRatio'
|
|
-- -- let lengthDistancePairsCompressedSize = fromIntegral . BS.length . C.serialise . (LDP.encode :: ByteString -> [LDP.LengthDistancePair Word16]) $ f
|
|
-- -- let lengthDistancePairsCompressionRatio :: Double = lengthDistancePairsCompressedSize / (fromIntegral $ BS.length f)
|
|
-- pure ()) -- (lengthDistancePairsCompressionRatio))
|
|
-- let decompressed = (A.decompress compressedUnencoded)
|
|
-- print ("huffman coding", huffmanCompressionRatio)
|
|
-- print ("compression ratio (arithmetic coding)", arithmaticCodingCompressionRatio)
|
|
-- print ("works?", decompressed == fAsWords)
|
|
|
|
|
|
-- print . take 10 . drop 70 . zip fAsWords $ decompressed
|
|
|
|
-- print . ("original length", ) . length $ fAsWords
|
|
|
|
-- let f = "hello tehre"
|
|
-- f <- BS.readFile "pg64317.txt"
|
|
-- let (compressed :: Maybe (TreeDirs, PT.HuffmanPrefixTree Word8 Word8, Word8)) = PT.compress f
|
|
-- print $ BS.length . C.encode $ compressed
|
|
-- print $ BS.length . C.encode . (compress :: BS.ByteString -> Maybe (TreeDirs, HuffmanTree Word8)) $ f
|
|
-- print $ BS.length . C.encode . (compress :: BS.ByteString -> Maybe (TreeDirs, HuffmanTree Word16)) $ f
|
|
-- BS.writeFile "outin.txt" decompressed
|
|
-- print (decompressed, f)
|
|
-- print $ BS.length decompressed
|
|
-- print $ BS.length f
|
|
-- print (decompressed == f)
|
|
-- BS.putStr . applyCompressionOptions (P.Proxy :: P.Proxy Word16) cliOpts $ f
|