add arithmetic coding
This commit is contained in:
parent
6d00525334
commit
791fff6107
11 changed files with 667 additions and 194 deletions
1
.gitignore
vendored
1
.gitignore
vendored
|
|
@ -1,4 +1,5 @@
|
|||
/dist-newstyle/
|
||||
/haddocks/
|
||||
/result
|
||||
/.direnv/
|
||||
**/.DS_Store
|
||||
|
|
|
|||
|
|
@ -43,6 +43,7 @@ executable compress
|
|||
src
|
||||
build-depends:
|
||||
base,
|
||||
basement,
|
||||
bitvec,
|
||||
bytestring,
|
||||
cereal,
|
||||
|
|
@ -50,12 +51,20 @@ executable compress
|
|||
text,
|
||||
uuid,
|
||||
optparse-generic,
|
||||
vector
|
||||
vector,
|
||||
nonempty-containers,
|
||||
primes
|
||||
default-language:
|
||||
GHC2021
|
||||
other-modules:
|
||||
Data.PQueue
|
||||
-- ghc-options:
|
||||
-- -fprof-auto
|
||||
-- -fprof-late
|
||||
-- "-with-rtsopts=-p -hc"
|
||||
Data.FiniteBit
|
||||
Compress.Huffman
|
||||
Compress.PrefixTree
|
||||
Data.HuffmanTree
|
||||
Compress.Arithmetic
|
||||
ghc-options:
|
||||
-threaded
|
||||
-fprof-auto
|
||||
-fprof-late
|
||||
"-with-rtsopts=-p -hc"
|
||||
|
|
|
|||
42
flake.lock
generated
42
flake.lock
generated
|
|
@ -5,11 +5,11 @@
|
|||
"nixpkgs-lib": "nixpkgs-lib"
|
||||
},
|
||||
"locked": {
|
||||
"lastModified": 1712014858,
|
||||
"narHash": "sha256-sB4SWl2lX95bExY2gMFG5HIzvva5AVMJd4Igm+GpZNw=",
|
||||
"lastModified": 1733312601,
|
||||
"narHash": "sha256-4pDvzqnegAfRkPwO3wmwBhVi/Sye1mzps0zHWYnP88c=",
|
||||
"owner": "hercules-ci",
|
||||
"repo": "flake-parts",
|
||||
"rev": "9126214d0a59633752a136528f5f3b9aa8565b7d",
|
||||
"rev": "205b12d8b7cd4802fbcb8e8ef6a0f1408781a4f9",
|
||||
"type": "github"
|
||||
},
|
||||
"original": {
|
||||
|
|
@ -20,11 +20,11 @@
|
|||
},
|
||||
"haskell-flake": {
|
||||
"locked": {
|
||||
"lastModified": 1713084600,
|
||||
"narHash": "sha256-qL7LV2MtwJ+1Xasg1TjSUmoE7yrRuXPqxpPlKjLE0SE=",
|
||||
"lastModified": 1734464164,
|
||||
"narHash": "sha256-5JCCyrgy7IMnipyYMQzIAXncGt2XVlW1aK71A+FTXDs=",
|
||||
"owner": "srid",
|
||||
"repo": "haskell-flake",
|
||||
"rev": "847292fc793a5c15c873e52e7751ee4267ef32a0",
|
||||
"rev": "e280b39efdd72b6a5bdaa982b67f150c819be642",
|
||||
"type": "github"
|
||||
},
|
||||
"original": {
|
||||
|
|
@ -35,11 +35,11 @@
|
|||
},
|
||||
"nixpkgs": {
|
||||
"locked": {
|
||||
"lastModified": 1712963716,
|
||||
"narHash": "sha256-WKm9CvgCldeIVvRz87iOMi8CFVB1apJlkUT4GGvA0iM=",
|
||||
"lastModified": 1734424634,
|
||||
"narHash": "sha256-cHar1vqHOOyC7f1+tVycPoWTfKIaqkoe1Q6TnKzuti4=",
|
||||
"owner": "nixos",
|
||||
"repo": "nixpkgs",
|
||||
"rev": "cfd6b5fc90b15709b780a5a1619695a88505a176",
|
||||
"rev": "d3c42f187194c26d9f0309a8ecc469d6c878ce33",
|
||||
"type": "github"
|
||||
},
|
||||
"original": {
|
||||
|
|
@ -51,20 +51,14 @@
|
|||
},
|
||||
"nixpkgs-lib": {
|
||||
"locked": {
|
||||
"dir": "lib",
|
||||
"lastModified": 1711703276,
|
||||
"narHash": "sha256-iMUFArF0WCatKK6RzfUJknjem0H9m4KgorO/p3Dopkk=",
|
||||
"owner": "NixOS",
|
||||
"repo": "nixpkgs",
|
||||
"rev": "d8fe5e6c92d0d190646fb9f1056741a229980089",
|
||||
"type": "github"
|
||||
"lastModified": 1733096140,
|
||||
"narHash": "sha256-1qRH7uAUsyQI7R1Uwl4T+XvdNv778H0Nb5njNrqvylY=",
|
||||
"type": "tarball",
|
||||
"url": "https://github.com/NixOS/nixpkgs/archive/5487e69da40cbd611ab2cadee0b4637225f7cfae.tar.gz"
|
||||
},
|
||||
"original": {
|
||||
"dir": "lib",
|
||||
"owner": "NixOS",
|
||||
"ref": "nixos-unstable",
|
||||
"repo": "nixpkgs",
|
||||
"type": "github"
|
||||
"type": "tarball",
|
||||
"url": "https://github.com/NixOS/nixpkgs/archive/5487e69da40cbd611ab2cadee0b4637225f7cfae.tar.gz"
|
||||
}
|
||||
},
|
||||
"root": {
|
||||
|
|
@ -98,11 +92,11 @@
|
|||
]
|
||||
},
|
||||
"locked": {
|
||||
"lastModified": 1711963903,
|
||||
"narHash": "sha256-N3QDhoaX+paWXHbEXZapqd1r95mdshxToGowtjtYkGI=",
|
||||
"lastModified": 1733761991,
|
||||
"narHash": "sha256-s4DalCDepD22jtKL5Nw6f4LP5UwoMcPzPZgHWjAfqbQ=",
|
||||
"owner": "numtide",
|
||||
"repo": "treefmt-nix",
|
||||
"rev": "49dc4a92b02b8e68798abd99184f228243b6e3ac",
|
||||
"rev": "0ce9d149d99bc383d1f2d85f31f6ebd146e46085",
|
||||
"type": "github"
|
||||
},
|
||||
"original": {
|
||||
|
|
|
|||
11
flake.nix
11
flake.nix
|
|
@ -22,7 +22,7 @@
|
|||
# See https://github.com/srid/haskell-flake/blob/master/example/flake.nix
|
||||
haskellProjects.default = {
|
||||
# The base package set (this value is the default)
|
||||
# basePackages = pkgs.haskellPackages;
|
||||
basePackages = pkgs.haskell.packages.ghc96;
|
||||
|
||||
# Packages to add on top of `basePackages`
|
||||
packages = {
|
||||
|
|
@ -34,6 +34,13 @@
|
|||
|
||||
# Add your package overrides here
|
||||
settings = {
|
||||
uuid ={
|
||||
jailbreak = true;
|
||||
};
|
||||
|
||||
# hlint = {
|
||||
# jailbreak = true;
|
||||
# };
|
||||
# barbies-th = {
|
||||
# broken = false;
|
||||
# jailbreak = true;
|
||||
|
|
@ -57,7 +64,7 @@
|
|||
programs.ormolu.enable = true;
|
||||
programs.nixpkgs-fmt.enable = true;
|
||||
programs.cabal-fmt.enable = true;
|
||||
programs.hlint.enable = true;
|
||||
programs.hlint.enable = false;
|
||||
|
||||
# We use fourmolu
|
||||
programs.ormolu.package = pkgs.haskellPackages.fourmolu;
|
||||
|
|
|
|||
149
src/Compress/Arithmetic.hs
Normal file
149
src/Compress/Arithmetic.hs
Normal file
|
|
@ -0,0 +1,149 @@
|
|||
-- | https://en.wikipedia.org/wiki/Arithmetic_coding
|
||||
module Compress.Arithmetic where
|
||||
|
||||
import Basement.Bits qualified as B
|
||||
import Basement.Compat.Base (Word16, Word32, Word64, Word8)
|
||||
import Basement.Compat.Bifunctor qualified as Bi
|
||||
import Control.Arrow qualified as Ar
|
||||
import Data.ByteString qualified as By
|
||||
import Data.FiniteBit qualified as Fi
|
||||
import Data.Foldable as F
|
||||
import Data.List (genericLength)
|
||||
import Data.List qualified as L
|
||||
import Data.Map.Strict qualified as M
|
||||
import Data.Maybe qualified as My
|
||||
import Data.Numbers.Primes
|
||||
import Data.Ord
|
||||
import Data.Ratio
|
||||
import Data.Serialize qualified as C
|
||||
import GHC.Generics (Generic)
|
||||
import GHC.Natural (Natural)
|
||||
|
||||
-- withPrimeDenums = concatMap (\x -> map (% x) [1 .. (pred x)]) primes
|
||||
|
||||
-- shortestNumberBetween :: Ratio Integer -> Ratio Integer -> Maybe (Ratio Integer)
|
||||
-- shortestNumberBetween xMin xMax = find (\candidate -> candidate >= xMin && candidate < xMax) withPrimeDenums
|
||||
|
||||
data WordMarkovStats = WordMarkovStats
|
||||
{ location :: Ratio Integer,
|
||||
size :: Ratio Integer
|
||||
}
|
||||
deriving (Show, Eq, Ord, Generic, C.Serialize)
|
||||
|
||||
toRing :: forall a k. (Ord k, Integral a, Bounded a) => M.Map k a -> M.Map k WordMarkovStats
|
||||
toRing m = M.fromList . zip (map fst asList) $ zipWith WordMarkovStats (scanl (+) 0 . map snd $ asList) (map snd asList)
|
||||
where
|
||||
asList = M.toList $ m'
|
||||
sum' =
|
||||
sum
|
||||
. map fromIntegral
|
||||
. M.elems
|
||||
$ m
|
||||
m' = M.map ((% sum') . fromIntegral) m
|
||||
|
||||
maxBound' :: Integer
|
||||
maxBound' = fromIntegral (maxBound :: a)
|
||||
|
||||
twoByteMarkov :: forall k b. (Num b, Integral b, Bounded b, Ord k) => [k] -> M.Map k (M.Map k b)
|
||||
twoByteMarkov xs =
|
||||
M.map sizeAsFraction
|
||||
. M.fromListWith (M.unionWith (+))
|
||||
. zip xs
|
||||
. map (`M.singleton` (1 :: Integer))
|
||||
. tail
|
||||
$ xs
|
||||
where
|
||||
sizeAsFraction m = M.map (max 1 . floor . fromRational . (* maxBound') . (% sum')) m
|
||||
where
|
||||
sum' = sum . M.elems $ m
|
||||
|
||||
toInteger :: (Integral a) => a -> Integer
|
||||
toInteger = fromIntegral
|
||||
|
||||
maxBound' = fromIntegral (maxBound :: b)
|
||||
|
||||
data Compressed a = Compressed
|
||||
{ markovs :: M.Map a (M.Map a (Word8)),
|
||||
location :: Ratio Integer,
|
||||
start :: a,
|
||||
length :: Int
|
||||
}
|
||||
deriving (Eq, Ord, Show, Generic, C.Serialize)
|
||||
|
||||
decompress ::
|
||||
forall a.
|
||||
(Integral a, B.FiniteBitsOps a, B.BitOps a, Show a) =>
|
||||
Compressed a ->
|
||||
[a]
|
||||
decompress (Compressed {..}) = take length $ map snd . L.iterate decompress' $ (location, start)
|
||||
where
|
||||
decompress' :: (Ratio Integer, a) -> (Ratio Integer, a)
|
||||
decompress' (loc, prev) = ((loc - ansLoc) / newSize, newVal)
|
||||
where
|
||||
(ansLoc, (newVal, newSize)) = My.fromJust . M.lookupLE loc $ (rings M.! prev)
|
||||
|
||||
rings = M.map (M.fromList . map toDecompressionRing . M.toList . toRing) markovs
|
||||
|
||||
toDecompressionRing (key, (WordMarkovStats {..})) = (location, (key, size))
|
||||
|
||||
sanityCheck :: forall k. (Ord k) => M.Map k (M.Map k Word8) -> [Word8]
|
||||
sanityCheck = map (sum . M.elems) . M.elems
|
||||
|
||||
compress ::
|
||||
forall a.
|
||||
(Integral a, B.FiniteBitsOps a, B.BitOps a, Show a) =>
|
||||
[a] ->
|
||||
Compressed a
|
||||
compress toCompress = Compressed twoByteMarkovs (shortestLocation endStats) (head toCompress) (genericLength toCompress)
|
||||
where
|
||||
twoByteMarkovs = twoByteMarkov toCompress
|
||||
|
||||
rings = M.map toRing twoByteMarkovs
|
||||
|
||||
pairs = zip toCompress . tail $ toCompress
|
||||
|
||||
shortestLocation (WordMarkovStats {..}) = simplestBetween location (location + size)
|
||||
|
||||
endStats = pyramidFold addWordMarkovStats . map statsFor $ pairs
|
||||
|
||||
addWordMarkovStats
|
||||
(WordMarkovStats {location = prevLoc, size = prevSize})
|
||||
(WordMarkovStats {location = nextLoc, size = nextSize}) =
|
||||
WordMarkovStats
|
||||
{ location = prevLoc + (prevSize * nextLoc),
|
||||
size = prevSize * nextSize
|
||||
}
|
||||
|
||||
statsFor (x0, x1) = (rings M.! x0) M.! x1
|
||||
|
||||
pyramidFold f = pyramid
|
||||
where
|
||||
pyramid [x] = x
|
||||
pyramid xs = pyramid $ pyramidFold' xs
|
||||
|
||||
pyramidFold' [x] = [x]
|
||||
pyramidFold' (x0 : x1 : []) = [f x0 x1]
|
||||
pyramidFold' (x0 : x1 : xs) = (f x0 x1) : (pyramidFold' xs)
|
||||
|
||||
-- borrowed and slightly changed from Data.Ratio source
|
||||
simplestBetween :: Rational -> Rational -> Rational
|
||||
simplestBetween x y
|
||||
| x == y = x
|
||||
| x > 0 = simplestBetween' n d n' d'
|
||||
| otherwise = 0 % 1
|
||||
where
|
||||
n = numerator x
|
||||
d = denominator x
|
||||
n' = numerator y
|
||||
d' = denominator y
|
||||
|
||||
simplestBetween' n d n' d' -- assumes 0 < n%d < n'%d'
|
||||
| r == 0 = q % 1
|
||||
| q /= q' = (q + 1) % 1
|
||||
| otherwise = (q * n'' + d'') % n''
|
||||
where
|
||||
(q, r) = quotRem n d
|
||||
(q', r') = quotRem n' d'
|
||||
nd'' = simplestBetween' d' r' d r
|
||||
n'' = numerator nd''
|
||||
d'' = denominator nd''
|
||||
125
src/Compress/Huffman.hs
Normal file
125
src/Compress/Huffman.hs
Normal file
|
|
@ -0,0 +1,125 @@
|
|||
module Compress.Huffman where
|
||||
|
||||
|
||||
import Data.Bifunctor qualified as Bi
|
||||
import Data.FiniteBit
|
||||
import Data.Bit (cloneToByteString)
|
||||
import Data.Bit qualified as B
|
||||
import Data.Bit qualified as BV
|
||||
import Data.ByteString (fromFilePath)
|
||||
import Data.ByteString qualified as BS
|
||||
import Data.Data qualified as D
|
||||
import Data.Foldable qualified as F
|
||||
import Data.IntMap.Strict qualified as IM
|
||||
import qualified Data.Proxy as D
|
||||
import qualified Basement.From as F
|
||||
import Data.Map.Strict qualified as M
|
||||
import Basement.Bits as B
|
||||
import Data.Maybe (fromMaybe)
|
||||
import Data.Maybe qualified as My
|
||||
import Data.PQueue qualified as PQ
|
||||
import Data.Serialize qualified as C
|
||||
import Data.Text qualified as T
|
||||
import Data.Text.IO qualified as TIO
|
||||
import Data.Vector.Unboxed qualified as V
|
||||
import Data.Word
|
||||
import Debug.Trace qualified as D
|
||||
import GHC.Generics (Generic)
|
||||
import Options.Generic qualified as O
|
||||
import System.Environment qualified as SE
|
||||
import Data.HuffmanTree
|
||||
|
||||
decompress
|
||||
:: forall a
|
||||
. (Ord a, Integral a, B.FiniteBitsOps a)
|
||||
=> (TreeDirs, HuffmanTree a)
|
||||
-> Maybe BS.ByteString
|
||||
decompress (TreeDirs treeDirs, tree) = BS.concat . map toByteString <$> decompress' treeDirs
|
||||
where
|
||||
decompress' :: [TreeDir] -> Maybe [a]
|
||||
decompress' [] = Just []
|
||||
decompress' xs = case nextLeaf xs tree of
|
||||
Nothing -> Nothing
|
||||
Just (x, remainingDirs) -> (x :) <$> decompress' remainingDirs
|
||||
|
||||
nextLeaf :: [TreeDir] -> HuffmanTree a -> Maybe (a, [TreeDir])
|
||||
nextLeaf xs (Leaf a) = Just (a, xs)
|
||||
nextLeaf [] _ = Nothing
|
||||
nextLeaf (L : xs) (Node {..}) = nextLeaf xs left
|
||||
nextLeaf (R : xs) (Node {..}) = nextLeaf xs right
|
||||
|
||||
compress
|
||||
:: forall a
|
||||
. (Ord a, Integral a, B.FiniteBitsOps a, B.BitOps a)
|
||||
=> BS.ByteString
|
||||
-> Maybe (TreeDirs, HuffmanTree a)
|
||||
compress bs =
|
||||
liftA2 (,) (TreeDirs <$> treeDirections) mergedHuffmanTrees
|
||||
where
|
||||
treeDirections = concat <$> mapM (treeDirMap M.!?) dividedByteString
|
||||
|
||||
mergedHuffmanTrees =
|
||||
mergeHuffmanTrees
|
||||
. PQ.fromList
|
||||
. map (uncurry (flip (,)) . Bi.first Leaf)
|
||||
. counts
|
||||
$ dividedByteString
|
||||
|
||||
treeDirMap :: M.Map a [TreeDir]
|
||||
treeDirMap = My.maybe M.empty findTreeDirections mergedHuffmanTrees
|
||||
|
||||
dividedByteString = toWordsList bs
|
||||
|
||||
-- testCompression
|
||||
-- :: forall a
|
||||
-- . (Ord a, Eq a, Integral a, B.FiniteBitsOps a, B.BitOps a, C.Serialize a)
|
||||
-- => D.Proxy a
|
||||
-- -> BS.ByteString
|
||||
-- -> Bool
|
||||
-- testCompression _ bs =
|
||||
-- ((Right . Just $ bs) ==)
|
||||
-- . Bi.second (decompress :: Maybe (TreeDirs, HuffmanTree a) -> Maybe BS.ByteString)
|
||||
-- -- . D.traceShowWith (Bi.second (fmap fst))
|
||||
-- . (decodeCompressed :: BS.ByteString -> Either String (Maybe (TreeDirs, HuffmanTree a)))
|
||||
-- . encodeCompressed
|
||||
-- -- . D.traceShowWith (fmap fst)
|
||||
-- . (compress :: BS.ByteString -> Maybe (TreeDirs, HuffmanTree a))
|
||||
-- $ bs
|
||||
|
||||
-- cloneToByteStringWithLen :: V.Vector BV.Bit -> (BS.ByteString, Int)
|
||||
-- cloneToByteStringWithLen vec = (BV.cloneToByteString vec, V.length vec)
|
||||
|
||||
-- cloneFromByteStringWithLen :: (BS.ByteString, Int) -> V.Vector BV.Bit
|
||||
-- cloneFromByteStringWithLen (bs, len) = V.take len . BV.cloneFromByteString $ bs
|
||||
|
||||
-- decodeTreeDirs :: (BS.ByteString, Int) -> [TreeDir]
|
||||
-- decodeTreeDirs = map (\x -> if BV.unBit x then R else L) . V.toList . cloneFromByteStringWithLen
|
||||
|
||||
-- decodeCompressed :: forall a. (Ord a, Integral a, B.FiniteBitsOps a, C.Serialize a) => BS.ByteString -> Either String (Maybe (TreeDirs, HuffmanTree a))
|
||||
-- decodeCompressed = Bi.second (fmap (Bi.first decodeTreeDirs)) . C.decode
|
||||
|
||||
counts :: (Ord a) => [a] -> [(a, Int)]
|
||||
counts = M.toList . F.foldl' combiningInsert M.empty
|
||||
where
|
||||
combiningInsert m key = M.insertWith (+) key 1 m
|
||||
|
||||
divideByteString :: Int -> BS.ByteString -> [BS.ByteString]
|
||||
divideByteString n [] = []
|
||||
divideByteString n bs = x : divideByteString n xs
|
||||
where
|
||||
(x, xs) = BS.splitAt n bs
|
||||
|
||||
|
||||
compressionRatioFor
|
||||
:: forall a
|
||||
. (Integral a, B.FiniteBitsOps a, B.BitOps a, Ord a, C.Serialize a)
|
||||
=> D.Proxy a
|
||||
-> BS.ByteString
|
||||
-> Double
|
||||
compressionRatioFor proxy bs =
|
||||
(/ (fromIntegral . BS.length $ bs))
|
||||
. fromIntegral
|
||||
. BS.length
|
||||
. C.encode
|
||||
. (compress :: BS.ByteString -> Maybe (TreeDirs, HuffmanTree a))
|
||||
$ bs
|
||||
188
src/Compress/PrefixTree.hs
Normal file
188
src/Compress/PrefixTree.hs
Normal file
|
|
@ -0,0 +1,188 @@
|
|||
module Compress.PrefixTree where
|
||||
|
||||
import Basement.Bits qualified as B
|
||||
import qualified Basement.From as F
|
||||
import Compress.Huffman qualified as H
|
||||
import Control.Applicative qualified as A
|
||||
import Data.Bifunctor qualified as Bi
|
||||
import Data.ByteString qualified as BS
|
||||
import Data.Foldable qualified as F
|
||||
import Data.HuffmanTree as HT
|
||||
import Data.List qualified as L
|
||||
import Data.List.NonEmpty qualified as NE
|
||||
import Data.Map.Strict qualified as M
|
||||
import Data.Maybe qualified as My
|
||||
import Data.Ord qualified as O
|
||||
import Data.PQueue qualified as PQ
|
||||
import Debug.Trace qualified as D
|
||||
import Debug.Trace qualified as T
|
||||
import Basement.Bits (FiniteBitsOps(numberOfBits))
|
||||
import GHC.Generics
|
||||
import qualified Data.Serialize as C
|
||||
import Data.FiniteBit
|
||||
|
||||
data Tree a = (Ord a) =>
|
||||
Tree
|
||||
{ children :: M.Map a (Tree a)
|
||||
}
|
||||
|
||||
newtype HuffmanPrefixTree a b = HuffmanPrefixTree
|
||||
{ inner :: M.Map a (HuffmanTree b)
|
||||
} deriving (Eq, Ord, Show, Generic, C.Serialize)
|
||||
|
||||
finiteBitTupleUncons ::
|
||||
forall a b.
|
||||
(Integral a, B.FiniteBitsOps a, B.BitOps a, Integral b, B.FiniteBitsOps b, B.BitOps b) =>
|
||||
BS.ByteString ->
|
||||
Maybe ((a, b), BS.ByteString)
|
||||
finiteBitTupleUncons bs = case finiteBitUncons bs of
|
||||
Just (a, bs') -> case finiteBitUncons bs' of
|
||||
Just (b, _) -> Just ((a, b), bs')
|
||||
_ -> Nothing
|
||||
_ -> Nothing
|
||||
|
||||
fromByteString ::
|
||||
forall a b.
|
||||
(Integral a, B.FiniteBitsOps a, B.BitOps a, Integral b, B.FiniteBitsOps b, B.BitOps b) =>
|
||||
BS.ByteString ->
|
||||
[(a, b)]
|
||||
fromByteString bs = case finiteBitTupleUncons bs of
|
||||
Just ((a, b), bs') -> (a, b) : fromByteString bs'
|
||||
Nothing -> []
|
||||
|
||||
toHuffmanTree ::
|
||||
forall a b.
|
||||
(Integral a, B.FiniteBitsOps a, B.BitOps a, Integral b, B.FiniteBitsOps b, B.BitOps b) =>
|
||||
M.Map (a, b) Word ->
|
||||
HuffmanPrefixTree a b
|
||||
toHuffmanTree =
|
||||
HuffmanPrefixTree
|
||||
. M.mapMaybe HT.fromList
|
||||
. M.fromListWith (++)
|
||||
. map (\((a, b), count) -> (a, [(fromIntegral count, b)]))
|
||||
. M.assocs
|
||||
|
||||
decompress ::
|
||||
forall a .
|
||||
(Integral a, B.FiniteBitsOps a, B.BitOps a) =>
|
||||
(TreeDirs, HuffmanPrefixTree a a, a)
|
||||
-> Maybe BS.ByteString
|
||||
decompress (TreeDirs treeDirs'', HuffmanPrefixTree prefixTree, initial') = BS.concat . map toByteString . (initial' :) <$> decompress' treeDirs'' initial'
|
||||
where
|
||||
decompress' :: [TreeDir] -> a -> Maybe [a]
|
||||
decompress' treeDirs initial = case HT.lookup (prefixTree M.! initial) treeDirs of
|
||||
Nothing -> Nothing
|
||||
Just (ans, []) -> Just [ans]
|
||||
Just (ans, treeDirs') -> (ans :) <$> decompress' treeDirs' ans
|
||||
|
||||
compress ::
|
||||
forall a b.
|
||||
(Integral a, B.FiniteBitsOps a, B.BitOps a, Integral b, B.FiniteBitsOps b, B.BitOps b) =>
|
||||
BS.ByteString -> Maybe (TreeDirs, HuffmanPrefixTree a b, a)
|
||||
compress bs = (TreeDirs $ concatMap treeDirsFor asFiniteBitPairs, tree, ) <$> initial
|
||||
where
|
||||
tree :: HuffmanPrefixTree a b
|
||||
tree = toHuffmanTree . nGramCounts $ bs
|
||||
|
||||
treeDirMap :: M.Map a (M.Map b [TreeDir])
|
||||
treeDirMap = M.map HT.findTreeDirections . Compress.PrefixTree.inner $ tree
|
||||
|
||||
initial :: Maybe a
|
||||
initial = fst <$> finiteBitUncons bs
|
||||
|
||||
asFiniteBitPairs :: [(a,b)]
|
||||
asFiniteBitPairs = fromByteString bs
|
||||
|
||||
treeDirsFor :: (a, b) -> [TreeDir]
|
||||
treeDirsFor (a, b) = (treeDirMap M.! a) M.! b
|
||||
|
||||
|
||||
|
||||
-- | all (M.null . children) . M.elems . children $ tree =
|
||||
-- fmap End
|
||||
-- . HT.fromList
|
||||
-- . map (\x -> (prefixCounts x, x))
|
||||
-- . M.keys
|
||||
-- . children
|
||||
-- $ tree
|
||||
-- | otherwise =
|
||||
-- Just
|
||||
-- . Layer
|
||||
-- . M.mapMaybeWithKey (\key val -> toHuffmanTree' (key : soFar) val)
|
||||
-- . children
|
||||
-- $ tree
|
||||
-- where
|
||||
-- prefixCounts :: a -> Int
|
||||
-- prefixCounts x =
|
||||
-- fromIntegral
|
||||
-- . sum
|
||||
-- . M.elems
|
||||
-- . M.filterWithKey (\key val -> L.isPrefixOf (reverse . (x :) $ soFar) key)
|
||||
-- $ nGrams
|
||||
|
||||
-- toHuffmanTree :: Tree a -> p1 -> HuffmanTree a
|
||||
-- toHuffmanTree :: forall a . Tree a -> M.Map [a] Word -> HuffmanTree [a]
|
||||
-- toHuffmanTree (Tree {..}) nGrams soFar | M.size children == 1 = Leaf . map (reverse . (: soFar)) . M.keys $ children
|
||||
-- toHuffmanTree (Tree {..}) nGrams soFar = Leaf . map (reverse . (: soFar)) . M.keys $ children
|
||||
-- where
|
||||
-- sorted = L.sortBy (prefixCounts . fst) . M.toList $ children
|
||||
|
||||
nGramCounts ::
|
||||
forall a b.
|
||||
(Integral a, B.FiniteBitsOps a, B.BitOps a, Integral b, B.FiniteBitsOps b, B.BitOps b) =>
|
||||
BS.ByteString ->
|
||||
M.Map (a, b) Word
|
||||
nGramCounts =
|
||||
M.fromListWith (+)
|
||||
. map (,1)
|
||||
. My.mapMaybe (My.listToMaybe . fromByteString)
|
||||
. takeWhile ((== len) . BS.length)
|
||||
. map (BS.take len)
|
||||
. BS.tails
|
||||
where
|
||||
len = (`div` 8) . F.from $ numberOfBits (0 :: a) + numberOfBits (0 :: b)
|
||||
|
||||
empty :: (Ord a) => Tree a
|
||||
empty = Tree M.empty
|
||||
|
||||
singleton :: (Ord a) => a -> Tree a
|
||||
singleton x = Tree $ M.singleton x empty
|
||||
|
||||
fromSingleList :: (Ord a) => [a] -> Tree a
|
||||
fromSingleList [] = empty
|
||||
fromSingleList (x : xs) = Tree . M.singleton x . fromSingleList $ xs
|
||||
|
||||
fromList :: (Ord a) => [[a]] -> Tree a
|
||||
fromList = F.foldl' merge empty . map fromSingleList
|
||||
|
||||
-- insert :: Ord a => Tree a -> [a] -> Tree a
|
||||
-- insert (Tree {..}) (x:xs) =
|
||||
|
||||
merge :: Tree a -> Tree a -> Tree a
|
||||
merge (Tree children0) (Tree children1) = Tree $ M.unionWith merge children0 children1
|
||||
|
||||
-- deriving instance Eq (Tree a)
|
||||
|
||||
-- deriving instance Ord (Tree a)
|
||||
|
||||
-- deriving instance (Show a) => Show (Tree a)
|
||||
|
||||
-- empty :: (Ord a) => Tree a
|
||||
-- empty = Tree M.empty
|
||||
|
||||
-- fromList :: (Ord a, F.Foldable t) => t [a] -> Tree a
|
||||
-- fromList = F.foldl' insert empty
|
||||
|
||||
-- insert :: Tree a -> [a] -> Tree a
|
||||
-- insert (Tree {..}) [] = Tree M.empty
|
||||
-- insert (Tree {..}) (x : xs) =
|
||||
-- Tree
|
||||
-- . flip (M.insert x) children
|
||||
-- . flip insert xs
|
||||
-- . My.fromMaybe empty
|
||||
-- . M.lookup x
|
||||
-- $ children
|
||||
|
||||
-- lookup :: (Ord a) => [a] -> Tree a -> Bool
|
||||
-- lookup [] = const True
|
||||
-- lookup (x : xs) = maybe False (Compress.PrefixTree.lookup xs) . M.lookup x . children
|
||||
45
src/Data/FiniteBit.hs
Normal file
45
src/Data/FiniteBit.hs
Normal file
|
|
@ -0,0 +1,45 @@
|
|||
module Data.FiniteBit where
|
||||
|
||||
import Data.Bit (cloneToByteString)
|
||||
import Data.Bit qualified as B
|
||||
import Data.Bit qualified as BV
|
||||
import Data.ByteString qualified as BS
|
||||
import qualified Data.Proxy as D
|
||||
import qualified Basement.From as F
|
||||
import Data.Foldable qualified as F
|
||||
import Data.Word
|
||||
import Basement.Bits as B
|
||||
|
||||
|
||||
numBytesIn :: forall a. (B.FiniteBitsOps a, Integral a) => D.Proxy a -> Int
|
||||
numBytesIn _ = (`div` 8) . F.from . B.numberOfBits $ (0 :: a)
|
||||
|
||||
|
||||
toWordsList :: forall a. (Integral a, B.FiniteBitsOps a, B.BitOps a) => BS.ByteString -> [a]
|
||||
toWordsList bs = case finiteBitUncons bs of
|
||||
Nothing -> []
|
||||
(Just (x, xs)) -> x : toWordsList xs
|
||||
|
||||
toByteString :: forall a. (Integral a, B.FiniteBitsOps a) => a -> BS.ByteString
|
||||
toByteString n = BS.pack . take numBytes . map (fromIntegral . (n `B.rotateL`)) $ [8, 16 ..]
|
||||
where
|
||||
numBytes = numBytesIn (D.Proxy :: D.Proxy a)
|
||||
|
||||
finiteBitUncons
|
||||
:: forall a
|
||||
. (Integral a, B.FiniteBitsOps a, B.BitOps a)
|
||||
=> BS.ByteString
|
||||
-> Maybe (a, BS.ByteString)
|
||||
finiteBitUncons [] = Nothing
|
||||
finiteBitUncons bs =
|
||||
Just
|
||||
. (,rest)
|
||||
. F.foldl' (.|.) 0
|
||||
. zipWith (flip B.rotateR) [8, 16 ..]
|
||||
. map (fromIntegral :: Word8 -> a)
|
||||
. BS.unpack
|
||||
$ takenBytes
|
||||
where
|
||||
takenBytes :: BS.ByteString
|
||||
rest :: BS.ByteString
|
||||
(takenBytes, rest) = BS.splitAt (numBytesIn (D.Proxy :: D.Proxy a)) bs
|
||||
67
src/Data/HuffmanTree.hs
Normal file
67
src/Data/HuffmanTree.hs
Normal file
|
|
@ -0,0 +1,67 @@
|
|||
module Data.HuffmanTree where
|
||||
|
||||
import Data.Bifunctor qualified as Bi
|
||||
import Data.Map.Strict qualified as M
|
||||
import Data.PQueue qualified as PQ
|
||||
import Data.Serialize qualified as C
|
||||
import GHC.Generics (Generic)
|
||||
import Data.Vector.Unboxed qualified as V
|
||||
import Data.Bit qualified as B
|
||||
import Data.Bit qualified as BV
|
||||
import Data.ByteString qualified as BS
|
||||
|
||||
data HuffmanTree a
|
||||
= Leaf a
|
||||
| Node
|
||||
{ left :: HuffmanTree a,
|
||||
right :: HuffmanTree a
|
||||
}
|
||||
deriving (Eq, Ord, Show, Generic, C.Serialize, Functor)
|
||||
|
||||
|
||||
-- here so we can define our own Serialize instance
|
||||
newtype TreeDirs = TreeDirs {
|
||||
inner :: [TreeDir]
|
||||
} deriving (Eq, Ord, Show)
|
||||
|
||||
data TreeDir = L | R deriving (Eq, Ord, Show)
|
||||
|
||||
instance C.Serialize TreeDirs where
|
||||
put :: C.Putter TreeDirs
|
||||
put = C.put . Bi.first BV.cloneToByteString . (\x -> (x, V.length x)) . V.fromList . map (BV.Bit . (== R)) . (inner :: TreeDirs -> [TreeDir])
|
||||
|
||||
get :: C.Get TreeDirs
|
||||
get = do
|
||||
(bs, len) <- C.get
|
||||
pure . TreeDirs . map (\x -> if BV.unBit x then R else L) . V.toList . V.take len . BV.cloneFromByteString $ bs
|
||||
|
||||
lookup ::
|
||||
forall a.
|
||||
(Ord a) =>
|
||||
HuffmanTree a ->
|
||||
[TreeDir] ->
|
||||
Maybe (a, [TreeDir])
|
||||
lookup (Node {..}) (L : xs) = Data.HuffmanTree.lookup left xs
|
||||
lookup (Node {..}) (R : xs) = Data.HuffmanTree.lookup right xs
|
||||
lookup (Leaf a) xs = Just (a, xs)
|
||||
lookup _ [] = Nothing
|
||||
|
||||
findTreeDirections ::
|
||||
forall a.
|
||||
(Ord a) =>
|
||||
HuffmanTree a ->
|
||||
M.Map a [TreeDir]
|
||||
findTreeDirections (Leaf a) = M.singleton a []
|
||||
findTreeDirections (Node {..}) = M.union (rec' L left) (rec' R right)
|
||||
where
|
||||
rec' :: TreeDir -> HuffmanTree a -> M.Map a [TreeDir]
|
||||
rec' dir = M.map (dir :) . findTreeDirections
|
||||
|
||||
mergeHuffmanTrees :: PQ.PQueue (HuffmanTree a) -> Maybe (HuffmanTree a)
|
||||
mergeHuffmanTrees queue = case (Bi.second . Bi.second $ PQ.minView) <$> PQ.minView queue of
|
||||
Nothing -> Nothing
|
||||
Just (size, (x, Nothing)) -> Just x
|
||||
Just (size, (x, Just (size1, (x', rest)))) -> mergeHuffmanTrees $ PQ.insert (size + size1) (Node x x') rest
|
||||
|
||||
fromList :: [(Int, a)] -> Maybe (HuffmanTree a)
|
||||
fromList = mergeHuffmanTrees . PQ.fromList . map (Bi.second Leaf)
|
||||
|
|
@ -16,7 +16,10 @@ fromList = PQueue . IM.fromListWith NE.append . map (Bi.second NE.singleton)
|
|||
singleton :: Int -> a -> PQueue a
|
||||
singleton key a = PQueue $ IM.singleton key [a]
|
||||
|
||||
abstractView :: (IM.IntMap (NE.NonEmpty a) -> Maybe (IM.Key, NE.NonEmpty a)) -> PQueue a -> Maybe (IM.Key, (a, PQueue a))
|
||||
abstractView
|
||||
:: (IM.IntMap (NE.NonEmpty a) -> Maybe (IM.Key, NE.NonEmpty a))
|
||||
-> PQueue a
|
||||
-> Maybe (IM.Key, (a, PQueue a))
|
||||
abstractView f (PQueue m) = case f m of
|
||||
Nothing -> Nothing
|
||||
(Just (key, x NE.:| (x' : xs))) -> Just (key, (x, PQueue $ IM.insert key (x' NE.:| xs) m))
|
||||
|
|
|
|||
209
src/Main.hs
209
src/Main.hs
|
|
@ -1,182 +1,67 @@
|
|||
module Main where
|
||||
|
||||
import Basement.Bits qualified as B
|
||||
import Compress.Huffman
|
||||
import Compress.Huffman qualified as HT
|
||||
import Compress.PrefixTree (HuffmanPrefixTree (HuffmanPrefixTree))
|
||||
import Compress.PrefixTree qualified as PT
|
||||
import Compress.Arithmetic qualified as A
|
||||
import Data.Bifunctor qualified as Bi
|
||||
import Data.Bit (cloneToByteString)
|
||||
import Data.Bit qualified as B
|
||||
import Data.Bit qualified as BV
|
||||
import Data.Bits (Bits ((.|.)))
|
||||
import Data.Bits qualified as B
|
||||
import Data.ByteString (fromFilePath)
|
||||
import Data.ByteString qualified as BS
|
||||
import Data.Data qualified as D
|
||||
import Data.Foldable qualified as F
|
||||
import Data.IntMap.Strict qualified as IM
|
||||
import Data.Map.Strict qualified as M
|
||||
import Data.Maybe (fromMaybe)
|
||||
import Data.Maybe qualified as My
|
||||
import Data.PQueue qualified as PQ
|
||||
import Data.HuffmanTree
|
||||
import Data.Proxy qualified as P
|
||||
import Data.Serialize qualified as C
|
||||
import Data.Text qualified as T
|
||||
import Data.Text.IO qualified as TIO
|
||||
import Data.Vector.Unboxed qualified as V
|
||||
import Data.Word
|
||||
import Debug.Trace qualified as D
|
||||
import GHC.Generics (Generic)
|
||||
import Options.Generic qualified as O
|
||||
import System.Environment qualified as SE
|
||||
import qualified Data.FiniteBit as FB
|
||||
|
||||
data CompressOrDecompress = Compress | Decompress deriving (Show, Generic)
|
||||
data CompressOrDecompress = Compress | Decompress deriving (Show, Generic, O.ParseField, O.ParseFields, O.ParseRecord, Read)
|
||||
|
||||
instance O.ParseRecord CompressOrDecompress
|
||||
data CompressionStrategy = Huffman | MarkovHuffman deriving (Show, Generic, O.ParseField, O.ParseFields, O.ParseRecord, Read)
|
||||
|
||||
data HuffmanTree a
|
||||
= Leaf a
|
||||
| Node
|
||||
{left :: HuffmanTree a, right :: HuffmanTree a}
|
||||
deriving (Eq, Ord, Show, Generic, C.Serialize)
|
||||
data CLIOpts = CLIOpts
|
||||
{ task :: CompressOrDecompress,
|
||||
strategy :: CompressionStrategy
|
||||
}
|
||||
deriving (Show, Generic, O.ParseRecord)
|
||||
|
||||
data TreeDir = L | R deriving (Eq, Ord, Show, Generic, C.Serialize)
|
||||
applyCompressionOptions ::
|
||||
forall a.
|
||||
(Integral a, B.BitOps a, B.FiniteBitsOps a, Ord a, C.Serialize a) =>
|
||||
P.Proxy a ->
|
||||
CLIOpts ->
|
||||
BS.ByteString ->
|
||||
BS.ByteString
|
||||
applyCompressionOptions _ (CLIOpts Compress Huffman) f =
|
||||
C.encode . (compress :: BS.ByteString -> Maybe (TreeDirs, HuffmanTree a)) $ f
|
||||
applyCompressionOptions _ (CLIOpts Compress MarkovHuffman) f =
|
||||
C.encode . (PT.compress :: BS.ByteString -> Maybe (TreeDirs, HuffmanPrefixTree a a, a)) $ f
|
||||
applyCompressionOptions _ (CLIOpts Decompress Huffman) f =
|
||||
handleError $ Bi.second decompress . (C.decode :: BS.ByteString -> Either String (TreeDirs, HuffmanTree a)) $ f
|
||||
applyCompressionOptions _ (CLIOpts Decompress MarkovHuffman) f =
|
||||
handleError $ Bi.second PT.decompress . (C.decode :: BS.ByteString -> Either String (TreeDirs, HuffmanPrefixTree a a, a)) $ f
|
||||
|
||||
findTreeDirections :: forall a. (Ord a) => HuffmanTree a -> M.Map a [TreeDir]
|
||||
findTreeDirections (Leaf a) = M.singleton a []
|
||||
findTreeDirections (Node {..}) = M.union (rec' L left) (rec' R right)
|
||||
where
|
||||
rec' :: TreeDir -> HuffmanTree a -> M.Map a [TreeDir]
|
||||
rec' dir = M.map (dir :) . findTreeDirections
|
||||
|
||||
decompress :: forall a. (Ord a, Integral a, B.FiniteBits a) => Maybe ([TreeDir], HuffmanTree a) -> Maybe BS.ByteString
|
||||
decompress Nothing = Just []
|
||||
decompress (Just (treeDirs, tree)) = BS.concat . map toByteString <$> decompress' treeDirs
|
||||
where
|
||||
decompress' :: [TreeDir] -> Maybe [a]
|
||||
decompress' [] = Just []
|
||||
decompress' xs = case nextLeaf xs tree of
|
||||
Nothing -> Nothing
|
||||
Just (x, remainingDirs) -> (x :) <$> decompress' remainingDirs
|
||||
|
||||
nextLeaf :: [TreeDir] -> HuffmanTree a -> Maybe (a, [TreeDir])
|
||||
nextLeaf xs (Leaf a) = Just (a, xs)
|
||||
nextLeaf [] _ = Nothing
|
||||
nextLeaf (L : xs) (Node {..}) = nextLeaf xs left
|
||||
nextLeaf (R : xs) (Node {..}) = nextLeaf xs right
|
||||
|
||||
compress :: forall a. (Ord a, Integral a, B.FiniteBits a) => BS.ByteString -> Maybe ([TreeDir], HuffmanTree a)
|
||||
compress bs =
|
||||
liftA2 (,) treeDirections mergedHuffmanTrees
|
||||
where
|
||||
treeDirections = concat <$> mapM (treeDirMap M.!?) dividedByteString
|
||||
|
||||
mergedHuffmanTrees =
|
||||
mergeHuffmanTrees
|
||||
. PQ.fromList
|
||||
. map (uncurry (flip (,)) . Bi.first Leaf)
|
||||
. counts
|
||||
$ dividedByteString
|
||||
|
||||
treeDirMap :: M.Map a [TreeDir]
|
||||
treeDirMap = My.maybe M.empty findTreeDirections mergedHuffmanTrees
|
||||
|
||||
dividedByteString = toBitsList bs
|
||||
|
||||
testCompression :: forall a. (Ord a, Eq a, Integral a, B.FiniteBits a, C.Serialize a) => D.Proxy a -> BS.ByteString -> Bool
|
||||
testCompression _ bs =
|
||||
((Right . Just $ bs) ==)
|
||||
. Bi.second (decompress :: Maybe ([TreeDir], HuffmanTree a) -> Maybe BS.ByteString)
|
||||
-- . D.traceShowWith (Bi.second (fmap fst))
|
||||
. (decodeCompressed :: BS.ByteString -> Either String (Maybe ([TreeDir], HuffmanTree a)))
|
||||
. encodeCompressed
|
||||
-- . D.traceShowWith (fmap fst)
|
||||
. (compress :: BS.ByteString -> Maybe ([TreeDir], HuffmanTree a))
|
||||
$ bs
|
||||
|
||||
encodeCompressed :: (C.Serialize a) => Maybe ([TreeDir], HuffmanTree a) -> BS.ByteString
|
||||
encodeCompressed = C.encode . fmap (Bi.first encodeTreeDirs)
|
||||
where
|
||||
encodeTreeDirs = cloneToByteStringWithLen . V.fromList . map (BV.Bit . (== R))
|
||||
|
||||
cloneToByteStringWithLen :: V.Vector BV.Bit -> (BS.ByteString, Int)
|
||||
cloneToByteStringWithLen vec = (BV.cloneToByteString vec, V.length vec)
|
||||
|
||||
cloneFromByteStringWithLen :: (BS.ByteString, Int) -> V.Vector BV.Bit
|
||||
cloneFromByteStringWithLen (bs, len) = V.take len . BV.cloneFromByteString $ bs
|
||||
|
||||
decodeTreeDirs :: (BS.ByteString, Int) -> [TreeDir]
|
||||
decodeTreeDirs = map (\x -> if BV.unBit x then R else L) . V.toList . cloneFromByteStringWithLen
|
||||
|
||||
decodeCompressed :: forall a. (Ord a, Integral a, B.FiniteBits a, C.Serialize a) => BS.ByteString -> Either String (Maybe ([TreeDir], HuffmanTree a))
|
||||
decodeCompressed = Bi.second (fmap (Bi.first decodeTreeDirs)) . C.decode
|
||||
|
||||
mergeHuffmanTrees :: PQ.PQueue (HuffmanTree a) -> Maybe (HuffmanTree a)
|
||||
mergeHuffmanTrees queue = case (Bi.second . Bi.second $ PQ.minView) <$> PQ.minView queue of
|
||||
Nothing -> Nothing
|
||||
Just (size, (x, Nothing)) -> Just x
|
||||
Just (size, (x, Just (size1, (x', rest)))) -> mergeHuffmanTrees $ PQ.insert (size + size1) (Node x x') rest
|
||||
|
||||
counts :: (Ord a) => [a] -> [(a, Int)]
|
||||
counts = M.toList . F.foldl' combiningInsert M.empty
|
||||
where
|
||||
combiningInsert m key = M.insertWith (+) key 1 m
|
||||
|
||||
divideByteString :: Int -> BS.ByteString -> [BS.ByteString]
|
||||
divideByteString n [] = []
|
||||
divideByteString n bs = x : divideByteString n xs
|
||||
where
|
||||
(x, xs) = BS.splitAt n bs
|
||||
|
||||
toBitsList :: forall a. (Integral a, B.FiniteBits a) => BS.ByteString -> [a]
|
||||
toBitsList bs = case finiteBitUncons bs of
|
||||
Nothing -> []
|
||||
(Just (x, xs)) -> x : (toBitsList xs)
|
||||
|
||||
toByteString :: forall a. (Integral a, B.FiniteBits a) => a -> BS.ByteString
|
||||
toByteString n = BS.pack . take numBytes . map (fromIntegral . (n `B.rotateL`)) $ [8, 16 ..]
|
||||
where
|
||||
numBytes = numBytesIn (D.Proxy :: D.Proxy a)
|
||||
|
||||
finiteBitUncons :: forall a. (Integral a, B.FiniteBits a) => BS.ByteString -> Maybe (a, BS.ByteString)
|
||||
finiteBitUncons [] = Nothing
|
||||
finiteBitUncons bs =
|
||||
Just
|
||||
. (,rest)
|
||||
. F.foldl' (.|.) 0
|
||||
. zipWith (flip B.rotateR) [8, 16 ..]
|
||||
. map (fromIntegral :: Word8 -> a)
|
||||
. BS.unpack
|
||||
$ takenBytes
|
||||
where
|
||||
takenBytes :: BS.ByteString
|
||||
rest :: BS.ByteString
|
||||
(takenBytes, rest) = BS.splitAt (numBytesIn (D.Proxy :: D.Proxy a)) bs
|
||||
|
||||
numBytesIn :: forall a. (B.FiniteBits a) => D.Proxy a -> Int
|
||||
numBytesIn _ = (`div` 8) . B.finiteBitSize $ (B.zeroBits :: a)
|
||||
|
||||
compressionRatioFor
|
||||
:: forall a
|
||||
. (Integral a, B.FiniteBits a, Ord a, C.Serialize a)
|
||||
=> D.Proxy a
|
||||
-> BS.ByteString
|
||||
-> Double
|
||||
compressionRatioFor proxy bs =
|
||||
(/ (fromIntegral . BS.length $ bs))
|
||||
. fromIntegral
|
||||
. BS.length
|
||||
. encodeCompressed
|
||||
. (compress :: BS.ByteString -> Maybe ([TreeDir], HuffmanTree a))
|
||||
$ bs
|
||||
|
||||
applyCompressionOptions :: CompressOrDecompress -> BS.ByteString -> BS.ByteString
|
||||
applyCompressionOptions Compress f = encodeCompressed . (compress :: BS.ByteString -> Maybe ([TreeDir], HuffmanTree Word8)) $ f
|
||||
applyCompressionOptions Decompress f = case Bi.second decompress
|
||||
. (decodeCompressed :: BS.ByteString -> Either String (Maybe ([TreeDir], HuffmanTree Word8)))
|
||||
$ f of
|
||||
-- TODO: write errors to stderr
|
||||
(Left _) -> []
|
||||
(Right Nothing) -> []
|
||||
(Right (Just bs)) -> bs
|
||||
handleError (Right (Just bs)) = bs
|
||||
handleError _ = []
|
||||
|
||||
main :: IO ()
|
||||
main = do
|
||||
compresionOrDecompression :: CompressOrDecompress <- O.getRecord "compression/decompression"
|
||||
-- cliOpts :: CLIOpts <- O.getRecord "compress/decompress & strategy"
|
||||
f <- BS.getContents
|
||||
BS.putStr . applyCompressionOptions compresionOrDecompression $ f
|
||||
BS.putStr . C.encode . A.compress . (FB.toWordsList :: BS.ByteString -> [ Word8 ]) $ f
|
||||
-- let f = "hello tehre"
|
||||
-- f <- BS.readFile "pg64317.txt"
|
||||
-- let (compressed :: Maybe (TreeDirs, PT.HuffmanPrefixTree Word8 Word8, Word8)) = PT.compress f
|
||||
-- print $ BS.length . C.encode $ compressed
|
||||
-- print $ BS.length . C.encode . (compress :: BS.ByteString -> Maybe (TreeDirs, HuffmanTree Word8)) $ f
|
||||
-- print $ BS.length . C.encode . (compress :: BS.ByteString -> Maybe (TreeDirs, HuffmanTree Word16)) $ f
|
||||
-- BS.writeFile "outin.txt" decompressed
|
||||
-- print (decompressed, f)
|
||||
-- print $ BS.length decompressed
|
||||
-- print $ BS.length f
|
||||
-- print (decompressed == f)
|
||||
-- BS.putStr . applyCompressionOptions (P.Proxy :: P.Proxy Word16) cliOpts $ f
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue