add arithmetic coding
This commit is contained in:
parent
6d00525334
commit
791fff6107
11 changed files with 667 additions and 194 deletions
1
.gitignore
vendored
1
.gitignore
vendored
|
|
@ -1,4 +1,5 @@
|
||||||
/dist-newstyle/
|
/dist-newstyle/
|
||||||
|
/haddocks/
|
||||||
/result
|
/result
|
||||||
/.direnv/
|
/.direnv/
|
||||||
**/.DS_Store
|
**/.DS_Store
|
||||||
|
|
|
||||||
|
|
@ -43,6 +43,7 @@ executable compress
|
||||||
src
|
src
|
||||||
build-depends:
|
build-depends:
|
||||||
base,
|
base,
|
||||||
|
basement,
|
||||||
bitvec,
|
bitvec,
|
||||||
bytestring,
|
bytestring,
|
||||||
cereal,
|
cereal,
|
||||||
|
|
@ -50,12 +51,20 @@ executable compress
|
||||||
text,
|
text,
|
||||||
uuid,
|
uuid,
|
||||||
optparse-generic,
|
optparse-generic,
|
||||||
vector
|
vector,
|
||||||
|
nonempty-containers,
|
||||||
|
primes
|
||||||
default-language:
|
default-language:
|
||||||
GHC2021
|
GHC2021
|
||||||
other-modules:
|
other-modules:
|
||||||
Data.PQueue
|
Data.PQueue
|
||||||
-- ghc-options:
|
Data.FiniteBit
|
||||||
-- -fprof-auto
|
Compress.Huffman
|
||||||
-- -fprof-late
|
Compress.PrefixTree
|
||||||
-- "-with-rtsopts=-p -hc"
|
Data.HuffmanTree
|
||||||
|
Compress.Arithmetic
|
||||||
|
ghc-options:
|
||||||
|
-threaded
|
||||||
|
-fprof-auto
|
||||||
|
-fprof-late
|
||||||
|
"-with-rtsopts=-p -hc"
|
||||||
|
|
|
||||||
42
flake.lock
generated
42
flake.lock
generated
|
|
@ -5,11 +5,11 @@
|
||||||
"nixpkgs-lib": "nixpkgs-lib"
|
"nixpkgs-lib": "nixpkgs-lib"
|
||||||
},
|
},
|
||||||
"locked": {
|
"locked": {
|
||||||
"lastModified": 1712014858,
|
"lastModified": 1733312601,
|
||||||
"narHash": "sha256-sB4SWl2lX95bExY2gMFG5HIzvva5AVMJd4Igm+GpZNw=",
|
"narHash": "sha256-4pDvzqnegAfRkPwO3wmwBhVi/Sye1mzps0zHWYnP88c=",
|
||||||
"owner": "hercules-ci",
|
"owner": "hercules-ci",
|
||||||
"repo": "flake-parts",
|
"repo": "flake-parts",
|
||||||
"rev": "9126214d0a59633752a136528f5f3b9aa8565b7d",
|
"rev": "205b12d8b7cd4802fbcb8e8ef6a0f1408781a4f9",
|
||||||
"type": "github"
|
"type": "github"
|
||||||
},
|
},
|
||||||
"original": {
|
"original": {
|
||||||
|
|
@ -20,11 +20,11 @@
|
||||||
},
|
},
|
||||||
"haskell-flake": {
|
"haskell-flake": {
|
||||||
"locked": {
|
"locked": {
|
||||||
"lastModified": 1713084600,
|
"lastModified": 1734464164,
|
||||||
"narHash": "sha256-qL7LV2MtwJ+1Xasg1TjSUmoE7yrRuXPqxpPlKjLE0SE=",
|
"narHash": "sha256-5JCCyrgy7IMnipyYMQzIAXncGt2XVlW1aK71A+FTXDs=",
|
||||||
"owner": "srid",
|
"owner": "srid",
|
||||||
"repo": "haskell-flake",
|
"repo": "haskell-flake",
|
||||||
"rev": "847292fc793a5c15c873e52e7751ee4267ef32a0",
|
"rev": "e280b39efdd72b6a5bdaa982b67f150c819be642",
|
||||||
"type": "github"
|
"type": "github"
|
||||||
},
|
},
|
||||||
"original": {
|
"original": {
|
||||||
|
|
@ -35,11 +35,11 @@
|
||||||
},
|
},
|
||||||
"nixpkgs": {
|
"nixpkgs": {
|
||||||
"locked": {
|
"locked": {
|
||||||
"lastModified": 1712963716,
|
"lastModified": 1734424634,
|
||||||
"narHash": "sha256-WKm9CvgCldeIVvRz87iOMi8CFVB1apJlkUT4GGvA0iM=",
|
"narHash": "sha256-cHar1vqHOOyC7f1+tVycPoWTfKIaqkoe1Q6TnKzuti4=",
|
||||||
"owner": "nixos",
|
"owner": "nixos",
|
||||||
"repo": "nixpkgs",
|
"repo": "nixpkgs",
|
||||||
"rev": "cfd6b5fc90b15709b780a5a1619695a88505a176",
|
"rev": "d3c42f187194c26d9f0309a8ecc469d6c878ce33",
|
||||||
"type": "github"
|
"type": "github"
|
||||||
},
|
},
|
||||||
"original": {
|
"original": {
|
||||||
|
|
@ -51,20 +51,14 @@
|
||||||
},
|
},
|
||||||
"nixpkgs-lib": {
|
"nixpkgs-lib": {
|
||||||
"locked": {
|
"locked": {
|
||||||
"dir": "lib",
|
"lastModified": 1733096140,
|
||||||
"lastModified": 1711703276,
|
"narHash": "sha256-1qRH7uAUsyQI7R1Uwl4T+XvdNv778H0Nb5njNrqvylY=",
|
||||||
"narHash": "sha256-iMUFArF0WCatKK6RzfUJknjem0H9m4KgorO/p3Dopkk=",
|
"type": "tarball",
|
||||||
"owner": "NixOS",
|
"url": "https://github.com/NixOS/nixpkgs/archive/5487e69da40cbd611ab2cadee0b4637225f7cfae.tar.gz"
|
||||||
"repo": "nixpkgs",
|
|
||||||
"rev": "d8fe5e6c92d0d190646fb9f1056741a229980089",
|
|
||||||
"type": "github"
|
|
||||||
},
|
},
|
||||||
"original": {
|
"original": {
|
||||||
"dir": "lib",
|
"type": "tarball",
|
||||||
"owner": "NixOS",
|
"url": "https://github.com/NixOS/nixpkgs/archive/5487e69da40cbd611ab2cadee0b4637225f7cfae.tar.gz"
|
||||||
"ref": "nixos-unstable",
|
|
||||||
"repo": "nixpkgs",
|
|
||||||
"type": "github"
|
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"root": {
|
"root": {
|
||||||
|
|
@ -98,11 +92,11 @@
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
"locked": {
|
"locked": {
|
||||||
"lastModified": 1711963903,
|
"lastModified": 1733761991,
|
||||||
"narHash": "sha256-N3QDhoaX+paWXHbEXZapqd1r95mdshxToGowtjtYkGI=",
|
"narHash": "sha256-s4DalCDepD22jtKL5Nw6f4LP5UwoMcPzPZgHWjAfqbQ=",
|
||||||
"owner": "numtide",
|
"owner": "numtide",
|
||||||
"repo": "treefmt-nix",
|
"repo": "treefmt-nix",
|
||||||
"rev": "49dc4a92b02b8e68798abd99184f228243b6e3ac",
|
"rev": "0ce9d149d99bc383d1f2d85f31f6ebd146e46085",
|
||||||
"type": "github"
|
"type": "github"
|
||||||
},
|
},
|
||||||
"original": {
|
"original": {
|
||||||
|
|
|
||||||
11
flake.nix
11
flake.nix
|
|
@ -22,7 +22,7 @@
|
||||||
# See https://github.com/srid/haskell-flake/blob/master/example/flake.nix
|
# See https://github.com/srid/haskell-flake/blob/master/example/flake.nix
|
||||||
haskellProjects.default = {
|
haskellProjects.default = {
|
||||||
# The base package set (this value is the default)
|
# The base package set (this value is the default)
|
||||||
# basePackages = pkgs.haskellPackages;
|
basePackages = pkgs.haskell.packages.ghc96;
|
||||||
|
|
||||||
# Packages to add on top of `basePackages`
|
# Packages to add on top of `basePackages`
|
||||||
packages = {
|
packages = {
|
||||||
|
|
@ -34,6 +34,13 @@
|
||||||
|
|
||||||
# Add your package overrides here
|
# Add your package overrides here
|
||||||
settings = {
|
settings = {
|
||||||
|
uuid ={
|
||||||
|
jailbreak = true;
|
||||||
|
};
|
||||||
|
|
||||||
|
# hlint = {
|
||||||
|
# jailbreak = true;
|
||||||
|
# };
|
||||||
# barbies-th = {
|
# barbies-th = {
|
||||||
# broken = false;
|
# broken = false;
|
||||||
# jailbreak = true;
|
# jailbreak = true;
|
||||||
|
|
@ -57,7 +64,7 @@
|
||||||
programs.ormolu.enable = true;
|
programs.ormolu.enable = true;
|
||||||
programs.nixpkgs-fmt.enable = true;
|
programs.nixpkgs-fmt.enable = true;
|
||||||
programs.cabal-fmt.enable = true;
|
programs.cabal-fmt.enable = true;
|
||||||
programs.hlint.enable = true;
|
programs.hlint.enable = false;
|
||||||
|
|
||||||
# We use fourmolu
|
# We use fourmolu
|
||||||
programs.ormolu.package = pkgs.haskellPackages.fourmolu;
|
programs.ormolu.package = pkgs.haskellPackages.fourmolu;
|
||||||
|
|
|
||||||
149
src/Compress/Arithmetic.hs
Normal file
149
src/Compress/Arithmetic.hs
Normal file
|
|
@ -0,0 +1,149 @@
|
||||||
|
-- | https://en.wikipedia.org/wiki/Arithmetic_coding
|
||||||
|
module Compress.Arithmetic where
|
||||||
|
|
||||||
|
import Basement.Bits qualified as B
|
||||||
|
import Basement.Compat.Base (Word16, Word32, Word64, Word8)
|
||||||
|
import Basement.Compat.Bifunctor qualified as Bi
|
||||||
|
import Control.Arrow qualified as Ar
|
||||||
|
import Data.ByteString qualified as By
|
||||||
|
import Data.FiniteBit qualified as Fi
|
||||||
|
import Data.Foldable as F
|
||||||
|
import Data.List (genericLength)
|
||||||
|
import Data.List qualified as L
|
||||||
|
import Data.Map.Strict qualified as M
|
||||||
|
import Data.Maybe qualified as My
|
||||||
|
import Data.Numbers.Primes
|
||||||
|
import Data.Ord
|
||||||
|
import Data.Ratio
|
||||||
|
import Data.Serialize qualified as C
|
||||||
|
import GHC.Generics (Generic)
|
||||||
|
import GHC.Natural (Natural)
|
||||||
|
|
||||||
|
-- withPrimeDenums = concatMap (\x -> map (% x) [1 .. (pred x)]) primes
|
||||||
|
|
||||||
|
-- shortestNumberBetween :: Ratio Integer -> Ratio Integer -> Maybe (Ratio Integer)
|
||||||
|
-- shortestNumberBetween xMin xMax = find (\candidate -> candidate >= xMin && candidate < xMax) withPrimeDenums
|
||||||
|
|
||||||
|
data WordMarkovStats = WordMarkovStats
|
||||||
|
{ location :: Ratio Integer,
|
||||||
|
size :: Ratio Integer
|
||||||
|
}
|
||||||
|
deriving (Show, Eq, Ord, Generic, C.Serialize)
|
||||||
|
|
||||||
|
toRing :: forall a k. (Ord k, Integral a, Bounded a) => M.Map k a -> M.Map k WordMarkovStats
|
||||||
|
toRing m = M.fromList . zip (map fst asList) $ zipWith WordMarkovStats (scanl (+) 0 . map snd $ asList) (map snd asList)
|
||||||
|
where
|
||||||
|
asList = M.toList $ m'
|
||||||
|
sum' =
|
||||||
|
sum
|
||||||
|
. map fromIntegral
|
||||||
|
. M.elems
|
||||||
|
$ m
|
||||||
|
m' = M.map ((% sum') . fromIntegral) m
|
||||||
|
|
||||||
|
maxBound' :: Integer
|
||||||
|
maxBound' = fromIntegral (maxBound :: a)
|
||||||
|
|
||||||
|
twoByteMarkov :: forall k b. (Num b, Integral b, Bounded b, Ord k) => [k] -> M.Map k (M.Map k b)
|
||||||
|
twoByteMarkov xs =
|
||||||
|
M.map sizeAsFraction
|
||||||
|
. M.fromListWith (M.unionWith (+))
|
||||||
|
. zip xs
|
||||||
|
. map (`M.singleton` (1 :: Integer))
|
||||||
|
. tail
|
||||||
|
$ xs
|
||||||
|
where
|
||||||
|
sizeAsFraction m = M.map (max 1 . floor . fromRational . (* maxBound') . (% sum')) m
|
||||||
|
where
|
||||||
|
sum' = sum . M.elems $ m
|
||||||
|
|
||||||
|
toInteger :: (Integral a) => a -> Integer
|
||||||
|
toInteger = fromIntegral
|
||||||
|
|
||||||
|
maxBound' = fromIntegral (maxBound :: b)
|
||||||
|
|
||||||
|
data Compressed a = Compressed
|
||||||
|
{ markovs :: M.Map a (M.Map a (Word8)),
|
||||||
|
location :: Ratio Integer,
|
||||||
|
start :: a,
|
||||||
|
length :: Int
|
||||||
|
}
|
||||||
|
deriving (Eq, Ord, Show, Generic, C.Serialize)
|
||||||
|
|
||||||
|
decompress ::
|
||||||
|
forall a.
|
||||||
|
(Integral a, B.FiniteBitsOps a, B.BitOps a, Show a) =>
|
||||||
|
Compressed a ->
|
||||||
|
[a]
|
||||||
|
decompress (Compressed {..}) = take length $ map snd . L.iterate decompress' $ (location, start)
|
||||||
|
where
|
||||||
|
decompress' :: (Ratio Integer, a) -> (Ratio Integer, a)
|
||||||
|
decompress' (loc, prev) = ((loc - ansLoc) / newSize, newVal)
|
||||||
|
where
|
||||||
|
(ansLoc, (newVal, newSize)) = My.fromJust . M.lookupLE loc $ (rings M.! prev)
|
||||||
|
|
||||||
|
rings = M.map (M.fromList . map toDecompressionRing . M.toList . toRing) markovs
|
||||||
|
|
||||||
|
toDecompressionRing (key, (WordMarkovStats {..})) = (location, (key, size))
|
||||||
|
|
||||||
|
sanityCheck :: forall k. (Ord k) => M.Map k (M.Map k Word8) -> [Word8]
|
||||||
|
sanityCheck = map (sum . M.elems) . M.elems
|
||||||
|
|
||||||
|
compress ::
|
||||||
|
forall a.
|
||||||
|
(Integral a, B.FiniteBitsOps a, B.BitOps a, Show a) =>
|
||||||
|
[a] ->
|
||||||
|
Compressed a
|
||||||
|
compress toCompress = Compressed twoByteMarkovs (shortestLocation endStats) (head toCompress) (genericLength toCompress)
|
||||||
|
where
|
||||||
|
twoByteMarkovs = twoByteMarkov toCompress
|
||||||
|
|
||||||
|
rings = M.map toRing twoByteMarkovs
|
||||||
|
|
||||||
|
pairs = zip toCompress . tail $ toCompress
|
||||||
|
|
||||||
|
shortestLocation (WordMarkovStats {..}) = simplestBetween location (location + size)
|
||||||
|
|
||||||
|
endStats = pyramidFold addWordMarkovStats . map statsFor $ pairs
|
||||||
|
|
||||||
|
addWordMarkovStats
|
||||||
|
(WordMarkovStats {location = prevLoc, size = prevSize})
|
||||||
|
(WordMarkovStats {location = nextLoc, size = nextSize}) =
|
||||||
|
WordMarkovStats
|
||||||
|
{ location = prevLoc + (prevSize * nextLoc),
|
||||||
|
size = prevSize * nextSize
|
||||||
|
}
|
||||||
|
|
||||||
|
statsFor (x0, x1) = (rings M.! x0) M.! x1
|
||||||
|
|
||||||
|
pyramidFold f = pyramid
|
||||||
|
where
|
||||||
|
pyramid [x] = x
|
||||||
|
pyramid xs = pyramid $ pyramidFold' xs
|
||||||
|
|
||||||
|
pyramidFold' [x] = [x]
|
||||||
|
pyramidFold' (x0 : x1 : []) = [f x0 x1]
|
||||||
|
pyramidFold' (x0 : x1 : xs) = (f x0 x1) : (pyramidFold' xs)
|
||||||
|
|
||||||
|
-- borrowed and slightly changed from Data.Ratio source
|
||||||
|
simplestBetween :: Rational -> Rational -> Rational
|
||||||
|
simplestBetween x y
|
||||||
|
| x == y = x
|
||||||
|
| x > 0 = simplestBetween' n d n' d'
|
||||||
|
| otherwise = 0 % 1
|
||||||
|
where
|
||||||
|
n = numerator x
|
||||||
|
d = denominator x
|
||||||
|
n' = numerator y
|
||||||
|
d' = denominator y
|
||||||
|
|
||||||
|
simplestBetween' n d n' d' -- assumes 0 < n%d < n'%d'
|
||||||
|
| r == 0 = q % 1
|
||||||
|
| q /= q' = (q + 1) % 1
|
||||||
|
| otherwise = (q * n'' + d'') % n''
|
||||||
|
where
|
||||||
|
(q, r) = quotRem n d
|
||||||
|
(q', r') = quotRem n' d'
|
||||||
|
nd'' = simplestBetween' d' r' d r
|
||||||
|
n'' = numerator nd''
|
||||||
|
d'' = denominator nd''
|
||||||
125
src/Compress/Huffman.hs
Normal file
125
src/Compress/Huffman.hs
Normal file
|
|
@ -0,0 +1,125 @@
|
||||||
|
module Compress.Huffman where
|
||||||
|
|
||||||
|
|
||||||
|
import Data.Bifunctor qualified as Bi
|
||||||
|
import Data.FiniteBit
|
||||||
|
import Data.Bit (cloneToByteString)
|
||||||
|
import Data.Bit qualified as B
|
||||||
|
import Data.Bit qualified as BV
|
||||||
|
import Data.ByteString (fromFilePath)
|
||||||
|
import Data.ByteString qualified as BS
|
||||||
|
import Data.Data qualified as D
|
||||||
|
import Data.Foldable qualified as F
|
||||||
|
import Data.IntMap.Strict qualified as IM
|
||||||
|
import qualified Data.Proxy as D
|
||||||
|
import qualified Basement.From as F
|
||||||
|
import Data.Map.Strict qualified as M
|
||||||
|
import Basement.Bits as B
|
||||||
|
import Data.Maybe (fromMaybe)
|
||||||
|
import Data.Maybe qualified as My
|
||||||
|
import Data.PQueue qualified as PQ
|
||||||
|
import Data.Serialize qualified as C
|
||||||
|
import Data.Text qualified as T
|
||||||
|
import Data.Text.IO qualified as TIO
|
||||||
|
import Data.Vector.Unboxed qualified as V
|
||||||
|
import Data.Word
|
||||||
|
import Debug.Trace qualified as D
|
||||||
|
import GHC.Generics (Generic)
|
||||||
|
import Options.Generic qualified as O
|
||||||
|
import System.Environment qualified as SE
|
||||||
|
import Data.HuffmanTree
|
||||||
|
|
||||||
|
decompress
|
||||||
|
:: forall a
|
||||||
|
. (Ord a, Integral a, B.FiniteBitsOps a)
|
||||||
|
=> (TreeDirs, HuffmanTree a)
|
||||||
|
-> Maybe BS.ByteString
|
||||||
|
decompress (TreeDirs treeDirs, tree) = BS.concat . map toByteString <$> decompress' treeDirs
|
||||||
|
where
|
||||||
|
decompress' :: [TreeDir] -> Maybe [a]
|
||||||
|
decompress' [] = Just []
|
||||||
|
decompress' xs = case nextLeaf xs tree of
|
||||||
|
Nothing -> Nothing
|
||||||
|
Just (x, remainingDirs) -> (x :) <$> decompress' remainingDirs
|
||||||
|
|
||||||
|
nextLeaf :: [TreeDir] -> HuffmanTree a -> Maybe (a, [TreeDir])
|
||||||
|
nextLeaf xs (Leaf a) = Just (a, xs)
|
||||||
|
nextLeaf [] _ = Nothing
|
||||||
|
nextLeaf (L : xs) (Node {..}) = nextLeaf xs left
|
||||||
|
nextLeaf (R : xs) (Node {..}) = nextLeaf xs right
|
||||||
|
|
||||||
|
compress
|
||||||
|
:: forall a
|
||||||
|
. (Ord a, Integral a, B.FiniteBitsOps a, B.BitOps a)
|
||||||
|
=> BS.ByteString
|
||||||
|
-> Maybe (TreeDirs, HuffmanTree a)
|
||||||
|
compress bs =
|
||||||
|
liftA2 (,) (TreeDirs <$> treeDirections) mergedHuffmanTrees
|
||||||
|
where
|
||||||
|
treeDirections = concat <$> mapM (treeDirMap M.!?) dividedByteString
|
||||||
|
|
||||||
|
mergedHuffmanTrees =
|
||||||
|
mergeHuffmanTrees
|
||||||
|
. PQ.fromList
|
||||||
|
. map (uncurry (flip (,)) . Bi.first Leaf)
|
||||||
|
. counts
|
||||||
|
$ dividedByteString
|
||||||
|
|
||||||
|
treeDirMap :: M.Map a [TreeDir]
|
||||||
|
treeDirMap = My.maybe M.empty findTreeDirections mergedHuffmanTrees
|
||||||
|
|
||||||
|
dividedByteString = toWordsList bs
|
||||||
|
|
||||||
|
-- testCompression
|
||||||
|
-- :: forall a
|
||||||
|
-- . (Ord a, Eq a, Integral a, B.FiniteBitsOps a, B.BitOps a, C.Serialize a)
|
||||||
|
-- => D.Proxy a
|
||||||
|
-- -> BS.ByteString
|
||||||
|
-- -> Bool
|
||||||
|
-- testCompression _ bs =
|
||||||
|
-- ((Right . Just $ bs) ==)
|
||||||
|
-- . Bi.second (decompress :: Maybe (TreeDirs, HuffmanTree a) -> Maybe BS.ByteString)
|
||||||
|
-- -- . D.traceShowWith (Bi.second (fmap fst))
|
||||||
|
-- . (decodeCompressed :: BS.ByteString -> Either String (Maybe (TreeDirs, HuffmanTree a)))
|
||||||
|
-- . encodeCompressed
|
||||||
|
-- -- . D.traceShowWith (fmap fst)
|
||||||
|
-- . (compress :: BS.ByteString -> Maybe (TreeDirs, HuffmanTree a))
|
||||||
|
-- $ bs
|
||||||
|
|
||||||
|
-- cloneToByteStringWithLen :: V.Vector BV.Bit -> (BS.ByteString, Int)
|
||||||
|
-- cloneToByteStringWithLen vec = (BV.cloneToByteString vec, V.length vec)
|
||||||
|
|
||||||
|
-- cloneFromByteStringWithLen :: (BS.ByteString, Int) -> V.Vector BV.Bit
|
||||||
|
-- cloneFromByteStringWithLen (bs, len) = V.take len . BV.cloneFromByteString $ bs
|
||||||
|
|
||||||
|
-- decodeTreeDirs :: (BS.ByteString, Int) -> [TreeDir]
|
||||||
|
-- decodeTreeDirs = map (\x -> if BV.unBit x then R else L) . V.toList . cloneFromByteStringWithLen
|
||||||
|
|
||||||
|
-- decodeCompressed :: forall a. (Ord a, Integral a, B.FiniteBitsOps a, C.Serialize a) => BS.ByteString -> Either String (Maybe (TreeDirs, HuffmanTree a))
|
||||||
|
-- decodeCompressed = Bi.second (fmap (Bi.first decodeTreeDirs)) . C.decode
|
||||||
|
|
||||||
|
counts :: (Ord a) => [a] -> [(a, Int)]
|
||||||
|
counts = M.toList . F.foldl' combiningInsert M.empty
|
||||||
|
where
|
||||||
|
combiningInsert m key = M.insertWith (+) key 1 m
|
||||||
|
|
||||||
|
divideByteString :: Int -> BS.ByteString -> [BS.ByteString]
|
||||||
|
divideByteString n [] = []
|
||||||
|
divideByteString n bs = x : divideByteString n xs
|
||||||
|
where
|
||||||
|
(x, xs) = BS.splitAt n bs
|
||||||
|
|
||||||
|
|
||||||
|
compressionRatioFor
|
||||||
|
:: forall a
|
||||||
|
. (Integral a, B.FiniteBitsOps a, B.BitOps a, Ord a, C.Serialize a)
|
||||||
|
=> D.Proxy a
|
||||||
|
-> BS.ByteString
|
||||||
|
-> Double
|
||||||
|
compressionRatioFor proxy bs =
|
||||||
|
(/ (fromIntegral . BS.length $ bs))
|
||||||
|
. fromIntegral
|
||||||
|
. BS.length
|
||||||
|
. C.encode
|
||||||
|
. (compress :: BS.ByteString -> Maybe (TreeDirs, HuffmanTree a))
|
||||||
|
$ bs
|
||||||
188
src/Compress/PrefixTree.hs
Normal file
188
src/Compress/PrefixTree.hs
Normal file
|
|
@ -0,0 +1,188 @@
|
||||||
|
module Compress.PrefixTree where
|
||||||
|
|
||||||
|
import Basement.Bits qualified as B
|
||||||
|
import qualified Basement.From as F
|
||||||
|
import Compress.Huffman qualified as H
|
||||||
|
import Control.Applicative qualified as A
|
||||||
|
import Data.Bifunctor qualified as Bi
|
||||||
|
import Data.ByteString qualified as BS
|
||||||
|
import Data.Foldable qualified as F
|
||||||
|
import Data.HuffmanTree as HT
|
||||||
|
import Data.List qualified as L
|
||||||
|
import Data.List.NonEmpty qualified as NE
|
||||||
|
import Data.Map.Strict qualified as M
|
||||||
|
import Data.Maybe qualified as My
|
||||||
|
import Data.Ord qualified as O
|
||||||
|
import Data.PQueue qualified as PQ
|
||||||
|
import Debug.Trace qualified as D
|
||||||
|
import Debug.Trace qualified as T
|
||||||
|
import Basement.Bits (FiniteBitsOps(numberOfBits))
|
||||||
|
import GHC.Generics
|
||||||
|
import qualified Data.Serialize as C
|
||||||
|
import Data.FiniteBit
|
||||||
|
|
||||||
|
data Tree a = (Ord a) =>
|
||||||
|
Tree
|
||||||
|
{ children :: M.Map a (Tree a)
|
||||||
|
}
|
||||||
|
|
||||||
|
newtype HuffmanPrefixTree a b = HuffmanPrefixTree
|
||||||
|
{ inner :: M.Map a (HuffmanTree b)
|
||||||
|
} deriving (Eq, Ord, Show, Generic, C.Serialize)
|
||||||
|
|
||||||
|
finiteBitTupleUncons ::
|
||||||
|
forall a b.
|
||||||
|
(Integral a, B.FiniteBitsOps a, B.BitOps a, Integral b, B.FiniteBitsOps b, B.BitOps b) =>
|
||||||
|
BS.ByteString ->
|
||||||
|
Maybe ((a, b), BS.ByteString)
|
||||||
|
finiteBitTupleUncons bs = case finiteBitUncons bs of
|
||||||
|
Just (a, bs') -> case finiteBitUncons bs' of
|
||||||
|
Just (b, _) -> Just ((a, b), bs')
|
||||||
|
_ -> Nothing
|
||||||
|
_ -> Nothing
|
||||||
|
|
||||||
|
fromByteString ::
|
||||||
|
forall a b.
|
||||||
|
(Integral a, B.FiniteBitsOps a, B.BitOps a, Integral b, B.FiniteBitsOps b, B.BitOps b) =>
|
||||||
|
BS.ByteString ->
|
||||||
|
[(a, b)]
|
||||||
|
fromByteString bs = case finiteBitTupleUncons bs of
|
||||||
|
Just ((a, b), bs') -> (a, b) : fromByteString bs'
|
||||||
|
Nothing -> []
|
||||||
|
|
||||||
|
toHuffmanTree ::
|
||||||
|
forall a b.
|
||||||
|
(Integral a, B.FiniteBitsOps a, B.BitOps a, Integral b, B.FiniteBitsOps b, B.BitOps b) =>
|
||||||
|
M.Map (a, b) Word ->
|
||||||
|
HuffmanPrefixTree a b
|
||||||
|
toHuffmanTree =
|
||||||
|
HuffmanPrefixTree
|
||||||
|
. M.mapMaybe HT.fromList
|
||||||
|
. M.fromListWith (++)
|
||||||
|
. map (\((a, b), count) -> (a, [(fromIntegral count, b)]))
|
||||||
|
. M.assocs
|
||||||
|
|
||||||
|
decompress ::
|
||||||
|
forall a .
|
||||||
|
(Integral a, B.FiniteBitsOps a, B.BitOps a) =>
|
||||||
|
(TreeDirs, HuffmanPrefixTree a a, a)
|
||||||
|
-> Maybe BS.ByteString
|
||||||
|
decompress (TreeDirs treeDirs'', HuffmanPrefixTree prefixTree, initial') = BS.concat . map toByteString . (initial' :) <$> decompress' treeDirs'' initial'
|
||||||
|
where
|
||||||
|
decompress' :: [TreeDir] -> a -> Maybe [a]
|
||||||
|
decompress' treeDirs initial = case HT.lookup (prefixTree M.! initial) treeDirs of
|
||||||
|
Nothing -> Nothing
|
||||||
|
Just (ans, []) -> Just [ans]
|
||||||
|
Just (ans, treeDirs') -> (ans :) <$> decompress' treeDirs' ans
|
||||||
|
|
||||||
|
compress ::
|
||||||
|
forall a b.
|
||||||
|
(Integral a, B.FiniteBitsOps a, B.BitOps a, Integral b, B.FiniteBitsOps b, B.BitOps b) =>
|
||||||
|
BS.ByteString -> Maybe (TreeDirs, HuffmanPrefixTree a b, a)
|
||||||
|
compress bs = (TreeDirs $ concatMap treeDirsFor asFiniteBitPairs, tree, ) <$> initial
|
||||||
|
where
|
||||||
|
tree :: HuffmanPrefixTree a b
|
||||||
|
tree = toHuffmanTree . nGramCounts $ bs
|
||||||
|
|
||||||
|
treeDirMap :: M.Map a (M.Map b [TreeDir])
|
||||||
|
treeDirMap = M.map HT.findTreeDirections . Compress.PrefixTree.inner $ tree
|
||||||
|
|
||||||
|
initial :: Maybe a
|
||||||
|
initial = fst <$> finiteBitUncons bs
|
||||||
|
|
||||||
|
asFiniteBitPairs :: [(a,b)]
|
||||||
|
asFiniteBitPairs = fromByteString bs
|
||||||
|
|
||||||
|
treeDirsFor :: (a, b) -> [TreeDir]
|
||||||
|
treeDirsFor (a, b) = (treeDirMap M.! a) M.! b
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
-- | all (M.null . children) . M.elems . children $ tree =
|
||||||
|
-- fmap End
|
||||||
|
-- . HT.fromList
|
||||||
|
-- . map (\x -> (prefixCounts x, x))
|
||||||
|
-- . M.keys
|
||||||
|
-- . children
|
||||||
|
-- $ tree
|
||||||
|
-- | otherwise =
|
||||||
|
-- Just
|
||||||
|
-- . Layer
|
||||||
|
-- . M.mapMaybeWithKey (\key val -> toHuffmanTree' (key : soFar) val)
|
||||||
|
-- . children
|
||||||
|
-- $ tree
|
||||||
|
-- where
|
||||||
|
-- prefixCounts :: a -> Int
|
||||||
|
-- prefixCounts x =
|
||||||
|
-- fromIntegral
|
||||||
|
-- . sum
|
||||||
|
-- . M.elems
|
||||||
|
-- . M.filterWithKey (\key val -> L.isPrefixOf (reverse . (x :) $ soFar) key)
|
||||||
|
-- $ nGrams
|
||||||
|
|
||||||
|
-- toHuffmanTree :: Tree a -> p1 -> HuffmanTree a
|
||||||
|
-- toHuffmanTree :: forall a . Tree a -> M.Map [a] Word -> HuffmanTree [a]
|
||||||
|
-- toHuffmanTree (Tree {..}) nGrams soFar | M.size children == 1 = Leaf . map (reverse . (: soFar)) . M.keys $ children
|
||||||
|
-- toHuffmanTree (Tree {..}) nGrams soFar = Leaf . map (reverse . (: soFar)) . M.keys $ children
|
||||||
|
-- where
|
||||||
|
-- sorted = L.sortBy (prefixCounts . fst) . M.toList $ children
|
||||||
|
|
||||||
|
nGramCounts ::
|
||||||
|
forall a b.
|
||||||
|
(Integral a, B.FiniteBitsOps a, B.BitOps a, Integral b, B.FiniteBitsOps b, B.BitOps b) =>
|
||||||
|
BS.ByteString ->
|
||||||
|
M.Map (a, b) Word
|
||||||
|
nGramCounts =
|
||||||
|
M.fromListWith (+)
|
||||||
|
. map (,1)
|
||||||
|
. My.mapMaybe (My.listToMaybe . fromByteString)
|
||||||
|
. takeWhile ((== len) . BS.length)
|
||||||
|
. map (BS.take len)
|
||||||
|
. BS.tails
|
||||||
|
where
|
||||||
|
len = (`div` 8) . F.from $ numberOfBits (0 :: a) + numberOfBits (0 :: b)
|
||||||
|
|
||||||
|
empty :: (Ord a) => Tree a
|
||||||
|
empty = Tree M.empty
|
||||||
|
|
||||||
|
singleton :: (Ord a) => a -> Tree a
|
||||||
|
singleton x = Tree $ M.singleton x empty
|
||||||
|
|
||||||
|
fromSingleList :: (Ord a) => [a] -> Tree a
|
||||||
|
fromSingleList [] = empty
|
||||||
|
fromSingleList (x : xs) = Tree . M.singleton x . fromSingleList $ xs
|
||||||
|
|
||||||
|
fromList :: (Ord a) => [[a]] -> Tree a
|
||||||
|
fromList = F.foldl' merge empty . map fromSingleList
|
||||||
|
|
||||||
|
-- insert :: Ord a => Tree a -> [a] -> Tree a
|
||||||
|
-- insert (Tree {..}) (x:xs) =
|
||||||
|
|
||||||
|
merge :: Tree a -> Tree a -> Tree a
|
||||||
|
merge (Tree children0) (Tree children1) = Tree $ M.unionWith merge children0 children1
|
||||||
|
|
||||||
|
-- deriving instance Eq (Tree a)
|
||||||
|
|
||||||
|
-- deriving instance Ord (Tree a)
|
||||||
|
|
||||||
|
-- deriving instance (Show a) => Show (Tree a)
|
||||||
|
|
||||||
|
-- empty :: (Ord a) => Tree a
|
||||||
|
-- empty = Tree M.empty
|
||||||
|
|
||||||
|
-- fromList :: (Ord a, F.Foldable t) => t [a] -> Tree a
|
||||||
|
-- fromList = F.foldl' insert empty
|
||||||
|
|
||||||
|
-- insert :: Tree a -> [a] -> Tree a
|
||||||
|
-- insert (Tree {..}) [] = Tree M.empty
|
||||||
|
-- insert (Tree {..}) (x : xs) =
|
||||||
|
-- Tree
|
||||||
|
-- . flip (M.insert x) children
|
||||||
|
-- . flip insert xs
|
||||||
|
-- . My.fromMaybe empty
|
||||||
|
-- . M.lookup x
|
||||||
|
-- $ children
|
||||||
|
|
||||||
|
-- lookup :: (Ord a) => [a] -> Tree a -> Bool
|
||||||
|
-- lookup [] = const True
|
||||||
|
-- lookup (x : xs) = maybe False (Compress.PrefixTree.lookup xs) . M.lookup x . children
|
||||||
45
src/Data/FiniteBit.hs
Normal file
45
src/Data/FiniteBit.hs
Normal file
|
|
@ -0,0 +1,45 @@
|
||||||
|
module Data.FiniteBit where
|
||||||
|
|
||||||
|
import Data.Bit (cloneToByteString)
|
||||||
|
import Data.Bit qualified as B
|
||||||
|
import Data.Bit qualified as BV
|
||||||
|
import Data.ByteString qualified as BS
|
||||||
|
import qualified Data.Proxy as D
|
||||||
|
import qualified Basement.From as F
|
||||||
|
import Data.Foldable qualified as F
|
||||||
|
import Data.Word
|
||||||
|
import Basement.Bits as B
|
||||||
|
|
||||||
|
|
||||||
|
numBytesIn :: forall a. (B.FiniteBitsOps a, Integral a) => D.Proxy a -> Int
|
||||||
|
numBytesIn _ = (`div` 8) . F.from . B.numberOfBits $ (0 :: a)
|
||||||
|
|
||||||
|
|
||||||
|
toWordsList :: forall a. (Integral a, B.FiniteBitsOps a, B.BitOps a) => BS.ByteString -> [a]
|
||||||
|
toWordsList bs = case finiteBitUncons bs of
|
||||||
|
Nothing -> []
|
||||||
|
(Just (x, xs)) -> x : toWordsList xs
|
||||||
|
|
||||||
|
toByteString :: forall a. (Integral a, B.FiniteBitsOps a) => a -> BS.ByteString
|
||||||
|
toByteString n = BS.pack . take numBytes . map (fromIntegral . (n `B.rotateL`)) $ [8, 16 ..]
|
||||||
|
where
|
||||||
|
numBytes = numBytesIn (D.Proxy :: D.Proxy a)
|
||||||
|
|
||||||
|
finiteBitUncons
|
||||||
|
:: forall a
|
||||||
|
. (Integral a, B.FiniteBitsOps a, B.BitOps a)
|
||||||
|
=> BS.ByteString
|
||||||
|
-> Maybe (a, BS.ByteString)
|
||||||
|
finiteBitUncons [] = Nothing
|
||||||
|
finiteBitUncons bs =
|
||||||
|
Just
|
||||||
|
. (,rest)
|
||||||
|
. F.foldl' (.|.) 0
|
||||||
|
. zipWith (flip B.rotateR) [8, 16 ..]
|
||||||
|
. map (fromIntegral :: Word8 -> a)
|
||||||
|
. BS.unpack
|
||||||
|
$ takenBytes
|
||||||
|
where
|
||||||
|
takenBytes :: BS.ByteString
|
||||||
|
rest :: BS.ByteString
|
||||||
|
(takenBytes, rest) = BS.splitAt (numBytesIn (D.Proxy :: D.Proxy a)) bs
|
||||||
67
src/Data/HuffmanTree.hs
Normal file
67
src/Data/HuffmanTree.hs
Normal file
|
|
@ -0,0 +1,67 @@
|
||||||
|
module Data.HuffmanTree where
|
||||||
|
|
||||||
|
import Data.Bifunctor qualified as Bi
|
||||||
|
import Data.Map.Strict qualified as M
|
||||||
|
import Data.PQueue qualified as PQ
|
||||||
|
import Data.Serialize qualified as C
|
||||||
|
import GHC.Generics (Generic)
|
||||||
|
import Data.Vector.Unboxed qualified as V
|
||||||
|
import Data.Bit qualified as B
|
||||||
|
import Data.Bit qualified as BV
|
||||||
|
import Data.ByteString qualified as BS
|
||||||
|
|
||||||
|
data HuffmanTree a
|
||||||
|
= Leaf a
|
||||||
|
| Node
|
||||||
|
{ left :: HuffmanTree a,
|
||||||
|
right :: HuffmanTree a
|
||||||
|
}
|
||||||
|
deriving (Eq, Ord, Show, Generic, C.Serialize, Functor)
|
||||||
|
|
||||||
|
|
||||||
|
-- here so we can define our own Serialize instance
|
||||||
|
newtype TreeDirs = TreeDirs {
|
||||||
|
inner :: [TreeDir]
|
||||||
|
} deriving (Eq, Ord, Show)
|
||||||
|
|
||||||
|
data TreeDir = L | R deriving (Eq, Ord, Show)
|
||||||
|
|
||||||
|
instance C.Serialize TreeDirs where
|
||||||
|
put :: C.Putter TreeDirs
|
||||||
|
put = C.put . Bi.first BV.cloneToByteString . (\x -> (x, V.length x)) . V.fromList . map (BV.Bit . (== R)) . (inner :: TreeDirs -> [TreeDir])
|
||||||
|
|
||||||
|
get :: C.Get TreeDirs
|
||||||
|
get = do
|
||||||
|
(bs, len) <- C.get
|
||||||
|
pure . TreeDirs . map (\x -> if BV.unBit x then R else L) . V.toList . V.take len . BV.cloneFromByteString $ bs
|
||||||
|
|
||||||
|
lookup ::
|
||||||
|
forall a.
|
||||||
|
(Ord a) =>
|
||||||
|
HuffmanTree a ->
|
||||||
|
[TreeDir] ->
|
||||||
|
Maybe (a, [TreeDir])
|
||||||
|
lookup (Node {..}) (L : xs) = Data.HuffmanTree.lookup left xs
|
||||||
|
lookup (Node {..}) (R : xs) = Data.HuffmanTree.lookup right xs
|
||||||
|
lookup (Leaf a) xs = Just (a, xs)
|
||||||
|
lookup _ [] = Nothing
|
||||||
|
|
||||||
|
findTreeDirections ::
|
||||||
|
forall a.
|
||||||
|
(Ord a) =>
|
||||||
|
HuffmanTree a ->
|
||||||
|
M.Map a [TreeDir]
|
||||||
|
findTreeDirections (Leaf a) = M.singleton a []
|
||||||
|
findTreeDirections (Node {..}) = M.union (rec' L left) (rec' R right)
|
||||||
|
where
|
||||||
|
rec' :: TreeDir -> HuffmanTree a -> M.Map a [TreeDir]
|
||||||
|
rec' dir = M.map (dir :) . findTreeDirections
|
||||||
|
|
||||||
|
mergeHuffmanTrees :: PQ.PQueue (HuffmanTree a) -> Maybe (HuffmanTree a)
|
||||||
|
mergeHuffmanTrees queue = case (Bi.second . Bi.second $ PQ.minView) <$> PQ.minView queue of
|
||||||
|
Nothing -> Nothing
|
||||||
|
Just (size, (x, Nothing)) -> Just x
|
||||||
|
Just (size, (x, Just (size1, (x', rest)))) -> mergeHuffmanTrees $ PQ.insert (size + size1) (Node x x') rest
|
||||||
|
|
||||||
|
fromList :: [(Int, a)] -> Maybe (HuffmanTree a)
|
||||||
|
fromList = mergeHuffmanTrees . PQ.fromList . map (Bi.second Leaf)
|
||||||
|
|
@ -16,7 +16,10 @@ fromList = PQueue . IM.fromListWith NE.append . map (Bi.second NE.singleton)
|
||||||
singleton :: Int -> a -> PQueue a
|
singleton :: Int -> a -> PQueue a
|
||||||
singleton key a = PQueue $ IM.singleton key [a]
|
singleton key a = PQueue $ IM.singleton key [a]
|
||||||
|
|
||||||
abstractView :: (IM.IntMap (NE.NonEmpty a) -> Maybe (IM.Key, NE.NonEmpty a)) -> PQueue a -> Maybe (IM.Key, (a, PQueue a))
|
abstractView
|
||||||
|
:: (IM.IntMap (NE.NonEmpty a) -> Maybe (IM.Key, NE.NonEmpty a))
|
||||||
|
-> PQueue a
|
||||||
|
-> Maybe (IM.Key, (a, PQueue a))
|
||||||
abstractView f (PQueue m) = case f m of
|
abstractView f (PQueue m) = case f m of
|
||||||
Nothing -> Nothing
|
Nothing -> Nothing
|
||||||
(Just (key, x NE.:| (x' : xs))) -> Just (key, (x, PQueue $ IM.insert key (x' NE.:| xs) m))
|
(Just (key, x NE.:| (x' : xs))) -> Just (key, (x, PQueue $ IM.insert key (x' NE.:| xs) m))
|
||||||
|
|
|
||||||
209
src/Main.hs
209
src/Main.hs
|
|
@ -1,182 +1,67 @@
|
||||||
module Main where
|
module Main where
|
||||||
|
|
||||||
|
import Basement.Bits qualified as B
|
||||||
|
import Compress.Huffman
|
||||||
|
import Compress.Huffman qualified as HT
|
||||||
|
import Compress.PrefixTree (HuffmanPrefixTree (HuffmanPrefixTree))
|
||||||
|
import Compress.PrefixTree qualified as PT
|
||||||
|
import Compress.Arithmetic qualified as A
|
||||||
import Data.Bifunctor qualified as Bi
|
import Data.Bifunctor qualified as Bi
|
||||||
import Data.Bit (cloneToByteString)
|
|
||||||
import Data.Bit qualified as B
|
import Data.Bit qualified as B
|
||||||
import Data.Bit qualified as BV
|
|
||||||
import Data.Bits (Bits ((.|.)))
|
import Data.Bits (Bits ((.|.)))
|
||||||
import Data.Bits qualified as B
|
import Data.Bits qualified as B
|
||||||
import Data.ByteString (fromFilePath)
|
|
||||||
import Data.ByteString qualified as BS
|
import Data.ByteString qualified as BS
|
||||||
import Data.Data qualified as D
|
import Data.HuffmanTree
|
||||||
import Data.Foldable qualified as F
|
import Data.Proxy qualified as P
|
||||||
import Data.IntMap.Strict qualified as IM
|
|
||||||
import Data.Map.Strict qualified as M
|
|
||||||
import Data.Maybe (fromMaybe)
|
|
||||||
import Data.Maybe qualified as My
|
|
||||||
import Data.PQueue qualified as PQ
|
|
||||||
import Data.Serialize qualified as C
|
import Data.Serialize qualified as C
|
||||||
import Data.Text qualified as T
|
|
||||||
import Data.Text.IO qualified as TIO
|
|
||||||
import Data.Vector.Unboxed qualified as V
|
|
||||||
import Data.Word
|
import Data.Word
|
||||||
import Debug.Trace qualified as D
|
|
||||||
import GHC.Generics (Generic)
|
import GHC.Generics (Generic)
|
||||||
import Options.Generic qualified as O
|
import Options.Generic qualified as O
|
||||||
import System.Environment qualified as SE
|
import qualified Data.FiniteBit as FB
|
||||||
|
|
||||||
data CompressOrDecompress = Compress | Decompress deriving (Show, Generic)
|
data CompressOrDecompress = Compress | Decompress deriving (Show, Generic, O.ParseField, O.ParseFields, O.ParseRecord, Read)
|
||||||
|
|
||||||
instance O.ParseRecord CompressOrDecompress
|
data CompressionStrategy = Huffman | MarkovHuffman deriving (Show, Generic, O.ParseField, O.ParseFields, O.ParseRecord, Read)
|
||||||
|
|
||||||
data HuffmanTree a
|
data CLIOpts = CLIOpts
|
||||||
= Leaf a
|
{ task :: CompressOrDecompress,
|
||||||
| Node
|
strategy :: CompressionStrategy
|
||||||
{left :: HuffmanTree a, right :: HuffmanTree a}
|
}
|
||||||
deriving (Eq, Ord, Show, Generic, C.Serialize)
|
deriving (Show, Generic, O.ParseRecord)
|
||||||
|
|
||||||
data TreeDir = L | R deriving (Eq, Ord, Show, Generic, C.Serialize)
|
applyCompressionOptions ::
|
||||||
|
forall a.
|
||||||
|
(Integral a, B.BitOps a, B.FiniteBitsOps a, Ord a, C.Serialize a) =>
|
||||||
|
P.Proxy a ->
|
||||||
|
CLIOpts ->
|
||||||
|
BS.ByteString ->
|
||||||
|
BS.ByteString
|
||||||
|
applyCompressionOptions _ (CLIOpts Compress Huffman) f =
|
||||||
|
C.encode . (compress :: BS.ByteString -> Maybe (TreeDirs, HuffmanTree a)) $ f
|
||||||
|
applyCompressionOptions _ (CLIOpts Compress MarkovHuffman) f =
|
||||||
|
C.encode . (PT.compress :: BS.ByteString -> Maybe (TreeDirs, HuffmanPrefixTree a a, a)) $ f
|
||||||
|
applyCompressionOptions _ (CLIOpts Decompress Huffman) f =
|
||||||
|
handleError $ Bi.second decompress . (C.decode :: BS.ByteString -> Either String (TreeDirs, HuffmanTree a)) $ f
|
||||||
|
applyCompressionOptions _ (CLIOpts Decompress MarkovHuffman) f =
|
||||||
|
handleError $ Bi.second PT.decompress . (C.decode :: BS.ByteString -> Either String (TreeDirs, HuffmanPrefixTree a a, a)) $ f
|
||||||
|
|
||||||
findTreeDirections :: forall a. (Ord a) => HuffmanTree a -> M.Map a [TreeDir]
|
handleError (Right (Just bs)) = bs
|
||||||
findTreeDirections (Leaf a) = M.singleton a []
|
handleError _ = []
|
||||||
findTreeDirections (Node {..}) = M.union (rec' L left) (rec' R right)
|
|
||||||
where
|
|
||||||
rec' :: TreeDir -> HuffmanTree a -> M.Map a [TreeDir]
|
|
||||||
rec' dir = M.map (dir :) . findTreeDirections
|
|
||||||
|
|
||||||
decompress :: forall a. (Ord a, Integral a, B.FiniteBits a) => Maybe ([TreeDir], HuffmanTree a) -> Maybe BS.ByteString
|
|
||||||
decompress Nothing = Just []
|
|
||||||
decompress (Just (treeDirs, tree)) = BS.concat . map toByteString <$> decompress' treeDirs
|
|
||||||
where
|
|
||||||
decompress' :: [TreeDir] -> Maybe [a]
|
|
||||||
decompress' [] = Just []
|
|
||||||
decompress' xs = case nextLeaf xs tree of
|
|
||||||
Nothing -> Nothing
|
|
||||||
Just (x, remainingDirs) -> (x :) <$> decompress' remainingDirs
|
|
||||||
|
|
||||||
nextLeaf :: [TreeDir] -> HuffmanTree a -> Maybe (a, [TreeDir])
|
|
||||||
nextLeaf xs (Leaf a) = Just (a, xs)
|
|
||||||
nextLeaf [] _ = Nothing
|
|
||||||
nextLeaf (L : xs) (Node {..}) = nextLeaf xs left
|
|
||||||
nextLeaf (R : xs) (Node {..}) = nextLeaf xs right
|
|
||||||
|
|
||||||
compress :: forall a. (Ord a, Integral a, B.FiniteBits a) => BS.ByteString -> Maybe ([TreeDir], HuffmanTree a)
|
|
||||||
compress bs =
|
|
||||||
liftA2 (,) treeDirections mergedHuffmanTrees
|
|
||||||
where
|
|
||||||
treeDirections = concat <$> mapM (treeDirMap M.!?) dividedByteString
|
|
||||||
|
|
||||||
mergedHuffmanTrees =
|
|
||||||
mergeHuffmanTrees
|
|
||||||
. PQ.fromList
|
|
||||||
. map (uncurry (flip (,)) . Bi.first Leaf)
|
|
||||||
. counts
|
|
||||||
$ dividedByteString
|
|
||||||
|
|
||||||
treeDirMap :: M.Map a [TreeDir]
|
|
||||||
treeDirMap = My.maybe M.empty findTreeDirections mergedHuffmanTrees
|
|
||||||
|
|
||||||
dividedByteString = toBitsList bs
|
|
||||||
|
|
||||||
testCompression :: forall a. (Ord a, Eq a, Integral a, B.FiniteBits a, C.Serialize a) => D.Proxy a -> BS.ByteString -> Bool
|
|
||||||
testCompression _ bs =
|
|
||||||
((Right . Just $ bs) ==)
|
|
||||||
. Bi.second (decompress :: Maybe ([TreeDir], HuffmanTree a) -> Maybe BS.ByteString)
|
|
||||||
-- . D.traceShowWith (Bi.second (fmap fst))
|
|
||||||
. (decodeCompressed :: BS.ByteString -> Either String (Maybe ([TreeDir], HuffmanTree a)))
|
|
||||||
. encodeCompressed
|
|
||||||
-- . D.traceShowWith (fmap fst)
|
|
||||||
. (compress :: BS.ByteString -> Maybe ([TreeDir], HuffmanTree a))
|
|
||||||
$ bs
|
|
||||||
|
|
||||||
encodeCompressed :: (C.Serialize a) => Maybe ([TreeDir], HuffmanTree a) -> BS.ByteString
|
|
||||||
encodeCompressed = C.encode . fmap (Bi.first encodeTreeDirs)
|
|
||||||
where
|
|
||||||
encodeTreeDirs = cloneToByteStringWithLen . V.fromList . map (BV.Bit . (== R))
|
|
||||||
|
|
||||||
cloneToByteStringWithLen :: V.Vector BV.Bit -> (BS.ByteString, Int)
|
|
||||||
cloneToByteStringWithLen vec = (BV.cloneToByteString vec, V.length vec)
|
|
||||||
|
|
||||||
cloneFromByteStringWithLen :: (BS.ByteString, Int) -> V.Vector BV.Bit
|
|
||||||
cloneFromByteStringWithLen (bs, len) = V.take len . BV.cloneFromByteString $ bs
|
|
||||||
|
|
||||||
decodeTreeDirs :: (BS.ByteString, Int) -> [TreeDir]
|
|
||||||
decodeTreeDirs = map (\x -> if BV.unBit x then R else L) . V.toList . cloneFromByteStringWithLen
|
|
||||||
|
|
||||||
decodeCompressed :: forall a. (Ord a, Integral a, B.FiniteBits a, C.Serialize a) => BS.ByteString -> Either String (Maybe ([TreeDir], HuffmanTree a))
|
|
||||||
decodeCompressed = Bi.second (fmap (Bi.first decodeTreeDirs)) . C.decode
|
|
||||||
|
|
||||||
mergeHuffmanTrees :: PQ.PQueue (HuffmanTree a) -> Maybe (HuffmanTree a)
|
|
||||||
mergeHuffmanTrees queue = case (Bi.second . Bi.second $ PQ.minView) <$> PQ.minView queue of
|
|
||||||
Nothing -> Nothing
|
|
||||||
Just (size, (x, Nothing)) -> Just x
|
|
||||||
Just (size, (x, Just (size1, (x', rest)))) -> mergeHuffmanTrees $ PQ.insert (size + size1) (Node x x') rest
|
|
||||||
|
|
||||||
counts :: (Ord a) => [a] -> [(a, Int)]
|
|
||||||
counts = M.toList . F.foldl' combiningInsert M.empty
|
|
||||||
where
|
|
||||||
combiningInsert m key = M.insertWith (+) key 1 m
|
|
||||||
|
|
||||||
divideByteString :: Int -> BS.ByteString -> [BS.ByteString]
|
|
||||||
divideByteString n [] = []
|
|
||||||
divideByteString n bs = x : divideByteString n xs
|
|
||||||
where
|
|
||||||
(x, xs) = BS.splitAt n bs
|
|
||||||
|
|
||||||
toBitsList :: forall a. (Integral a, B.FiniteBits a) => BS.ByteString -> [a]
|
|
||||||
toBitsList bs = case finiteBitUncons bs of
|
|
||||||
Nothing -> []
|
|
||||||
(Just (x, xs)) -> x : (toBitsList xs)
|
|
||||||
|
|
||||||
toByteString :: forall a. (Integral a, B.FiniteBits a) => a -> BS.ByteString
|
|
||||||
toByteString n = BS.pack . take numBytes . map (fromIntegral . (n `B.rotateL`)) $ [8, 16 ..]
|
|
||||||
where
|
|
||||||
numBytes = numBytesIn (D.Proxy :: D.Proxy a)
|
|
||||||
|
|
||||||
finiteBitUncons :: forall a. (Integral a, B.FiniteBits a) => BS.ByteString -> Maybe (a, BS.ByteString)
|
|
||||||
finiteBitUncons [] = Nothing
|
|
||||||
finiteBitUncons bs =
|
|
||||||
Just
|
|
||||||
. (,rest)
|
|
||||||
. F.foldl' (.|.) 0
|
|
||||||
. zipWith (flip B.rotateR) [8, 16 ..]
|
|
||||||
. map (fromIntegral :: Word8 -> a)
|
|
||||||
. BS.unpack
|
|
||||||
$ takenBytes
|
|
||||||
where
|
|
||||||
takenBytes :: BS.ByteString
|
|
||||||
rest :: BS.ByteString
|
|
||||||
(takenBytes, rest) = BS.splitAt (numBytesIn (D.Proxy :: D.Proxy a)) bs
|
|
||||||
|
|
||||||
numBytesIn :: forall a. (B.FiniteBits a) => D.Proxy a -> Int
|
|
||||||
numBytesIn _ = (`div` 8) . B.finiteBitSize $ (B.zeroBits :: a)
|
|
||||||
|
|
||||||
compressionRatioFor
|
|
||||||
:: forall a
|
|
||||||
. (Integral a, B.FiniteBits a, Ord a, C.Serialize a)
|
|
||||||
=> D.Proxy a
|
|
||||||
-> BS.ByteString
|
|
||||||
-> Double
|
|
||||||
compressionRatioFor proxy bs =
|
|
||||||
(/ (fromIntegral . BS.length $ bs))
|
|
||||||
. fromIntegral
|
|
||||||
. BS.length
|
|
||||||
. encodeCompressed
|
|
||||||
. (compress :: BS.ByteString -> Maybe ([TreeDir], HuffmanTree a))
|
|
||||||
$ bs
|
|
||||||
|
|
||||||
applyCompressionOptions :: CompressOrDecompress -> BS.ByteString -> BS.ByteString
|
|
||||||
applyCompressionOptions Compress f = encodeCompressed . (compress :: BS.ByteString -> Maybe ([TreeDir], HuffmanTree Word8)) $ f
|
|
||||||
applyCompressionOptions Decompress f = case Bi.second decompress
|
|
||||||
. (decodeCompressed :: BS.ByteString -> Either String (Maybe ([TreeDir], HuffmanTree Word8)))
|
|
||||||
$ f of
|
|
||||||
-- TODO: write errors to stderr
|
|
||||||
(Left _) -> []
|
|
||||||
(Right Nothing) -> []
|
|
||||||
(Right (Just bs)) -> bs
|
|
||||||
|
|
||||||
main :: IO ()
|
main :: IO ()
|
||||||
main = do
|
main = do
|
||||||
compresionOrDecompression :: CompressOrDecompress <- O.getRecord "compression/decompression"
|
-- cliOpts :: CLIOpts <- O.getRecord "compress/decompress & strategy"
|
||||||
f <- BS.getContents
|
f <- BS.getContents
|
||||||
BS.putStr . applyCompressionOptions compresionOrDecompression $ f
|
BS.putStr . C.encode . A.compress . (FB.toWordsList :: BS.ByteString -> [ Word8 ]) $ f
|
||||||
|
-- let f = "hello tehre"
|
||||||
|
-- f <- BS.readFile "pg64317.txt"
|
||||||
|
-- let (compressed :: Maybe (TreeDirs, PT.HuffmanPrefixTree Word8 Word8, Word8)) = PT.compress f
|
||||||
|
-- print $ BS.length . C.encode $ compressed
|
||||||
|
-- print $ BS.length . C.encode . (compress :: BS.ByteString -> Maybe (TreeDirs, HuffmanTree Word8)) $ f
|
||||||
|
-- print $ BS.length . C.encode . (compress :: BS.ByteString -> Maybe (TreeDirs, HuffmanTree Word16)) $ f
|
||||||
|
-- BS.writeFile "outin.txt" decompressed
|
||||||
|
-- print (decompressed, f)
|
||||||
|
-- print $ BS.length decompressed
|
||||||
|
-- print $ BS.length f
|
||||||
|
-- print (decompressed == f)
|
||||||
|
-- BS.putStr . applyCompressionOptions (P.Proxy :: P.Proxy Word16) cliOpts $ f
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue