188 lines
5.8 KiB
Haskell
188 lines
5.8 KiB
Haskell
module Compress.PrefixTree where
|
|
|
|
import Basement.Bits qualified as B
|
|
import qualified Basement.From as F
|
|
import Compress.Huffman qualified as H
|
|
import Control.Applicative qualified as A
|
|
import Data.Bifunctor qualified as Bi
|
|
import Data.ByteString qualified as BS
|
|
import Data.Foldable qualified as F
|
|
import Data.HuffmanTree as HT
|
|
import Data.List qualified as L
|
|
import Data.List.NonEmpty qualified as NE
|
|
import Data.Map.Strict qualified as M
|
|
import Data.Maybe qualified as My
|
|
import Data.Ord qualified as O
|
|
import Data.PQueue qualified as PQ
|
|
import Debug.Trace qualified as D
|
|
import Debug.Trace qualified as T
|
|
import Basement.Bits (FiniteBitsOps(numberOfBits))
|
|
import GHC.Generics
|
|
import qualified Data.Serialize as C
|
|
import Data.FiniteBit
|
|
|
|
data Tree a = (Ord a) =>
|
|
Tree
|
|
{ children :: M.Map a (Tree a)
|
|
}
|
|
|
|
newtype HuffmanPrefixTree a b = HuffmanPrefixTree
|
|
{ inner :: M.Map a (HuffmanTree b)
|
|
} deriving (Eq, Ord, Show, Generic, C.Serialize)
|
|
|
|
finiteBitTupleUncons ::
|
|
forall a b.
|
|
(Integral a, B.FiniteBitsOps a, B.BitOps a, Integral b, B.FiniteBitsOps b, B.BitOps b) =>
|
|
BS.ByteString ->
|
|
Maybe ((a, b), BS.ByteString)
|
|
finiteBitTupleUncons bs = case finiteBitUncons bs of
|
|
Just (a, bs') -> case finiteBitUncons bs' of
|
|
Just (b, _) -> Just ((a, b), bs')
|
|
_ -> Nothing
|
|
_ -> Nothing
|
|
|
|
fromByteString ::
|
|
forall a b.
|
|
(Integral a, B.FiniteBitsOps a, B.BitOps a, Integral b, B.FiniteBitsOps b, B.BitOps b) =>
|
|
BS.ByteString ->
|
|
[(a, b)]
|
|
fromByteString bs = case finiteBitTupleUncons bs of
|
|
Just ((a, b), bs') -> (a, b) : fromByteString bs'
|
|
Nothing -> []
|
|
|
|
toHuffmanTree ::
|
|
forall a b.
|
|
(Integral a, B.FiniteBitsOps a, B.BitOps a, Integral b, B.FiniteBitsOps b, B.BitOps b) =>
|
|
M.Map (a, b) Word ->
|
|
HuffmanPrefixTree a b
|
|
toHuffmanTree =
|
|
HuffmanPrefixTree
|
|
. M.mapMaybe HT.fromList
|
|
. M.fromListWith (++)
|
|
. map (\((a, b), count) -> (a, [(fromIntegral count, b)]))
|
|
. M.assocs
|
|
|
|
decompress ::
|
|
forall a .
|
|
(Integral a, B.FiniteBitsOps a, B.BitOps a) =>
|
|
(TreeDirs, HuffmanPrefixTree a a, a)
|
|
-> Maybe BS.ByteString
|
|
decompress (TreeDirs treeDirs'', HuffmanPrefixTree prefixTree, initial') = BS.concat . map toByteString . (initial' :) <$> decompress' treeDirs'' initial'
|
|
where
|
|
decompress' :: [TreeDir] -> a -> Maybe [a]
|
|
decompress' treeDirs initial = case HT.lookup (prefixTree M.! initial) treeDirs of
|
|
Nothing -> Nothing
|
|
Just (ans, []) -> Just [ans]
|
|
Just (ans, treeDirs') -> (ans :) <$> decompress' treeDirs' ans
|
|
|
|
compress ::
|
|
forall a b.
|
|
(Integral a, B.FiniteBitsOps a, B.BitOps a, Integral b, B.FiniteBitsOps b, B.BitOps b) =>
|
|
BS.ByteString -> Maybe (TreeDirs, HuffmanPrefixTree a b, a)
|
|
compress bs = (TreeDirs $ concatMap treeDirsFor asFiniteBitPairs, tree, ) <$> initial
|
|
where
|
|
tree :: HuffmanPrefixTree a b
|
|
tree = toHuffmanTree . nGramCounts $ bs
|
|
|
|
treeDirMap :: M.Map a (M.Map b [TreeDir])
|
|
treeDirMap = M.map HT.findTreeDirections . Compress.PrefixTree.inner $ tree
|
|
|
|
initial :: Maybe a
|
|
initial = fst <$> finiteBitUncons bs
|
|
|
|
asFiniteBitPairs :: [(a,b)]
|
|
asFiniteBitPairs = fromByteString bs
|
|
|
|
treeDirsFor :: (a, b) -> [TreeDir]
|
|
treeDirsFor (a, b) = (treeDirMap M.! a) M.! b
|
|
|
|
|
|
|
|
-- | all (M.null . children) . M.elems . children $ tree =
|
|
-- fmap End
|
|
-- . HT.fromList
|
|
-- . map (\x -> (prefixCounts x, x))
|
|
-- . M.keys
|
|
-- . children
|
|
-- $ tree
|
|
-- | otherwise =
|
|
-- Just
|
|
-- . Layer
|
|
-- . M.mapMaybeWithKey (\key val -> toHuffmanTree' (key : soFar) val)
|
|
-- . children
|
|
-- $ tree
|
|
-- where
|
|
-- prefixCounts :: a -> Int
|
|
-- prefixCounts x =
|
|
-- fromIntegral
|
|
-- . sum
|
|
-- . M.elems
|
|
-- . M.filterWithKey (\key val -> L.isPrefixOf (reverse . (x :) $ soFar) key)
|
|
-- $ nGrams
|
|
|
|
-- toHuffmanTree :: Tree a -> p1 -> HuffmanTree a
|
|
-- toHuffmanTree :: forall a . Tree a -> M.Map [a] Word -> HuffmanTree [a]
|
|
-- toHuffmanTree (Tree {..}) nGrams soFar | M.size children == 1 = Leaf . map (reverse . (: soFar)) . M.keys $ children
|
|
-- toHuffmanTree (Tree {..}) nGrams soFar = Leaf . map (reverse . (: soFar)) . M.keys $ children
|
|
-- where
|
|
-- sorted = L.sortBy (prefixCounts . fst) . M.toList $ children
|
|
|
|
nGramCounts ::
|
|
forall a b.
|
|
(Integral a, B.FiniteBitsOps a, B.BitOps a, Integral b, B.FiniteBitsOps b, B.BitOps b) =>
|
|
BS.ByteString ->
|
|
M.Map (a, b) Word
|
|
nGramCounts =
|
|
M.fromListWith (+)
|
|
. map (,1)
|
|
. My.mapMaybe (My.listToMaybe . fromByteString)
|
|
. takeWhile ((== len) . BS.length)
|
|
. map (BS.take len)
|
|
. BS.tails
|
|
where
|
|
len = (`div` 8) . F.from $ numberOfBits (0 :: a) + numberOfBits (0 :: b)
|
|
|
|
empty :: (Ord a) => Tree a
|
|
empty = Tree M.empty
|
|
|
|
singleton :: (Ord a) => a -> Tree a
|
|
singleton x = Tree $ M.singleton x empty
|
|
|
|
fromSingleList :: (Ord a) => [a] -> Tree a
|
|
fromSingleList [] = empty
|
|
fromSingleList (x : xs) = Tree . M.singleton x . fromSingleList $ xs
|
|
|
|
fromList :: (Ord a) => [[a]] -> Tree a
|
|
fromList = F.foldl' merge empty . map fromSingleList
|
|
|
|
-- insert :: Ord a => Tree a -> [a] -> Tree a
|
|
-- insert (Tree {..}) (x:xs) =
|
|
|
|
merge :: Tree a -> Tree a -> Tree a
|
|
merge (Tree children0) (Tree children1) = Tree $ M.unionWith merge children0 children1
|
|
|
|
-- deriving instance Eq (Tree a)
|
|
|
|
-- deriving instance Ord (Tree a)
|
|
|
|
-- deriving instance (Show a) => Show (Tree a)
|
|
|
|
-- empty :: (Ord a) => Tree a
|
|
-- empty = Tree M.empty
|
|
|
|
-- fromList :: (Ord a, F.Foldable t) => t [a] -> Tree a
|
|
-- fromList = F.foldl' insert empty
|
|
|
|
-- insert :: Tree a -> [a] -> Tree a
|
|
-- insert (Tree {..}) [] = Tree M.empty
|
|
-- insert (Tree {..}) (x : xs) =
|
|
-- Tree
|
|
-- . flip (M.insert x) children
|
|
-- . flip insert xs
|
|
-- . My.fromMaybe empty
|
|
-- . M.lookup x
|
|
-- $ children
|
|
|
|
-- lookup :: (Ord a) => [a] -> Tree a -> Bool
|
|
-- lookup [] = const True
|
|
-- lookup (x : xs) = maybe False (Compress.PrefixTree.lookup xs) . M.lookup x . children
|