compress/src/Compress/PrefixTree.hs
2024-12-21 12:28:00 -05:00

188 lines
5.8 KiB
Haskell

module Compress.PrefixTree where
import Basement.Bits qualified as B
import qualified Basement.From as F
import Compress.Huffman qualified as H
import Control.Applicative qualified as A
import Data.Bifunctor qualified as Bi
import Data.ByteString qualified as BS
import Data.Foldable qualified as F
import Data.HuffmanTree as HT
import Data.List qualified as L
import Data.List.NonEmpty qualified as NE
import Data.Map.Strict qualified as M
import Data.Maybe qualified as My
import Data.Ord qualified as O
import Data.PQueue qualified as PQ
import Debug.Trace qualified as D
import Debug.Trace qualified as T
import Basement.Bits (FiniteBitsOps(numberOfBits))
import GHC.Generics
import qualified Data.Serialize as C
import Data.FiniteBit
data Tree a = (Ord a) =>
Tree
{ children :: M.Map a (Tree a)
}
newtype HuffmanPrefixTree a b = HuffmanPrefixTree
{ inner :: M.Map a (HuffmanTree b)
} deriving (Eq, Ord, Show, Generic, C.Serialize)
finiteBitTupleUncons ::
forall a b.
(Integral a, B.FiniteBitsOps a, B.BitOps a, Integral b, B.FiniteBitsOps b, B.BitOps b) =>
BS.ByteString ->
Maybe ((a, b), BS.ByteString)
finiteBitTupleUncons bs = case finiteBitUncons bs of
Just (a, bs') -> case finiteBitUncons bs' of
Just (b, _) -> Just ((a, b), bs')
_ -> Nothing
_ -> Nothing
fromByteString ::
forall a b.
(Integral a, B.FiniteBitsOps a, B.BitOps a, Integral b, B.FiniteBitsOps b, B.BitOps b) =>
BS.ByteString ->
[(a, b)]
fromByteString bs = case finiteBitTupleUncons bs of
Just ((a, b), bs') -> (a, b) : fromByteString bs'
Nothing -> []
toHuffmanTree ::
forall a b.
(Integral a, B.FiniteBitsOps a, B.BitOps a, Integral b, B.FiniteBitsOps b, B.BitOps b) =>
M.Map (a, b) Word ->
HuffmanPrefixTree a b
toHuffmanTree =
HuffmanPrefixTree
. M.mapMaybe HT.fromList
. M.fromListWith (++)
. map (\((a, b), count) -> (a, [(fromIntegral count, b)]))
. M.assocs
decompress ::
forall a .
(Integral a, B.FiniteBitsOps a, B.BitOps a) =>
(TreeDirs, HuffmanPrefixTree a a, a)
-> Maybe BS.ByteString
decompress (TreeDirs treeDirs'', HuffmanPrefixTree prefixTree, initial') = BS.concat . map toByteString . (initial' :) <$> decompress' treeDirs'' initial'
where
decompress' :: [TreeDir] -> a -> Maybe [a]
decompress' treeDirs initial = case HT.lookup (prefixTree M.! initial) treeDirs of
Nothing -> Nothing
Just (ans, []) -> Just [ans]
Just (ans, treeDirs') -> (ans :) <$> decompress' treeDirs' ans
compress ::
forall a b.
(Integral a, B.FiniteBitsOps a, B.BitOps a, Integral b, B.FiniteBitsOps b, B.BitOps b) =>
BS.ByteString -> Maybe (TreeDirs, HuffmanPrefixTree a b, a)
compress bs = (TreeDirs $ concatMap treeDirsFor asFiniteBitPairs, tree, ) <$> initial
where
tree :: HuffmanPrefixTree a b
tree = toHuffmanTree . nGramCounts $ bs
treeDirMap :: M.Map a (M.Map b [TreeDir])
treeDirMap = M.map HT.findTreeDirections . Compress.PrefixTree.inner $ tree
initial :: Maybe a
initial = fst <$> finiteBitUncons bs
asFiniteBitPairs :: [(a,b)]
asFiniteBitPairs = fromByteString bs
treeDirsFor :: (a, b) -> [TreeDir]
treeDirsFor (a, b) = (treeDirMap M.! a) M.! b
-- | all (M.null . children) . M.elems . children $ tree =
-- fmap End
-- . HT.fromList
-- . map (\x -> (prefixCounts x, x))
-- . M.keys
-- . children
-- $ tree
-- | otherwise =
-- Just
-- . Layer
-- . M.mapMaybeWithKey (\key val -> toHuffmanTree' (key : soFar) val)
-- . children
-- $ tree
-- where
-- prefixCounts :: a -> Int
-- prefixCounts x =
-- fromIntegral
-- . sum
-- . M.elems
-- . M.filterWithKey (\key val -> L.isPrefixOf (reverse . (x :) $ soFar) key)
-- $ nGrams
-- toHuffmanTree :: Tree a -> p1 -> HuffmanTree a
-- toHuffmanTree :: forall a . Tree a -> M.Map [a] Word -> HuffmanTree [a]
-- toHuffmanTree (Tree {..}) nGrams soFar | M.size children == 1 = Leaf . map (reverse . (: soFar)) . M.keys $ children
-- toHuffmanTree (Tree {..}) nGrams soFar = Leaf . map (reverse . (: soFar)) . M.keys $ children
-- where
-- sorted = L.sortBy (prefixCounts . fst) . M.toList $ children
nGramCounts ::
forall a b.
(Integral a, B.FiniteBitsOps a, B.BitOps a, Integral b, B.FiniteBitsOps b, B.BitOps b) =>
BS.ByteString ->
M.Map (a, b) Word
nGramCounts =
M.fromListWith (+)
. map (,1)
. My.mapMaybe (My.listToMaybe . fromByteString)
. takeWhile ((== len) . BS.length)
. map (BS.take len)
. BS.tails
where
len = (`div` 8) . F.from $ numberOfBits (0 :: a) + numberOfBits (0 :: b)
empty :: (Ord a) => Tree a
empty = Tree M.empty
singleton :: (Ord a) => a -> Tree a
singleton x = Tree $ M.singleton x empty
fromSingleList :: (Ord a) => [a] -> Tree a
fromSingleList [] = empty
fromSingleList (x : xs) = Tree . M.singleton x . fromSingleList $ xs
fromList :: (Ord a) => [[a]] -> Tree a
fromList = F.foldl' merge empty . map fromSingleList
-- insert :: Ord a => Tree a -> [a] -> Tree a
-- insert (Tree {..}) (x:xs) =
merge :: Tree a -> Tree a -> Tree a
merge (Tree children0) (Tree children1) = Tree $ M.unionWith merge children0 children1
-- deriving instance Eq (Tree a)
-- deriving instance Ord (Tree a)
-- deriving instance (Show a) => Show (Tree a)
-- empty :: (Ord a) => Tree a
-- empty = Tree M.empty
-- fromList :: (Ord a, F.Foldable t) => t [a] -> Tree a
-- fromList = F.foldl' insert empty
-- insert :: Tree a -> [a] -> Tree a
-- insert (Tree {..}) [] = Tree M.empty
-- insert (Tree {..}) (x : xs) =
-- Tree
-- . flip (M.insert x) children
-- . flip insert xs
-- . My.fromMaybe empty
-- . M.lookup x
-- $ children
-- lookup :: (Ord a) => [a] -> Tree a -> Bool
-- lookup [] = const True
-- lookup (x : xs) = maybe False (Compress.PrefixTree.lookup xs) . M.lookup x . children