the start of a rewrite using what was learned from bzip2

This commit is contained in:
Jack Wines 2025-11-28 17:47:27 -08:00
parent da83f9a5d0
commit 4a963eb383
Signed by: Jack
SSH key fingerprint: SHA256:AaP2Hr/e3mEjeY+s9XJmQqAesqEms8ENRhwRkpO0WUk
4 changed files with 93 additions and 18 deletions

View file

@ -41,6 +41,7 @@ common deps
TypeOperators, TypeOperators,
TypeSynonymInstances, TypeSynonymInstances,
UndecidableInstances, UndecidableInstances,
ViewPatterns,
mixins: mixins:
base hiding (Prelude), base hiding (Prelude),
relude (Relude as Prelude), relude (Relude as Prelude),
@ -76,6 +77,7 @@ common deps
Compress.PrefixTree Compress.PrefixTree
Compress.WordMarkovStats Compress.WordMarkovStats
Compress.LengthDistancePairs Compress.LengthDistancePairs
Compress.MoveToFrontTransform
Data.Dirs Data.Dirs
Data.FiniteBit Data.FiniteBit
Data.HuffmanTree Data.HuffmanTree

View file

@ -3,25 +3,50 @@
module Compress.BurrowsWheeler where module Compress.BurrowsWheeler where
-- import qualified Data.CircularList as CL -- import qualified Data.CircularList as CL
import qualified Relude.Unsafe as U import qualified Relude.Unsafe as U
-- import Compress.BurrowsWheeler (toBurrowsWheeler)
import Data.ByteString.Lazy qualified as BSL
import Relude.Unsafe qualified as U
toBurrowsWheeler (x:xs) = map U.head . sortOn (U.tail) . take (pred . length $ xs) . tails $ xs' data BurrowsWheelerMetadata a = BurrowsWheelerMetadata
{ originPtr :: Word,
transformed :: a
}
deriving (Functor, Show, Generic)
type BurrowsWheelerOutput = BurrowsWheelerMetadata LByteString
toBurrowsWheeler :: LByteString -> BurrowsWheelerOutput
toBurrowsWheeler bs =
BurrowsWheelerMetadata originPtr
. BSL.pack
. map (`BSL.index` (bsLen - 1))
$ sortedCycles
where where
xs' = toList $ (x :| xs) <> (x :| []) sortedCycles =
sort
. take bsLen
. BSL.tails
$ (bs <> bs)
toCounts :: forall a b . (Eq a, Eq b, Num b, Enum b, Bounded a, Bounded b) => [a] -> [(a, b)] originPtr =
toCounts = reverse . foldl' f [] succ
. fst
. U.fromJust
. find (BSL.isPrefixOf bs . snd)
. zip [0 ..]
$ sortedCycles
bsLen :: (Integral a) => a
bsLen = fromIntegral . BSL.length $ bs
unBurrowsWheeler :: BurrowsWheelerOutput -> LByteString
unBurrowsWheeler (BurrowsWheelerMetadata {..}) =
(U.!! (fromIntegral originPtr))
. sort
. (U.!! (pred . fromIntegral . BSL.length $ transformed))
. iterate (zipWith BSL.cons unpacked . sort)
. map BSL.singleton
. BSL.unpack
$ transformed
where where
f [] x = [(x, 0)] unpacked = BSL.unpack transformed
f ((x, count) : xs) newVal
| x == newVal && count == maxBound = (x, 0) : (x, count) : xs
| x == newVal = (x, succ count) : xs
| otherwise = (newVal, 0) : (x, count) : xs
-- toBurrowsWheeler xs = map last . sort . map (findLyndonWord) . take (length xs) . tails $ xs'
-- where
-- xs' = xs ++ xs
-- findLyndonWord (x : xs) = (x :|) . map snd . takeWhile (uncurry (<=)) . zip (x : xs) $ xs

View file

@ -20,7 +20,6 @@ import Data.Bits as B
import Data.Maybe (fromMaybe) import Data.Maybe (fromMaybe)
import Data.Maybe qualified as My import Data.Maybe qualified as My
import Data.PQueue qualified as PQ import Data.PQueue qualified as PQ
import Codec.Winery qualified as C
import Data.Text qualified as T import Data.Text qualified as T
import Data.Text.IO qualified as TIO import Data.Text.IO qualified as TIO
import Data.Vector.Unboxed qualified as V import Data.Vector.Unboxed qualified as V

View file

@ -0,0 +1,49 @@
module Compress.MoveToFrontTransform where
import Data.ByteString.Lazy qualified as BSL
import qualified Data.Containers.ListUtils as LU
import qualified Relude.Unsafe as U
import qualified Data.ByteString as S
data MoveToFrontMetadata a = MoveToFrontMetadata {
startingStack :: [Word8],
transformed :: a
} deriving (Functor, Show, Generic)
type MoveToFrontOutput = MoveToFrontMetadata LByteString
moveToFront :: LByteString -> MoveToFrontOutput
moveToFront bs = MoveToFrontMetadata startingStack . moveToFront' startingStack $ bs
where
startingStack = LU.nubIntOn fromEnum . BSL.unpack $ bs
moveToFront' :: [Word8] -> LByteString -> LByteString
moveToFront' s "" = BSL.empty
moveToFront' s (BSL.uncons -> Just (x, xs)) = BSL.cons stackIndex recurse
where
stackIndex :: Word8
stackIndex =
fromIntegral
. fromMaybe (length s)
. fmap fst
. find ((==) x . snd)
. zip [0 ..]
$ s
recurse = moveToFront' s' xs
s' :: [Word8]
s' = removeAndPrepend x s
removeAndPrepend x = (x :) . filter (/= x)
unMoveToFront :: MoveToFrontOutput -> LByteString
unMoveToFront (MoveToFrontMetadata {..}) = unMoveToFront' startingStack transformed
where
unMoveToFront' s bs | BSL.null bs = BSL.empty
unMoveToFront' s (BSL.uncons -> Just (loc, xs)) = BSL.cons byte $ unMoveToFront' s' xs
where
byte = s U.!! (fromEnum loc)
s' = removeAndPrepend byte s