the start of a rewrite using what was learned from bzip2
This commit is contained in:
parent
da83f9a5d0
commit
4a963eb383
4 changed files with 93 additions and 18 deletions
|
|
@ -41,6 +41,7 @@ common deps
|
|||
TypeOperators,
|
||||
TypeSynonymInstances,
|
||||
UndecidableInstances,
|
||||
ViewPatterns,
|
||||
mixins:
|
||||
base hiding (Prelude),
|
||||
relude (Relude as Prelude),
|
||||
|
|
@ -76,6 +77,7 @@ common deps
|
|||
Compress.PrefixTree
|
||||
Compress.WordMarkovStats
|
||||
Compress.LengthDistancePairs
|
||||
Compress.MoveToFrontTransform
|
||||
Data.Dirs
|
||||
Data.FiniteBit
|
||||
Data.HuffmanTree
|
||||
|
|
|
|||
|
|
@ -3,25 +3,50 @@
|
|||
module Compress.BurrowsWheeler where
|
||||
-- import qualified Data.CircularList as CL
|
||||
import qualified Relude.Unsafe as U
|
||||
-- import Compress.BurrowsWheeler (toBurrowsWheeler)
|
||||
|
||||
import Data.ByteString.Lazy qualified as BSL
|
||||
import Relude.Unsafe qualified as U
|
||||
|
||||
toBurrowsWheeler (x:xs) = map U.head . sortOn (U.tail) . take (pred . length $ xs) . tails $ xs'
|
||||
data BurrowsWheelerMetadata a = BurrowsWheelerMetadata
|
||||
{ originPtr :: Word,
|
||||
transformed :: a
|
||||
}
|
||||
deriving (Functor, Show, Generic)
|
||||
|
||||
type BurrowsWheelerOutput = BurrowsWheelerMetadata LByteString
|
||||
|
||||
toBurrowsWheeler :: LByteString -> BurrowsWheelerOutput
|
||||
toBurrowsWheeler bs =
|
||||
BurrowsWheelerMetadata originPtr
|
||||
. BSL.pack
|
||||
. map (`BSL.index` (bsLen - 1))
|
||||
$ sortedCycles
|
||||
where
|
||||
xs' = toList $ (x :| xs) <> (x :| [])
|
||||
sortedCycles =
|
||||
sort
|
||||
. take bsLen
|
||||
. BSL.tails
|
||||
$ (bs <> bs)
|
||||
|
||||
toCounts :: forall a b . (Eq a, Eq b, Num b, Enum b, Bounded a, Bounded b) => [a] -> [(a, b)]
|
||||
toCounts = reverse . foldl' f []
|
||||
originPtr =
|
||||
succ
|
||||
. fst
|
||||
. U.fromJust
|
||||
. find (BSL.isPrefixOf bs . snd)
|
||||
. zip [0 ..]
|
||||
$ sortedCycles
|
||||
|
||||
bsLen :: (Integral a) => a
|
||||
bsLen = fromIntegral . BSL.length $ bs
|
||||
|
||||
unBurrowsWheeler :: BurrowsWheelerOutput -> LByteString
|
||||
unBurrowsWheeler (BurrowsWheelerMetadata {..}) =
|
||||
(U.!! (fromIntegral originPtr))
|
||||
. sort
|
||||
. (U.!! (pred . fromIntegral . BSL.length $ transformed))
|
||||
. iterate (zipWith BSL.cons unpacked . sort)
|
||||
. map BSL.singleton
|
||||
. BSL.unpack
|
||||
$ transformed
|
||||
where
|
||||
f [] x = [(x, 0)]
|
||||
f ((x, count) : xs) newVal
|
||||
| x == newVal && count == maxBound = (x, 0) : (x, count) : xs
|
||||
| x == newVal = (x, succ count) : xs
|
||||
| otherwise = (newVal, 0) : (x, count) : xs
|
||||
|
||||
|
||||
-- toBurrowsWheeler xs = map last . sort . map (findLyndonWord) . take (length xs) . tails $ xs'
|
||||
-- where
|
||||
-- xs' = xs ++ xs
|
||||
|
||||
-- findLyndonWord (x : xs) = (x :|) . map snd . takeWhile (uncurry (<=)) . zip (x : xs) $ xs
|
||||
unpacked = BSL.unpack transformed
|
||||
|
|
|
|||
|
|
@ -20,7 +20,6 @@ import Data.Bits as B
|
|||
import Data.Maybe (fromMaybe)
|
||||
import Data.Maybe qualified as My
|
||||
import Data.PQueue qualified as PQ
|
||||
import Codec.Winery qualified as C
|
||||
import Data.Text qualified as T
|
||||
import Data.Text.IO qualified as TIO
|
||||
import Data.Vector.Unboxed qualified as V
|
||||
|
|
|
|||
49
src/Compress/MoveToFrontTransform.hs
Normal file
49
src/Compress/MoveToFrontTransform.hs
Normal file
|
|
@ -0,0 +1,49 @@
|
|||
module Compress.MoveToFrontTransform where
|
||||
|
||||
import Data.ByteString.Lazy qualified as BSL
|
||||
import qualified Data.Containers.ListUtils as LU
|
||||
import qualified Relude.Unsafe as U
|
||||
import qualified Data.ByteString as S
|
||||
|
||||
|
||||
data MoveToFrontMetadata a = MoveToFrontMetadata {
|
||||
startingStack :: [Word8],
|
||||
transformed :: a
|
||||
} deriving (Functor, Show, Generic)
|
||||
|
||||
type MoveToFrontOutput = MoveToFrontMetadata LByteString
|
||||
|
||||
moveToFront :: LByteString -> MoveToFrontOutput
|
||||
moveToFront bs = MoveToFrontMetadata startingStack . moveToFront' startingStack $ bs
|
||||
where
|
||||
startingStack = LU.nubIntOn fromEnum . BSL.unpack $ bs
|
||||
|
||||
moveToFront' :: [Word8] -> LByteString -> LByteString
|
||||
moveToFront' s "" = BSL.empty
|
||||
moveToFront' s (BSL.uncons -> Just (x, xs)) = BSL.cons stackIndex recurse
|
||||
where
|
||||
stackIndex :: Word8
|
||||
stackIndex =
|
||||
fromIntegral
|
||||
. fromMaybe (length s)
|
||||
. fmap fst
|
||||
. find ((==) x . snd)
|
||||
. zip [0 ..]
|
||||
$ s
|
||||
|
||||
recurse = moveToFront' s' xs
|
||||
|
||||
s' :: [Word8]
|
||||
s' = removeAndPrepend x s
|
||||
|
||||
removeAndPrepend x = (x :) . filter (/= x)
|
||||
|
||||
unMoveToFront :: MoveToFrontOutput -> LByteString
|
||||
unMoveToFront (MoveToFrontMetadata {..}) = unMoveToFront' startingStack transformed
|
||||
where
|
||||
unMoveToFront' s bs | BSL.null bs = BSL.empty
|
||||
unMoveToFront' s (BSL.uncons -> Just (loc, xs)) = BSL.cons byte $ unMoveToFront' s' xs
|
||||
where
|
||||
byte = s U.!! (fromEnum loc)
|
||||
|
||||
s' = removeAndPrepend byte s
|
||||
Loading…
Add table
Add a link
Reference in a new issue