the start of a rewrite using what was learned from bzip2
This commit is contained in:
parent
da83f9a5d0
commit
4a963eb383
4 changed files with 93 additions and 18 deletions
|
|
@ -41,6 +41,7 @@ common deps
|
||||||
TypeOperators,
|
TypeOperators,
|
||||||
TypeSynonymInstances,
|
TypeSynonymInstances,
|
||||||
UndecidableInstances,
|
UndecidableInstances,
|
||||||
|
ViewPatterns,
|
||||||
mixins:
|
mixins:
|
||||||
base hiding (Prelude),
|
base hiding (Prelude),
|
||||||
relude (Relude as Prelude),
|
relude (Relude as Prelude),
|
||||||
|
|
@ -76,6 +77,7 @@ common deps
|
||||||
Compress.PrefixTree
|
Compress.PrefixTree
|
||||||
Compress.WordMarkovStats
|
Compress.WordMarkovStats
|
||||||
Compress.LengthDistancePairs
|
Compress.LengthDistancePairs
|
||||||
|
Compress.MoveToFrontTransform
|
||||||
Data.Dirs
|
Data.Dirs
|
||||||
Data.FiniteBit
|
Data.FiniteBit
|
||||||
Data.HuffmanTree
|
Data.HuffmanTree
|
||||||
|
|
|
||||||
|
|
@ -3,25 +3,50 @@
|
||||||
module Compress.BurrowsWheeler where
|
module Compress.BurrowsWheeler where
|
||||||
-- import qualified Data.CircularList as CL
|
-- import qualified Data.CircularList as CL
|
||||||
import qualified Relude.Unsafe as U
|
import qualified Relude.Unsafe as U
|
||||||
-- import Compress.BurrowsWheeler (toBurrowsWheeler)
|
|
||||||
|
|
||||||
|
import Data.ByteString.Lazy qualified as BSL
|
||||||
|
import Relude.Unsafe qualified as U
|
||||||
|
|
||||||
toBurrowsWheeler (x:xs) = map U.head . sortOn (U.tail) . take (pred . length $ xs) . tails $ xs'
|
data BurrowsWheelerMetadata a = BurrowsWheelerMetadata
|
||||||
|
{ originPtr :: Word,
|
||||||
|
transformed :: a
|
||||||
|
}
|
||||||
|
deriving (Functor, Show, Generic)
|
||||||
|
|
||||||
|
type BurrowsWheelerOutput = BurrowsWheelerMetadata LByteString
|
||||||
|
|
||||||
|
toBurrowsWheeler :: LByteString -> BurrowsWheelerOutput
|
||||||
|
toBurrowsWheeler bs =
|
||||||
|
BurrowsWheelerMetadata originPtr
|
||||||
|
. BSL.pack
|
||||||
|
. map (`BSL.index` (bsLen - 1))
|
||||||
|
$ sortedCycles
|
||||||
where
|
where
|
||||||
xs' = toList $ (x :| xs) <> (x :| [])
|
sortedCycles =
|
||||||
|
sort
|
||||||
|
. take bsLen
|
||||||
|
. BSL.tails
|
||||||
|
$ (bs <> bs)
|
||||||
|
|
||||||
toCounts :: forall a b . (Eq a, Eq b, Num b, Enum b, Bounded a, Bounded b) => [a] -> [(a, b)]
|
originPtr =
|
||||||
toCounts = reverse . foldl' f []
|
succ
|
||||||
|
. fst
|
||||||
|
. U.fromJust
|
||||||
|
. find (BSL.isPrefixOf bs . snd)
|
||||||
|
. zip [0 ..]
|
||||||
|
$ sortedCycles
|
||||||
|
|
||||||
|
bsLen :: (Integral a) => a
|
||||||
|
bsLen = fromIntegral . BSL.length $ bs
|
||||||
|
|
||||||
|
unBurrowsWheeler :: BurrowsWheelerOutput -> LByteString
|
||||||
|
unBurrowsWheeler (BurrowsWheelerMetadata {..}) =
|
||||||
|
(U.!! (fromIntegral originPtr))
|
||||||
|
. sort
|
||||||
|
. (U.!! (pred . fromIntegral . BSL.length $ transformed))
|
||||||
|
. iterate (zipWith BSL.cons unpacked . sort)
|
||||||
|
. map BSL.singleton
|
||||||
|
. BSL.unpack
|
||||||
|
$ transformed
|
||||||
where
|
where
|
||||||
f [] x = [(x, 0)]
|
unpacked = BSL.unpack transformed
|
||||||
f ((x, count) : xs) newVal
|
|
||||||
| x == newVal && count == maxBound = (x, 0) : (x, count) : xs
|
|
||||||
| x == newVal = (x, succ count) : xs
|
|
||||||
| otherwise = (newVal, 0) : (x, count) : xs
|
|
||||||
|
|
||||||
|
|
||||||
-- toBurrowsWheeler xs = map last . sort . map (findLyndonWord) . take (length xs) . tails $ xs'
|
|
||||||
-- where
|
|
||||||
-- xs' = xs ++ xs
|
|
||||||
|
|
||||||
-- findLyndonWord (x : xs) = (x :|) . map snd . takeWhile (uncurry (<=)) . zip (x : xs) $ xs
|
|
||||||
|
|
|
||||||
|
|
@ -20,7 +20,6 @@ import Data.Bits as B
|
||||||
import Data.Maybe (fromMaybe)
|
import Data.Maybe (fromMaybe)
|
||||||
import Data.Maybe qualified as My
|
import Data.Maybe qualified as My
|
||||||
import Data.PQueue qualified as PQ
|
import Data.PQueue qualified as PQ
|
||||||
import Codec.Winery qualified as C
|
|
||||||
import Data.Text qualified as T
|
import Data.Text qualified as T
|
||||||
import Data.Text.IO qualified as TIO
|
import Data.Text.IO qualified as TIO
|
||||||
import Data.Vector.Unboxed qualified as V
|
import Data.Vector.Unboxed qualified as V
|
||||||
|
|
|
||||||
49
src/Compress/MoveToFrontTransform.hs
Normal file
49
src/Compress/MoveToFrontTransform.hs
Normal file
|
|
@ -0,0 +1,49 @@
|
||||||
|
module Compress.MoveToFrontTransform where
|
||||||
|
|
||||||
|
import Data.ByteString.Lazy qualified as BSL
|
||||||
|
import qualified Data.Containers.ListUtils as LU
|
||||||
|
import qualified Relude.Unsafe as U
|
||||||
|
import qualified Data.ByteString as S
|
||||||
|
|
||||||
|
|
||||||
|
data MoveToFrontMetadata a = MoveToFrontMetadata {
|
||||||
|
startingStack :: [Word8],
|
||||||
|
transformed :: a
|
||||||
|
} deriving (Functor, Show, Generic)
|
||||||
|
|
||||||
|
type MoveToFrontOutput = MoveToFrontMetadata LByteString
|
||||||
|
|
||||||
|
moveToFront :: LByteString -> MoveToFrontOutput
|
||||||
|
moveToFront bs = MoveToFrontMetadata startingStack . moveToFront' startingStack $ bs
|
||||||
|
where
|
||||||
|
startingStack = LU.nubIntOn fromEnum . BSL.unpack $ bs
|
||||||
|
|
||||||
|
moveToFront' :: [Word8] -> LByteString -> LByteString
|
||||||
|
moveToFront' s "" = BSL.empty
|
||||||
|
moveToFront' s (BSL.uncons -> Just (x, xs)) = BSL.cons stackIndex recurse
|
||||||
|
where
|
||||||
|
stackIndex :: Word8
|
||||||
|
stackIndex =
|
||||||
|
fromIntegral
|
||||||
|
. fromMaybe (length s)
|
||||||
|
. fmap fst
|
||||||
|
. find ((==) x . snd)
|
||||||
|
. zip [0 ..]
|
||||||
|
$ s
|
||||||
|
|
||||||
|
recurse = moveToFront' s' xs
|
||||||
|
|
||||||
|
s' :: [Word8]
|
||||||
|
s' = removeAndPrepend x s
|
||||||
|
|
||||||
|
removeAndPrepend x = (x :) . filter (/= x)
|
||||||
|
|
||||||
|
unMoveToFront :: MoveToFrontOutput -> LByteString
|
||||||
|
unMoveToFront (MoveToFrontMetadata {..}) = unMoveToFront' startingStack transformed
|
||||||
|
where
|
||||||
|
unMoveToFront' s bs | BSL.null bs = BSL.empty
|
||||||
|
unMoveToFront' s (BSL.uncons -> Just (loc, xs)) = BSL.cons byte $ unMoveToFront' s' xs
|
||||||
|
where
|
||||||
|
byte = s U.!! (fromEnum loc)
|
||||||
|
|
||||||
|
s' = removeAndPrepend byte s
|
||||||
Loading…
Add table
Add a link
Reference in a new issue