remove markov chain from arithmetic compression in preparition for move to front transform
This commit is contained in:
parent
4a963eb383
commit
9f3c1d0168
6 changed files with 146 additions and 179 deletions
|
|
@ -75,7 +75,6 @@ common deps
|
|||
Compress.BurrowsWheeler
|
||||
Compress.Huffman
|
||||
Compress.PrefixTree
|
||||
Compress.WordMarkovStats
|
||||
Compress.LengthDistancePairs
|
||||
Compress.MoveToFrontTransform
|
||||
Data.Dirs
|
||||
|
|
|
|||
30
flake.lock
generated
30
flake.lock
generated
|
|
@ -37,11 +37,11 @@
|
|||
"nixpkgs-lib": "nixpkgs-lib"
|
||||
},
|
||||
"locked": {
|
||||
"lastModified": 1756770412,
|
||||
"narHash": "sha256-+uWLQZccFHwqpGqr2Yt5VsW/PbeJVTn9Dk6SHWhNRPw=",
|
||||
"lastModified": 1763759067,
|
||||
"narHash": "sha256-LlLt2Jo/gMNYAwOgdRQBrsRoOz7BPRkzvNaI/fzXi2Q=",
|
||||
"owner": "hercules-ci",
|
||||
"repo": "flake-parts",
|
||||
"rev": "4524271976b625a4a605beefd893f270620fd751",
|
||||
"rev": "2cccadc7357c0ba201788ae99c4dfa90728ef5e0",
|
||||
"type": "github"
|
||||
},
|
||||
"original": {
|
||||
|
|
@ -67,11 +67,11 @@
|
|||
},
|
||||
"haskell-flake": {
|
||||
"locked": {
|
||||
"lastModified": 1756607542,
|
||||
"narHash": "sha256-+99fEAk0HwjYgIW2tEOs7ayBDxnU9NAM5E29ZxgyX40=",
|
||||
"lastModified": 1763904090,
|
||||
"narHash": "sha256-Mq99c4O6Ovx/JQgi6B6fnI9xR77nQ9+GcFTM2113nMI=",
|
||||
"owner": "srid",
|
||||
"repo": "haskell-flake",
|
||||
"rev": "73e3891fb135c679a1c30fae4b101e5b41b8ca61",
|
||||
"rev": "73c006f8e6531acab28bbde362ae8f13ee0ac72b",
|
||||
"type": "github"
|
||||
},
|
||||
"original": {
|
||||
|
|
@ -97,11 +97,11 @@
|
|||
},
|
||||
"nixpkgs": {
|
||||
"locked": {
|
||||
"lastModified": 1756542300,
|
||||
"narHash": "sha256-tlOn88coG5fzdyqz6R93SQL5Gpq+m/DsWpekNFhqPQk=",
|
||||
"lastModified": 1764517877,
|
||||
"narHash": "sha256-pp3uT4hHijIC8JUK5MEqeAWmParJrgBVzHLNfJDZxg4=",
|
||||
"owner": "nixos",
|
||||
"repo": "nixpkgs",
|
||||
"rev": "d7600c775f877cd87b4f5a831c28aa94137377aa",
|
||||
"rev": "2d293cbfa5a793b4c50d17c05ef9e385b90edf6c",
|
||||
"type": "github"
|
||||
},
|
||||
"original": {
|
||||
|
|
@ -113,11 +113,11 @@
|
|||
},
|
||||
"nixpkgs-lib": {
|
||||
"locked": {
|
||||
"lastModified": 1754788789,
|
||||
"narHash": "sha256-x2rJ+Ovzq0sCMpgfgGaaqgBSwY+LST+WbZ6TytnT9Rk=",
|
||||
"lastModified": 1761765539,
|
||||
"narHash": "sha256-b0yj6kfvO8ApcSE+QmA6mUfu8IYG6/uU28OFn4PaC8M=",
|
||||
"owner": "nix-community",
|
||||
"repo": "nixpkgs.lib",
|
||||
"rev": "a73b9c743612e4244d865a2fdee11865283c04e6",
|
||||
"rev": "719359f4562934ae99f5443f20aa06c2ffff91fc",
|
||||
"type": "github"
|
||||
},
|
||||
"original": {
|
||||
|
|
@ -162,11 +162,11 @@
|
|||
]
|
||||
},
|
||||
"locked": {
|
||||
"lastModified": 1756662192,
|
||||
"narHash": "sha256-F1oFfV51AE259I85av+MAia221XwMHCOtZCMcZLK2Jk=",
|
||||
"lastModified": 1762938485,
|
||||
"narHash": "sha256-AlEObg0syDl+Spi4LsZIBrjw+snSVU4T8MOeuZJUJjM=",
|
||||
"owner": "numtide",
|
||||
"repo": "treefmt-nix",
|
||||
"rev": "1aabc6c05ccbcbf4a635fb7a90400e44282f61c4",
|
||||
"rev": "5b4ee75aeefd1e2d5a1cc43cf6ba65eba75e83e4",
|
||||
"type": "github"
|
||||
},
|
||||
"original": {
|
||||
|
|
|
|||
|
|
@ -2,130 +2,115 @@
|
|||
-- module Compress.Arithmetic (Compressed, compress, decompress, Word4MapSerialized, Word4Map) where
|
||||
module Compress.Arithmetic where
|
||||
|
||||
import Data.Bits qualified as B
|
||||
import Data.Word
|
||||
import Data.Bifunctor qualified as Bi
|
||||
import Codec.Winery (WineryRecord)
|
||||
import Codec.Winery qualified as C
|
||||
import Control.Arrow qualified as Ar
|
||||
import Control.Monad.Par (NFData)
|
||||
import Control.Monad.Par qualified as P
|
||||
import Data.Bifunctor qualified as Bi
|
||||
import Data.Bit qualified as BV
|
||||
import Data.Bit qualified as VB
|
||||
import Data.Bits qualified as B
|
||||
import Data.ByteString qualified as BS
|
||||
import Data.ByteString qualified as By
|
||||
import Data.Dirs
|
||||
import Data.FiniteBit qualified as Fi
|
||||
import qualified Data.Foldable as F
|
||||
import Data.Foldable qualified as F
|
||||
import Data.List (genericLength)
|
||||
import Data.List qualified as L
|
||||
import Data.Map.Strict qualified as M
|
||||
import Data.Maybe qualified as My
|
||||
import Data.Ord
|
||||
-- import qualified Debug.Trace as D
|
||||
import Data.Proxy qualified as P
|
||||
import Data.Ratio
|
||||
import Codec.Winery qualified as C
|
||||
import Data.Vector qualified as V
|
||||
import Data.Vector.Unboxed qualified as VU
|
||||
import Data.Word
|
||||
import Data.Word4 (CompressWord4List, Word4, fromCompressedWord4List, toCompressedWord4List)
|
||||
import GHC.Generics (Generic)
|
||||
import GHC.Natural (Natural)
|
||||
import qualified Relude.Unsafe as US
|
||||
import Data.Word4 (Word4, CompressWord4List, toCompressedWord4List, fromCompressedWord4List)
|
||||
import qualified Data.ByteString as BS
|
||||
import qualified Relude.Unsafe as U
|
||||
import Codec.Winery (WineryRecord)
|
||||
import Compress.BurrowsWheeler (toBurrowsWheeler)
|
||||
import qualified Data.Bit as BV
|
||||
import qualified Data.Vector as V
|
||||
import qualified Data.Bit as VB
|
||||
import qualified Data.Vector.Unboxed as VU
|
||||
import Data.Dirs
|
||||
-- import qualified Debug.Trace as D
|
||||
import qualified Data.Proxy as P
|
||||
import Compress.WordMarkovStats as WM
|
||||
import Relude.Unsafe qualified as U
|
||||
import Relude.Unsafe qualified as US
|
||||
|
||||
-- withPrimeDenums = concatMap (\x -> map (% x) [1 .. (pred x)]) primes
|
||||
|
||||
-- shortestNumberBetween :: Ratio Integer -> Ratio Integer -> Maybe (Ratio Integer)
|
||||
-- shortestNumberBetween xMin xMax = find (\candidate -> candidate >= xMin && candidate < xMax) withPrimeDenums
|
||||
|
||||
|
||||
toRing :: forall a k. (Show a, Show k, Ord k, Integral a, Bounded a) => [(k, a)] -> M.Map k WordMarkovStats
|
||||
toRing xs = M.fromList . zip (map fst xs) $ wordMarkovStats
|
||||
where
|
||||
|
||||
wordMarkovStats = zipWith WordMarkovStats (L.scanl' (+) 0 withBumpedZeroSized) withBumpedZeroSized
|
||||
|
||||
maxBound' :: Integer
|
||||
maxBound' = fromIntegral (maxBound :: a)
|
||||
|
||||
resize xs = map ( / sum' ) xs
|
||||
resize xs = map (/ sum') xs
|
||||
where
|
||||
sum' = sum xs
|
||||
|
||||
withBumpedZeroSized :: [Rational]
|
||||
withBumpedZeroSized
|
||||
= resize
|
||||
withBumpedZeroSized =
|
||||
resize
|
||||
. map (max minimumSize)
|
||||
. resize
|
||||
. map (fromIntegral . snd)
|
||||
$ xs
|
||||
where
|
||||
-- | we have to round the 0s up to something
|
||||
-- | we somewhat arbitrarily choose half of the smallest
|
||||
-- | normally representable amount
|
||||
-- \| we have to round the 0s up to something
|
||||
-- \| we somewhat arbitrarily choose half of the smallest
|
||||
-- \| normally representable amount
|
||||
minimumSize :: Rational
|
||||
minimumSize = 1 % (maxBound' * 2)
|
||||
|
||||
|
||||
|
||||
f (prevFrac, runningSum) currFrac = (newFrac, newFrac + runningSum)
|
||||
where
|
||||
newFrac = currFrac * (1 - runningSum)
|
||||
|
||||
relativeCounts :: forall k . Ord k => [(k, Integer)] -> [(k, Rational)]
|
||||
relativeCounts :: forall k. (Ord k) => [(k, Integer)] -> [(k, Rational)]
|
||||
relativeCounts m = map (Bi.second asFractionOfSum) m
|
||||
where
|
||||
asFractionOfSum = ( % sum') . fromIntegral
|
||||
asFractionOfSum = (% sum') . fromIntegral
|
||||
|
||||
sum' = fromIntegral . F.sum . map snd $ m
|
||||
|
||||
|
||||
|
||||
maxBound' :: forall a b . (Num b, Bounded a, Integral a) => P.Proxy a -> b
|
||||
maxBound' :: forall a b. (Num b, Bounded a, Integral a) => P.Proxy a -> b
|
||||
maxBound' p = fromIntegral (maxBound :: a)
|
||||
|
||||
twoByteMarkov :: forall k . (Ord k) => [k] -> M.Map k [(k, Rational)]
|
||||
twoByteMarkov xs =
|
||||
M.map sizeAsFraction
|
||||
. M.fromListWith (M.unionWith (+))
|
||||
. zip xs
|
||||
. map (`M.singleton` (1 :: Integer))
|
||||
. US.tail
|
||||
listToRelativeCounts :: forall k. (Ord k) => [k] -> [(k, Rational)]
|
||||
listToRelativeCounts xs =
|
||||
relativeCounts
|
||||
. M.assocs
|
||||
. M.fromListWith (+)
|
||||
. map (,1)
|
||||
$ xs
|
||||
where
|
||||
|
||||
sizeAsFraction :: M.Map k Integer -> [(k, Rational)]
|
||||
sizeAsFraction = relativeCounts . M.assocs
|
||||
|
||||
data Chunk a = Chunk
|
||||
{ location :: SerializedDirs,
|
||||
start :: a,
|
||||
length :: Word64
|
||||
}
|
||||
deriving (Eq, Ord, Show, Generic, P.NFData)
|
||||
deriving C.Serialise via C.WineryVariant (Chunk a)
|
||||
deriving (C.Serialise) via C.WineryVariant (Chunk a)
|
||||
|
||||
-- maxBound = maxBound' (P.Proxy :: Proxy b)
|
||||
|
||||
discretizeFraction :: forall b . (Bounded b, Integral b) => Rational -> b
|
||||
discretizeFraction :: forall b. (Bounded b, Integral b) => Rational -> b
|
||||
discretizeFraction = floor . min maxBound'' . fromRational . (* (succ $ maxBound' (P.Proxy :: P.Proxy b)))
|
||||
where
|
||||
maxBound'' = maxBound' (P.Proxy :: P.Proxy b)
|
||||
|
||||
|
||||
type Word4MapSerialized a = M.Map a ([a], CompressWord4List)
|
||||
type Word4Map a = M.Map a [(a, Word8)]
|
||||
|
||||
type Word4Map a = M.Map a [(a, Word8)]
|
||||
|
||||
-- talkAboutCompressed (Compressed {..}) = (
|
||||
-- ("markovs (serealized)", BS.length . C.serialiseOnly $ markovs),
|
||||
-- ("markovs (unserealized)", BS.length . C.serialiseOnly . unserealizeWord4Map $ markovs),
|
||||
-- ("chunks", BS.length . C.serialiseOnly $ chunks))
|
||||
-- ("chunks", BS.length . C.serialiseOnly $ chunks))
|
||||
|
||||
newtype Prediction = Prediction {
|
||||
chances :: [(Word8, Rational)]
|
||||
newtype Prediction = Prediction
|
||||
{ chances :: [(Word8, Rational)]
|
||||
}
|
||||
|
||||
class CompressionModel a where
|
||||
|
|
@ -136,7 +121,7 @@ class CompressionModel a where
|
|||
|
||||
-- at some point write
|
||||
-- instance IsoSerialisable a b => C.Serialise b
|
||||
class C.Serialise b => IsoSerialisable a b where
|
||||
class (C.Serialise b) => IsoSerialisable a b where
|
||||
toSerialisable :: a -> b
|
||||
|
||||
fromSerialisable :: b -> a
|
||||
|
|
@ -145,14 +130,14 @@ class C.Serialise b => IsoSerialisable a b where
|
|||
-- deriving (Eq, Ord, Show, Generic) -- , C.Serialise, P.NFData)
|
||||
-- deriving C.Serialise via C.WineryRecord BetterCompressed
|
||||
|
||||
data Compressed a mapImplementation = Compressed
|
||||
{ markovs :: mapImplementation,
|
||||
data Compressed a = Compressed
|
||||
{
|
||||
-- needs a better name than markovs
|
||||
frequencies :: [(a, Word8)], -- we'll paramaterize this at some point::168
|
||||
chunks :: [Chunk a]
|
||||
}
|
||||
deriving (Eq, Ord, Show, Generic, NFData) -- , C.Serialise, P.NFData)
|
||||
deriving C.Serialise via C.WineryVariant (Compressed a mapImplementation)
|
||||
|
||||
|
||||
deriving (C.Serialise) via C.WineryVariant (Compressed a)
|
||||
|
||||
-- unserealizeWord4Map :: forall a . Show a => Word4MapSerialized a -> Word4Map a
|
||||
-- unserealizeWord4Map = M.map unserializedIndividualMap
|
||||
|
|
@ -162,6 +147,11 @@ data Compressed a mapImplementation = Compressed
|
|||
|
||||
-- -- unserealizeWord4Map = M.map (uncurry zip . map (\(bytes, sizes) -> (bytes,) . (++ [maxBound]) . fromCompressedWord4List (Prelude.length bytes) $ sizes))
|
||||
|
||||
data WordMarkovStats = WordMarkovStats
|
||||
{ location :: Ratio Integer,
|
||||
size :: Ratio Integer
|
||||
}
|
||||
deriving (Show, Eq, Ord, Generic)
|
||||
|
||||
-- serealizeWord4Map :: Word4Map a -> Word4MapSerialized a
|
||||
-- serealizeWord4Map = M.map (Bi.second toCompressedWord4List . L.unzip)
|
||||
|
|
@ -169,32 +159,35 @@ data Compressed a mapImplementation = Compressed
|
|||
decompress ::
|
||||
forall a.
|
||||
(Integral a, B.FiniteBits a, B.Bits a, Show a, NFData a) =>
|
||||
Compressed a (Word4Map a) ->
|
||||
Compressed a ->
|
||||
[a]
|
||||
decompress (Compressed {..}) = concat . P.runPar . P.parMap decompressChunk
|
||||
$ chunks
|
||||
decompress (Compressed {..}) =
|
||||
concat . P.runPar . P.parMap decompressChunk $
|
||||
chunks
|
||||
where
|
||||
rings = M.map (M.fromList . map toDecompressionRing . M.toList . toRing) markovs'
|
||||
|
||||
markovs' = -- unserealizeWord4Map
|
||||
markovs
|
||||
|
||||
rings :: M.Map Rational (a, Rational)
|
||||
rings = M.fromList . map toDecompressionRing . M.assocs . toRing $ frequencies
|
||||
|
||||
toDecompressionRing (key, (WordMarkovStats {..})) = (location, (key, size))
|
||||
|
||||
decompressChunk :: Chunk a -> [a]
|
||||
decompressChunk (Chunk {..}) = map snd . take (fromIntegral length) . L.iterate' decompress' $ (location', start) -- (fromBS rawLocation, start)
|
||||
decompressChunk (Chunk {..}) =
|
||||
map snd
|
||||
. take (fromIntegral length)
|
||||
. US.tail
|
||||
. L.iterate' (decompress' . fst)
|
||||
$ (location', 0) -- (fromBS rawLocation, start)
|
||||
where
|
||||
|
||||
location' = deserialize location
|
||||
-- decompress' :: (Ratio Integer, a) -> (Ratio Integer, a)
|
||||
decompress' (!loc, !prevWord) = -- traceShow (newSize) $
|
||||
decompress' !loc =
|
||||
-- traceShow (newSize) $
|
||||
((loc - ansLoc) / newSize, newVal)
|
||||
where
|
||||
ansLoc :: Ratio Integer
|
||||
newVal :: a
|
||||
newSize :: Ratio Integer
|
||||
(ansLoc, (newVal, newSize)) = My.fromJust . M.lookupLE loc $ (rings M.! prevWord)
|
||||
(ansLoc, (newVal, newSize)) = My.fromJust . M.lookupLE loc $ rings
|
||||
|
||||
sanityCheck :: forall k. (Ord k) => M.Map k (M.Map k Word8) -> [Word8]
|
||||
sanityCheck = map (sum . M.elems) . M.elems
|
||||
|
|
@ -210,7 +203,6 @@ chunk chunkSize = chunk'
|
|||
chunkLength :: Int
|
||||
chunkLength = 4096
|
||||
|
||||
|
||||
-- runPar = id
|
||||
-- parMap = map
|
||||
|
||||
|
|
@ -219,48 +211,35 @@ chunkLength = 4096
|
|||
-- (Integral a, B.FiniteBits a, B.Bits a, Show a, NFData a) =>
|
||||
-- [a] ->
|
||||
-- Compressed a (Word4MapSerialized a)
|
||||
compress :: (NFData a, Show a, Ord a) => [a] -> Compressed a (Word4Map a)
|
||||
compress toCompress' = Compressed twoByteMarkovs . P.runPar . P.parMap
|
||||
(compressChunk)
|
||||
compress :: (NFData a, Show a, Ord a) => [a] -> Compressed a
|
||||
compress toCompress =
|
||||
Compressed relativeFrequencies
|
||||
. P.runPar
|
||||
. P.parMap compressChunk
|
||||
$ unCompressedChunks
|
||||
where
|
||||
|
||||
toCompress = toCompress' -- toBurrowsWheeler toCompress'
|
||||
-- toTraceShow' = (unserealizeWord4Map . serealizeWord4Map $ twoByteMarkovs) == twoByteMarkovs
|
||||
|
||||
-- toTraceShow = (fromIntegral :: Int -> Double) (BS.length . C.encode $ (twoByteMarkovs :: M.Map Word8 [(Word8, Word4)])) / (fromIntegral . BS.length . C.encode . serealizeWord4Map $ (twoByteMarkovs :: M.Map Word8 [(Word8, Word4)]))
|
||||
|
||||
unCompressedChunks = chunk chunkLength toCompress
|
||||
|
||||
twoByteMarkovs = M.map (map (Bi.second discretizeFraction)) . twoByteMarkov $ toCompress
|
||||
relativeFrequencies = map (Bi.second discretizeFraction) . listToRelativeCounts $ toCompress
|
||||
|
||||
rings = M.map toRing twoByteMarkovs
|
||||
rings = toRing relativeFrequencies
|
||||
|
||||
compressChunk toCompress =
|
||||
Chunk
|
||||
{ location = shortestLocation endStats,
|
||||
start = US.head toCompress,
|
||||
length = fromIntegral . L.length $ toCompress
|
||||
}
|
||||
where
|
||||
pairs = zip toCompress . US.tail $ toCompress
|
||||
|
||||
shortestLocation (WordMarkovStats {..}) = serialize $ binarySearch location (location + size)
|
||||
-- shortestLocation (WordMarkovStats {..})
|
||||
-- -- | simplestBetweenAnswer /= upperBound = simplestBetweenAnswer
|
||||
-- | otherwise = simplestBetween location $ upperBound - epsilon
|
||||
-- where
|
||||
-- simplestBetweenAnswer = simplestBetween location upperBound
|
||||
|
||||
-- upperBound = location + size
|
||||
endStats = pyramidFold addWordMarkovStats . map statsFor $ toCompress
|
||||
|
||||
-- -- | almost entirely arbitrary, picked because it's gaurenteed to be smaller than size
|
||||
-- -- | and should make for a fairly quick subtraction
|
||||
-- epsilon = 1 % (denominator size)
|
||||
|
||||
endStats = pyramidFold addWordMarkovStats . map statsFor $ pairs
|
||||
|
||||
statsFor (x0, x1) = (rings M.! x0) M.! x1
|
||||
statsFor = (rings M.!)
|
||||
|
||||
addWordMarkovStats
|
||||
(WordMarkovStats {location = prevLoc, size = prevSize})
|
||||
|
|
@ -270,7 +249,6 @@ compress toCompress' = Compressed twoByteMarkovs . P.runPar . P.parMap
|
|||
size = prevSize * nextSize
|
||||
}
|
||||
|
||||
|
||||
pyramidFold :: (a -> a -> a) -> [a] -> a
|
||||
pyramidFold f = pyramid
|
||||
where
|
||||
|
|
@ -286,7 +264,7 @@ simplestBetween :: Rational -> Rational -> Rational
|
|||
simplestBetween x y
|
||||
| x == y = x
|
||||
| x > 0 = simplestBetween' n d n' d'
|
||||
-- | x > 0 = simplestBetween' n d n' d'
|
||||
-- \| x > 0 = simplestBetween' n d n' d'
|
||||
| otherwise = 0 % 1
|
||||
where
|
||||
n = numerator x
|
||||
|
|
|
|||
|
|
@ -30,7 +30,7 @@ import Options.Generic qualified as O
|
|||
import System.Environment qualified as SE
|
||||
import Data.HuffmanTree
|
||||
import Data.Word4 (CompressWord4List(xs))
|
||||
import Compress.BurrowsWheeler (toBurrowsWheeler, toCounts)
|
||||
import Codec.Winery qualified as C
|
||||
|
||||
decompress
|
||||
:: forall a
|
||||
|
|
@ -85,13 +85,13 @@ compress dividedByteString =
|
|||
treeDirMap :: M.Map a [TreeDir]
|
||||
treeDirMap = My.maybe M.empty findTreeDirections mergedHuffmanTrees
|
||||
|
||||
compressWithBurrowsWheeler :: (Integral b, Integral a, FiniteBits a, Bounded b, Bounded a) => ByteString -> Maybe (BurrowsWheelerCompressed a b)
|
||||
compressWithBurrowsWheeler bs
|
||||
= liftA2 BurrowsWheelerCompressed
|
||||
(compress . map fst $ withCounts)
|
||||
(compress . map snd $ withCounts)
|
||||
where
|
||||
withCounts = toCounts . toBurrowsWheeler . toWordsList $ bs
|
||||
-- compressWithBurrowsWheeler :: (Integral b, Integral a, FiniteBits a, Bounded b, Bounded a) => ByteString -> Maybe (BurrowsWheelerCompressed a b)
|
||||
-- compressWithBurrowsWheeler bs
|
||||
-- = liftA2 BurrowsWheelerCompressed
|
||||
-- (compress . map fst $ withCounts)
|
||||
-- (compress . map snd $ withCounts)
|
||||
-- where
|
||||
-- withCounts = toCounts . toBurrowsWheeler . toWordsList $ bs
|
||||
|
||||
-- testCompression
|
||||
-- :: forall a
|
||||
|
|
@ -136,18 +136,18 @@ divideByteString n bs = x : divideByteString n xs
|
|||
(x, xs) = BS.splitAt n bs
|
||||
|
||||
|
||||
compressionRatioFor
|
||||
:: forall a b
|
||||
. (Integral a, B.FiniteBits a, B.Bits a, Ord a, C.Serialise a, Bounded a,
|
||||
Integral b, Ord b, C.Serialise b, Bounded b)
|
||||
=> D.Proxy a
|
||||
-> D.Proxy b
|
||||
-> BS.ByteString
|
||||
-> Double
|
||||
compressionRatioFor _ _ bs =
|
||||
(/ (fromIntegral . BS.length $ bs))
|
||||
. fromIntegral
|
||||
. BS.length
|
||||
. C.serialise
|
||||
. (compressWithBurrowsWheeler :: ByteString -> Maybe (BurrowsWheelerCompressed a b))
|
||||
$ bs
|
||||
-- compressionRatioFor
|
||||
-- :: forall a b
|
||||
-- . (Integral a, B.FiniteBits a, B.Bits a, Ord a, C.Serialise a, Bounded a,
|
||||
-- Integral b, Ord b, C.Serialise b, Bounded b)
|
||||
-- => D.Proxy a
|
||||
-- -> D.Proxy b
|
||||
-- -> BS.ByteString
|
||||
-- -> Double
|
||||
-- compressionRatioFor _ _ bs =
|
||||
-- (/ (fromIntegral . BS.length $ bs))
|
||||
-- . fromIntegral
|
||||
-- . BS.length
|
||||
-- . C.serialise
|
||||
-- . (compressWithBurrowsWheeler :: ByteString -> Maybe (BurrowsWheelerCompressed a b))
|
||||
-- $ bs
|
||||
|
|
|
|||
|
|
@ -1,8 +0,0 @@
|
|||
module Compress.WordMarkovStats where
|
||||
|
||||
|
||||
data WordMarkovStats = WordMarkovStats
|
||||
{ location :: Ratio Integer,
|
||||
size :: Ratio Integer
|
||||
}
|
||||
deriving (Show, Eq, Ord, Generic)
|
||||
50
test/Test.hs
50
test/Test.hs
|
|
@ -1,7 +1,5 @@
|
|||
import Compress.Arithmetic (twoByteMarkov)
|
||||
import Compress.Arithmetic qualified as A
|
||||
import Compress.LengthDistancePairs as LDP
|
||||
import Compress.WordMarkovStats as WM
|
||||
import Data.Bifunctor qualified as Bi
|
||||
import Data.ByteString qualified as BS
|
||||
import Data.Dirs as D
|
||||
|
|
@ -28,24 +26,24 @@ tests toCompress =
|
|||
"falsify"
|
||||
[ testGroup
|
||||
"Arithmetic"
|
||||
[ testCase "works" $
|
||||
A.twoByteMarkov ([0, 1, 0, 1, 0, 1, 0, 1, 0, 9] :: [Word8])
|
||||
@?= M.fromList [(0, [(1, 0.8), (9, 0.2)]), (1, [(0, 1)])],
|
||||
testCase "relativeCounts works as expected with one param" $
|
||||
A.relativeCounts [(0, 30)]
|
||||
@?= [(0, 1)],
|
||||
testCase "relativeCounts works as expected with lots of params" $
|
||||
A.relativeCounts [(0, 30), (1, 20)]
|
||||
@?= [(0, 30 % 50), (1, 20 % 50)],
|
||||
testCase "toRing locs all less than 1" $
|
||||
assertBool "larger than one" $
|
||||
all (all ((<= 1) . (.location))) toCompressRing,
|
||||
testCase "toRing sizes all add up to 1" $
|
||||
assertBool "larger than 1" $
|
||||
all ((== 1) . sum . map (.size)) toCompressRing,
|
||||
testCase "toRing gives no zero sizes" $
|
||||
assertBool "== 0" $
|
||||
all (all ((/= 0) . (.size))) toCompressRing,
|
||||
[ -- testCase "works" $
|
||||
-- A.twoByteMarkov ([0, 1, 0, 1, 0, 1, 0, 1, 0, 9] :: [Word8])
|
||||
-- @?= M.fromList [(0, [(1, 0.8), (9, 0.2)]), (1, [(0, 1)])],
|
||||
-- testCase "relativeCounts works as expected with one param" $
|
||||
-- A.relativeCounts [(0, 30)]
|
||||
-- @?= [(0, 1)],
|
||||
-- testCase "relativeCounts works as expected with lots of params" $
|
||||
-- A.relativeCounts [(0, 30), (1, 20)]
|
||||
-- @?= [(0, 30 % 50), (1, 20 % 50)],
|
||||
-- testCase "toRing locs all less than 1" $
|
||||
-- assertBool "larger than one" $
|
||||
-- all (all ((<= 1) . (.location))) toCompressRing,
|
||||
-- testCase "toRing sizes all add up to 1" $
|
||||
-- assertBool "larger than 1" $
|
||||
-- all ((== 1) . sum . map (.size)) toCompressRing,
|
||||
-- testCase "toRing gives no zero sizes" $
|
||||
-- assertBool "== 0" $
|
||||
-- all (all ((/= 0) . (.size))) toCompressRing,
|
||||
F.testProperty "binary search" propBinarySearchWithinBounds,
|
||||
F.testProperty "compress and decompress isomorphism" (propCompressDecompressIsomorphism (A.decompress . A.compress))
|
||||
]
|
||||
|
|
@ -55,12 +53,12 @@ tests toCompress =
|
|||
-- ]
|
||||
]
|
||||
where
|
||||
toCompressRing =
|
||||
map (M.elems . A.toRing . map (Bi.second (A.discretizeFraction :: Rational -> Word8)))
|
||||
. M.elems
|
||||
. A.twoByteMarkov
|
||||
. BS.unpack
|
||||
$ toCompress
|
||||
-- toCompressRing =
|
||||
-- map (M.elems . A.toRing . map (Bi.second (A.discretizeFraction :: Rational -> Word8)))
|
||||
-- . M.elems
|
||||
-- . A.twoByteMarkov
|
||||
-- . BS.unpack
|
||||
-- $ toCompress
|
||||
|
||||
wordMaxBound :: Integer
|
||||
wordMaxBound = fromIntegral (maxBound :: Word)
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue