swap to word4 instead of word8 sizes

This isn't beter yet, but I have a hunch it's to to the Maybe in
the compressed word4 list type, which I can get rid of
This commit is contained in:
Jack Wines 2025-01-08 19:45:34 -08:00
parent 46749895bc
commit 2123636291
Signed by: Jack
SSH key fingerprint: SHA256:AaP2Hr/e3mEjeY+s9XJmQqAesqEms8ENRhwRkpO0WUk
9 changed files with 400 additions and 148 deletions

View file

@ -1,4 +1,4 @@
cabal-version: 3.0
cabal-version: 3.4
name: compress
version: 0.1.0.0
category: Web
@ -41,21 +41,28 @@ executable compress
UndecidableInstances,
hs-source-dirs:
src
mixins:
base hiding (Prelude),
relude (Relude as Prelude),
relude
build-depends:
base,
basement,
bitvec,
bytestring,
cereal,
containers,
-- accelerate,
-- containers-accelerate,
leancheck,
monad-par,
monad-par-extras,
nonempty-containers,
optparse-generic,
relude,
text,
uuid,
optparse-generic,
vector,
nonempty-containers,
witch,
monad-par,
monad-par-extras
witch
default-language:
GHC2021
other-modules:
@ -65,7 +72,7 @@ executable compress
Compress.PrefixTree
Data.HuffmanTree
Compress.Arithmetic
Data.WordyMap
Data.Word4
ghc-options:
-threaded
-fprof-auto

105
flake.lock generated
View file

@ -1,5 +1,21 @@
{
"nodes": {
"accelerate": {
"flake": false,
"locked": {
"lastModified": 1732969010,
"narHash": "sha256-Qrmtrgij2GbklBXUK42Pt6Db8WiGijA5sz5oC5AR72c=",
"owner": "AccelerateHS",
"repo": "accelerate",
"rev": "02da6161ef143a9886c8bce542cd96029c4f527a",
"type": "github"
},
"original": {
"owner": "AccelerateHS",
"repo": "accelerate",
"type": "github"
}
},
"flake-parts": {
"inputs": {
"nixpkgs-lib": "nixpkgs-lib"
@ -18,13 +34,28 @@
"type": "github"
}
},
"flake-root": {
"locked": {
"lastModified": 1723604017,
"narHash": "sha256-rBtQ8gg+Dn4Sx/s+pvjdq3CB2wQNzx9XGFq/JVGCB6k=",
"owner": "srid",
"repo": "flake-root",
"rev": "b759a56851e10cb13f6b8e5698af7b59c44be26e",
"type": "github"
},
"original": {
"owner": "srid",
"repo": "flake-root",
"type": "github"
}
},
"haskell-flake": {
"locked": {
"lastModified": 1734464164,
"narHash": "sha256-5JCCyrgy7IMnipyYMQzIAXncGt2XVlW1aK71A+FTXDs=",
"lastModified": 1734984991,
"narHash": "sha256-oUYtRBD3Yhw2jvKYo0lfd82fgEQQbFoiJcHO923gmOc=",
"owner": "srid",
"repo": "haskell-flake",
"rev": "e280b39efdd72b6a5bdaa982b67f150c819be642",
"rev": "daf00052906bdd977e57a07f7048437214232e87",
"type": "github"
},
"original": {
@ -33,13 +64,64 @@
"type": "github"
}
},
"llvm-hs": {
"flake": false,
"locked": {
"lastModified": 1665495332,
"narHash": "sha256-JKrpUmHJ1nsNiCoHhV5FCcdQGlNFfD37Oiu5kSmghfM=",
"owner": "llvm-hs",
"repo": "llvm-hs",
"rev": "423220bffac4990d019fc088c46c5f25310d5a33",
"type": "github"
},
"original": {
"owner": "llvm-hs",
"ref": "llvm-12",
"repo": "llvm-hs",
"type": "github"
}
},
"llvm-hs-pure": {
"flake": false,
"locked": {
"dir": "llvm-hs-pure",
"lastModified": 1665495332,
"narHash": "sha256-JKrpUmHJ1nsNiCoHhV5FCcdQGlNFfD37Oiu5kSmghfM=",
"owner": "llvm-hs",
"repo": "llvm-hs",
"rev": "423220bffac4990d019fc088c46c5f25310d5a33",
"type": "github"
},
"original": {
"dir": "llvm-hs-pure",
"owner": "llvm-hs",
"ref": "llvm-12",
"repo": "llvm-hs",
"type": "github"
}
},
"mission-control": {
"locked": {
"lastModified": 1733438716,
"narHash": "sha256-1tt43rwHk0N5fwEhbpsHWO4nBVFCQN0w1KM427DNycM=",
"owner": "Platonic-Systems",
"repo": "mission-control",
"rev": "65d04c4ab9db076eff09824d2936a5c215c21f36",
"type": "github"
},
"original": {
"owner": "Platonic-Systems",
"repo": "mission-control",
"type": "github"
}
},
"nixpkgs": {
"locked": {
"lastModified": 1734424634,
"narHash": "sha256-cHar1vqHOOyC7f1+tVycPoWTfKIaqkoe1Q6TnKzuti4=",
"lastModified": 1734649271,
"narHash": "sha256-4EVBRhOjMDuGtMaofAIqzJbg4Ql7Ai0PSeuVZTHjyKQ=",
"owner": "nixos",
"repo": "nixpkgs",
"rev": "d3c42f187194c26d9f0309a8ecc469d6c878ce33",
"rev": "d70bd19e0a38ad4790d3913bf08fcbfc9eeca507",
"type": "github"
},
"original": {
@ -63,8 +145,13 @@
},
"root": {
"inputs": {
"accelerate": "accelerate",
"flake-parts": "flake-parts",
"flake-root": "flake-root",
"haskell-flake": "haskell-flake",
"llvm-hs": "llvm-hs",
"llvm-hs-pure": "llvm-hs-pure",
"mission-control": "mission-control",
"nixpkgs": "nixpkgs",
"systems": "systems",
"treefmt-nix": "treefmt-nix"
@ -92,11 +179,11 @@
]
},
"locked": {
"lastModified": 1733761991,
"narHash": "sha256-s4DalCDepD22jtKL5Nw6f4LP5UwoMcPzPZgHWjAfqbQ=",
"lastModified": 1734982074,
"narHash": "sha256-N7M37KP7cHWoXicuE536GrVvU8nMDT/gpI1kja2hkdg=",
"owner": "numtide",
"repo": "treefmt-nix",
"rev": "0ce9d149d99bc383d1f2d85f31f6ebd146e46085",
"rev": "e41e948cf097cbf96ba4dff47a30ea6891af9f33",
"type": "github"
},
"original": {

View file

@ -5,16 +5,32 @@
systems.url = "github:nix-systems/default";
flake-parts.url = "github:hercules-ci/flake-parts";
haskell-flake.url = "github:srid/haskell-flake";
flake-root.url = "github:srid/flake-root";
treefmt-nix.url = "github:numtide/treefmt-nix";
treefmt-nix.inputs.nixpkgs.follows = "nixpkgs";
accelerate = {
url = "github:AccelerateHS/accelerate?submodules=1";
flake = false;
};
llvm-hs = {
url = "github:llvm-hs/llvm-hs?ref=llvm-12";
flake = false;
};
llvm-hs-pure = {
url = "github:llvm-hs/llvm-hs?dir=llvm-hs-pure&ref=llvm-12";
flake = false;
};
mission-control.url = "github:Platonic-Systems/mission-control";
};
outputs = inputs:
inputs.flake-parts.lib.mkFlake { inherit inputs; } {
systems = import inputs.systems;
imports = [
inputs.flake-root.flakeModule
inputs.haskell-flake.flakeModule
inputs.treefmt-nix.flakeModule
inputs.mission-control.flakeModule
];
perSystem = { self', system, lib, config, pkgs, ... }: {
# Our only Haskell project. You can have multiple projects, but this template
@ -30,14 +46,31 @@
# (Local packages are added automatically)
# https://github.com/lehins/hip.git
# hip.source = inputs.hip + "/hip";
accelerate.source = inputs.accelerate;
# llvm-hs.source = inputs.llvm-hs;
llvm-hs-pure.source = inputs.llvm-hs-pure;
};
# Add your package overrides here
settings = {
uuid ={
uuid.jailbreak = true;
accelerate =
{
jailbreak = true;
broken = false;
};
llvm-hs =
{
jailbreak = true;
broken = false;
};
llvm-hs-pure =
{
jailbreak = true;
broken = false;
};
};
# hlint = {
# jailbreak = true;
# };
@ -45,7 +78,6 @@
# broken = false;
# jailbreak = true;
# };
};
# Development shell configuration
devShell = {
@ -74,12 +106,35 @@
packages.default = self'.packages.compress;
apps.default = self'.apps.compress;
mission-control.scripts = {
hoogle = {
description = "Start Hoogle server for project dependencies";
exec = ''
echo http://127.0.0.1:8888;
hoogle serve -p 8888 --local;
'';
category = "Dev Tools";
};
haddocks = {
description = "make docs & serve them";
exec = ''
echo http://127.0.0.1:8887;
cabal haddock-project --executables --internal --hoogle || true;
python3 -m http.server -d haddocks 8887;
'';
category = "Dev Tools";
};
};
# Default shell.
devShells.default = pkgs.mkShell {
name = "haskell-template";
meta.description = "Haskell development environment";
# See https://zero-to-flakes.com/haskell-flake/devshell#composing-devshells
inputsFrom = [
config.mission-control.devShell
config.haskellProjects.default.outputs.devShell
config.treefmt.build.devShell
];

View file

@ -1,13 +1,15 @@
-- | https://en.wikipedia.org/wiki/Arithmetic_coding
module Compress.Arithmetic where
module Compress.Arithmetic (Compressed, compress, decompress, Word4MapSerialized) where
import Basement.Bits qualified as B
import Basement.Compat.Base (Word16, Word32, Word64, Word8)
import Basement.Compat.Bifunctor qualified as Bi
import Data.Bits qualified as B
import Data.Word
import Data.Bifunctor qualified as Bi
import Control.Arrow qualified as Ar
import Control.Monad.Par (NFData)
import Control.Monad.Par qualified as P
import Data.ByteString qualified as By
import Data.FiniteBit qualified as Fi
import Data.Foldable as F
import qualified Data.Foldable as F
import Data.List (genericLength)
import Data.List qualified as L
import Data.Map.Strict qualified as M
@ -17,29 +19,34 @@ import Data.Ratio
import Data.Serialize qualified as C
import GHC.Generics (Generic)
import GHC.Natural (Natural)
import qualified Control.Monad.Par as P
import Control.Monad.Par (NFData)
import qualified Relude.Unsafe as US
import Data.Word4 (Word4, CompressWord4List, toCompressedWord4List, fromCompressedWord4List)
import qualified Data.ByteString as BS
-- withPrimeDenums = concatMap (\x -> map (% x) [1 .. (pred x)]) primes
-- shortestNumberBetween :: Ratio Integer -> Ratio Integer -> Maybe (Ratio Integer)
-- shortestNumberBetween xMin xMax = find (\candidate -> candidate >= xMin && candidate < xMax) withPrimeDenums
data WordMarkovStats = WordMarkovStats
{ location :: Ratio Integer,
size :: Ratio Integer
}
deriving (Show, Eq, Ord, Generic, C.Serialize)
loc' (WordMarkovStats {..}) = location
toRing :: forall a k. (Ord k, Integral a, Bounded a) => [(k, a)] -> M.Map k WordMarkovStats
toRing xs = M.fromList . zip (map fst xs) $ wordMarkovStats
where
sizes = map ((% maxBound') . fromIntegral . snd) xs
wordMarkovStats = zipWith WordMarkovStats (L.scanl' (+) 0 withBumpedZeroSized) withBumpedZeroSized
asFracsOfTotalSum = map fst . tail . L.scanl' f (0, 0) $ sizes
asFracsOfTotalSum = map fst . US.tail . L.scanl' f (0, 0) $ sizes
maxBound' :: Integer
maxBound' = fromIntegral (maxBound :: a)
@ -62,7 +69,7 @@ twoByteMarkov xs =
. M.fromListWith (M.unionWith (+))
. zip xs
. map (`M.singleton` (1 :: Integer))
. tail
. US.tail
$ xs
where
-- optimization, changes fractions so they represent the fraction
@ -88,34 +95,48 @@ twoByteMarkov xs =
maxBound' :: Ratio Integer
maxBound' = fromIntegral (maxBound :: b)
data Compressed a = Compressed
{ markovs :: M.Map a [(a, Word8)],
type Word4MapSerialized a = M.Map a ([a], CompressWord4List)
type Word4Map a = M.Map a [(a, Word4)]
data Compressed a mapImplementation = Compressed
{ markovs :: mapImplementation,
chunks :: [Chunk a]
}
deriving (Eq, Ord, Show, Generic) -- , C.Serialize, P.NFData)
deriving instance (Ord a, C.Serialize a) => C.Serialize (Compressed a (Word4MapSerialized a))
data Chunk a = Chunk
{ location :: Ratio Integer,
start :: a,
length :: Int
}
deriving (Eq, Ord, Show, Generic, C.Serialize, P.NFData)
data Chunk a = Chunk {
location :: Ratio Integer,
start :: a,
length :: Int
} deriving (Eq, Ord, Show, Generic, C.Serialize, P.NFData)
unserealizeWord4Map :: Word4MapSerialized a -> Word4Map a
unserealizeWord4Map = M.map (uncurry zip . Bi.second ((++ [maxBound]) . fromCompressedWord4List))
serealizeWord4Map :: Word4Map a -> Word4MapSerialized a
serealizeWord4Map = M.map (Bi.second (toCompressedWord4List . L.init) . L.unzip)
decompress ::
forall a.
(Integral a, B.FiniteBitsOps a, B.BitOps a, Show a, NFData a) =>
Compressed a ->
(Integral a, B.FiniteBits a, B.Bits a, Show a, NFData a) =>
Compressed a (Word4MapSerialized a) ->
[a]
decompress (Compressed {..}) = concat . P.runPar . P.parMap decompressChunk $ chunks
where
rings = M.map (M.fromList . map toDecompressionRing . M.toList . toRing) markovs'
rings = M.map (M.fromList . map toDecompressionRing . M.toList . toRing) markovs
markovs' = unserealizeWord4Map markovs
toDecompressionRing (key, (WordMarkovStats {..})) = (location, (key, size))
decompressChunk :: Chunk a -> [a]
decompressChunk (Chunk {..}) = map snd . take length . L.iterate' decompress' $ (location, start)
where
decompress' :: (Ratio Integer, a) -> (Ratio Integer, a)
-- decompress' :: (Ratio Integer, a) -> (Ratio Integer, a)
decompress' (!loc, !prev) = ((loc - ansLoc) / newSize, newVal)
where
ansLoc :: Ratio Integer
@ -126,6 +147,7 @@ decompress (Compressed {..}) = concat . P.runPar . P.parMap decompressChunk $ ch
sanityCheck :: forall k. (Ord k) => M.Map k (M.Map k Word8) -> [Word8]
sanityCheck = map (sum . M.elems) . M.elems
chunk :: Int -> [a] -> [[a]]
chunk chunkSize = chunk'
where
chunk' [] = []
@ -133,30 +155,30 @@ chunk chunkSize = chunk'
where
(xs', xs'') = splitAt chunkSize xs
chunkLength :: Int
chunkLength = 8000
compress ::
forall a.
(Integral a, B.FiniteBitsOps a, B.BitOps a, Show a, NFData a) =>
[a] ->
Compressed a
compress toCompress = Compressed twoByteMarkovs . P.runPar . P.parMap compressChunk $ unCompressedChunks
-- compress ::
-- forall a.
-- (Integral a, B.FiniteBits a, B.Bits a, Show a, NFData a) =>
-- [a] ->
-- Compressed a (Word4MapSerialized a)
compress toCompress = traceShow ((fromIntegral :: Int -> Double) (BS.length . C.encode $ (twoByteMarkovs :: M.Map Word8 [(Word8, Word4)])) / (fromIntegral . BS.length . C.encode . serealizeWord4Map $ (twoByteMarkovs :: M.Map Word8 [(Word8, Word4)]))) $ Compressed (serealizeWord4Map twoByteMarkovs) . P.runPar . P.parMap compressChunk $ unCompressedChunks
where
unCompressedChunks = chunk chunkLength toCompress
twoByteMarkovs = twoByteMarkov toCompress
rings = M.map toRing twoByteMarkovs
compressChunk toCompress = Chunk {
location = shortestLocation endStats,
start = head toCompress,
compressChunk toCompress =
Chunk
{ location = shortestLocation endStats,
start = US.head toCompress,
length = L.length toCompress
}
where
pairs = zip toCompress . tail $ toCompress
pairs = zip toCompress . US.tail $ toCompress
shortestLocation (WordMarkovStats {..}) = simplestBetween location (location + size)
@ -172,7 +194,6 @@ compress toCompress = Compressed twoByteMarkovs . P.runPar . P.parMap compressCh
size = prevSize * nextSize
}
pyramidFold :: (a -> a -> a) -> [a] -> a
pyramidFold f = pyramid
where

View file

@ -1,5 +1,7 @@
module Compress.Huffman where
--- TODO: ?? accelarate
import Data.Bifunctor qualified as Bi
import Data.FiniteBit
@ -12,9 +14,9 @@ import Data.Data qualified as D
import Data.Foldable qualified as F
import Data.IntMap.Strict qualified as IM
import qualified Data.Proxy as D
import qualified Basement.From as F
import qualified Witch as F
import Data.Map.Strict qualified as M
import Basement.Bits as B
import Data.Bits as B
import Data.Maybe (fromMaybe)
import Data.Maybe qualified as My
import Data.PQueue qualified as PQ
@ -31,7 +33,7 @@ import Data.HuffmanTree
decompress
:: forall a
. (Ord a, Integral a, B.FiniteBitsOps a)
. (Ord a, Integral a, B.FiniteBits a)
=> (TreeDirs, HuffmanTree a)
-> Maybe BS.ByteString
decompress (TreeDirs treeDirs, tree) = BS.concat . map toByteString <$> decompress' treeDirs
@ -50,7 +52,7 @@ decompress (TreeDirs treeDirs, tree) = BS.concat . map toByteString <$> decompre
compress
:: forall a
. (Ord a, Integral a, B.FiniteBitsOps a, B.BitOps a)
. (Ord a, Integral a, B.FiniteBits a, B.Bits a)
=> BS.ByteString
-> Maybe (TreeDirs, HuffmanTree a)
compress bs =
@ -112,7 +114,7 @@ divideByteString n bs = x : divideByteString n xs
compressionRatioFor
:: forall a
. (Integral a, B.FiniteBitsOps a, B.BitOps a, Ord a, C.Serialize a)
. (Integral a, B.FiniteBits a, B.Bits a, Ord a, C.Serialize a)
=> D.Proxy a
-> BS.ByteString
-> Double

View file

@ -1,7 +1,7 @@
module Compress.PrefixTree where
import Basement.Bits qualified as B
import qualified Basement.From as F
import Data.Bits qualified as B
import qualified Witch as F
import Compress.Huffman qualified as H
import Control.Applicative qualified as A
import Data.Bifunctor qualified as Bi
@ -16,10 +16,10 @@ import Data.Ord qualified as O
import Data.PQueue qualified as PQ
import Debug.Trace qualified as D
import Debug.Trace qualified as T
import Basement.Bits (FiniteBitsOps(numberOfBits))
import GHC.Generics
import qualified Data.Serialize as C
import Data.FiniteBit
import qualified Data.FiniteBit as B
data Tree a = (Ord a) =>
Tree
@ -32,7 +32,7 @@ newtype HuffmanPrefixTree a b = HuffmanPrefixTree
finiteBitTupleUncons ::
forall a b.
(Integral a, B.FiniteBitsOps a, B.BitOps a, Integral b, B.FiniteBitsOps b, B.BitOps b) =>
(Integral a, B.FiniteBits a, B.Bits a, Integral b, B.FiniteBits b, B.Bits b) =>
BS.ByteString ->
Maybe ((a, b), BS.ByteString)
finiteBitTupleUncons bs = case finiteBitUncons bs of
@ -43,7 +43,7 @@ finiteBitTupleUncons bs = case finiteBitUncons bs of
fromByteString ::
forall a b.
(Integral a, B.FiniteBitsOps a, B.BitOps a, Integral b, B.FiniteBitsOps b, B.BitOps b) =>
(Integral a, B.FiniteBits a, B.Bits a, Integral b, B.FiniteBits b, B.Bits b) =>
BS.ByteString ->
[(a, b)]
fromByteString bs = case finiteBitTupleUncons bs of
@ -52,7 +52,7 @@ fromByteString bs = case finiteBitTupleUncons bs of
toHuffmanTree ::
forall a b.
(Integral a, B.FiniteBitsOps a, B.BitOps a, Integral b, B.FiniteBitsOps b, B.BitOps b) =>
(Integral a, B.FiniteBits a, B.Bits a, Integral b, B.FiniteBits b, B.Bits b) =>
M.Map (a, b) Word ->
HuffmanPrefixTree a b
toHuffmanTree =
@ -64,7 +64,7 @@ toHuffmanTree =
decompress ::
forall a .
(Integral a, B.FiniteBitsOps a, B.BitOps a) =>
(Integral a, B.FiniteBits a, B.Bits a) =>
(TreeDirs, HuffmanPrefixTree a a, a)
-> Maybe BS.ByteString
decompress (TreeDirs treeDirs'', HuffmanPrefixTree prefixTree, initial') = BS.concat . map toByteString . (initial' :) <$> decompress' treeDirs'' initial'
@ -77,7 +77,7 @@ decompress (TreeDirs treeDirs'', HuffmanPrefixTree prefixTree, initial') = BS.co
compress ::
forall a b.
(Integral a, B.FiniteBitsOps a, B.BitOps a, Integral b, B.FiniteBitsOps b, B.BitOps b) =>
(Integral a, B.FiniteBits a, B.Bits a, Integral b, B.FiniteBits b, B.Bits b) =>
BS.ByteString -> Maybe (TreeDirs, HuffmanPrefixTree a b, a)
compress bs = (TreeDirs $ concatMap treeDirsFor asFiniteBitPairs, tree, ) <$> initial
where
@ -129,7 +129,7 @@ compress bs = (TreeDirs $ concatMap treeDirsFor asFiniteBitPairs, tree, ) <$> in
nGramCounts ::
forall a b.
(Integral a, B.FiniteBitsOps a, B.BitOps a, Integral b, B.FiniteBitsOps b, B.BitOps b) =>
(Integral a, B.FiniteBits a, B.Bits a, Integral b, B.FiniteBits b, B.Bits b) =>
BS.ByteString ->
M.Map (a, b) Word
nGramCounts =
@ -140,20 +140,20 @@ nGramCounts =
. map (BS.take len)
. BS.tails
where
len = (`div` 8) . F.from $ numberOfBits (0 :: a) + numberOfBits (0 :: b)
len = (`div` 8) . F.from $ B.finiteBitSize (0 :: a) + B.finiteBitSize (0 :: b)
empty :: (Ord a) => Tree a
empty = Tree M.empty
singleton :: (Ord a) => a -> Tree a
singleton x = Tree $ M.singleton x empty
singleton x = Tree $ M.singleton x Compress.PrefixTree.empty
fromSingleList :: (Ord a) => [a] -> Tree a
fromSingleList [] = empty
fromSingleList [] = Compress.PrefixTree.empty
fromSingleList (x : xs) = Tree . M.singleton x . fromSingleList $ xs
fromList :: (Ord a) => [[a]] -> Tree a
fromList = F.foldl' merge empty . map fromSingleList
fromList = F.foldl' merge Compress.PrefixTree.empty . map fromSingleList
-- insert :: Ord a => Tree a -> [a] -> Tree a
-- insert (Tree {..}) (x:xs) =

View file

@ -1,40 +1,41 @@
module Data.FiniteBit where
import Data.Bit (cloneToByteString)
import Data.Bit qualified as B
import Data.Bit qualified as BV
import Data.Bits qualified as B
import Data.Bits qualified as BV
import Data.ByteString qualified as BS
import qualified Data.Proxy as D
import qualified Basement.From as F
import qualified Witch as F
import Data.Foldable qualified as F
import Data.Word
import Basement.Bits as B
import Data.Proxy (Proxy)
class SizedBits a where
size :: Proxy a -> Int
numBytesIn :: forall a. (B.FiniteBitsOps a, Integral a) => D.Proxy a -> Int
numBytesIn _ = (`div` 8) . F.from . B.numberOfBits $ (0 :: a)
numBytesIn :: forall a. (B.FiniteBits a, Integral a) => D.Proxy a -> Int
numBytesIn _ = (`div` 8) . F.from . B.finiteBitSize $ (0 :: a)
toWordsList :: forall a. (Integral a, B.FiniteBitsOps a, B.BitOps a) => BS.ByteString -> [a]
toWordsList :: forall a. (Integral a, B.FiniteBits a) => BS.ByteString -> [a]
toWordsList bs = case finiteBitUncons bs of
Nothing -> []
(Just (x, xs)) -> x : toWordsList xs
toByteString :: forall a. (Integral a, B.FiniteBitsOps a) => a -> BS.ByteString
toByteString :: forall a. (Integral a, B.FiniteBits a) => a -> BS.ByteString
toByteString n = BS.pack . take numBytes . map (fromIntegral . (n `B.rotateL`)) $ [8, 16 ..]
where
numBytes = numBytesIn (D.Proxy :: D.Proxy a)
finiteBitUncons
:: forall a
. (Integral a, B.FiniteBitsOps a, B.BitOps a)
. (Integral a, B.FiniteBits a)
=> BS.ByteString
-> Maybe (a, BS.ByteString)
finiteBitUncons [] = Nothing
finiteBitUncons bs =
Just
. (,rest)
. F.foldl' (.|.) 0
. F.foldl' (B..|.) 0
. zipWith (flip B.rotateR) [8, 16 ..]
. map (fromIntegral :: Word8 -> a)
. BS.unpack

80
src/Data/Word4.hs Normal file
View file

@ -0,0 +1,80 @@
module Data.Word4 where
import Data.Bits
import Data.Word
import qualified Data.Serialize as C
import qualified Data.Map as M
-- only use lower 4 bits
newtype Word4 = Word4 {
inner :: Word8
} deriving (Eq, Ord, Show, Enum, Generic, C.Serialize)
map2 :: (Word8 -> Word8 -> Word8) -> Word4 -> Word4 -> Word4
map2 f (Word4 a) (Word4 b) = Word4 $ f a b
data CompressWord4List = CompressWord4List
{ xs :: [Word8],
last :: Maybe Word8 -- in lower 4 bits
} deriving (Eq, Ord, Generic, Show, C.Serialize, NFData)
bitwiseAnd a b = getAnd $ And a <> And b
bitwiseOr a b = getIor $ Ior a <> Ior b
toCompressedWord4List [] = CompressWord4List [] Nothing
toCompressedWord4List [Word4 x] = CompressWord4List [] (Just x)
toCompressedWord4List ((Word4 x) : (Word4 x') : xs) =
CompressWord4List
{ xs = headByte : xs',
last = last'
}
where
headByte = bitwiseOr (bitwiseAnd 0xf0 (x .<<. 4)) (bitwiseAnd 0x0f x')
(CompressWord4List xs' last') = toCompressedWord4List xs
fromCompressedWord4List :: CompressWord4List -> [Word4]
fromCompressedWord4List (CompressWord4List [] Nothing) = []
fromCompressedWord4List (CompressWord4List [] (Just a)) = [word4 a]
fromCompressedWord4List (CompressWord4List {xs = (x : xs), ..}) =
Word4 ((bitwiseAnd 0xf0 x) .>>. 4)
: word4 x
: fromCompressedWord4List (CompressWord4List xs last)
-- instance Show Word4 where
-- show :: Word4 -> String
-- show (Word4 a) = show . (.>>. 4) $ a
instance Bounded Word4 where
minBound = word4 0
maxBound = word4 0xf
instance Real Word4 where
toRational = toRational . inner
instance Integral Word4 where
quot = map2 quot `on` clean
rem = map2 rem `on` clean
div = map2 div `on` clean
mod = map2 mod `on` clean
quotRem :: Word4 -> Word4 -> (Word4, Word4)
quotRem (Word4 a) (Word4 b) = (word4 a', word4 b')
where
(a', b') = quotRem a b
divMod (Word4 a) (Word4 b) = (word4 a', word4 b')
where
(a', b') = divMod a b
toInteger = toInteger . inner . clean
clean (Word4 a) = Word4 . bitwiseAnd 0xf $ a
word4 = clean . Word4
instance Num Word4 where
(+) = map2 (+) `on` clean
(*) = map2 (*) `on` clean
abs = id
negate = id
signum = const 1
fromInteger = clean . Word4 . fromInteger

View file

@ -1,6 +1,5 @@
module Main where
import Basement.Bits qualified as B
import Compress.Huffman
import Compress.Huffman qualified as HT
import Compress.PrefixTree (HuffmanPrefixTree (HuffmanPrefixTree))
@ -31,7 +30,7 @@ data CLIOpts = CLIOpts
applyCompressionOptions ::
forall a.
(Integral a, B.BitOps a, B.FiniteBitsOps a, Ord a, C.Serialize a) =>
(Integral a, B.Bits a, B.FiniteBits a, Ord a, C.Serialize a) =>
P.Proxy a ->
CLIOpts ->
BS.ByteString ->
@ -53,7 +52,7 @@ main = do
-- cliOpts :: CLIOpts <- O.getRecord "compress/decompress & strategy"
f <- BS.getContents
let fAsWords = (FB.toWordsList :: BS.ByteString -> [ Word8 ]) $ f
let compressedUnencoded :: A.Compressed Word8 = (A.compress fAsWords)
let compressedUnencoded :: (A.Compressed Word8 (A.Word4MapSerialized Word8)) = (A.compress fAsWords)
let compressed = C.encode compressedUnencoded
print ("compression ratio", (fromIntegral . BS.length $ compressed) / (fromIntegral . BS.length $ f))
print ("works?", (A.decompress compressedUnencoded) == fAsWords)