From 212363629149cee2eed0bb32878d8cb9f4e1b9eb Mon Sep 17 00:00:00 2001 From: Jack Wines Date: Wed, 8 Jan 2025 19:45:34 -0800 Subject: [PATCH] swap to word4 instead of word8 sizes This isn't beter yet, but I have a hunch it's to to the Maybe in the compressed word4 list type, which I can get rid of --- compress.cabal | 23 +++-- flake.lock | 105 ++++++++++++++++++++-- flake.nix | 179 ++++++++++++++++++++++++------------- src/Compress/Arithmetic.hs | 95 ++++++++++++-------- src/Compress/Huffman.hs | 12 +-- src/Compress/PrefixTree.hs | 26 +++--- src/Data/FiniteBit.hs | 23 ++--- src/Data/Word4.hs | 80 +++++++++++++++++ src/Main.hs | 5 +- 9 files changed, 400 insertions(+), 148 deletions(-) create mode 100644 src/Data/Word4.hs diff --git a/compress.cabal b/compress.cabal index 2b7cb2c..8d8040b 100644 --- a/compress.cabal +++ b/compress.cabal @@ -1,4 +1,4 @@ -cabal-version: 3.0 +cabal-version: 3.4 name: compress version: 0.1.0.0 category: Web @@ -41,21 +41,28 @@ executable compress UndecidableInstances, hs-source-dirs: src + mixins: + base hiding (Prelude), + relude (Relude as Prelude), + relude build-depends: base, - basement, bitvec, bytestring, cereal, containers, + -- accelerate, + -- containers-accelerate, + leancheck, + monad-par, + monad-par-extras, + nonempty-containers, + optparse-generic, + relude, text, uuid, - optparse-generic, vector, - nonempty-containers, - witch, - monad-par, - monad-par-extras + witch default-language: GHC2021 other-modules: @@ -65,7 +72,7 @@ executable compress Compress.PrefixTree Data.HuffmanTree Compress.Arithmetic - Data.WordyMap + Data.Word4 ghc-options: -threaded -fprof-auto diff --git a/flake.lock b/flake.lock index ebcd77f..c8662ec 100644 --- a/flake.lock +++ b/flake.lock @@ -1,5 +1,21 @@ { "nodes": { + "accelerate": { + "flake": false, + "locked": { + "lastModified": 1732969010, + "narHash": "sha256-Qrmtrgij2GbklBXUK42Pt6Db8WiGijA5sz5oC5AR72c=", + "owner": "AccelerateHS", + "repo": "accelerate", + "rev": "02da6161ef143a9886c8bce542cd96029c4f527a", + "type": "github" + }, + "original": { + "owner": "AccelerateHS", + "repo": "accelerate", + "type": "github" + } + }, "flake-parts": { "inputs": { "nixpkgs-lib": "nixpkgs-lib" @@ -18,13 +34,28 @@ "type": "github" } }, + "flake-root": { + "locked": { + "lastModified": 1723604017, + "narHash": "sha256-rBtQ8gg+Dn4Sx/s+pvjdq3CB2wQNzx9XGFq/JVGCB6k=", + "owner": "srid", + "repo": "flake-root", + "rev": "b759a56851e10cb13f6b8e5698af7b59c44be26e", + "type": "github" + }, + "original": { + "owner": "srid", + "repo": "flake-root", + "type": "github" + } + }, "haskell-flake": { "locked": { - "lastModified": 1734464164, - "narHash": "sha256-5JCCyrgy7IMnipyYMQzIAXncGt2XVlW1aK71A+FTXDs=", + "lastModified": 1734984991, + "narHash": "sha256-oUYtRBD3Yhw2jvKYo0lfd82fgEQQbFoiJcHO923gmOc=", "owner": "srid", "repo": "haskell-flake", - "rev": "e280b39efdd72b6a5bdaa982b67f150c819be642", + "rev": "daf00052906bdd977e57a07f7048437214232e87", "type": "github" }, "original": { @@ -33,13 +64,64 @@ "type": "github" } }, + "llvm-hs": { + "flake": false, + "locked": { + "lastModified": 1665495332, + "narHash": "sha256-JKrpUmHJ1nsNiCoHhV5FCcdQGlNFfD37Oiu5kSmghfM=", + "owner": "llvm-hs", + "repo": "llvm-hs", + "rev": "423220bffac4990d019fc088c46c5f25310d5a33", + "type": "github" + }, + "original": { + "owner": "llvm-hs", + "ref": "llvm-12", + "repo": "llvm-hs", + "type": "github" + } + }, + "llvm-hs-pure": { + "flake": false, + "locked": { + "dir": "llvm-hs-pure", + "lastModified": 1665495332, + "narHash": "sha256-JKrpUmHJ1nsNiCoHhV5FCcdQGlNFfD37Oiu5kSmghfM=", + "owner": "llvm-hs", + "repo": "llvm-hs", + "rev": "423220bffac4990d019fc088c46c5f25310d5a33", + "type": "github" + }, + "original": { + "dir": "llvm-hs-pure", + "owner": "llvm-hs", + "ref": "llvm-12", + "repo": "llvm-hs", + "type": "github" + } + }, + "mission-control": { + "locked": { + "lastModified": 1733438716, + "narHash": "sha256-1tt43rwHk0N5fwEhbpsHWO4nBVFCQN0w1KM427DNycM=", + "owner": "Platonic-Systems", + "repo": "mission-control", + "rev": "65d04c4ab9db076eff09824d2936a5c215c21f36", + "type": "github" + }, + "original": { + "owner": "Platonic-Systems", + "repo": "mission-control", + "type": "github" + } + }, "nixpkgs": { "locked": { - "lastModified": 1734424634, - "narHash": "sha256-cHar1vqHOOyC7f1+tVycPoWTfKIaqkoe1Q6TnKzuti4=", + "lastModified": 1734649271, + "narHash": "sha256-4EVBRhOjMDuGtMaofAIqzJbg4Ql7Ai0PSeuVZTHjyKQ=", "owner": "nixos", "repo": "nixpkgs", - "rev": "d3c42f187194c26d9f0309a8ecc469d6c878ce33", + "rev": "d70bd19e0a38ad4790d3913bf08fcbfc9eeca507", "type": "github" }, "original": { @@ -63,8 +145,13 @@ }, "root": { "inputs": { + "accelerate": "accelerate", "flake-parts": "flake-parts", + "flake-root": "flake-root", "haskell-flake": "haskell-flake", + "llvm-hs": "llvm-hs", + "llvm-hs-pure": "llvm-hs-pure", + "mission-control": "mission-control", "nixpkgs": "nixpkgs", "systems": "systems", "treefmt-nix": "treefmt-nix" @@ -92,11 +179,11 @@ ] }, "locked": { - "lastModified": 1733761991, - "narHash": "sha256-s4DalCDepD22jtKL5Nw6f4LP5UwoMcPzPZgHWjAfqbQ=", + "lastModified": 1734982074, + "narHash": "sha256-N7M37KP7cHWoXicuE536GrVvU8nMDT/gpI1kja2hkdg=", "owner": "numtide", "repo": "treefmt-nix", - "rev": "0ce9d149d99bc383d1f2d85f31f6ebd146e46085", + "rev": "e41e948cf097cbf96ba4dff47a30ea6891af9f33", "type": "github" }, "original": { diff --git a/flake.nix b/flake.nix index 859754e..221bbc8 100644 --- a/flake.nix +++ b/flake.nix @@ -5,87 +5,142 @@ systems.url = "github:nix-systems/default"; flake-parts.url = "github:hercules-ci/flake-parts"; haskell-flake.url = "github:srid/haskell-flake"; + flake-root.url = "github:srid/flake-root"; treefmt-nix.url = "github:numtide/treefmt-nix"; treefmt-nix.inputs.nixpkgs.follows = "nixpkgs"; + accelerate = { + url = "github:AccelerateHS/accelerate?submodules=1"; + flake = false; + }; + llvm-hs = { + url = "github:llvm-hs/llvm-hs?ref=llvm-12"; + flake = false; + }; + llvm-hs-pure = { + url = "github:llvm-hs/llvm-hs?dir=llvm-hs-pure&ref=llvm-12"; + flake = false; + }; + mission-control.url = "github:Platonic-Systems/mission-control"; }; outputs = inputs: - inputs.flake-parts.lib.mkFlake { inherit inputs; } { - systems = import inputs.systems; - imports = [ - inputs.haskell-flake.flakeModule - inputs.treefmt-nix.flakeModule - ]; - perSystem = { self', system, lib, config, pkgs, ... }: { - # Our only Haskell project. You can have multiple projects, but this template - # has only one. - # See https://github.com/srid/haskell-flake/blob/master/example/flake.nix - haskellProjects.default = { - # The base package set (this value is the default) - basePackages = pkgs.haskell.packages.ghc96; + inputs.flake-parts.lib.mkFlake { inherit inputs; } { + systems = import inputs.systems; + imports = [ + inputs.flake-root.flakeModule + inputs.haskell-flake.flakeModule + inputs.treefmt-nix.flakeModule + inputs.mission-control.flakeModule + ]; + perSystem = { self', system, lib, config, pkgs, ... }: { + # Our only Haskell project. You can have multiple projects, but this template + # has only one. + # See https://github.com/srid/haskell-flake/blob/master/example/flake.nix + haskellProjects.default = { + # The base package set (this value is the default) + basePackages = pkgs.haskell.packages.ghc96; - # Packages to add on top of `basePackages` - packages = { - # Add source or Hackage overrides here - # (Local packages are added automatically) - # https://github.com/lehins/hip.git - # hip.source = inputs.hip + "/hip"; - }; + # Packages to add on top of `basePackages` + packages = { + # Add source or Hackage overrides here + # (Local packages are added automatically) + # https://github.com/lehins/hip.git + # hip.source = inputs.hip + "/hip"; + accelerate.source = inputs.accelerate; + # llvm-hs.source = inputs.llvm-hs; + llvm-hs-pure.source = inputs.llvm-hs-pure; + }; - # Add your package overrides here - settings = { - uuid ={ + # Add your package overrides here + settings = { + uuid.jailbreak = true; + accelerate = + { jailbreak = true; + broken = false; + }; + llvm-hs = + { + jailbreak = true; + broken = false; + }; + llvm-hs-pure = + { + jailbreak = true; + broken = false; }; - # hlint = { - # jailbreak = true; - # }; - # barbies-th = { + }; + # hlint = { + # jailbreak = true; + # }; + # barbies-th = { # broken = false; # jailbreak = true; # }; + + # Development shell configuration + devShell = { + hlsCheck.enable = true; + }; + + # What should haskell-flake add to flake outputs? + autoWire = [ "packages" "apps" "checks" ]; # Wire all but the devShell + }; + + # Auto formatters. This also adds a flake check to ensure that the + # source tree was auto formatted. + treefmt.config = { + projectRootFile = "flake.nix"; + + programs.ormolu.enable = true; + programs.nixpkgs-fmt.enable = true; + programs.cabal-fmt.enable = true; + programs.hlint.enable = false; + + # We use fourmolu + programs.ormolu.package = pkgs.haskellPackages.fourmolu; + }; + + # Default package & app. + packages.default = self'.packages.compress; + apps.default = self'.apps.compress; + + mission-control.scripts = { + hoogle = { + description = "Start Hoogle server for project dependencies"; + exec = '' + echo http://127.0.0.1:8888; + hoogle serve -p 8888 --local; + ''; + category = "Dev Tools"; }; - # Development shell configuration - devShell = { - hlsCheck.enable = true; + haddocks = { + description = "make docs & serve them"; + exec = '' + echo http://127.0.0.1:8887; + cabal haddock-project --executables --internal --hoogle || true; + python3 -m http.server -d haddocks 8887; + ''; + category = "Dev Tools"; }; - # What should haskell-flake add to flake outputs? - autoWire = [ "packages" "apps" "checks" ]; # Wire all but the devShell }; - # Auto formatters. This also adds a flake check to ensure that the - # source tree was auto formatted. - treefmt.config = { - projectRootFile = "flake.nix"; - - programs.ormolu.enable = true; - programs.nixpkgs-fmt.enable = true; - programs.cabal-fmt.enable = true; - programs.hlint.enable = false; - - # We use fourmolu - programs.ormolu.package = pkgs.haskellPackages.fourmolu; - }; - - # Default package & app. - packages.default = self'.packages.compress; - apps.default = self'.apps.compress; - - # Default shell. - devShells.default = pkgs.mkShell { - name = "haskell-template"; - meta.description = "Haskell development environment"; - # See https://zero-to-flakes.com/haskell-flake/devshell#composing-devshells - inputsFrom = [ - config.haskellProjects.default.outputs.devShell - config.treefmt.build.devShell - ]; - nativeBuildInputs = with pkgs; [ - ]; - }; + # Default shell. + devShells.default = pkgs.mkShell { + name = "haskell-template"; + meta.description = "Haskell development environment"; + # See https://zero-to-flakes.com/haskell-flake/devshell#composing-devshells + inputsFrom = [ + config.mission-control.devShell + config.haskellProjects.default.outputs.devShell + config.treefmt.build.devShell + ]; + nativeBuildInputs = with pkgs; [ + ]; }; }; + }; } diff --git a/src/Compress/Arithmetic.hs b/src/Compress/Arithmetic.hs index 5daae19..467af2b 100644 --- a/src/Compress/Arithmetic.hs +++ b/src/Compress/Arithmetic.hs @@ -1,13 +1,15 @@ -- | https://en.wikipedia.org/wiki/Arithmetic_coding -module Compress.Arithmetic where +module Compress.Arithmetic (Compressed, compress, decompress, Word4MapSerialized) where -import Basement.Bits qualified as B -import Basement.Compat.Base (Word16, Word32, Word64, Word8) -import Basement.Compat.Bifunctor qualified as Bi +import Data.Bits qualified as B +import Data.Word +import Data.Bifunctor qualified as Bi import Control.Arrow qualified as Ar +import Control.Monad.Par (NFData) +import Control.Monad.Par qualified as P import Data.ByteString qualified as By import Data.FiniteBit qualified as Fi -import Data.Foldable as F +import qualified Data.Foldable as F import Data.List (genericLength) import Data.List qualified as L import Data.Map.Strict qualified as M @@ -17,29 +19,34 @@ import Data.Ratio import Data.Serialize qualified as C import GHC.Generics (Generic) import GHC.Natural (Natural) -import qualified Control.Monad.Par as P -import Control.Monad.Par (NFData) +import qualified Relude.Unsafe as US +import Data.Word4 (Word4, CompressWord4List, toCompressedWord4List, fromCompressedWord4List) +import qualified Data.ByteString as BS -- withPrimeDenums = concatMap (\x -> map (% x) [1 .. (pred x)]) primes -- shortestNumberBetween :: Ratio Integer -> Ratio Integer -> Maybe (Ratio Integer) -- shortestNumberBetween xMin xMax = find (\candidate -> candidate >= xMin && candidate < xMax) withPrimeDenums + data WordMarkovStats = WordMarkovStats { location :: Ratio Integer, size :: Ratio Integer + } deriving (Show, Eq, Ord, Generic, C.Serialize) + +loc' (WordMarkovStats {..}) = location + toRing :: forall a k. (Ord k, Integral a, Bounded a) => [(k, a)] -> M.Map k WordMarkovStats toRing xs = M.fromList . zip (map fst xs) $ wordMarkovStats where - sizes = map ((% maxBound') . fromIntegral . snd) xs wordMarkovStats = zipWith WordMarkovStats (L.scanl' (+) 0 withBumpedZeroSized) withBumpedZeroSized - asFracsOfTotalSum = map fst . tail . L.scanl' f (0, 0) $ sizes + asFracsOfTotalSum = map fst . US.tail . L.scanl' f (0, 0) $ sizes maxBound' :: Integer maxBound' = fromIntegral (maxBound :: a) @@ -62,14 +69,14 @@ twoByteMarkov xs = . M.fromListWith (M.unionWith (+)) . zip xs . map (`M.singleton` (1 :: Integer)) - . tail + . US.tail $ xs where -- optimization, changes fractions so they represent the fraction -- of remaining space in the list taken up instead of total space sizeAsFraction :: M.Map k Integer -> [(k, b)] sizeAsFraction m = - zip keys + zip keys . map discretizeFraction . fractionOfRemainingSums $ counts @@ -88,34 +95,48 @@ twoByteMarkov xs = maxBound' :: Ratio Integer maxBound' = fromIntegral (maxBound :: b) -data Compressed a = Compressed - { markovs :: M.Map a [(a, Word8)], +type Word4MapSerialized a = M.Map a ([a], CompressWord4List) +type Word4Map a = M.Map a [(a, Word4)] + +data Compressed a mapImplementation = Compressed + { markovs :: mapImplementation, chunks :: [Chunk a] } + deriving (Eq, Ord, Show, Generic) -- , C.Serialize, P.NFData) + + +deriving instance (Ord a, C.Serialize a) => C.Serialize (Compressed a (Word4MapSerialized a)) + +data Chunk a = Chunk + { location :: Ratio Integer, + start :: a, + length :: Int + } deriving (Eq, Ord, Show, Generic, C.Serialize, P.NFData) -data Chunk a = Chunk { - location :: Ratio Integer, - start :: a, - length :: Int - } deriving (Eq, Ord, Show, Generic, C.Serialize, P.NFData) +unserealizeWord4Map :: Word4MapSerialized a -> Word4Map a +unserealizeWord4Map = M.map (uncurry zip . Bi.second ((++ [maxBound]) . fromCompressedWord4List)) + +serealizeWord4Map :: Word4Map a -> Word4MapSerialized a +serealizeWord4Map = M.map (Bi.second (toCompressedWord4List . L.init) . L.unzip) decompress :: forall a. - (Integral a, B.FiniteBitsOps a, B.BitOps a, Show a, NFData a) => - Compressed a -> + (Integral a, B.FiniteBits a, B.Bits a, Show a, NFData a) => + Compressed a (Word4MapSerialized a) -> [a] decompress (Compressed {..}) = concat . P.runPar . P.parMap decompressChunk $ chunks where + rings = M.map (M.fromList . map toDecompressionRing . M.toList . toRing) markovs' - rings = M.map (M.fromList . map toDecompressionRing . M.toList . toRing) markovs + markovs' = unserealizeWord4Map markovs toDecompressionRing (key, (WordMarkovStats {..})) = (location, (key, size)) decompressChunk :: Chunk a -> [a] decompressChunk (Chunk {..}) = map snd . take length . L.iterate' decompress' $ (location, start) where - decompress' :: (Ratio Integer, a) -> (Ratio Integer, a) + -- decompress' :: (Ratio Integer, a) -> (Ratio Integer, a) decompress' (!loc, !prev) = ((loc - ansLoc) / newSize, newVal) where ansLoc :: Ratio Integer @@ -126,6 +147,7 @@ decompress (Compressed {..}) = concat . P.runPar . P.parMap decompressChunk $ ch sanityCheck :: forall k. (Ord k) => M.Map k (M.Map k Word8) -> [Word8] sanityCheck = map (sum . M.elems) . M.elems +chunk :: Int -> [a] -> [[a]] chunk chunkSize = chunk' where chunk' [] = [] @@ -133,30 +155,30 @@ chunk chunkSize = chunk' where (xs', xs'') = splitAt chunkSize xs +chunkLength :: Int chunkLength = 8000 -compress :: - forall a. - (Integral a, B.FiniteBitsOps a, B.BitOps a, Show a, NFData a) => - [a] -> - Compressed a -compress toCompress = Compressed twoByteMarkovs . P.runPar . P.parMap compressChunk $ unCompressedChunks +-- compress :: +-- forall a. +-- (Integral a, B.FiniteBits a, B.Bits a, Show a, NFData a) => +-- [a] -> +-- Compressed a (Word4MapSerialized a) +compress toCompress = traceShow ((fromIntegral :: Int -> Double) (BS.length . C.encode $ (twoByteMarkovs :: M.Map Word8 [(Word8, Word4)])) / (fromIntegral . BS.length . C.encode . serealizeWord4Map $ (twoByteMarkovs :: M.Map Word8 [(Word8, Word4)]))) $ Compressed (serealizeWord4Map twoByteMarkovs) . P.runPar . P.parMap compressChunk $ unCompressedChunks where - unCompressedChunks = chunk chunkLength toCompress twoByteMarkovs = twoByteMarkov toCompress rings = M.map toRing twoByteMarkovs - compressChunk toCompress = Chunk { - location = shortestLocation endStats, - start = head toCompress, - length = L.length toCompress - } + compressChunk toCompress = + Chunk + { location = shortestLocation endStats, + start = US.head toCompress, + length = L.length toCompress + } where - - pairs = zip toCompress . tail $ toCompress + pairs = zip toCompress . US.tail $ toCompress shortestLocation (WordMarkovStats {..}) = simplestBetween location (location + size) @@ -172,7 +194,6 @@ compress toCompress = Compressed twoByteMarkovs . P.runPar . P.parMap compressCh size = prevSize * nextSize } - pyramidFold :: (a -> a -> a) -> [a] -> a pyramidFold f = pyramid where diff --git a/src/Compress/Huffman.hs b/src/Compress/Huffman.hs index 582cd35..a34d3cc 100644 --- a/src/Compress/Huffman.hs +++ b/src/Compress/Huffman.hs @@ -1,5 +1,7 @@ module Compress.Huffman where +--- TODO: ?? accelarate + import Data.Bifunctor qualified as Bi import Data.FiniteBit @@ -12,9 +14,9 @@ import Data.Data qualified as D import Data.Foldable qualified as F import Data.IntMap.Strict qualified as IM import qualified Data.Proxy as D -import qualified Basement.From as F +import qualified Witch as F import Data.Map.Strict qualified as M -import Basement.Bits as B +import Data.Bits as B import Data.Maybe (fromMaybe) import Data.Maybe qualified as My import Data.PQueue qualified as PQ @@ -31,7 +33,7 @@ import Data.HuffmanTree decompress :: forall a - . (Ord a, Integral a, B.FiniteBitsOps a) + . (Ord a, Integral a, B.FiniteBits a) => (TreeDirs, HuffmanTree a) -> Maybe BS.ByteString decompress (TreeDirs treeDirs, tree) = BS.concat . map toByteString <$> decompress' treeDirs @@ -50,7 +52,7 @@ decompress (TreeDirs treeDirs, tree) = BS.concat . map toByteString <$> decompre compress :: forall a - . (Ord a, Integral a, B.FiniteBitsOps a, B.BitOps a) + . (Ord a, Integral a, B.FiniteBits a, B.Bits a) => BS.ByteString -> Maybe (TreeDirs, HuffmanTree a) compress bs = @@ -112,7 +114,7 @@ divideByteString n bs = x : divideByteString n xs compressionRatioFor :: forall a - . (Integral a, B.FiniteBitsOps a, B.BitOps a, Ord a, C.Serialize a) + . (Integral a, B.FiniteBits a, B.Bits a, Ord a, C.Serialize a) => D.Proxy a -> BS.ByteString -> Double diff --git a/src/Compress/PrefixTree.hs b/src/Compress/PrefixTree.hs index 8143615..c89be9c 100644 --- a/src/Compress/PrefixTree.hs +++ b/src/Compress/PrefixTree.hs @@ -1,7 +1,7 @@ module Compress.PrefixTree where -import Basement.Bits qualified as B -import qualified Basement.From as F +import Data.Bits qualified as B +import qualified Witch as F import Compress.Huffman qualified as H import Control.Applicative qualified as A import Data.Bifunctor qualified as Bi @@ -16,10 +16,10 @@ import Data.Ord qualified as O import Data.PQueue qualified as PQ import Debug.Trace qualified as D import Debug.Trace qualified as T -import Basement.Bits (FiniteBitsOps(numberOfBits)) import GHC.Generics import qualified Data.Serialize as C import Data.FiniteBit +import qualified Data.FiniteBit as B data Tree a = (Ord a) => Tree @@ -32,7 +32,7 @@ newtype HuffmanPrefixTree a b = HuffmanPrefixTree finiteBitTupleUncons :: forall a b. - (Integral a, B.FiniteBitsOps a, B.BitOps a, Integral b, B.FiniteBitsOps b, B.BitOps b) => + (Integral a, B.FiniteBits a, B.Bits a, Integral b, B.FiniteBits b, B.Bits b) => BS.ByteString -> Maybe ((a, b), BS.ByteString) finiteBitTupleUncons bs = case finiteBitUncons bs of @@ -43,7 +43,7 @@ finiteBitTupleUncons bs = case finiteBitUncons bs of fromByteString :: forall a b. - (Integral a, B.FiniteBitsOps a, B.BitOps a, Integral b, B.FiniteBitsOps b, B.BitOps b) => + (Integral a, B.FiniteBits a, B.Bits a, Integral b, B.FiniteBits b, B.Bits b) => BS.ByteString -> [(a, b)] fromByteString bs = case finiteBitTupleUncons bs of @@ -52,7 +52,7 @@ fromByteString bs = case finiteBitTupleUncons bs of toHuffmanTree :: forall a b. - (Integral a, B.FiniteBitsOps a, B.BitOps a, Integral b, B.FiniteBitsOps b, B.BitOps b) => + (Integral a, B.FiniteBits a, B.Bits a, Integral b, B.FiniteBits b, B.Bits b) => M.Map (a, b) Word -> HuffmanPrefixTree a b toHuffmanTree = @@ -64,7 +64,7 @@ toHuffmanTree = decompress :: forall a . - (Integral a, B.FiniteBitsOps a, B.BitOps a) => + (Integral a, B.FiniteBits a, B.Bits a) => (TreeDirs, HuffmanPrefixTree a a, a) -> Maybe BS.ByteString decompress (TreeDirs treeDirs'', HuffmanPrefixTree prefixTree, initial') = BS.concat . map toByteString . (initial' :) <$> decompress' treeDirs'' initial' @@ -77,7 +77,7 @@ decompress (TreeDirs treeDirs'', HuffmanPrefixTree prefixTree, initial') = BS.co compress :: forall a b. - (Integral a, B.FiniteBitsOps a, B.BitOps a, Integral b, B.FiniteBitsOps b, B.BitOps b) => + (Integral a, B.FiniteBits a, B.Bits a, Integral b, B.FiniteBits b, B.Bits b) => BS.ByteString -> Maybe (TreeDirs, HuffmanPrefixTree a b, a) compress bs = (TreeDirs $ concatMap treeDirsFor asFiniteBitPairs, tree, ) <$> initial where @@ -129,7 +129,7 @@ compress bs = (TreeDirs $ concatMap treeDirsFor asFiniteBitPairs, tree, ) <$> in nGramCounts :: forall a b. - (Integral a, B.FiniteBitsOps a, B.BitOps a, Integral b, B.FiniteBitsOps b, B.BitOps b) => + (Integral a, B.FiniteBits a, B.Bits a, Integral b, B.FiniteBits b, B.Bits b) => BS.ByteString -> M.Map (a, b) Word nGramCounts = @@ -140,20 +140,20 @@ nGramCounts = . map (BS.take len) . BS.tails where - len = (`div` 8) . F.from $ numberOfBits (0 :: a) + numberOfBits (0 :: b) + len = (`div` 8) . F.from $ B.finiteBitSize (0 :: a) + B.finiteBitSize (0 :: b) empty :: (Ord a) => Tree a empty = Tree M.empty singleton :: (Ord a) => a -> Tree a -singleton x = Tree $ M.singleton x empty +singleton x = Tree $ M.singleton x Compress.PrefixTree.empty fromSingleList :: (Ord a) => [a] -> Tree a -fromSingleList [] = empty +fromSingleList [] = Compress.PrefixTree.empty fromSingleList (x : xs) = Tree . M.singleton x . fromSingleList $ xs fromList :: (Ord a) => [[a]] -> Tree a -fromList = F.foldl' merge empty . map fromSingleList +fromList = F.foldl' merge Compress.PrefixTree.empty . map fromSingleList -- insert :: Ord a => Tree a -> [a] -> Tree a -- insert (Tree {..}) (x:xs) = diff --git a/src/Data/FiniteBit.hs b/src/Data/FiniteBit.hs index 253cb2b..03baacc 100644 --- a/src/Data/FiniteBit.hs +++ b/src/Data/FiniteBit.hs @@ -1,40 +1,41 @@ module Data.FiniteBit where import Data.Bit (cloneToByteString) -import Data.Bit qualified as B -import Data.Bit qualified as BV +import Data.Bits qualified as B +import Data.Bits qualified as BV import Data.ByteString qualified as BS import qualified Data.Proxy as D -import qualified Basement.From as F +import qualified Witch as F import Data.Foldable qualified as F import Data.Word -import Basement.Bits as B +import Data.Proxy (Proxy) +class SizedBits a where + size :: Proxy a -> Int -numBytesIn :: forall a. (B.FiniteBitsOps a, Integral a) => D.Proxy a -> Int -numBytesIn _ = (`div` 8) . F.from . B.numberOfBits $ (0 :: a) +numBytesIn :: forall a. (B.FiniteBits a, Integral a) => D.Proxy a -> Int +numBytesIn _ = (`div` 8) . F.from . B.finiteBitSize $ (0 :: a) - -toWordsList :: forall a. (Integral a, B.FiniteBitsOps a, B.BitOps a) => BS.ByteString -> [a] +toWordsList :: forall a. (Integral a, B.FiniteBits a) => BS.ByteString -> [a] toWordsList bs = case finiteBitUncons bs of Nothing -> [] (Just (x, xs)) -> x : toWordsList xs -toByteString :: forall a. (Integral a, B.FiniteBitsOps a) => a -> BS.ByteString +toByteString :: forall a. (Integral a, B.FiniteBits a) => a -> BS.ByteString toByteString n = BS.pack . take numBytes . map (fromIntegral . (n `B.rotateL`)) $ [8, 16 ..] where numBytes = numBytesIn (D.Proxy :: D.Proxy a) finiteBitUncons :: forall a - . (Integral a, B.FiniteBitsOps a, B.BitOps a) + . (Integral a, B.FiniteBits a) => BS.ByteString -> Maybe (a, BS.ByteString) finiteBitUncons [] = Nothing finiteBitUncons bs = Just . (,rest) - . F.foldl' (.|.) 0 + . F.foldl' (B..|.) 0 . zipWith (flip B.rotateR) [8, 16 ..] . map (fromIntegral :: Word8 -> a) . BS.unpack diff --git a/src/Data/Word4.hs b/src/Data/Word4.hs new file mode 100644 index 0000000..eddaa94 --- /dev/null +++ b/src/Data/Word4.hs @@ -0,0 +1,80 @@ +module Data.Word4 where + +import Data.Bits +import Data.Word +import qualified Data.Serialize as C +import qualified Data.Map as M + +-- only use lower 4 bits +newtype Word4 = Word4 { + inner :: Word8 + } deriving (Eq, Ord, Show, Enum, Generic, C.Serialize) + +map2 :: (Word8 -> Word8 -> Word8) -> Word4 -> Word4 -> Word4 +map2 f (Word4 a) (Word4 b) = Word4 $ f a b + +data CompressWord4List = CompressWord4List + { xs :: [Word8], + last :: Maybe Word8 -- in lower 4 bits + } deriving (Eq, Ord, Generic, Show, C.Serialize, NFData) + +bitwiseAnd a b = getAnd $ And a <> And b + +bitwiseOr a b = getIor $ Ior a <> Ior b + +toCompressedWord4List [] = CompressWord4List [] Nothing +toCompressedWord4List [Word4 x] = CompressWord4List [] (Just x) +toCompressedWord4List ((Word4 x) : (Word4 x') : xs) = + CompressWord4List + { xs = headByte : xs', + last = last' + } + where + headByte = bitwiseOr (bitwiseAnd 0xf0 (x .<<. 4)) (bitwiseAnd 0x0f x') + + (CompressWord4List xs' last') = toCompressedWord4List xs + +fromCompressedWord4List :: CompressWord4List -> [Word4] +fromCompressedWord4List (CompressWord4List [] Nothing) = [] +fromCompressedWord4List (CompressWord4List [] (Just a)) = [word4 a] +fromCompressedWord4List (CompressWord4List {xs = (x : xs), ..}) = + Word4 ((bitwiseAnd 0xf0 x) .>>. 4) + : word4 x + : fromCompressedWord4List (CompressWord4List xs last) + +-- instance Show Word4 where +-- show :: Word4 -> String +-- show (Word4 a) = show . (.>>. 4) $ a + +instance Bounded Word4 where + minBound = word4 0 + maxBound = word4 0xf + +instance Real Word4 where + toRational = toRational . inner + +instance Integral Word4 where + quot = map2 quot `on` clean + rem = map2 rem `on` clean + div = map2 div `on` clean + mod = map2 mod `on` clean + quotRem :: Word4 -> Word4 -> (Word4, Word4) + quotRem (Word4 a) (Word4 b) = (word4 a', word4 b') + where + (a', b') = quotRem a b + + divMod (Word4 a) (Word4 b) = (word4 a', word4 b') + where + (a', b') = divMod a b + toInteger = toInteger . inner . clean + +clean (Word4 a) = Word4 . bitwiseAnd 0xf $ a +word4 = clean . Word4 + +instance Num Word4 where + (+) = map2 (+) `on` clean + (*) = map2 (*) `on` clean + abs = id + negate = id + signum = const 1 + fromInteger = clean . Word4 . fromInteger diff --git a/src/Main.hs b/src/Main.hs index cbb5cf0..6bd0aa8 100644 --- a/src/Main.hs +++ b/src/Main.hs @@ -1,6 +1,5 @@ module Main where -import Basement.Bits qualified as B import Compress.Huffman import Compress.Huffman qualified as HT import Compress.PrefixTree (HuffmanPrefixTree (HuffmanPrefixTree)) @@ -31,7 +30,7 @@ data CLIOpts = CLIOpts applyCompressionOptions :: forall a. - (Integral a, B.BitOps a, B.FiniteBitsOps a, Ord a, C.Serialize a) => + (Integral a, B.Bits a, B.FiniteBits a, Ord a, C.Serialize a) => P.Proxy a -> CLIOpts -> BS.ByteString -> @@ -53,7 +52,7 @@ main = do -- cliOpts :: CLIOpts <- O.getRecord "compress/decompress & strategy" f <- BS.getContents let fAsWords = (FB.toWordsList :: BS.ByteString -> [ Word8 ]) $ f - let compressedUnencoded :: A.Compressed Word8 = (A.compress fAsWords) + let compressedUnencoded :: (A.Compressed Word8 (A.Word4MapSerialized Word8)) = (A.compress fAsWords) let compressed = C.encode compressedUnencoded print ("compression ratio", (fromIntegral . BS.length $ compressed) / (fromIntegral . BS.length $ f)) print ("works?", (A.decompress compressedUnencoded) == fAsWords)