benchmark, test suite, and run length encoding

2025-10-27 13:15:58 -07:00 · 2025-10-27 13:15:58 -07:00 · da83f9a5d0
commit da83f9a5d0
parent 2123636291
19 changed files with 884 additions and 364 deletions
--- a/.gitignore
+++ b/.gitignore
@ -6,3 +6,4 @@
 /cabal.project.local
 /compress.hp
 /*.prof
 /calgarycorpus/
--- a/bench/Bench.hs
+++ b/bench/Bench.hs
@ -0,0 +1,15 @@
 import Criterion.Main
 import Compress.LengthDistancePairs
 import qualified Data.ByteString as BS
 import qualified Compress.Arithmetic as A
 main = do
  testData <- BS.unpack . BS.take 10000 <$> (BS.readFile "pg64317.txt")
  defaultMain [
        bgroup "encode" [
                bench "length distance pair" $ nf (encode :: [Word8] -> [LengthDistancePair Word32]) testData
                                ],
        bgroup "arithmetic coding" [
                bench "pg64317.txt" $ nf A.compress testData
                                ]
                   ]
--- a/compress.cabal
+++ b/compress.cabal
@ -6,23 +6,26 @@ build-type:          Simple
 license:             MIT
 license-file:        LICENSE
-executable compress
+common deps
  main-is:
    Main.hs
  default-extensions:
    DataKinds,
    DeriveAnyClass,
    DeriveGeneric,
    DerivingStrategies,
    DerivingVia,
    DuplicateRecordFields,
    ExtendedDefaultRules,
    FlexibleContexts,
    FlexibleInstances,
    ImpredicativeTypes,
    InstanceSigs,
    LambdaCase,
    MultiParamTypeClasses,
    NamedFieldPuns,
    NoFieldSelectors,
    OverloadedLabels,
    OverloadedLists,
    OverloadedRecordDot,
    OverloadedStrings,
    PartialTypeSignatures,
    RankNTypes,
@ -32,15 +35,12 @@ executable compress
    StandaloneDeriving,
    StrictData,
    TemplateHaskell,
    LambdaCase,
    TupleSections,
    TypeApplications,
    TypeFamilies,
    TypeOperators,
    TypeSynonymInstances,
    UndecidableInstances,
  hs-source-dirs:
    src
  mixins:
    base hiding (Prelude),
    relude (Relude as Prelude),
@ -48,17 +48,21 @@ executable compress
  build-depends:
    base,
    bitvec,
    bytestring,
    cereal,
    bytestring,
    containers,
    winery,
    parsec,
    -- accelerate,
    -- containers-accelerate,
    parsec,
    leancheck,
    monad-par,
    monad-par-extras,
    nonempty-containers,
    optparse-generic,
    relude,
    pointless-fun,
    text,
    uuid,
    vector,
@ -66,15 +70,63 @@ executable compress
  default-language:
    GHC2021
  other-modules:
-     Data.PQueue
+     Compress.Arithmetic
-     Data.FiniteBit
+     Compress.BurrowsWheeler
     Compress.Huffman
     Compress.PrefixTree
     Compress.WordMarkovStats
     Compress.LengthDistancePairs
     Data.Dirs
     Data.FiniteBit
     Data.HuffmanTree
-     Compress.Arithmetic
+     Data.PQueue
     Data.Word4
     Data.ArbitraryPrecisionFloatingPoint
     -- Data.CircularList
  hs-source-dirs:
    src
  ghc-options:
    -threaded
-    -fprof-auto
+    "-with-rtsopts=-N"
-    -fprof-late
+
-    "-with-rtsopts=-p -hc -B -N -qa"
+
 benchmark bench
  import: deps
  type:
    exitcode-stdio-1.0
  main-is:
    Bench.hs
  build-depends:
    criterion
  hs-source-dirs:
    bench
  ghc-options:
    "-with-rtsopts=-T -p -hc -B -qa"
 test-suite test
  import: deps
  type:
    exitcode-stdio-1.0
  main-is:
    Test.hs
  build-depends:
    tasty,
    tasty-hunit,
    falsify
  hs-source-dirs:
    test
 executable compress
  import: deps
  main-is:
    Main.hs
  hs-source-dirs:
    src-exe
 -- -fllvm
--- a/flake.lock
+++ b/flake.lock
@ -1,18 +1,34 @@
 {
  "nodes": {
-    "accelerate": {
+    "barbies": {
      "flake": false,
      "locked": {
-        "lastModified": 1732969010,
+        "lastModified": 1712605099,
-        "narHash": "sha256-Qrmtrgij2GbklBXUK42Pt6Db8WiGijA5sz5oC5AR72c=",
+        "narHash": "sha256-jDyIDPiGWAw4qLRoYA4p6njANOg4/EOCx0jmFl607IM=",
-        "owner": "AccelerateHS",
+        "owner": "jcpetruzza",
-        "repo": "accelerate",
+        "repo": "barbies",
-        "rev": "02da6161ef143a9886c8bce542cd96029c4f527a",
+        "rev": "856bc3d3cc72a13e95ed495afd15683c45c7cc55",
        "type": "github"
      },
      "original": {
-        "owner": "AccelerateHS",
+        "owner": "jcpetruzza",
-        "repo": "accelerate",
+        "repo": "barbies",
        "type": "github"
      }
    },
    "barbies-th": {
      "flake": false,
      "locked": {
        "lastModified": 1730091166,
        "narHash": "sha256-kc3ObxnhAJW6vgroFIax/qBOrp3HIWoLRtu0jsJUGIE=",
        "owner": "fumieval",
        "repo": "barbies-th",
        "rev": "46c7b8c68634b219ff12e7966983f9b46a5976d4",
        "type": "github"
      },
      "original": {
        "owner": "fumieval",
        "repo": "barbies-th",
        "type": "github"
      }
    },
@ -21,11 +37,11 @@
        "nixpkgs-lib": "nixpkgs-lib"
      },
      "locked": {
-        "lastModified": 1733312601,
+        "lastModified": 1756770412,
-        "narHash": "sha256-4pDvzqnegAfRkPwO3wmwBhVi/Sye1mzps0zHWYnP88c=",
+        "narHash": "sha256-+uWLQZccFHwqpGqr2Yt5VsW/PbeJVTn9Dk6SHWhNRPw=",
        "owner": "hercules-ci",
        "repo": "flake-parts",
-        "rev": "205b12d8b7cd4802fbcb8e8ef6a0f1408781a4f9",
+        "rev": "4524271976b625a4a605beefd893f270620fd751",
        "type": "github"
      },
      "original": {
@ -51,11 +67,11 @@
    },
    "haskell-flake": {
      "locked": {
-        "lastModified": 1734984991,
+        "lastModified": 1756607542,
-        "narHash": "sha256-oUYtRBD3Yhw2jvKYo0lfd82fgEQQbFoiJcHO923gmOc=",
+        "narHash": "sha256-+99fEAk0HwjYgIW2tEOs7ayBDxnU9NAM5E29ZxgyX40=",
        "owner": "srid",
        "repo": "haskell-flake",
-        "rev": "daf00052906bdd977e57a07f7048437214232e87",
+        "rev": "73e3891fb135c679a1c30fae4b101e5b41b8ca61",
        "type": "github"
      },
      "original": {
@ -64,42 +80,6 @@
        "type": "github"
      }
    },
    "llvm-hs": {
      "flake": false,
      "locked": {
        "lastModified": 1665495332,
        "narHash": "sha256-JKrpUmHJ1nsNiCoHhV5FCcdQGlNFfD37Oiu5kSmghfM=",
        "owner": "llvm-hs",
        "repo": "llvm-hs",
        "rev": "423220bffac4990d019fc088c46c5f25310d5a33",
        "type": "github"
      },
      "original": {
        "owner": "llvm-hs",
        "ref": "llvm-12",
        "repo": "llvm-hs",
        "type": "github"
      }
    },
    "llvm-hs-pure": {
      "flake": false,
      "locked": {
        "dir": "llvm-hs-pure",
        "lastModified": 1665495332,
        "narHash": "sha256-JKrpUmHJ1nsNiCoHhV5FCcdQGlNFfD37Oiu5kSmghfM=",
        "owner": "llvm-hs",
        "repo": "llvm-hs",
        "rev": "423220bffac4990d019fc088c46c5f25310d5a33",
        "type": "github"
      },
      "original": {
        "dir": "llvm-hs-pure",
        "owner": "llvm-hs",
        "ref": "llvm-12",
        "repo": "llvm-hs",
        "type": "github"
      }
    },
    "mission-control": {
      "locked": {
        "lastModified": 1733438716,
@ -117,11 +97,11 @@
    },
    "nixpkgs": {
      "locked": {
-        "lastModified": 1734649271,
+        "lastModified": 1756542300,
-        "narHash": "sha256-4EVBRhOjMDuGtMaofAIqzJbg4Ql7Ai0PSeuVZTHjyKQ=",
+        "narHash": "sha256-tlOn88coG5fzdyqz6R93SQL5Gpq+m/DsWpekNFhqPQk=",
        "owner": "nixos",
        "repo": "nixpkgs",
-        "rev": "d70bd19e0a38ad4790d3913bf08fcbfc9eeca507",
+        "rev": "d7600c775f877cd87b4f5a831c28aa94137377aa",
        "type": "github"
      },
      "original": {
@ -133,28 +113,31 @@
    },
    "nixpkgs-lib": {
      "locked": {
-        "lastModified": 1733096140,
+        "lastModified": 1754788789,
-        "narHash": "sha256-1qRH7uAUsyQI7R1Uwl4T+XvdNv778H0Nb5njNrqvylY=",
+        "narHash": "sha256-x2rJ+Ovzq0sCMpgfgGaaqgBSwY+LST+WbZ6TytnT9Rk=",
-        "type": "tarball",
+        "owner": "nix-community",
-        "url": "https://github.com/NixOS/nixpkgs/archive/5487e69da40cbd611ab2cadee0b4637225f7cfae.tar.gz"
+        "repo": "nixpkgs.lib",
        "rev": "a73b9c743612e4244d865a2fdee11865283c04e6",
        "type": "github"
      },
      "original": {
-        "type": "tarball",
+        "owner": "nix-community",
-        "url": "https://github.com/NixOS/nixpkgs/archive/5487e69da40cbd611ab2cadee0b4637225f7cfae.tar.gz"
+        "repo": "nixpkgs.lib",
        "type": "github"
      }
    },
    "root": {
      "inputs": {
-        "accelerate": "accelerate",
+        "barbies": "barbies",
        "barbies-th": "barbies-th",
        "flake-parts": "flake-parts",
        "flake-root": "flake-root",
        "haskell-flake": "haskell-flake",
        "llvm-hs": "llvm-hs",
        "llvm-hs-pure": "llvm-hs-pure",
        "mission-control": "mission-control",
        "nixpkgs": "nixpkgs",
        "systems": "systems",
-        "treefmt-nix": "treefmt-nix"
+        "treefmt-nix": "treefmt-nix",
        "winery": "winery"
      }
    },
    "systems": {
@ -179,11 +162,11 @@
        ]
      },
      "locked": {
-        "lastModified": 1734982074,
+        "lastModified": 1756662192,
-        "narHash": "sha256-N7M37KP7cHWoXicuE536GrVvU8nMDT/gpI1kja2hkdg=",
+        "narHash": "sha256-F1oFfV51AE259I85av+MAia221XwMHCOtZCMcZLK2Jk=",
        "owner": "numtide",
        "repo": "treefmt-nix",
-        "rev": "e41e948cf097cbf96ba4dff47a30ea6891af9f33",
+        "rev": "1aabc6c05ccbcbf4a635fb7a90400e44282f61c4",
        "type": "github"
      },
      "original": {
@ -191,6 +174,22 @@
        "repo": "treefmt-nix",
        "type": "github"
      }
    },
    "winery": {
      "flake": false,
      "locked": {
        "lastModified": 1732329065,
        "narHash": "sha256-CDFOD4B6cJgAe3ebTVF21/eCJtbHMEzmKF3b/XxMOw4=",
        "owner": "fumieval",
        "repo": "winery",
        "rev": "2d6976bc822f6d2c2d590703b9470cb9e62191a5",
        "type": "github"
      },
      "original": {
        "owner": "fumieval",
        "repo": "winery",
        "type": "github"
      }
    }
  },
  "root": "root",
--- a/flake.nix
+++ b/flake.nix
@ -8,19 +8,24 @@
    flake-root.url = "github:srid/flake-root";
    treefmt-nix.url = "github:numtide/treefmt-nix";
    treefmt-nix.inputs.nixpkgs.follows = "nixpkgs";
    accelerate = {
      url = "github:AccelerateHS/accelerate?submodules=1";
      flake = false;
    };
    llvm-hs = {
      url = "github:llvm-hs/llvm-hs?ref=llvm-12";
      flake = false;
    };
    llvm-hs-pure = {
      url = "github:llvm-hs/llvm-hs?dir=llvm-hs-pure&ref=llvm-12";
      flake = false;
    };
    mission-control.url = "github:Platonic-Systems/mission-control";
    winery = {
      url = "github:fumieval/winery";
      flake = false;
    };
    barbies-th = {
      url = "github:fumieval/barbies-th";
      flake = false;
    };
    barbies = {
      url = "github:jcpetruzza/barbies";
      flake = false;
    };
  };
  outputs = inputs:
@ -38,7 +43,7 @@
      # See https://github.com/srid/haskell-flake/blob/master/example/flake.nix
      haskellProjects.default = {
        # The base package set (this value is the default)
-        basePackages = pkgs.haskell.packages.ghc96;
+        # basePackages = pkgs.haskell.packages.ghc910;
        # Packages to add on top of `basePackages`
        packages = {
@ -46,30 +51,32 @@
          # (Local packages are added automatically)
          # https://github.com/lehins/hip.git
          # hip.source = inputs.hip + "/hip";
-          accelerate.source = inputs.accelerate;
+          winery.source = inputs.winery;
-          # llvm-hs.source = inputs.llvm-hs;
+          barbies-th.source = inputs.barbies-th;
-          llvm-hs-pure.source = inputs.llvm-hs-pure;
+          barbies.source = inputs.barbies;
        };
        # Add your package overrides here
        settings = {
          uuid.jailbreak = true;
-          accelerate =
+          # accelerate =
-            {
+          #   {
-              jailbreak = true;
+          #     jailbreak = true;
-              broken = false;
+          #     broken = false;
-            };
+          #   };
-          llvm-hs =
+          # winery.broken = false;
-            {
+          winery.jailbreak = true;
-              jailbreak = true;
+          winery.check = false;
-              broken = false;
+          barbies-th.broken = false;
-            };
+          barbies-th.jailbreak = true;
-          llvm-hs-pure =
+          barbies.broken = false;
-            {
+          barbies.jailbreak = true;
-              jailbreak = true;
+          barbies.check = false;
-              broken = false;
+          tree-diff.check = false;
-            };
+          # hlint.jailbreak = true;
-
+          # ghc-lib-parser.jailbreak = true;
          # ghc-lib-parser-ex.jailbreak = true;
          # gmp-with-cxx.jailbreak = true;
        };
        # hlint = {
          #   jailbreak = true;
@ -96,7 +103,7 @@
        programs.ormolu.enable = true;
        programs.nixpkgs-fmt.enable = true;
        programs.cabal-fmt.enable = true;
-        programs.hlint.enable = false;
+        # programs.hlint.enable = false;
        # We use fourmolu
        programs.ormolu.package = pkgs.haskellPackages.fourmolu;
@ -110,7 +117,6 @@
          hoogle = {
            description = "Start Hoogle server for project dependencies";
            exec = ''
              echo http://127.0.0.1:8888;
              hoogle serve -p 8888 --local;
            '';
            category = "Dev Tools";
@ -139,6 +145,7 @@
          config.treefmt.build.devShell
        ];
        nativeBuildInputs = with pkgs; [
          # llvmPackages_16.libllvm
        ];
      };
    };
--- a/src-exe/Main.hs
+++ b/src-exe/Main.hs
@ -0,0 +1,97 @@
 module Main where
 import Compress.Huffman
 import Compress.Huffman qualified as HT
 import Compress.PrefixTree qualified as PT
 import Compress.Arithmetic qualified as A
 import Data.Bifunctor qualified as Bi
 import Data.Bit qualified as B
 import Data.Bits (Bits ((.|.)))
 import Data.Bits qualified as B
 import Data.ByteString qualified as BS
 import Data.HuffmanTree
 import Data.Proxy qualified as P
 import Codec.Winery qualified as C
 import Data.Word
 import GHC.Generics (Generic)
 import Options.Generic qualified as O
 import qualified Data.FiniteBit as FB
 import qualified Relude.Unsafe as U
 import qualified Data.ByteString.Lazy as BSL
 -- import Data.Word4 (Word4(Word4))
 import Compress.BurrowsWheeler
 import qualified Control.Monad.Par as P
 import qualified Compress.LengthDistancePairs as LDP
 data CompressOrDecompress = Compress | Decompress deriving (Show, Generic, O.ParseField, O.ParseFields, O.ParseRecord, Read)
 data CompressionStrategy = Huffman | MarkovHuffman deriving (Show, Generic, O.ParseField, O.ParseFields, O.ParseRecord, Read)
 data CLIOpts = CLIOpts
  { task :: CompressOrDecompress,
    strategy :: CompressionStrategy
  }
  deriving (Show, Generic, O.ParseRecord)
 -- applyCompressionOptions ::
 --   forall a.
 --   (Integral a, B.Bits a, B.FiniteBits a, Ord a, C.Serialise a) =>
 --   P.Proxy a ->
 --   CLIOpts ->
 --   BS.By(++ []) . teString ->
 --   BS.ByteString
 -- applyCompressionOptions _ (CLIOpts Compress Huffman) f =
 --   C.serialise . (compress :: BS.ByteString -> Maybe (TreeDirs, HuffmanTree a)) $ f
 -- applyCompressionOptions _ (CLIOpts Compress MarkovHuffman) f =
 --   C.encode . (PT.compress :: BS.ByteString -> Maybe (TreeDirs, HuffmanPrefixTree a a, a)) $ f
 -- applyCompressionOptions _ (CLIOpts Decompress Huffman) f =
 --   handleError $ Bi.second decompress . (C.decode :: BS.ByteString -> Either String (TreeDirs, HuffmanTree a)) $ f
 -- applyCompressionOptions _ (CLIOpts Decompress MarkovHuffman) f =
 --   handleError $ Bi.second PT.decompress . (C.decode :: BS.ByteString -> Either String (TreeDirs, HuffmanPrefixTree a a, a)) $ f
 handleError (Right (Just bs)) = bs
 handleError _ = []
 main :: IO ()
 main = do
  -- cliOpts :: CLIOpts <- O.getRecord "compress/decompress & strategy"
  f <- BS.getContents
  let f' = BS.unpack f
  print . BS.pack . A.decompress . A.compress $ f'
  -- print =<< (P.runParIO $ do
  --   let fCounts = (toCounts :: [Word8] -> [(Word8, Word64)]) . {- toBurrowsWheeler . -} (FB.toWordsList :: BS.ByteString -> [ Word8 ]) $ f
  --   let fAsWords = map fst fCounts
  --   -- let compressedUnencoded {- :: (A.Compressed Word8 (A.Word4MapSerialized Word8)) -} = (A.compress fAsWords)
  --   -- print . A.talkAboutCompressed $ compressedUnencoded
  --   -- let compressed = C.serialise (compressedUnencoded, A.compress . map snd $ fCounts)
  --   -- huffmanCompressionRatio' <- P.spawnP $ (compressionRatioFor (Proxy :: Proxy Word8) (P.Proxy :: Proxy Word8) f)
  --   -- arithmaticCodingCompressionRatio' <- P.spawnP $ ((fromIntegral . BS.length $ compressed) / (fromIntegral . BS.length $ f))
  --   -- huffmanCompressionRatio <- P.get huffmanCompressionRatio'
  --   -- arithmaticCodingCompressionRatio <- P.get arithmaticCodingCompressionRatio'
  --   --    let lengthDistancePairsCompressedSize = fromIntegral . BS.length .  C.serialise . (LDP.encode :: ByteString -> [LDP.LengthDistancePair Word16]) $ f
  --   --    let lengthDistancePairsCompressionRatio :: Double = lengthDistancePairsCompressedSize / (fromIntegral $ BS.length f)
  --   pure ()) -- (lengthDistancePairsCompressionRatio))
  -- let decompressed = (A.decompress compressedUnencoded)
  -- print ("huffman coding", huffmanCompressionRatio)
  -- print ("compression ratio (arithmetic coding)", arithmaticCodingCompressionRatio)
  -- print ("works?", decompressed == fAsWords)
  -- print . take 10  . drop 70 . zip fAsWords $ decompressed
  -- print . ("original length", ) . length $ fAsWords
  -- let f = "hello tehre"
  -- f <- BS.readFile "pg64317.txt"
  -- let (compressed :: Maybe (TreeDirs, PT.HuffmanPrefixTree Word8 Word8, Word8)) = PT.compress f
  -- print $ BS.length . C.encode $ compressed
  -- print $ BS.length . C.encode . (compress :: BS.ByteString -> Maybe (TreeDirs, HuffmanTree Word8)) $ f
  -- print $ BS.length . C.encode . (compress :: BS.ByteString -> Maybe (TreeDirs, HuffmanTree Word16)) $ f
  -- BS.writeFile "outin.txt" decompressed
  -- print (decompressed, f)
  -- print $ BS.length decompressed
  -- print $ BS.length f
  -- print (decompressed == f)
  -- BS.putStr . applyCompressionOptions (P.Proxy :: P.Proxy Word16) cliOpts $ f
--- a/src/Compress/Arithmetic.hs
+++ b/src/Compress/Arithmetic.hs
@ -1,5 +1,6 @@
 -- | https://en.wikipedia.org/wiki/Arithmetic_coding
-module Compress.Arithmetic (Compressed, compress, decompress, Word4MapSerialized) where
+-- module Compress.Arithmetic (Compressed, compress, decompress, Word4MapSerialized, Word4Map) where
 module Compress.Arithmetic where
 import Data.Bits qualified as B
 import Data.Word
@ -16,12 +17,23 @@ import Data.Map.Strict qualified as M
 import Data.Maybe qualified as My
 import Data.Ord
 import Data.Ratio
-import Data.Serialize qualified as C
+import Codec.Winery qualified as C
 import GHC.Generics (Generic)
 import GHC.Natural (Natural)
 import qualified Relude.Unsafe as US
 import Data.Word4 (Word4, CompressWord4List, toCompressedWord4List, fromCompressedWord4List)
 import qualified Data.ByteString as BS
 import qualified Relude.Unsafe as U
 import Codec.Winery (WineryRecord)
 import Compress.BurrowsWheeler (toBurrowsWheeler)
 import qualified Data.Bit as BV
 import qualified Data.Vector as V
 import qualified Data.Bit as VB
 import qualified Data.Vector.Unboxed as VU
 import Data.Dirs
 -- import qualified Debug.Trace as D
 import qualified Data.Proxy as P
 import Compress.WordMarkovStats as WM
 -- withPrimeDenums = concatMap (\x -> map (% x) [1 .. (pred x)]) primes
@ -29,41 +41,52 @@ import qualified Data.ByteString as BS
 -- shortestNumberBetween xMin xMax = find (\candidate -> candidate >= xMin && candidate < xMax) withPrimeDenums
-data WordMarkovStats = WordMarkovStats
+toRing :: forall a k. (Show a, Show k, Ord k, Integral a, Bounded a) => [(k, a)] -> M.Map k WordMarkovStats
  { location :: Ratio Integer,
    size :: Ratio Integer
  }
  deriving (Show, Eq, Ord, Generic, C.Serialize)
 loc' (WordMarkovStats {..}) = location
 toRing :: forall a k. (Ord k, Integral a, Bounded a) => [(k, a)] -> M.Map k WordMarkovStats
 toRing xs = M.fromList . zip (map fst xs) $ wordMarkovStats
  where
    sizes = map ((% maxBound') . fromIntegral . snd) xs
    wordMarkovStats = zipWith WordMarkovStats (L.scanl' (+) 0 withBumpedZeroSized) withBumpedZeroSized
    asFracsOfTotalSum = map fst . US.tail . L.scanl' f (0, 0) $ sizes
    maxBound' :: Integer
    maxBound' = fromIntegral (maxBound :: a)
    resize xs = map ( / sum' ) xs
      where
        sum' = sum xs
    withBumpedZeroSized :: [Rational]
    withBumpedZeroSized
-      | numZeroSized /= 0 = map (max (remainingSpace / numZeroSized)) asFracsOfTotalSum
+      = resize
-      | otherwise = asFracsOfTotalSum
+      . map (max minimumSize)
      . resize
      . map (fromIntegral . snd)
      $ xs
      where
        -- | we have to round the 0s up to something
        -- | we somewhat arbitrarily choose half of the smallest
        -- | normally representable amount
        minimumSize :: Rational
        minimumSize = 1 % (maxBound' * 2)
    remainingSpace = 1 - sum asFracsOfTotalSum
    numZeroSized = L.genericLength . filter (== 0) $ asFracsOfTotalSum
    f (prevFrac, runningSum) currFrac = (newFrac, newFrac + runningSum)
      where
        newFrac = currFrac * (1 - runningSum)
-twoByteMarkov :: forall k b. (Num b, Integral b, Bounded b, Ord k) => [k] -> M.Map k [(k, b)]
+relativeCounts :: forall k . Ord k => [(k, Integer)] -> [(k, Rational)]
 relativeCounts m = map (Bi.second asFractionOfSum) m
      where
        asFractionOfSum = ( % sum') . fromIntegral
        sum' = fromIntegral . F.sum . map snd $ m
 maxBound' :: forall a b . (Num b, Bounded a, Integral a) => P.Proxy a -> b
 maxBound' p = fromIntegral (maxBound :: a)
 twoByteMarkov :: forall k . (Ord k) => [k] -> M.Map k [(k, Rational)]
 twoByteMarkov xs =
  M.map sizeAsFraction
    . M.fromListWith (M.unionWith (+))
@ -72,77 +95,106 @@ twoByteMarkov xs =
    . US.tail
    $ xs
  where
    -- optimization, changes fractions so they represent the fraction
    -- of remaining space in the list taken up instead of total space
    sizeAsFraction :: M.Map k Integer -> [(k, b)]
    sizeAsFraction m =
      zip keys
        . map discretizeFraction
        . fractionOfRemainingSums
        $ counts
      where
        fractionOfRemainingSums xs = zipWith (%) xs . scanr (+) 0 $ xs
-        asList = L.sortOn (negate . snd) . M.assocs $ m
+    sizeAsFraction :: M.Map k Integer -> [(k, Rational)]
    sizeAsFraction = relativeCounts . M.assocs
-        keys = map fst asList
+data Chunk a = Chunk
  { location :: SerializedDirs,
    start :: a,
    length :: Word64
  }
  deriving (Eq, Ord, Show, Generic, P.NFData)
  deriving C.Serialise via C.WineryVariant (Chunk a)
-        counts = map snd asList
+-- maxBound = maxBound' (P.Proxy :: Proxy b)
-    discretizeFraction :: Rational -> b
+discretizeFraction :: forall b . (Bounded b, Integral b) => Rational -> b
-    discretizeFraction = floor . fromRational . (* maxBound')
+discretizeFraction = floor . min maxBound'' . fromRational . (* (succ $ maxBound' (P.Proxy :: P.Proxy b)))
  where
    maxBound'' = maxBound' (P.Proxy :: P.Proxy b)
    maxBound' :: Ratio Integer
    maxBound' = fromIntegral (maxBound :: b)
 type Word4MapSerialized a = M.Map a ([a], CompressWord4List)
-type Word4Map a = M.Map a [(a, Word4)]
+type Word4Map a = M.Map a [(a, Word8)]
 -- talkAboutCompressed (Compressed {..}) = (
 --   ("markovs (serealized)", BS.length . C.serialiseOnly $ markovs),
 --   ("markovs (unserealized)", BS.length . C.serialiseOnly . unserealizeWord4Map $ markovs),
  -- ("chunks", BS.length . C.serialiseOnly $ chunks))
 newtype Prediction = Prediction {
  chances :: [(Word8, Rational)]
                }
 class CompressionModel a where
  createModel :: [Word8] -> a
  -- done in reverse
  predict :: a -> [Word8] -> Prediction
 -- at some point write
 -- instance IsoSerialisable a b => C.Serialise b
 class C.Serialise b => IsoSerialisable a b where
  toSerialisable :: a -> b
  fromSerialisable :: b -> a
 -- newtype BetterCompressed = BetterCompressed (Compressed Word8 (A.Word4MapSerialized Word8))
 --   deriving (Eq, Ord, Show, Generic) -- , C.Serialise, P.NFData)
 --   deriving C.Serialise via C.WineryRecord BetterCompressed
 data Compressed a mapImplementation = Compressed
  { markovs :: mapImplementation,
    chunks :: [Chunk a]
  }
-  deriving (Eq, Ord, Show, Generic) -- , C.Serialize, P.NFData)
+  deriving (Eq, Ord, Show, Generic, NFData) -- , C.Serialise, P.NFData)
  deriving C.Serialise via C.WineryVariant (Compressed a mapImplementation)
 deriving instance (Ord a, C.Serialize a) => C.Serialize (Compressed a (Word4MapSerialized a))
-data Chunk a = Chunk
+-- unserealizeWord4Map :: forall a . Show a => Word4MapSerialized a -> Word4Map a
-  { location :: Ratio Integer,
+-- unserealizeWord4Map = M.map unserializedIndividualMap
-    start :: a,
+--   where
-    length :: Int
+--     unserializedIndividualMap :: ([a], CompressWord4List) -> [(a, Word4)]
-  }
+--     unserializedIndividualMap (bytes, sizes) = zip bytes . fromCompressedWord4List $ sizes
  deriving (Eq, Ord, Show, Generic, C.Serialize, P.NFData)
-unserealizeWord4Map :: Word4MapSerialized a -> Word4Map a
+-- -- unserealizeWord4Map = M.map (uncurry zip . map (\(bytes, sizes) -> (bytes,) . (++ [maxBound]) .  fromCompressedWord4List (Prelude.length bytes) $ sizes))
 unserealizeWord4Map = M.map (uncurry zip . Bi.second ((++ [maxBound]) .  fromCompressedWord4List))
-serealizeWord4Map :: Word4Map a -> Word4MapSerialized a
+
-serealizeWord4Map = M.map (Bi.second (toCompressedWord4List . L.init) . L.unzip)
+-- serealizeWord4Map :: Word4Map a -> Word4MapSerialized a
 -- serealizeWord4Map = M.map (Bi.second toCompressedWord4List . L.unzip)
 decompress ::
  forall a.
-  (Integral a, B.FiniteBits a, B.Bits a, Show a, NFData a) =>
+ (Integral a, B.FiniteBits a, B.Bits a, Show a, NFData a) =>
-  Compressed a (Word4MapSerialized a) ->
+  Compressed a (Word4Map a) ->
  [a]
-decompress (Compressed {..}) = concat . P.runPar . P.parMap decompressChunk $ chunks
+decompress (Compressed {..}) = concat . P.runPar . P.parMap decompressChunk
  $ chunks
  where
    rings = M.map (M.fromList . map toDecompressionRing . M.toList . toRing) markovs'
-    markovs' = unserealizeWord4Map markovs
+    markovs' = -- unserealizeWord4Map
      markovs
    toDecompressionRing (key, (WordMarkovStats {..})) = (location, (key, size))
    decompressChunk :: Chunk a -> [a]
-    decompressChunk (Chunk {..}) = map snd . take length . L.iterate' decompress' $ (location, start)
+    decompressChunk (Chunk {..}) = map snd . take (fromIntegral length) . L.iterate' decompress' $ (location', start) -- (fromBS rawLocation, start)
      where
        location' = deserialize location
        -- decompress' :: (Ratio Integer, a) -> (Ratio Integer, a)
-        decompress' (!loc, !prev) = ((loc - ansLoc) / newSize, newVal)
+        decompress' (!loc, !prevWord) = -- traceShow (newSize) $
            ((loc - ansLoc) / newSize, newVal)
          where
            ansLoc :: Ratio Integer
            newVal :: a
            newSize :: Ratio Integer
-            (!ansLoc, (!newVal, !newSize)) = My.fromJust . M.lookupLE loc $ (rings M.! prev)
+            (ansLoc, (newVal, newSize)) = My.fromJust . M.lookupLE loc $ (rings M.! prevWord)
 sanityCheck :: forall k. (Ord k) => M.Map k (M.Map k Word8) -> [Word8]
 sanityCheck = map (sum . M.elems) . M.elems
@ -156,18 +208,31 @@ chunk chunkSize = chunk'
        (xs', xs'') = splitAt chunkSize xs
 chunkLength :: Int
-chunkLength = 8000
+chunkLength = 4096
 -- runPar = id
 -- parMap = map
 -- compress ::
 --   forall a.
 --   (Integral a, B.FiniteBits a, B.Bits a, Show a, NFData a) =>
 --   [a] ->
 --   Compressed a (Word4MapSerialized a)
-compress toCompress = traceShow ((fromIntegral :: Int -> Double) (BS.length . C.encode $ (twoByteMarkovs :: M.Map Word8 [(Word8, Word4)])) / (fromIntegral . BS.length . C.encode . serealizeWord4Map $ (twoByteMarkovs :: M.Map Word8 [(Word8, Word4)]))) $ Compressed (serealizeWord4Map twoByteMarkovs) . P.runPar . P.parMap compressChunk $ unCompressedChunks
+compress :: (NFData a, Show a, Ord a) => [a] -> Compressed a (Word4Map a)
 compress toCompress' = Compressed twoByteMarkovs . P.runPar . P.parMap
  (compressChunk)
  $ unCompressedChunks
  where
    toCompress = toCompress' -- toBurrowsWheeler toCompress'
    -- toTraceShow' = (unserealizeWord4Map . serealizeWord4Map $ twoByteMarkovs) == twoByteMarkovs
    -- toTraceShow = (fromIntegral :: Int -> Double) (BS.length . C.encode $ (twoByteMarkovs :: M.Map Word8 [(Word8, Word4)])) / (fromIntegral . BS.length . C.encode . serealizeWord4Map $ (twoByteMarkovs :: M.Map Word8 [(Word8, Word4)]))
    unCompressedChunks = chunk chunkLength toCompress
-    twoByteMarkovs = twoByteMarkov toCompress
+    twoByteMarkovs = M.map (map (Bi.second discretizeFraction)) . twoByteMarkov $ toCompress
    rings = M.map toRing twoByteMarkovs
@ -175,12 +240,23 @@ compress toCompress = traceShow ((fromIntegral :: Int -> Double) (BS.length . C.
      Chunk
        { location = shortestLocation endStats,
          start = US.head toCompress,
-          length = L.length toCompress
+          length = fromIntegral . L.length $ toCompress
        }
      where
        pairs = zip toCompress . US.tail $ toCompress
-        shortestLocation (WordMarkovStats {..}) = simplestBetween location (location + size)
+        shortestLocation (WordMarkovStats {..}) = serialize $ binarySearch location (location + size)
        -- shortestLocation (WordMarkovStats {..})
        --   -- | simplestBetweenAnswer /= upperBound = simplestBetweenAnswer
        --   | otherwise = simplestBetween location $ upperBound - epsilon
        --   where
        --     simplestBetweenAnswer = simplestBetween location upperBound
        --     upperBound = location + size
        --     -- | almost entirely arbitrary, picked because it's gaurenteed to be smaller than size
        --     -- | and should make for a fairly quick subtraction
        --     epsilon = 1 % (denominator size)
        endStats = pyramidFold addWordMarkovStats . map statsFor $ pairs
@ -194,6 +270,7 @@ compress toCompress = traceShow ((fromIntegral :: Int -> Double) (BS.length . C.
                size = prevSize * nextSize
              }
 pyramidFold :: (a -> a -> a) -> [a] -> a
 pyramidFold f = pyramid
  where
@ -209,6 +286,7 @@ simplestBetween :: Rational -> Rational -> Rational
 simplestBetween x y
  | x == y = x
  | x > 0 = simplestBetween' n d n' d'
  -- | x > 0 = simplestBetween' n d n' d'
  | otherwise = 0 % 1
  where
    n = numerator x
@ -226,3 +304,15 @@ simplestBetween x y
        nd'' = simplestBetween' d' r' d r
        n'' = numerator nd''
        d'' = denominator nd''
 binarySearch x y = binarySearch' 0 1
  where
    acceptLow = min x y
    acceptHigh = max x y
    binarySearch' searchSpaceLow searchSpaceHigh
      | mid < acceptLow = Higher : binarySearch' mid searchSpaceHigh
      | mid > acceptHigh = Lower : binarySearch' searchSpaceLow mid
      | otherwise = []
      where
        mid = (searchSpaceLow + searchSpaceHigh) / 2
--- a/src/Compress/BurrowsWheeler.hs
+++ b/src/Compress/BurrowsWheeler.hs
@ -0,0 +1,27 @@
 -- |
 module Compress.BurrowsWheeler where
 -- import qualified Data.CircularList as CL
 import qualified Relude.Unsafe as U
 -- import Compress.BurrowsWheeler (toBurrowsWheeler)
 toBurrowsWheeler (x:xs) = map U.head . sortOn (U.tail) . take (pred . length $ xs) .  tails $ xs'
  where
    xs' = toList $ (x :| xs) <> (x :| [])
 toCounts :: forall a b . (Eq a, Eq b, Num b, Enum b, Bounded a, Bounded b) => [a] -> [(a, b)]
 toCounts = reverse . foldl' f []
  where
    f [] x = [(x, 0)]
    f ((x, count) : xs) newVal
      | x == newVal && count == maxBound = (x, 0) : (x, count) : xs
      | x == newVal = (x, succ count) : xs
      | otherwise = (newVal, 0) : (x, count) : xs
 -- toBurrowsWheeler xs = map last . sort . map (findLyndonWord) . take (length xs) . tails $ xs'
 --   where
 --     xs' = xs ++ xs
 -- findLyndonWord (x : xs) = (x :|) . map snd . takeWhile (uncurry (<=)) . zip (x : xs) $ xs
--- a/src/Compress/Huffman.hs
+++ b/src/Compress/Huffman.hs
@ -20,7 +20,7 @@ import Data.Bits as B
 import Data.Maybe (fromMaybe)
 import Data.Maybe qualified as My
 import Data.PQueue qualified as PQ
-import Data.Serialize qualified as C
+import Codec.Winery qualified as C
 import Data.Text qualified as T
 import Data.Text.IO qualified as TIO
 import Data.Vector.Unboxed qualified as V
@ -30,6 +30,8 @@ import GHC.Generics (Generic)
 import Options.Generic qualified as O
 import System.Environment qualified as SE
 import Data.HuffmanTree
 import Data.Word4 (CompressWord4List(xs))
 import Compress.BurrowsWheeler (toBurrowsWheeler, toCounts)
 decompress
  :: forall a
@ -50,13 +52,27 @@ decompress (TreeDirs treeDirs, tree) = BS.concat . map toByteString <$> decompre
  nextLeaf (L : xs) (Node {..}) = nextLeaf xs left
  nextLeaf (R : xs) (Node {..}) = nextLeaf xs right
 data BurrowsWheelerCompressed a b = BurrowsWheelerCompressed {
  elements :: Compressed SerializedTreeDirs a,
  counts :: Compressed SerializedTreeDirs b
                                  }
  deriving (Eq, Ord, Generic) -- , C.Serialise, P.NFData)
  deriving C.Serialise via C.WineryRecord (BurrowsWheelerCompressed a b)
 data Compressed treeDir a = Compressed {
  treeDirs :: treeDir,
  huffman :: HuffmanTree a
                             }
  deriving (Eq, Ord, Show, Generic) -- , C.Serialise, P.NFData)
  deriving C.Serialise via C.WineryRecord (Compressed treeDir a)
 compress
  :: forall a
-   . (Ord a, Integral a, B.FiniteBits a, B.Bits a)
+   . (Ord a, Integral a)
-  => BS.ByteString
+  => [a]
-  -> Maybe (TreeDirs, HuffmanTree a)
+  -> Maybe (Compressed SerializedTreeDirs a)
-compress bs =
+compress dividedByteString =
-  liftA2 (,) (TreeDirs <$> treeDirections) mergedHuffmanTrees
+  liftA2 Compressed (serializeTreeDirs . TreeDirs <$> treeDirections) mergedHuffmanTrees
 where
  treeDirections = concat <$> mapM (treeDirMap M.!?) dividedByteString
@ -64,13 +80,19 @@ compress bs =
    mergeHuffmanTrees
      . PQ.fromList
      . map (uncurry (flip (,)) . Bi.first Leaf)
-      . counts
+      . countOccurances
      $ dividedByteString
  treeDirMap :: M.Map a [TreeDir]
  treeDirMap = My.maybe M.empty findTreeDirections mergedHuffmanTrees
-  dividedByteString = toWordsList bs
+compressWithBurrowsWheeler :: (Integral b, Integral a, FiniteBits a, Bounded b, Bounded a) => ByteString -> Maybe (BurrowsWheelerCompressed a b)
 compressWithBurrowsWheeler bs
  = liftA2 BurrowsWheelerCompressed
    (compress . map fst $ withCounts)
    (compress . map snd $ withCounts)
  where
    withCounts = toCounts . toBurrowsWheeler . toWordsList $ bs
 -- testCompression
 --   :: forall a
@ -100,10 +122,13 @@ compress bs =
 -- decodeCompressed :: forall a. (Ord a, Integral a, B.FiniteBitsOps a, C.Serialize a) => BS.ByteString -> Either String (Maybe (TreeDirs, HuffmanTree a))
 -- decodeCompressed = Bi.second (fmap (Bi.first decodeTreeDirs)) . C.decode
-counts :: (Ord a) => [a] -> [(a, Int)]
+
-counts = M.toList . F.foldl' combiningInsert M.empty
+-- I've replaced this with a simpler version but it might be slower
- where
+countOccurances :: (Ord a) => [a] -> [(a, Int)]
-  combiningInsert m key = M.insertWith (+) key 1 m
+countOccurances = M.toList . M.fromListWith (+) . map (, 1)
 -- countOccurances = M.toList . F.foldl' combiningInsert M.empty
 --  where
 --   combiningInsert m key = M.insertWith (+) key 1 m
 divideByteString :: Int -> BS.ByteString -> [BS.ByteString]
 divideByteString n [] = []
@ -113,15 +138,17 @@ divideByteString n bs = x : divideByteString n xs
 compressionRatioFor
-  :: forall a
+  :: forall a b
-   . (Integral a, B.FiniteBits a, B.Bits a, Ord a, C.Serialize a)
+   . (Integral a, B.FiniteBits a, B.Bits a, Ord a, C.Serialise a, Bounded a,
      Integral b, Ord b, C.Serialise b, Bounded b)
  => D.Proxy a
  -> D.Proxy b
  -> BS.ByteString
  -> Double
-compressionRatioFor proxy bs =
+compressionRatioFor _ _ bs =
  (/ (fromIntegral . BS.length $ bs))
    . fromIntegral
    . BS.length
-    . C.encode
+    . C.serialise
-    . (compress :: BS.ByteString -> Maybe (TreeDirs, HuffmanTree a))
+    . (compressWithBurrowsWheeler :: ByteString -> Maybe (BurrowsWheelerCompressed a b))
    $ bs
--- a/src/Compress/LengthDistancePairs.hs
+++ b/src/Compress/LengthDistancePairs.hs
@ -0,0 +1,97 @@
 module Compress.LengthDistancePairs where
 import qualified Data.ByteString as BS
 import Data.Word
 import qualified Data.Foldable as F
 import qualified Relude.Unsafe as US
 import qualified Debug.Trace as D
 import Compress.Arithmetic (maxBound')
 import qualified Data.Proxy as P
 import qualified Codec.Winery as C
 import qualified Relude.Unsafe as U
 data LengthDistancePair a = Literal Word8 | Repeat {
  distance :: a,
  matchLength :: Word8
                                                 }
  deriving (Eq, Ord, Show, Generic, NFData)
  deriving C.Serialise via (C.WineryVariant (LengthDistancePair a))
 ldpLength :: LengthDistancePair a -> Int
 ldpLength (Literal _) = 1
 ldpLength (Repeat {..}) = fromIntegral matchLength
 -- | normally, we'd order `previous` in reverse, but finding
 -- | the longest-matching-subsequence starting at `next`
 -- | doesn't let us.
 -- | The linked list dosen't really lend itself to the problem
 data Cursor = Cursor {
  previous :: [Word8],
  next :: [Word8]
                     } deriving (Eq, Ord, Show, Generic)
 toCursor :: BS.ByteString -> Cursor
 toCursor bs= Cursor {
  previous = [],
  next = BS.unpack bs
  }
 word8Max :: Int
 word8Max = fromIntegral (maxBound :: Word8)
 cutTo :: Int -> [a] -> [a]
 cutTo newLength xs = drop (max 0 . flip (-) newLength . length $ xs) xs
 decode :: forall a . Integral a => [LengthDistancePair a] -> [Word8]
 decode = foldl' decodeIndividual []
  where
    decodeIndividual xs (Literal x) = xs ++ [x]
    decodeIndividual xs (Repeat {..})
      = (++) xs
      . take (fromIntegral matchLength)
      . drop (F.length xs - fromIntegral distance)
      $ xs
 encode :: forall a . (Integral a, Bounded a) => [Word8] -> [LengthDistancePair a]
 encode xs = encode' xs xs 0
  where
    encode' :: [Word8] -> [Word8] -> Int -> [LengthDistancePair a]
    encode' predictFrom [] n = []
    encode' predictFrom next 0 = []
    encode' predictFrom next distance  = match : encode' (drop (maxLength - newDistance) predictFrom) (drop matchLength next) newDistance
      where
        (match, matchLength)
          = F.maximumBy (comparing snd)
          . map (\x -> (x, ldpLength x))
          . ( Literal (U.head next) : )
          . zipWith Repeat (reverse [1 .. (fromIntegral distance)])
          . map lengthMatchWithRepeat . tails
          . take distance
          $ predictFrom
        maxLength = fromIntegral (minBound :: a)
        newDistance = max maxLength . (+ matchLength) $ distance
        lengthMatchWithRepeat cyclexs'  = zipWithCycle' cyclexs' next
          where
            zipWithCycle' [] ys = zipWithCycle' cyclexs' ys
            zipWithCycle' (x:xs) (y:ys) | x == y = 1 + zipWithCycle' xs ys
            zipWithCycle' _ _ = 0
        -- lengthMatchWithRepeat candidate
        --   = genericLength
        --   . takeWhile (uncurry (==)) . zip (cycle candidate)
        --   $ next
 -- fasterEncode bs = encode' start start 0
 --   where
 --     encode' toPredict predictFrom maxDistance = (take diff previous)
 --     start = BS.unpack bs take
--- a/src/Compress/PrefixTree.hs
+++ b/src/Compress/PrefixTree.hs
@ -17,9 +17,9 @@ import Data.PQueue qualified as PQ
 import Debug.Trace qualified as D
 import Debug.Trace qualified as T
 import GHC.Generics
 import qualified Data.Serialize as C
 import Data.FiniteBit
 import qualified Data.FiniteBit as B
 import qualified Codec.Winery as C
 data Tree a = (Ord a) =>
  Tree
@ -28,7 +28,8 @@ data Tree a = (Ord a) =>
 newtype HuffmanPrefixTree a b = HuffmanPrefixTree
  { inner :: M.Map a (HuffmanTree b)
-  } deriving (Eq, Ord, Show, Generic, C.Serialize)
+  } deriving (Eq, Ord, Show, Generic)
  deriving C.Serialise via C.WineryRecord (HuffmanPrefixTree a b)
 finiteBitTupleUncons ::
  forall a b.
@ -85,7 +86,7 @@ compress bs = (TreeDirs $ concatMap treeDirsFor asFiniteBitPairs, tree, ) <$> in
    tree = toHuffmanTree . nGramCounts $ bs
    treeDirMap :: M.Map a (M.Map b [TreeDir])
-    treeDirMap = M.map HT.findTreeDirections . Compress.PrefixTree.inner $ tree
+    treeDirMap = M.map HT.findTreeDirections tree.inner
    initial :: Maybe a
    initial = fst <$> finiteBitUncons bs
@ -96,37 +97,6 @@ compress bs = (TreeDirs $ concatMap treeDirsFor asFiniteBitPairs, tree, ) <$> in
    treeDirsFor :: (a, b) -> [TreeDir]
    treeDirsFor (a, b) = (treeDirMap M.! a) M.! b
 --       | all (M.null . children) . M.elems . children $ tree =
 --           fmap End
 --             . HT.fromList
 --             . map (\x -> (prefixCounts x, x))
 --             . M.keys
 --             . children
 --             $ tree
 --       | otherwise =
 --           Just
 --             . Layer
 --             . M.mapMaybeWithKey (\key val -> toHuffmanTree' (key : soFar) val)
 --             . children
 --             $ tree
 --       where
 --         prefixCounts :: a -> Int
 --         prefixCounts x =
 --           fromIntegral
 --             . sum
 --             . M.elems
 --             . M.filterWithKey (\key val -> L.isPrefixOf (reverse . (x :) $ soFar) key)
 --          $ nGrams
 -- toHuffmanTree :: Tree a -> p1 -> HuffmanTree a
 -- toHuffmanTree :: forall a . Tree a -> M.Map [a] Word -> HuffmanTree [a]
 -- toHuffmanTree (Tree {..}) nGrams soFar | M.size children == 1 = Leaf . map (reverse . (: soFar)) . M.keys $ children
 -- toHuffmanTree (Tree {..}) nGrams soFar = Leaf . map (reverse . (: soFar)) . M.keys $ children
 --   where
 --     sorted = L.sortBy (prefixCounts . fst) . M.toList $ children
 nGramCounts ::
  forall a b.
  (Integral a, B.FiniteBits a, B.Bits a, Integral b, B.FiniteBits b, B.Bits b) =>
@ -155,34 +125,5 @@ fromSingleList (x : xs) = Tree . M.singleton x . fromSingleList $ xs
 fromList :: (Ord a) => [[a]] -> Tree a
 fromList = F.foldl' merge Compress.PrefixTree.empty . map fromSingleList
 -- insert :: Ord a => Tree a -> [a] -> Tree a
 -- insert (Tree {..}) (x:xs) =
 merge :: Tree a -> Tree a -> Tree a
 merge (Tree children0) (Tree children1) = Tree $ M.unionWith merge children0 children1
 -- deriving instance Eq (Tree a)
 -- deriving instance Ord (Tree a)
 -- deriving instance (Show a) => Show (Tree a)
 -- empty :: (Ord a) => Tree a
 -- empty = Tree M.empty
 -- fromList :: (Ord a, F.Foldable t) => t [a] -> Tree a
 -- fromList = F.foldl' insert empty
 -- insert :: Tree a -> [a] -> Tree a
 -- insert (Tree {..}) [] = Tree M.empty
 -- insert (Tree {..}) (x : xs) =
 --   Tree
 --     . flip (M.insert x) children
 --     . flip insert xs
 --     . My.fromMaybe empty
 --     . M.lookup x
 --     $ children
 -- lookup :: (Ord a) => [a] -> Tree a -> Bool
 -- lookup [] = const True
 -- lookup (x : xs) = maybe False (Compress.PrefixTree.lookup xs) . M.lookup x . children
--- a/src/Compress/WordMarkovStats.hs
+++ b/src/Compress/WordMarkovStats.hs
@ -0,0 +1,8 @@
 module Compress.WordMarkovStats where
 data WordMarkovStats = WordMarkovStats
  { location :: Ratio Integer,
    size :: Ratio Integer
  }
  deriving (Show, Eq, Ord, Generic)
--- a/src/Data/ArbitraryPrecisionFloatingPoint.hs
+++ b/src/Data/ArbitraryPrecisionFloatingPoint.hs
@ -0,0 +1,62 @@
 module Data.ArbitraryPrecisionFloatingPoint where
 import Compress.Arithmetic (binarySearch)
 import Data.Function.Pointless
 data FloatingPoint = FloatingPoint
  { exponent :: Integer,
    significand :: Integer
  }
  deriving (Eq)
 instance Num FloatingPoint where
  (+) :: FloatingPoint -> FloatingPoint -> FloatingPoint
  (+) = onEqualExponent (+)
  (*) :: FloatingPoint -> FloatingPoint -> FloatingPoint
  (*) n0 n1 = correctExponent $ FloatingPoint (n0.exponent + n1.exponent) (n0.significand * n1.significand)
  abs :: FloatingPoint -> FloatingPoint
  abs (FloatingPoint {..}) = FloatingPoint exponent (abs significand)
  negate :: FloatingPoint -> FloatingPoint
  negate (FloatingPoint {..}) = FloatingPoint exponent (negate significand)
  fromInteger :: Integer -> FloatingPoint
  fromInteger = correctExponent . FloatingPoint 0
  signum :: FloatingPoint -> FloatingPoint
  signum = FloatingPoint 0 . signum . (.significand)
 onEqualExponent :: (Integer -> Integer -> Integer) -> FloatingPoint -> FloatingPoint -> FloatingPoint
 onEqualExponent f n0 n1 = correctExponent . FloatingPoint n0'.exponent $ on f (.significand) n0' n1'
  where
    (n0', n1') = equalizeExponents n0 n1
 -- | significand shouldn't have trailing 0s in binary
 -- | exponent should be one larger instead
 -- | this function corrects until siginicand isn't divisible by 2
 correctExponent :: FloatingPoint -> FloatingPoint
 correctExponent (FloatingPoint {..})
  | significand /= 0 && remainder == 0 = correctExponent $ FloatingPoint (succ exponent) quotient
  | otherwise = (FloatingPoint {..})
  where
    (quotient, remainder) = divMod significand 2
 equalizeExponents :: FloatingPoint -> FloatingPoint -> (FloatingPoint, FloatingPoint)
 equalizeExponents n0 n1 =
    ( withDiff n0 exponent0Diff,
      withDiff n1 exponent1Diff
    )
    where
      withDiff n exponentDiff = FloatingPoint (n.exponent + exponentDiff) (n.significand * (2 ^ exponentDiff))
      signedExponentDiff = n0.exponent - n1.exponent
      (exponent0Diff, exponent1Diff)
        | signedExponentDiff < 0 = (0, abs signedExponentDiff)
        | otherwise = (signedExponentDiff, 0)
 instance Ord FloatingPoint where
  compare :: FloatingPoint -> FloatingPoint -> Ordering
  compare = uncurry (on compare (.significand)) .: equalizeExponents
--- a/src/Data/Dirs.hs
+++ b/src/Data/Dirs.hs
@ -0,0 +1,52 @@
 -- |
 module Data.Dirs where
 import qualified Data.ByteString as BS
 import qualified Data.Vector.Unboxed as VU
 import qualified Data.Vector as V
 import qualified Data.Bit as VB
 import qualified Data.Bit as BV
 import Data.Ratio
 import Control.Monad.Par qualified as P
 import Codec.Winery qualified as C
 import qualified Relude as R
 data Dir = Higher | Lower deriving Show
 data SerializedDirs = SerializedDirs {
  bs :: BS.ByteString,
  length :: Int
                                     } deriving (Generic, Show, Ord, Eq, P.NFData, C.Serialise)
 -- toBS :: [Dir] -> SerializedDirs
 serialize dirs = SerializedDirs {..}
  where
    bs = VB.cloneToByteString . VU.fromList . map dirToBit $ dirs
    length = R.length dirs
 deserialize :: SerializedDirs -> Rational
 deserialize (SerializedDirs {..}) = fromDirs . map bitToDir . VU.toList $ asVec
  where
    asVec = VU.take length . VB.cloneFromByteString $ bs
    addNewBit x Higher = (x * 2) + 1
    addNewBit x Lower = x * 2
 fromDirs = sum . (0.5 :) . zipWith (*) fractions . map dirToNum
  where
    addNewBit x Higher = (x * 2) + 1
    addNewBit x Lower = x * 2
 dirToNum Higher = 1
 dirToNum Lower  = -1
 bitToDir (BV.Bit True ) = Higher
 bitToDir (BV.Bit False) = Lower
 dirToBit Higher = BV.Bit True
 dirToBit Lower = BV.Bit False
 fractions :: [Rational]
 fractions = iterate ( / 2) (1 % 4)
--- a/src/Data/HuffmanTree.hs
+++ b/src/Data/HuffmanTree.hs
@ -9,6 +9,9 @@ import Data.Vector.Unboxed qualified as V
 import Data.Bit qualified as B
 import Data.Bit qualified as BV
 import Data.ByteString qualified as BS
 import qualified Codec.Winery as C
 import Data.FiniteBit (toByteString)
 import Codec.Winery (serialise)
 data HuffmanTree a
  = Leaf a
@ -16,24 +19,29 @@ data HuffmanTree a
      { left :: HuffmanTree a,
        right :: HuffmanTree a
      }
-  deriving (Eq, Ord, Show, Generic, C.Serialize, Functor)
+  deriving (Eq, Ord, Show, Generic, Functor)
  deriving C.Serialise via (C.WineryVariant (HuffmanTree a))
 -- here so we can define our own Serialize instance
 newtype TreeDirs = TreeDirs {
  inner :: [TreeDir]
                            } deriving (Eq, Ord, Show)
  -- deriving C.Serialize via C.WineryVariant TreeDirs
 data TreeDir = L | R deriving (Eq, Ord, Show)
-instance C.Serialize TreeDirs where
+newtype SerializedTreeDirs = SerializedTreeDirs
-  put :: C.Putter TreeDirs
+  {
-  put = C.put . Bi.first BV.cloneToByteString . (\x -> (x, V.length x)) . V.fromList . map (BV.Bit . (== R)) . (inner :: TreeDirs -> [TreeDir])
+    inner :: BS.ByteString
  } deriving (Generic, Eq, Ord)
  deriving C.Serialise via (C.WineryRecord SerializedTreeDirs)
-  get :: C.Get TreeDirs
+serializeTreeDirs (TreeDirs inner) = SerializedTreeDirs . fst . Bi.first BV.cloneToByteString . (\x -> (x, V.length x)) . V.fromList . map (BV.Bit . (== R)) $ inner
-  get = do
+
-    (bs, len) <- C.get
+unserializeTreeDirs (SerializedTreeDirs bs) = TreeDirs . map (\x -> if BV.unBit x then R else L) . V.toList . V.take len . BV.cloneFromByteString $ bs
-    pure . TreeDirs . map (\x -> if BV.unBit x then R else L) . V.toList . V.take len . BV.cloneFromByteString $ bs
+  where
    len = BS.length bs
 lookup ::
  forall a.
--- a/src/Data/PQueue.hs
+++ b/src/Data/PQueue.hs
@ -32,10 +32,10 @@ maxView :: PQueue a -> Maybe (Int, (a, PQueue a))
 maxView = abstractView IM.lookupMax
 insert :: Int -> p -> PQueue p -> PQueue p
-insert key a = PQueue . IM.insertWith NE.append key (NE.singleton a) . toMap
+insert key a = PQueue . IM.insertWith NE.append key (NE.singleton a) . (.toMap)
 elems :: PQueue a -> [a]
-elems = concatMap NE.toList . IM.elems . toMap
+elems = concatMap NE.toList . IM.elems . (.toMap)
 keys :: PQueue a -> [IM.Key]
-keys = IM.keys . toMap
+keys = IM.keys . (.toMap)
--- a/src/Data/Word4.hs
+++ b/src/Data/Word4.hs
@ -1,46 +1,62 @@
 -- | note to myself. Something's wrong here.
 -- | it doesn't work properly when fed into Compress.Arithmetic
 module Data.Word4 where
 import Data.Bits
 import Data.Word
-import qualified Data.Serialize as C
+import qualified Codec.Winery as C
 import qualified Data.Map as M
 import qualified Text.Show as S
 import qualified Relude.Unsafe as U
 import Codec.Winery (WineryRecord)
 -- only use lower 4 bits
 newtype Word4 = Word4 {
  inner :: Word8
-  } deriving (Eq, Ord, Show, Enum, Generic, C.Serialize)
+  } deriving (Eq, Ord, Enum, Generic)
  deriving C.Serialise via C.WineryRecord Word4
 instance S.Show Word4 where
  show = show . (.inner)
 map2 :: (Word8 -> Word8 -> Word8) -> Word4 -> Word4 -> Word4
 map2 f (Word4 a) (Word4 b) = Word4 $ f a b
-data CompressWord4List = CompressWord4List
+newtype CompressWord4List = CompressWord4List
-  { xs :: [Word8],
+  { xs :: [Word8]
-    last :: Maybe Word8 -- in lower 4 bits
+  } deriving (Eq, Ord, Show, Generic)
-  } deriving (Eq, Ord, Generic, Show, C.Serialize, NFData)
+  deriving C.Serialise via C.WineryVariant CompressWord4List
 bitwiseAnd a b = getAnd $ And a <> And b
 bitwiseOr a b = getIor $ Ior a <> Ior b
 toCompressedWord4List [] = CompressWord4List [] Nothing
 toCompressedWord4List [Word4 x] = CompressWord4List [] (Just x)
 toCompressedWord4List ((Word4 x) : (Word4 x') : xs) =
  CompressWord4List
    { xs = headByte : xs',
      last = last'
    }
  where
    headByte = bitwiseOr (bitwiseAnd 0xf0 (x .<<. 4)) (bitwiseAnd 0x0f x')
-    (CompressWord4List xs' last') = toCompressedWord4List xs
+toCompressedWord4List = toCompressedWord4List' . U.init
  where
    toCompressedWord4List' [] = CompressWord4List []
    toCompressedWord4List' [Word4 x] = CompressWord4List [x .<<. 4]
    toCompressedWord4List' ((Word4 x) : (Word4 x') : xs) =
      CompressWord4List
        { xs = headByte : xs'
        }
      where
        headByte = bitwiseOr (bitwiseAnd 0xf0 (x .<<. 4)) (bitwiseAnd 0x0f x')
        (CompressWord4List xs') = toCompressedWord4List' xs
 fromCompressedWord4List :: CompressWord4List -> [Word4]
-fromCompressedWord4List (CompressWord4List [] Nothing) = []
+fromCompressedWord4List = fromCompressedWord4List' . (.xs)
-fromCompressedWord4List (CompressWord4List [] (Just a)) = [word4 a]
+  where
-fromCompressedWord4List (CompressWord4List {xs = (x : xs), ..}) =
+    fromCompressedWord4List' :: [Word8] -> [Word4]
-  Word4 ((bitwiseAnd 0xf0 x) .>>. 4)
+    fromCompressedWord4List' [] = []
-    : word4 x
+    fromCompressedWord4List' (x : xs) =
-    : fromCompressedWord4List (CompressWord4List xs last)
+      word4 (x .>>. 4)
        : word4 x
        : fromCompressedWord4List' xs
 -- instance Show Word4 where
 --   show :: Word4 -> String
@ -48,10 +64,10 @@ fromCompressedWord4List (CompressWord4List {xs = (x : xs), ..}) =
 instance Bounded Word4 where
  minBound = word4 0
-  maxBound = word4 0xf
+  maxBound = word4 15
 instance Real Word4 where
-  toRational = toRational . inner
+  toRational = toRational . (.inner)
 instance Integral Word4 where
  quot = map2 quot `on` clean
@ -66,9 +82,9 @@ instance Integral Word4 where
  divMod (Word4 a) (Word4 b) = (word4 a', word4 b')
    where
      (a', b') = divMod a b
-  toInteger = toInteger . inner . clean
+  toInteger = toInteger . (.inner) . clean
-clean (Word4 a) = Word4 . bitwiseAnd 0xf $ a
+clean (Word4 a) = Word4 . bitwiseAnd 0x0f $ a
 word4 = clean . Word4
 instance Num Word4 where
--- a/src/Main.hs
+++ b/src/Main.hs
@ -1,71 +0,0 @@
 module Main where
 import Compress.Huffman
 import Compress.Huffman qualified as HT
 import Compress.PrefixTree (HuffmanPrefixTree (HuffmanPrefixTree))
 import Compress.PrefixTree qualified as PT
 import Compress.Arithmetic qualified as A
 import Data.Bifunctor qualified as Bi
 import Data.Bit qualified as B
 import Data.Bits (Bits ((.|.)))
 import Data.Bits qualified as B
 import Data.ByteString qualified as BS
 import Data.HuffmanTree
 import Data.Proxy qualified as P
 import Data.Serialize qualified as C
 import Data.Word
 import GHC.Generics (Generic)
 import Options.Generic qualified as O
 import qualified Data.FiniteBit as FB
 data CompressOrDecompress = Compress | Decompress deriving (Show, Generic, O.ParseField, O.ParseFields, O.ParseRecord, Read)
 data CompressionStrategy = Huffman | MarkovHuffman deriving (Show, Generic, O.ParseField, O.ParseFields, O.ParseRecord, Read)
 data CLIOpts = CLIOpts
  { task :: CompressOrDecompress,
    strategy :: CompressionStrategy
  }
  deriving (Show, Generic, O.ParseRecord)
 applyCompressionOptions ::
  forall a.
  (Integral a, B.Bits a, B.FiniteBits a, Ord a, C.Serialize a) =>
  P.Proxy a ->
  CLIOpts ->
  BS.ByteString ->
  BS.ByteString
 applyCompressionOptions _ (CLIOpts Compress Huffman) f =
  C.encode . (compress :: BS.ByteString -> Maybe (TreeDirs, HuffmanTree a)) $ f
 applyCompressionOptions _ (CLIOpts Compress MarkovHuffman) f =
  C.encode . (PT.compress :: BS.ByteString -> Maybe (TreeDirs, HuffmanPrefixTree a a, a)) $ f
 applyCompressionOptions _ (CLIOpts Decompress Huffman) f =
  handleError $ Bi.second decompress . (C.decode :: BS.ByteString -> Either String (TreeDirs, HuffmanTree a)) $ f
 applyCompressionOptions _ (CLIOpts Decompress MarkovHuffman) f =
  handleError $ Bi.second PT.decompress . (C.decode :: BS.ByteString -> Either String (TreeDirs, HuffmanPrefixTree a a, a)) $ f
 handleError (Right (Just bs)) = bs
 handleError _ = []
 main :: IO ()
 main = do
  -- cliOpts :: CLIOpts <- O.getRecord "compress/decompress & strategy"
  f <- BS.getContents
  let fAsWords = (FB.toWordsList :: BS.ByteString -> [ Word8 ]) $ f
  let compressedUnencoded :: (A.Compressed Word8 (A.Word4MapSerialized Word8)) = (A.compress fAsWords)
  let compressed = C.encode compressedUnencoded
  print ("compression ratio", (fromIntegral . BS.length $ compressed) / (fromIntegral . BS.length $ f))
  print ("works?", (A.decompress compressedUnencoded) == fAsWords)
  -- let f = "hello tehre"
  -- f <- BS.readFile "pg64317.txt"
  -- let (compressed :: Maybe (TreeDirs, PT.HuffmanPrefixTree Word8 Word8, Word8)) = PT.compress f
  -- print $ BS.length . C.encode $ compressed
  -- print $ BS.length . C.encode . (compress :: BS.ByteString -> Maybe (TreeDirs, HuffmanTree Word8)) $ f
  -- print $ BS.length . C.encode . (compress :: BS.ByteString -> Maybe (TreeDirs, HuffmanTree Word16)) $ f
  -- BS.writeFile "outin.txt" decompressed
  -- print (decompressed, f)
  -- print $ BS.length decompressed
  -- print $ BS.length f
  -- print (decompressed == f)
  -- BS.putStr . applyCompressionOptions (P.Proxy :: P.Proxy Word16) cliOpts $ f
--- a/test/Test.hs
+++ b/test/Test.hs
@ -0,0 +1,92 @@
 import Compress.Arithmetic (twoByteMarkov)
 import Compress.Arithmetic qualified as A
 import Compress.LengthDistancePairs as LDP
 import Compress.WordMarkovStats as WM
 import Data.Bifunctor qualified as Bi
 import Data.ByteString qualified as BS
 import Data.Dirs as D
 import Data.Map.Strict qualified as M
 import Data.Ratio
 import Test.Falsify.Generator qualified as Gen
 import Test.Falsify.Predicate as P
 import Test.Falsify.Property qualified as P
 import Test.Falsify.Range qualified as Range
 import Test.Tasty
 import Test.Tasty.Falsify as F
 import Test.Tasty.HUnit
 -- import qualified GHC.Enum as B
 main :: IO ()
 main = do
  toCompress <- -- BS.take 12000 <$>
    BS.readFile "pg64317.txt"
  defaultMain (tests toCompress)
 tests toCompress =
  testGroup
    "falsify"
    [ testGroup
        "Arithmetic"
        [ testCase "works" $
            A.twoByteMarkov ([0, 1, 0, 1, 0, 1, 0, 1, 0, 9] :: [Word8])
              @?= M.fromList [(0, [(1, 0.8), (9, 0.2)]), (1, [(0, 1)])],
          testCase "relativeCounts works as expected with one param" $
            A.relativeCounts [(0, 30)]
              @?= [(0, 1)],
          testCase "relativeCounts works as expected with lots of params" $
            A.relativeCounts [(0, 30), (1, 20)]
              @?= [(0, 30 % 50), (1, 20 % 50)],
          testCase "toRing locs all less than 1" $
            assertBool "larger than one" $
              all (all ((<= 1) . (.location))) toCompressRing,
          testCase "toRing sizes all add up to 1" $
            assertBool "larger than 1" $
              all ((== 1) . sum . map (.size)) toCompressRing,
          testCase "toRing gives no zero sizes" $
            assertBool "== 0" $
              all (all ((/= 0) . (.size))) toCompressRing,
          F.testProperty "binary search" propBinarySearchWithinBounds,
          F.testProperty "compress and decompress isomorphism" (propCompressDecompressIsomorphism (A.decompress . A.compress))
        ]
        -- testGroup
        -- "LengthDistancePair"
        -- [ F.testProperty "compress and decompress isomorphism" $ (propCompressDecompressIsomorphism (LDP.decode . (LDP.encode :: [Word8] -> [LDP.LengthDistancePair Word32])))
        -- ]
    ]
  where
    toCompressRing =
      map (M.elems . A.toRing . map (Bi.second (A.discretizeFraction :: Rational -> Word8)))
        . M.elems
        . A.twoByteMarkov
        . BS.unpack
        $ toCompress
 wordMaxBound :: Integer
 wordMaxBound = fromIntegral (maxBound :: Word)
 genProperFraction = F.gen . fmap ((% wordMaxBound) . fromIntegral) . Gen.inRange $ (Range.between (0, maxBound :: Word))
 -- propBinarySearchWithinBounds :: Property' String ()
 propBinarySearchWithinBounds = do
  bound1 <- genProperFraction
  epsilon' <- genProperFraction
  let epsilon = max (1 % (fromIntegral (maxBound :: Word))) . (* 0.1) $ bound1 * epsilon'
  let bound2 = if bound1 + epsilon < 1 then bound1 + epsilon else bound1 - epsilon
  -- let bound2 = min 1 $ bound1 + epsilon
  let iso = D.deserialize . D.serialize $ A.binarySearch bound1 bound2
  P.assert $ P.le .$ ("minimum", min bound1 bound2) .$ ("iso", iso)
  P.assert $ P.ge .$ ("maximum", max bound1 bound2) .$ ("iso", iso)
 -- P.assert $ Range.between (min bound1 bound2) (max bound1 bound2) .$ (A.fromDirs $ A.binarySearch bound1 bound2)
 -- propCompressDecompressIsomorphism :: F.Property ()
 -- propCompressDecompressIsomorphism :: Property' String ()
 propCompressDecompressIsomorphism :: ([Word8] -> [Word8]) -> Property' String ()
 propCompressDecompressIsomorphism iso = do
  (xs :: [Word8]) <-
    F.gen $ Gen.list (Range.between (4, 5000)) intGen
  P.assert $ eq .$ ("xs", xs) .$ ("changed", iso $ xs)
  where
    intGen :: F.Gen Word8
    intGen = Gen.inRange $ Range.between (0, fromIntegral (maxBound :: Word8))