module Data.Compression.Huffman
  ( HuffmanTree(..)
  , Bit(..)
  , Code

  , huffman
  , huffmanSorted
  , codewords
  , ppCode
  ) where

import Data.List (intercalate)
import Control.Arrow (first,second)
import qualified Data.PriorityQueue.FingerTree as PQ
import Data.Sequence as S hiding (Empty)

data Bit = Zero | One

instance Show Bit where
  show :: Bit -> String
show Bit
Zero = String
"0"
  show Bit
One  = String
"1"

data HuffmanTree a = Empty
                   | Node (HuffmanTree a) (HuffmanTree a)
                   | Leaf a
  deriving Int -> HuffmanTree a -> ShowS
[HuffmanTree a] -> ShowS
HuffmanTree a -> String
(Int -> HuffmanTree a -> ShowS)
-> (HuffmanTree a -> String)
-> ([HuffmanTree a] -> ShowS)
-> Show (HuffmanTree a)
forall a. Show a => Int -> HuffmanTree a -> ShowS
forall a. Show a => [HuffmanTree a] -> ShowS
forall a. Show a => HuffmanTree a -> String
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
$cshowsPrec :: forall a. Show a => Int -> HuffmanTree a -> ShowS
showsPrec :: Int -> HuffmanTree a -> ShowS
$cshow :: forall a. Show a => HuffmanTree a -> String
show :: HuffmanTree a -> String
$cshowList :: forall a. Show a => [HuffmanTree a] -> ShowS
showList :: [HuffmanTree a] -> ShowS
Show

type Code a = [(a,[Bit])]

-- Simple implementation, O(n log n).
huffman :: (Ord w, Num w) => [(a,w)] -> HuffmanTree a
huffman :: forall w a. (Ord w, Num w) => [(a, w)] -> HuffmanTree a
huffman = PQueue w (HuffmanTree a) -> HuffmanTree a
forall {k} {a}.
(Ord k, Num k) =>
PQueue k (HuffmanTree a) -> HuffmanTree a
build (PQueue w (HuffmanTree a) -> HuffmanTree a)
-> ([(a, w)] -> PQueue w (HuffmanTree a))
-> [(a, w)]
-> HuffmanTree a
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [(a, w)] -> PQueue w (HuffmanTree a)
forall {a}. [(a, w)] -> PQueue w (HuffmanTree a)
prepare
  where
   prepare :: [(a, w)] -> PQueue w (HuffmanTree a)
prepare  = [(w, HuffmanTree a)] -> PQueue w (HuffmanTree a)
forall k v. Ord k => [(k, v)] -> PQueue k v
PQ.fromList ([(w, HuffmanTree a)] -> PQueue w (HuffmanTree a))
-> ([(a, w)] -> [(w, HuffmanTree a)])
-> [(a, w)]
-> PQueue w (HuffmanTree a)
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ((a, w) -> (w, HuffmanTree a)) -> [(a, w)] -> [(w, HuffmanTree a)]
forall a b. (a -> b) -> [a] -> [b]
map (\(a
x,w
w) -> (w
w, a -> HuffmanTree a
forall a. a -> HuffmanTree a
Leaf a
x))
   build :: PQueue k (HuffmanTree a) -> HuffmanTree a
build PQueue k (HuffmanTree a)
pq =
     case PQueue k (HuffmanTree a)
-> Maybe ((k, HuffmanTree a), PQueue k (HuffmanTree a))
forall k v. Ord k => PQueue k v -> Maybe ((k, v), PQueue k v)
PQ.minViewWithKey PQueue k (HuffmanTree a)
pq of
       Maybe ((k, HuffmanTree a), PQueue k (HuffmanTree a))
Nothing -> HuffmanTree a
forall a. HuffmanTree a
Empty
       Just ((k
w,HuffmanTree a
x), PQueue k (HuffmanTree a)
pq') ->
         case PQueue k (HuffmanTree a)
-> Maybe ((k, HuffmanTree a), PQueue k (HuffmanTree a))
forall k v. Ord k => PQueue k v -> Maybe ((k, v), PQueue k v)
PQ.minViewWithKey PQueue k (HuffmanTree a)
pq' of
           Maybe ((k, HuffmanTree a), PQueue k (HuffmanTree a))
Nothing -> HuffmanTree a
x
           Just ((k
w',HuffmanTree a
y), PQueue k (HuffmanTree a)
pq'') -> PQueue k (HuffmanTree a) -> HuffmanTree a
build (PQueue k (HuffmanTree a) -> HuffmanTree a)
-> PQueue k (HuffmanTree a) -> HuffmanTree a
forall a b. (a -> b) -> a -> b
$ k
-> HuffmanTree a
-> PQueue k (HuffmanTree a)
-> PQueue k (HuffmanTree a)
forall k v. Ord k => k -> v -> PQueue k v -> PQueue k v
PQ.insert (k
wk -> k -> k
forall a. Num a => a -> a -> a
+k
w') (HuffmanTree a -> HuffmanTree a -> HuffmanTree a
forall a. HuffmanTree a -> HuffmanTree a -> HuffmanTree a
Node HuffmanTree a
x HuffmanTree a
y) PQueue k (HuffmanTree a)
pq''

-- More efficient implementation, O(n).  Requires that the input
-- list of symbols and weight is sorted by increasing weight.
huffmanSorted :: (Ord w, Num w) => [(a,w)] -> HuffmanTree a
huffmanSorted :: forall w a. (Ord w, Num w) => [(a, w)] -> HuffmanTree a
huffmanSorted = Seq (HuffmanTree a, w) -> Seq (HuffmanTree a, w) -> HuffmanTree a
forall {a} {a}.
(Ord a, Num a) =>
Seq (HuffmanTree a, a) -> Seq (HuffmanTree a, a) -> HuffmanTree a
build Seq (HuffmanTree a, w)
forall a. Seq a
S.empty (Seq (HuffmanTree a, w) -> HuffmanTree a)
-> ([(a, w)] -> Seq (HuffmanTree a, w))
-> [(a, w)]
-> HuffmanTree a
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [(a, w)] -> Seq (HuffmanTree a, w)
forall {b} {d}. [(b, d)] -> Seq (HuffmanTree b, d)
prepare
  where
   prepare :: [(b, d)] -> Seq (HuffmanTree b, d)
prepare = [(HuffmanTree b, d)] -> Seq (HuffmanTree b, d)
forall a. [a] -> Seq a
S.fromList ([(HuffmanTree b, d)] -> Seq (HuffmanTree b, d))
-> ([(b, d)] -> [(HuffmanTree b, d)])
-> [(b, d)]
-> Seq (HuffmanTree b, d)
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ((b, d) -> (HuffmanTree b, d)) -> [(b, d)] -> [(HuffmanTree b, d)]
forall a b. (a -> b) -> [a] -> [b]
map ((b -> HuffmanTree b) -> (b, d) -> (HuffmanTree b, d)
forall b c d. (b -> c) -> (b, d) -> (c, d)
forall (a :: * -> * -> *) b c d.
Arrow a =>
a b c -> a (b, d) (c, d)
first b -> HuffmanTree b
forall a. a -> HuffmanTree a
Leaf)
   dequeue :: Seq (a, a) -> Seq (a, a) -> Maybe ((a, a), Seq (a, a), Seq (a, a))
dequeue Seq (a, a)
s Seq (a, a)
t =
     case (Seq (a, a) -> ViewL (a, a)
forall a. Seq a -> ViewL a
viewl Seq (a, a)
s, Seq (a, a) -> ViewL (a, a)
forall a. Seq a -> ViewL a
viewl Seq (a, a)
t) of
       (ViewL (a, a)
EmptyL, ViewL (a, a)
EmptyL)    -> Maybe ((a, a), Seq (a, a), Seq (a, a))
forall a. Maybe a
Nothing
       (ViewL (a, a)
EmptyL, ((a, a)
x :< Seq (a, a)
ts)) -> ((a, a), Seq (a, a), Seq (a, a))
-> Maybe ((a, a), Seq (a, a), Seq (a, a))
forall a. a -> Maybe a
Just ((a, a)
x,Seq (a, a)
s,Seq (a, a)
ts)
       (((a, a)
x :< Seq (a, a)
ss), ViewL (a, a)
EmptyL) -> ((a, a), Seq (a, a), Seq (a, a))
-> Maybe ((a, a), Seq (a, a), Seq (a, a))
forall a. a -> Maybe a
Just ((a, a)
x,Seq (a, a)
ss,Seq (a, a)
t)
       (((a
x,a
w) :< Seq (a, a)
ss), ((a
y,a
w') :< Seq (a, a)
ts))
         | a
w a -> a -> Bool
forall a. Ord a => a -> a -> Bool
< a
w'    -> ((a, a), Seq (a, a), Seq (a, a))
-> Maybe ((a, a), Seq (a, a), Seq (a, a))
forall a. a -> Maybe a
Just ((a
x,a
w),Seq (a, a)
ss,Seq (a, a)
t)
         | Bool
otherwise -> ((a, a), Seq (a, a), Seq (a, a))
-> Maybe ((a, a), Seq (a, a), Seq (a, a))
forall a. a -> Maybe a
Just ((a
y,a
w'),Seq (a, a)
s,Seq (a, a)
ts)
   build :: Seq (HuffmanTree a, a) -> Seq (HuffmanTree a, a) -> HuffmanTree a
build Seq (HuffmanTree a, a)
s Seq (HuffmanTree a, a)
t =
     case Seq (HuffmanTree a, a)
-> Seq (HuffmanTree a, a)
-> Maybe
     ((HuffmanTree a, a), Seq (HuffmanTree a, a),
      Seq (HuffmanTree a, a))
forall {a} {a}.
Ord a =>
Seq (a, a) -> Seq (a, a) -> Maybe ((a, a), Seq (a, a), Seq (a, a))
dequeue Seq (HuffmanTree a, a)
s Seq (HuffmanTree a, a)
t of
       Maybe
  ((HuffmanTree a, a), Seq (HuffmanTree a, a),
   Seq (HuffmanTree a, a))
Nothing -> HuffmanTree a
forall a. HuffmanTree a
Empty
       Just ((HuffmanTree a
x,a
w),Seq (HuffmanTree a, a)
s',Seq (HuffmanTree a, a)
t') ->
         case Seq (HuffmanTree a, a)
-> Seq (HuffmanTree a, a)
-> Maybe
     ((HuffmanTree a, a), Seq (HuffmanTree a, a),
      Seq (HuffmanTree a, a))
forall {a} {a}.
Ord a =>
Seq (a, a) -> Seq (a, a) -> Maybe ((a, a), Seq (a, a), Seq (a, a))
dequeue Seq (HuffmanTree a, a)
s' Seq (HuffmanTree a, a)
t' of
           Maybe
  ((HuffmanTree a, a), Seq (HuffmanTree a, a),
   Seq (HuffmanTree a, a))
Nothing -> HuffmanTree a
x
           Just ((HuffmanTree a
y,a
w'),Seq (HuffmanTree a, a)
s'',Seq (HuffmanTree a, a)
t'') -> Seq (HuffmanTree a, a) -> Seq (HuffmanTree a, a) -> HuffmanTree a
build (Seq (HuffmanTree a, a)
s'' Seq (HuffmanTree a, a)
-> (HuffmanTree a, a) -> Seq (HuffmanTree a, a)
forall a. Seq a -> a -> Seq a
|> (HuffmanTree a -> HuffmanTree a -> HuffmanTree a
forall a. HuffmanTree a -> HuffmanTree a -> HuffmanTree a
Node HuffmanTree a
x HuffmanTree a
y, a
wa -> a -> a
forall a. Num a => a -> a -> a
+a
w')) Seq (HuffmanTree a, a)
t''

-- Derive the prefix-free binary code from a huffman tree.
codewords :: HuffmanTree a -> Code a
codewords :: forall a. HuffmanTree a -> Code a
codewords = [Bit] -> HuffmanTree a -> [(a, [Bit])]
forall {a}. [Bit] -> HuffmanTree a -> [(a, [Bit])]
code' []
  where code' :: [Bit] -> HuffmanTree a -> [(a, [Bit])]
code' [Bit]
_    HuffmanTree a
Empty      = []
        code' [Bit]
bits (Leaf a
x)   = [(a
x,[Bit]
bits)]
        code' [Bit]
bits (Node HuffmanTree a
l HuffmanTree a
r) = ((a, [Bit]) -> (a, [Bit])) -> [(a, [Bit])] -> [(a, [Bit])]
forall a b. (a -> b) -> [a] -> [b]
map (([Bit] -> [Bit]) -> (a, [Bit]) -> (a, [Bit])
forall b c d. (b -> c) -> (d, b) -> (d, c)
forall (a :: * -> * -> *) b c d.
Arrow a =>
a b c -> a (d, b) (d, c)
second (Bit
ZeroBit -> [Bit] -> [Bit]
forall a. a -> [a] -> [a]
:)) ([Bit] -> HuffmanTree a -> [(a, [Bit])]
code' [Bit]
bits HuffmanTree a
l) [(a, [Bit])] -> [(a, [Bit])] -> [(a, [Bit])]
forall a. [a] -> [a] -> [a]
++
                                ((a, [Bit]) -> (a, [Bit])) -> [(a, [Bit])] -> [(a, [Bit])]
forall a b. (a -> b) -> [a] -> [b]
map (([Bit] -> [Bit]) -> (a, [Bit]) -> (a, [Bit])
forall b c d. (b -> c) -> (d, b) -> (d, c)
forall (a :: * -> * -> *) b c d.
Arrow a =>
a b c -> a (d, b) (d, c)
second (Bit
OneBit -> [Bit] -> [Bit]
forall a. a -> [a] -> [a]
:)) ([Bit] -> HuffmanTree a -> [(a, [Bit])]
code' [Bit]
bits HuffmanTree a
r)

-- Pretty-print a binary code.  Mostly useful for debugging.
ppCode :: Show a => Code a -> String
ppCode :: forall a. Show a => Code a -> String
ppCode = String -> [String] -> String
forall a. [a] -> [[a]] -> [a]
intercalate String
"\n" ([String] -> String) -> (Code a -> [String]) -> Code a -> String
forall b c a. (b -> c) -> (a -> b) -> a -> c
.
           ((a, [Bit]) -> String) -> Code a -> [String]
forall a b. (a -> b) -> [a] -> [b]
map (\(a
x,[Bit]
bits) -> a -> String
forall a. Show a => a -> String
show a
x String -> ShowS
forall a. [a] -> [a] -> [a]
++ String
": " String -> ShowS
forall a. [a] -> [a] -> [a]
++ [String] -> String
forall (t :: * -> *) a. Foldable t => t [a] -> [a]
concat ((Bit -> String) -> [Bit] -> [String]
forall a b. (a -> b) -> [a] -> [b]
map Bit -> String
forall a. Show a => a -> String
show [Bit]
bits))