{-# LANGUAGE CPP #-}

-----------------------------------------------------------------------------
-- |
-- Module      :  Text.Hyphenation.Pattern
-- Copyright   :  (C) 2012-2019 Edward Kmett
-- License     :  BSD-style (see the file LICENSE)
--
-- Maintainer  :  Edward Kmett <ekmett@gmail.com>
-- Stability   :  provisional
-- Portability :  portable
--
----------------------------------------------------------------------------
module Text.Hyphenation.Pattern
  (
  -- * Pattern file support
    Patterns
  , insertPattern
  , lookupPattern
  , scorePattern
  , parsePatterns
  ) where

import qualified Data.IntMap as IM
import Prelude hiding (lookup)
import Data.Char (digitToInt, isDigit)

#if !(MIN_VERSION_base(4,11,0))
import Data.Semigroup (Semigroup(..))
#endif

-- | Hyphenation patterns
data Patterns = Patterns [Int] (IM.IntMap Patterns)
  deriving Int -> Patterns -> ShowS
[Patterns] -> ShowS
Patterns -> String
(Int -> Patterns -> ShowS)
-> (Patterns -> String) -> ([Patterns] -> ShowS) -> Show Patterns
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
$cshowsPrec :: Int -> Patterns -> ShowS
showsPrec :: Int -> Patterns -> ShowS
$cshow :: Patterns -> String
show :: Patterns -> String
$cshowList :: [Patterns] -> ShowS
showList :: [Patterns] -> ShowS
Show

instance Semigroup Patterns where
  Patterns [Int]
ps IntMap Patterns
m <> :: Patterns -> Patterns -> Patterns
<> Patterns [Int]
qs IntMap Patterns
n = [Int] -> IntMap Patterns -> Patterns
Patterns ([Int] -> [Int] -> [Int]
zipMax [Int]
ps [Int]
qs) ((Patterns -> Patterns -> Patterns)
-> IntMap Patterns -> IntMap Patterns -> IntMap Patterns
forall a. (a -> a -> a) -> IntMap a -> IntMap a -> IntMap a
IM.unionWith Patterns -> Patterns -> Patterns
forall a. Monoid a => a -> a -> a
mappend IntMap Patterns
m IntMap Patterns
n)

instance Monoid Patterns where
  mempty :: Patterns
mempty = [Int] -> IntMap Patterns -> Patterns
Patterns [] IntMap Patterns
forall a. IntMap a
IM.empty
#if !(MIN_VERSION_base(4,11,0))
  mappend = (<>)
#endif

-- | Tallies the hyphenation scores for a word considering all tails.
lookupPattern :: String -> Patterns -> [Int]
lookupPattern :: String -> Patterns -> [Int]
lookupPattern String
xs0 Patterns
p = [Int]
scores1 where
  scores0 :: [Int]
scores0 =
    case String -> Patterns -> [Int]
forall {a}. Enum a => [a] -> Patterns -> [Int]
go (Char
'.' Char -> ShowS
forall a. a -> [a] -> [a]
: String
xs0 String -> ShowS
forall a. [a] -> [a] -> [a]
++ String
".") Patterns
p of
      Int
_:[Int]
ys -> [Int]
ys
      [] -> String -> [Int]
forall a. HasCallStack => String -> a
error (String -> [Int]) -> String -> [Int]
forall a b. (a -> b) -> a -> b
$ String
"lookupPattern.scores0: Impossible (" String -> ShowS
forall a. [a] -> [a] -> [a]
++ String
xs0 String -> ShowS
forall a. [a] -> [a] -> [a]
++ String
")"

  scores1 :: [Int]
scores1 =
    case [Int] -> Maybe ([Int], Int)
forall a. [a] -> Maybe ([a], a)
unsnoc [Int]
scores0 of
      Just ([Int]
ys, Int
_) -> [Int]
ys
      Maybe ([Int], Int)
Nothing -> String -> [Int]
forall a. HasCallStack => String -> a
error (String -> [Int]) -> String -> [Int]
forall a b. (a -> b) -> a -> b
$ String
"lookupPattern.scores1: Impossible (" String -> ShowS
forall a. [a] -> [a] -> [a]
++ String
xs0 String -> ShowS
forall a. [a] -> [a] -> [a]
++ String
")"

  go :: [a] -> Patterns -> [Int]
go [] (Patterns [Int]
ys IntMap Patterns
_) = [Int]
ys
  go xxs :: [a]
xxs@(a
_:[a]
xs) Patterns
t = [Int] -> [Int] -> [Int]
zipMax ([a] -> Patterns -> [Int]
forall {a}. Enum a => [a] -> Patterns -> [Int]
go1 [a]
xxs Patterns
t) (Int
0Int -> [Int] -> [Int]
forall a. a -> [a] -> [a]
:[a] -> Patterns -> [Int]
go [a]
xs Patterns
t)
  go1 :: [a] -> Patterns -> [Int]
go1 [] (Patterns [Int]
ys IntMap Patterns
_) = [Int]
ys
  go1 (a
x:[a]
xs) (Patterns [Int]
ys IntMap Patterns
m) = case Int -> IntMap Patterns -> Maybe Patterns
forall a. Int -> IntMap a -> Maybe a
IM.lookup (a -> Int
forall a. Enum a => a -> Int
fromEnum a
x) IntMap Patterns
m of
    Just Patterns
t' -> [Int] -> [Int] -> [Int]
zipMax [Int]
ys ([a] -> Patterns -> [Int]
go1 [a]
xs Patterns
t')
    Maybe Patterns
Nothing -> [Int]
ys

-- | Insert a Knuth-Liang hyphenation pattern into the trie
--
-- 1. @.@ denotes the start or end of the input
--
-- 2. @0-9@ are used to denote hyphenation or dehyphenation depending on whether or not they are even (no hyphen) or odd (hyphen allowed).
--
-- Patterns are overlaid and the maximum value at each location is used.
-- this allows you to implement a finite number of precedences between hyphenation rules
--
-- (e.g. @3foo.@ indicates that the suffix '-foo' should be hyphenated with precedence 3.)
insertPattern :: String -> Patterns -> Patterns
insertPattern :: String -> Patterns -> Patterns
insertPattern String
s0 = String -> Patterns -> Patterns
forall {a}. Enum a => [a] -> Patterns -> Patterns
go (ShowS
chars String
s0) where
  pts :: [Int]
pts = String -> [Int]
scorePattern String
s0
  go :: [a] -> Patterns -> Patterns
go [] (Patterns [Int]
_ IntMap Patterns
m) = [Int] -> IntMap Patterns -> Patterns
Patterns [Int]
pts IntMap Patterns
m
  go (a
x:[a]
xs) (Patterns [Int]
n IntMap Patterns
m) = [Int] -> IntMap Patterns -> Patterns
Patterns [Int]
n ((Patterns -> Patterns -> Patterns)
-> Int -> Patterns -> IntMap Patterns -> IntMap Patterns
forall a. (a -> a -> a) -> Int -> a -> IntMap a -> IntMap a
IM.insertWith (\Patterns
_ -> [a] -> Patterns -> Patterns
go [a]
xs) (a -> Int
forall a. Enum a => a -> Int
fromEnum a
x) ([a] -> Patterns
forall {a}. Enum a => [a] -> Patterns
mk [a]
xs) IntMap Patterns
m)
  mk :: [a] -> Patterns
mk []     = [Int] -> IntMap Patterns -> Patterns
Patterns [Int]
pts IntMap Patterns
forall a. IntMap a
IM.empty
  mk (a
x:[a]
xs) = [Int] -> IntMap Patterns -> Patterns
Patterns [] (Int -> Patterns -> IntMap Patterns
forall a. Int -> a -> IntMap a
IM.singleton (a -> Int
forall a. Enum a => a -> Int
fromEnum a
x) ([a] -> Patterns
mk [a]
xs))

-- | Parse one pattern per line from an input string
--
-- @hyph-utf8@ supplies these files UTF-8 encoded in the @txt@ folder with a @.pat.txt@ extension
parsePatterns :: String -> Patterns
parsePatterns :: String -> Patterns
parsePatterns = (String -> Patterns -> Patterns)
-> Patterns -> [String] -> Patterns
forall a b. (a -> b -> b) -> b -> [a] -> b
forall (t :: * -> *) a b.
Foldable t =>
(a -> b -> b) -> b -> t a -> b
foldr String -> Patterns -> Patterns
insertPattern Patterns
forall a. Monoid a => a
mempty ([String] -> Patterns)
-> (String -> [String]) -> String -> Patterns
forall b c a. (b -> c) -> (a -> b) -> a -> c
. String -> [String]
lines

chars :: String -> String
chars :: ShowS
chars = (Char -> Bool) -> ShowS
forall a. (a -> Bool) -> [a] -> [a]
filter (\Char
x -> Char
x Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
< Char
'0' Bool -> Bool -> Bool
|| Char
x Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
> Char
'9')

-- | Convert a Pattern string to a Score
scorePattern :: String -> [Int]
scorePattern :: String -> [Int]
scorePattern [] = [Int
0]
scorePattern (Char
x:String
ys)
  | Char -> Bool
isDigit Char
x = Char -> Int
digitToInt Char
x Int -> [Int] -> [Int]
forall a. a -> [a] -> [a]
: case String
ys of
                                 []    -> []
                                 Char
_:String
ys' -> String -> [Int]
scorePattern String
ys'
  | Bool
otherwise = Int
0 Int -> [Int] -> [Int]
forall a. a -> [a] -> [a]
: String -> [Int]
scorePattern String
ys

-- | Zip two scores.
zipMax :: [Int] -> [Int] -> [Int]
zipMax :: [Int] -> [Int] -> [Int]
zipMax (Int
x:[Int]
xs) (Int
y:[Int]
ys) = Int -> Int -> Int
forall a. Ord a => a -> a -> a
max Int
x Int
y Int -> [Int] -> [Int]
forall a. a -> [a] -> [a]
: [Int] -> [Int] -> [Int]
zipMax [Int]
xs [Int]
ys
zipMax [] [Int]
ys = [Int]
ys
zipMax [Int]
xs [] = [Int]
xs

-- | Decompose a list into 'init' and 'last'.
unsnoc :: [a] -> Maybe ([a], a)
unsnoc :: forall a. [a] -> Maybe ([a], a)
unsnoc []     = Maybe ([a], a)
forall a. Maybe a
Nothing
unsnoc (a
x:[a]
xs) = case [a] -> Maybe ([a], a)
forall a. [a] -> Maybe ([a], a)
unsnoc [a]
xs of
                  Maybe ([a], a)
Nothing    -> ([a], a) -> Maybe ([a], a)
forall a. a -> Maybe a
Just ([], a
x)
                  Just ([a]
a,a
b) -> ([a], a) -> Maybe ([a], a)
forall a. a -> Maybe a
Just (a
xa -> [a] -> [a]
forall a. a -> [a] -> [a]
:[a]
a, a
b)