{-# LANGUAGE OverloadedStrings
           , ScopedTypeVariables
           , StandaloneDeriving  #-}
{-| The CLTokens module describes non-overlapping classes of strings that are
    useful for disambiguating arguments to command line programs. Many common
    string formats -- environment variable assignments, URLs, option strings --
    are recognized by this module's utilities.
 -}
module System.Posix.ARX.CLI.CLTokens where

import Prelude hiding (takeWhile)
import Control.Applicative
import Data.ByteString (ByteString)
import Data.ByteString.Char8 ()
import Data.Either
import Data.Map (Map)
import qualified Data.Map as Map

import Data.Attoparsec.ByteString.Char8 ( char8, choice, decimal, endOfInput,
                                          inClass, isDigit, parseOnly, Parser,
                                          satisfy, string, takeWhile,
                                          takeWhile1, try )


{-| Non-overlapping classes of command line argument strings.
 -}
data Class = EnvBinding    -- ^ An 'EnvBinding' has the form
                           --   @<shell var name>=<string>@. For example,
                           --   @SENDIN=the_clowns@.
           | QualifiedPath -- ^ A 'QualifiedPath' is a file path starting with
                           --   @/@, @./@, or @../@.
           | DashDash      -- ^ A 'DashDash' is a string of two dashes, @--@,
                           --   commonly used to indicate the end of options
                           --   processing.
           | LongOption    -- ^ A 'LongOption' is a string beginning with two
                           --   dashes and then at least one non-dash.
           | Dash          -- ^ A 'Dash' is a single dash, @-@, commonly used
                           --   to indicate input from @stdin@ or output to
                           --   @stdout@.
           | ShortOption   -- ^ A 'ShortOption' is a beginning with a dash and
                           --   then at least one non-dash.
           | URL           -- ^ A 'URL' is a scheme, separated from the
                           --   resource, represented as an arbitrary string,
                           --   by @://@. The scheme consists of ASCII,
                           --   lower-case letters and digits, and may be
                           --   multi-part, with each part separated by a @+@
                           --   or @/@ (for example, @git+ssh@). An example
                           --   URL: @http://example.com/?q=special@.
           | HexNum        -- ^ A 'HexNum' is a sequence of hexadecimal
                           --   digits, upper or lower case, beginning with
                           --   @0x@; for example: @0x01a3@.
           | DecimalNum    -- ^ A 'DecimalNum' is a string of decimal digits:
                           --   @123123@.
           | Size          -- ^ A 'Size' is a decimal number followed by a
                           --   multiplicative suffix, in the manner of @dd@
                           --   or @head@. Note that counts in terms of bytes
                           --   require @B@ (unlike @dd@ or @head@). For a
                           --   full list of suffixes, see 'sizes' below.
deriving instance Eq Class
deriving instance Ord Class
deriving instance Show Class


{-| Determine if a particular 'ByteString' matches the given 'Class' of token.
 -}
match                       ::  Class -> ByteString -> Bool
match :: Class -> ByteString -> Bool
match                        =  (Either String () -> Bool
forall {a} {b}. Either a b -> Bool
e2b (Either String () -> Bool)
-> (ByteString -> Either String ()) -> ByteString -> Bool
forall b c a. (b -> c) -> (a -> b) -> a -> c
.) ((ByteString -> Either String ()) -> ByteString -> Bool)
-> (Class -> ByteString -> Either String ())
-> Class
-> ByteString
-> Bool
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Parser () -> ByteString -> Either String ()
forall a. Parser a -> ByteString -> Either String a
parseOnly (Parser () -> ByteString -> Either String ())
-> (Class -> Parser ()) -> Class -> ByteString -> Either String ()
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Class -> Parser ()
recognizer
 where
  e2b :: Either a b -> Bool
e2b (Left a
_)               =  Bool
False
  e2b (Right b
_)              =  Bool
True


{-| Determine if a particular 'ByteString' matches any 'Class' of token.
 -}
recognize                   ::  ByteString -> Maybe Class
recognize :: ByteString -> Maybe Class
recognize                    =  Either String Class -> Maybe Class
forall {a} {a}. Either a a -> Maybe a
e2m (Either String Class -> Maybe Class)
-> (ByteString -> Either String Class) -> ByteString -> Maybe Class
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Parser Class -> ByteString -> Either String Class
forall a. Parser a -> ByteString -> Either String a
parseOnly ([Parser Class] -> Parser Class
forall (f :: * -> *) a. Alternative f => [f a] -> f a
choice [Parser Class]
recognizers)
 where
  e2m :: Either a a -> Maybe a
e2m (Left a
_)               =  Maybe a
forall a. Maybe a
Nothing
  e2m (Right a
x)              =  a -> Maybe a
forall a. a -> Maybe a
Just a
x
  recognizeIt :: Class -> Parser Class
recognizeIt Class
x              =  Class
x Class -> Parser () -> Parser Class
forall a b. a -> Parser ByteString b -> Parser ByteString a
forall (f :: * -> *) a b. Functor f => a -> f b -> f a
<$ Class -> Parser ()
recognizer Class
x
  recognizers :: [Parser Class]
recognizers                =  Class -> Parser Class
recognizeIt (Class -> Parser Class) -> [Class] -> [Parser Class]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> [ Class
EnvBinding,
                                                  Class
QualifiedPath,
                                                  Class
DashDash,
                                                  Class
LongOption,
                                                  Class
Dash,
                                                  Class
ShortOption,
                                                  Class
URL,
                                                  Class
HexNum,
                                                  Class
DecimalNum ]


{-| A ByteString stand-in that demoes each token class.
 -}
exemplar                    ::  Class -> ByteString
exemplar :: Class -> ByteString
exemplar Class
cls                 =  case Class
cls of
  Class
EnvBinding                ->  ByteString
"VAR=value"
  Class
QualifiedPath             ->  ByteString
"./qualified/path"
  Class
DashDash                  ->  ByteString
"--"
  Class
LongOption                ->  ByteString
"--long-option"
  Class
Dash                      ->  ByteString
"-"
  Class
ShortOption               ->  ByteString
"-shortopt"
  Class
URL                       ->  ByteString
"scheme://url-to-resource"
  Class
HexNum                    ->  ByteString
"0xA12FE"
  Class
DecimalNum                ->  ByteString
"0123456789"
  Class
Size                      ->  ByteString
"4MiB"


{-| The recognizer appropriate to each token class. Parses successfully if a
    the token class is recognized, returning '()'. Most token types are
    defined in terms of a prefix of the input -- for example, 'QualifiedPath'
    -- and the parsers for these tokens naturally return as soon as the prefix
    is recognized.
 -}
recognizer                  ::  Class -> Parser ()
recognizer :: Class -> Parser ()
recognizer Class
cls               =  case Class
cls of
  Class
EnvBinding                ->  () () -> Parser ByteString Word8 -> Parser ()
forall a b. a -> Parser ByteString b -> Parser ByteString a
forall (f :: * -> *) a b. Functor f => a -> f b -> f a
<$ do (Char -> Bool) -> Parser Char
satisfy Char -> Bool
varFirst
                                         (Char -> Bool) -> Parser ByteString
takeWhile Char -> Bool
varBody
                                         Char -> Parser ByteString Word8
char8 Char
'='
  Class
QualifiedPath             ->  () () -> Parser ByteString -> Parser ()
forall a b. a -> Parser ByteString b -> Parser ByteString a
forall (f :: * -> *) a b. Functor f => a -> f b -> f a
<$ do ByteString -> Parser ByteString
string ByteString
"/" Parser ByteString -> Parser ByteString -> Parser ByteString
forall a.
Parser ByteString a -> Parser ByteString a -> Parser ByteString a
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> ByteString -> Parser ByteString
string ByteString
"./"
                                                    Parser ByteString -> Parser ByteString -> Parser ByteString
forall a.
Parser ByteString a -> Parser ByteString a -> Parser ByteString a
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> ByteString -> Parser ByteString
string ByteString
"../"
  Class
DashDash                  ->  ByteString -> Parser ByteString
string ByteString
"--" Parser ByteString -> Parser () -> Parser ()
forall a b.
Parser ByteString a -> Parser ByteString b -> Parser ByteString b
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> Parser ()
forall t. Chunk t => Parser t ()
endOfInput
  Class
LongOption                ->  () () -> Parser Char -> Parser ()
forall a b. a -> Parser ByteString b -> Parser ByteString a
forall (f :: * -> *) a b. Functor f => a -> f b -> f a
<$ (ByteString -> Parser ByteString
string ByteString
"--" Parser ByteString -> Parser Char -> Parser Char
forall a b.
Parser ByteString a -> Parser ByteString b -> Parser ByteString b
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> (Char -> Bool) -> Parser Char
satisfy (Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
/= Char
'-'))
  Class
Dash                      ->  Char -> Parser ByteString Word8
char8 Char
'-' Parser ByteString Word8 -> Parser () -> Parser ()
forall a b.
Parser ByteString a -> Parser ByteString b -> Parser ByteString b
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> Parser ()
forall t. Chunk t => Parser t ()
endOfInput
  Class
ShortOption               ->  () () -> Parser Char -> Parser ()
forall a b. a -> Parser ByteString b -> Parser ByteString a
forall (f :: * -> *) a b. Functor f => a -> f b -> f a
<$ (Char -> Parser ByteString Word8
char8 Char
'-' Parser ByteString Word8 -> Parser Char -> Parser Char
forall a b.
Parser ByteString a -> Parser ByteString b -> Parser ByteString b
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> (Char -> Bool) -> Parser Char
satisfy (Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
/= Char
'-'))
  Class
URL                       ->  () () -> Parser ByteString -> Parser ()
forall a b. a -> Parser ByteString b -> Parser ByteString a
forall (f :: * -> *) a b. Functor f => a -> f b -> f a
<$ do (Char -> Bool) -> Parser ByteString
takeWhile1 Char -> Bool
isURLSchemeChar
                                         Parser ByteString -> Parser ByteString [ByteString]
forall a. Parser ByteString a -> Parser ByteString [a]
forall (f :: * -> *) a. Alternative f => f a -> f [a]
many (Parser ByteString -> Parser ByteString [ByteString])
-> Parser ByteString -> Parser ByteString [ByteString]
forall a b. (a -> b) -> a -> b
$ do Char -> Parser ByteString Word8
char8 Char
'+' Parser ByteString Word8
-> Parser ByteString Word8 -> Parser ByteString Word8
forall a.
Parser ByteString a -> Parser ByteString a -> Parser ByteString a
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> Char -> Parser ByteString Word8
char8 Char
'/'
                                                   (Char -> Bool) -> Parser ByteString
takeWhile1 Char -> Bool
isURLSchemeChar
                                         ByteString -> Parser ByteString
string ByteString
"://"
  Class
HexNum                    ->  ByteString -> Parser ByteString
string ByteString
"0x" Parser ByteString -> Parser () -> Parser ()
forall a b.
Parser ByteString a -> Parser ByteString b -> Parser ByteString b
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> (Char -> Bool) -> Parser ByteString
takeWhile1 Char -> Bool
isHexDigit
                                            Parser ByteString -> Parser () -> Parser ()
forall a b.
Parser ByteString a -> Parser ByteString b -> Parser ByteString b
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> Parser ()
forall t. Chunk t => Parser t ()
endOfInput
  Class
DecimalNum                ->  (Char -> Bool) -> Parser ByteString
takeWhile1 Char -> Bool
isDigit Parser ByteString -> Parser () -> Parser ()
forall a b.
Parser ByteString a -> Parser ByteString b -> Parser ByteString b
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> Parser ()
forall t. Chunk t => Parser t ()
endOfInput
  Class
Size                      ->  () () -> Parser ByteString Integer -> Parser ()
forall a b. a -> Parser ByteString b -> Parser ByteString a
forall (f :: * -> *) a b. Functor f => a -> f b -> f a
<$ Parser ByteString Integer
size

schemeSeparator :: Parser ByteString Word8
schemeSeparator              =  Char -> Parser ByteString Word8
char8 Char
'+' Parser ByteString Word8
-> Parser ByteString Word8 -> Parser ByteString Word8
forall a.
Parser ByteString a -> Parser ByteString a -> Parser ByteString a
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> Char -> Parser ByteString Word8
char8 Char
'/'

varFirst :: Char -> Bool
varFirst                     =  String -> Char -> Bool
inClass String
"a-zA-Z_"

varBody :: Char -> Bool
varBody                      =  String -> Char -> Bool
inClass String
"a-zA-Z_0-9"

isLongOptionChar :: Char -> Bool
isLongOptionChar             =  String -> Char -> Bool
inClass String
"a-zA-Z0-9-"

isShortOptionChar :: Char -> Bool
isShortOptionChar            =  String -> Char -> Bool
inClass String
"a-zA-Z0-9!?"

isSchemeChar :: Char -> Bool
isSchemeChar                 =  String -> Char -> Bool
inClass String
"a-z0-9"

isHexDigit :: Char -> Bool
isHexDigit                   =  String -> Char -> Bool
inClass String
"0-9a-fA-F"

isURLSchemeChar :: Char -> Bool
isURLSchemeChar              =  String -> Char -> Bool
inClass String
"a-z0-9"


{-| A map from suffixes to sizes, following the conventions of command line
    tools (GNU @dd@ or @head@ and many others) as well as the standard for
    binary sizes established by the IEC.
@
  B       =    1
  K = KiB = 1024B   kB = 1000B
  M = MiB = 1024K   MB = 1000kB
  G = GiB = 1024M   GB = 1000MB
  T = TiB = 1024G   TB = 1000GB
  P = PiB = 1024T   PB = 1000TB
  E = EiB = 1024P   EB = 1000PB
  Z = ZiB = 1024E   ZB = 1000EB
  Y = YiB = 1024Z   YB = 1000ZB
@
 -}
sizes                       ::  Map ByteString Integer
sizes :: Map ByteString Integer
sizes                        =  [(ByteString, Integer)] -> Map ByteString Integer
forall k a. Ord k => [(k, a)] -> Map k a
Map.fromList
                                 [ (ByteString
"B", Integer
1),
                                   (ByteString
"K", Integer
2Integer -> Integer -> Integer
forall a b. (Num a, Integral b) => a -> b -> a
^Integer
10), (ByteString
"KiB", Integer
2Integer -> Integer -> Integer
forall a b. (Num a, Integral b) => a -> b -> a
^Integer
10), (ByteString
"kB", Integer
10Integer -> Integer -> Integer
forall a b. (Num a, Integral b) => a -> b -> a
^Integer
03),
                                   (ByteString
"M", Integer
2Integer -> Integer -> Integer
forall a b. (Num a, Integral b) => a -> b -> a
^Integer
20), (ByteString
"MiB", Integer
2Integer -> Integer -> Integer
forall a b. (Num a, Integral b) => a -> b -> a
^Integer
20), (ByteString
"MB", Integer
10Integer -> Integer -> Integer
forall a b. (Num a, Integral b) => a -> b -> a
^Integer
06),
                                   (ByteString
"G", Integer
2Integer -> Integer -> Integer
forall a b. (Num a, Integral b) => a -> b -> a
^Integer
30), (ByteString
"GiB", Integer
2Integer -> Integer -> Integer
forall a b. (Num a, Integral b) => a -> b -> a
^Integer
30), (ByteString
"GB", Integer
10Integer -> Integer -> Integer
forall a b. (Num a, Integral b) => a -> b -> a
^Integer
09),
                                   (ByteString
"T", Integer
2Integer -> Integer -> Integer
forall a b. (Num a, Integral b) => a -> b -> a
^Integer
40), (ByteString
"TiB", Integer
2Integer -> Integer -> Integer
forall a b. (Num a, Integral b) => a -> b -> a
^Integer
40), (ByteString
"TB", Integer
10Integer -> Integer -> Integer
forall a b. (Num a, Integral b) => a -> b -> a
^Integer
12),
                                   (ByteString
"P", Integer
2Integer -> Integer -> Integer
forall a b. (Num a, Integral b) => a -> b -> a
^Integer
50), (ByteString
"PiB", Integer
2Integer -> Integer -> Integer
forall a b. (Num a, Integral b) => a -> b -> a
^Integer
50), (ByteString
"PB", Integer
10Integer -> Integer -> Integer
forall a b. (Num a, Integral b) => a -> b -> a
^Integer
15),
                                   (ByteString
"E", Integer
2Integer -> Integer -> Integer
forall a b. (Num a, Integral b) => a -> b -> a
^Integer
60), (ByteString
"EiB", Integer
2Integer -> Integer -> Integer
forall a b. (Num a, Integral b) => a -> b -> a
^Integer
60), (ByteString
"EB", Integer
10Integer -> Integer -> Integer
forall a b. (Num a, Integral b) => a -> b -> a
^Integer
18),
                                   (ByteString
"Z", Integer
2Integer -> Integer -> Integer
forall a b. (Num a, Integral b) => a -> b -> a
^Integer
70), (ByteString
"ZiB", Integer
2Integer -> Integer -> Integer
forall a b. (Num a, Integral b) => a -> b -> a
^Integer
70), (ByteString
"ZB", Integer
10Integer -> Integer -> Integer
forall a b. (Num a, Integral b) => a -> b -> a
^Integer
21),
                                   (ByteString
"Y", Integer
2Integer -> Integer -> Integer
forall a b. (Num a, Integral b) => a -> b -> a
^Integer
80), (ByteString
"YiB", Integer
2Integer -> Integer -> Integer
forall a b. (Num a, Integral b) => a -> b -> a
^Integer
80), (ByteString
"YB", Integer
10Integer -> Integer -> Integer
forall a b. (Num a, Integral b) => a -> b -> a
^Integer
24) ]

{-| Parse a size, consuming the entire input string.
 -}
size                        ::  Parser Integer
size :: Parser ByteString Integer
size                         =  Integer -> Integer -> Integer
forall a. Num a => a -> a -> a
(*) (Integer -> Integer -> Integer)
-> Parser ByteString Integer
-> Parser ByteString (Integer -> Integer)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Parser ByteString Integer
forall a. Integral a => Parser a
decimal Parser ByteString (Integer -> Integer)
-> Parser ByteString Integer -> Parser ByteString Integer
forall a b.
Parser ByteString (a -> b)
-> Parser ByteString a -> Parser ByteString b
forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> Parser ByteString Integer
suffix
 where
  asSuffix :: (ByteString, a) -> Parser ByteString a
asSuffix (ByteString
k, a
v)            =  a
v a -> Parser ByteString -> Parser ByteString a
forall a b. a -> Parser ByteString b -> Parser ByteString a
forall (f :: * -> *) a b. Functor f => a -> f b -> f a
<$ Parser ByteString -> Parser ByteString
forall i a. Parser i a -> Parser i a
try (ByteString -> Parser ByteString
string ByteString
k Parser ByteString -> Parser () -> Parser ByteString
forall a b.
Parser ByteString a -> Parser ByteString b -> Parser ByteString a
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* Parser ()
forall t. Chunk t => Parser t ()
endOfInput)
  suffix :: Parser ByteString Integer
suffix                     =  [Parser ByteString Integer] -> Parser ByteString Integer
forall (f :: * -> *) a. Alternative f => [f a] -> f a
choice ((ByteString, Integer) -> Parser ByteString Integer
forall {a}. (ByteString, a) -> Parser ByteString a
asSuffix ((ByteString, Integer) -> Parser ByteString Integer)
-> [(ByteString, Integer)] -> [Parser ByteString Integer]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Map ByteString Integer -> [(ByteString, Integer)]
forall k a. Map k a -> [(k, a)]
Map.toList Map ByteString Integer
sizes)

{-| Parse a size, consuming the entire input string, with the final result
    bounded by the maximum of a 'Bounded' type.
 -}
sizeBounded :: forall b . (Bounded b, Integral b) => Parser b
sizeBounded :: forall b. (Bounded b, Integral b) => Parser b
sizeBounded = Integer -> b
forall a. Num a => Integer -> a
fromInteger (Integer -> b) -> (Integer -> Integer) -> Integer -> b
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Integer -> Integer -> Integer
forall a. Ord a => a -> a -> a
min (b -> Integer
forall a. Integral a => a -> Integer
toInteger (b
forall a. Bounded a => a
maxBound :: b)) (Integer -> b) -> Parser ByteString Integer -> Parser ByteString b
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Parser ByteString Integer
size