{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE PartialTypeSignatures #-}
{-# LANGUAGE ViewPatterns #-}
{-# LANGUAGE FlexibleContexts #-}
module Token.Haskell(tokenizer) where
import Text.Pandoc.JSON ()
import Text.Pandoc.Definition ()
import Data.String (IsString)
import Data.Text (Text)
import qualified Data.Text as T
import Prelude hiding(getLine)
import Optics.Core
import GHC.SyntaxHighlighter
( tokenizeHaskell
, Loc(..)
, Token(..) )
import Token ( MyLoc(MyLoc), MyTok(..), unTikzMark, mark )
tokenizer :: Text
-> Maybe [(MyTok, MyLoc, Text)]
tokenizer :: Text -> Maybe [(MyTok, MyLoc, Text)]
tokenizer = ([(Token, Text)] -> [(MyTok, MyLoc, Text)])
-> Maybe [(Token, Text)] -> Maybe [(MyTok, MyLoc, Text)]
forall a b. (a -> b) -> Maybe a -> Maybe b
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap ( [(MyTok, MyLoc, Text)] -> [(MyTok, MyLoc, Text)]
forall c.
(Eq c, IsString c, Semigroup c) =>
[(MyTok, MyLoc, c)] -> [(MyTok, MyLoc, c)]
joinEscapedOperators
([(MyTok, MyLoc, Text)] -> [(MyTok, MyLoc, Text)])
-> ([(Token, Text)] -> [(MyTok, MyLoc, Text)])
-> [(Token, Text)]
-> [(MyTok, MyLoc, Text)]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [(MyTok, MyLoc, Text)] -> [(MyTok, MyLoc, Text)]
splitTokens
([(MyTok, MyLoc, Text)] -> [(MyTok, MyLoc, Text)])
-> ([(Token, Text)] -> [(MyTok, MyLoc, Text)])
-> [(Token, Text)]
-> [(MyTok, MyLoc, Text)]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [(MyTok, Text)] -> [(MyTok, MyLoc, Text)]
restoreLocations
([(MyTok, Text)] -> [(MyTok, MyLoc, Text)])
-> ([(Token, Text)] -> [(MyTok, Text)])
-> [(Token, Text)]
-> [(MyTok, MyLoc, Text)]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [(MyTok, Text)] -> [(MyTok, Text)]
splitUnicodeLambda
([(MyTok, Text)] -> [(MyTok, Text)])
-> ([(Token, Text)] -> [(MyTok, Text)])
-> [(Token, Text)]
-> [(MyTok, Text)]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ((Token, Text) -> (MyTok, Text))
-> [(Token, Text)] -> [(MyTok, Text)]
forall a b. (a -> b) -> [a] -> [b]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap (Token, Text) -> (MyTok, Text)
recognizeToken)
(Maybe [(Token, Text)] -> Maybe [(MyTok, MyLoc, Text)])
-> (Text -> Maybe [(Token, Text)])
-> Text
-> Maybe [(MyTok, MyLoc, Text)]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Text -> Maybe [(Token, Text)]
tokenizeHaskell
splitUnicodeLambda :: [(MyTok, Text)] -> [(MyTok, Text)]
splitUnicodeLambda :: [(MyTok, Text)] -> [(MyTok, Text)]
splitUnicodeLambda [] = []
splitUnicodeLambda ((MyTok
tok, Text
txt):[(MyTok, Text)]
rest)
| Text
"λ" Text -> Text -> Bool
`T.isPrefixOf` Text
txt Bool -> Bool -> Bool
&& Text -> Int
T.length Text
txt Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
> Int
1 =
(MyTok
TOther, Text
"λ") (MyTok, Text) -> [(MyTok, Text)] -> [(MyTok, Text)]
forall a. a -> [a] -> [a]
: [(MyTok, Text)] -> [(MyTok, Text)]
splitUnicodeLambda ((MyTok
tok, Int -> Text -> Text
T.drop Int
1 Text
txt)(MyTok, Text) -> [(MyTok, Text)] -> [(MyTok, Text)]
forall a. a -> [a] -> [a]
:[(MyTok, Text)]
rest)
| Bool
otherwise = (MyTok
tok, Text
txt) (MyTok, Text) -> [(MyTok, Text)] -> [(MyTok, Text)]
forall a. a -> [a] -> [a]
: [(MyTok, Text)] -> [(MyTok, Text)]
splitUnicodeLambda [(MyTok, Text)]
rest
recognizeToken :: (Token, Text) -> (MyTok, Text)
recognizeToken :: (Token, Text) -> (MyTok, Text)
recognizeToken (Token
SymbolTok, Text
"\\") =
(MyTok
TOther, Text
"λ" )
recognizeToken (Token
SymbolTok, Text
"λ") =
(MyTok
TOther, Text
"λ" )
recognizeToken (Token
CommentTok, tokText :: Text
tokText@(Text -> Maybe Text
unTikzMark -> Just Text
mark)) =
(Text -> MyTok
TTikz Text
mark, Text
tokText)
recognizeToken (Token
tokType, Text
tokText) =
(Token -> MyTok
haskellTok Token
tokType, Text
tokText)
haskellTok :: Token -> MyTok
haskellTok :: Token -> MyTok
haskellTok Token
SpaceTok = MyTok
TBlank
haskellTok Token
CommentTok = MyTok
TBlank
haskellTok Token
PragmaTok = MyTok
TBlank
haskellTok Token
KeywordTok = MyTok
TKeyword
haskellTok Token
ConstructorTok = MyTok
TCons
haskellTok Token
VariableTok = MyTok
TVar
haskellTok Token
OperatorTok = MyTok
TOperator
haskellTok Token
RationalTok = MyTok
TNum
haskellTok Token
IntegerTok = MyTok
TNum
haskellTok Token
StringTok = MyTok
TString
haskellTok Token
CharTok = MyTok
TString
haskellTok Token
t = MyTok
TOther
locLine :: Loc -> Int
locLine :: Loc -> Int
locLine (Loc Int
startLineNo Int
startColNo Int
_ Int
_) = Int
startLineNo
locCol :: Loc -> Int
locCol :: Loc -> Int
locCol (Loc Int
startLineNo Int
startColNo Int
_ Int
_) = Int
startColNo
restoreLocations :: [(MyTok, Text)] -> [(MyTok, MyLoc, Text)]
restoreLocations :: [(MyTok, Text)] -> [(MyTok, MyLoc, Text)]
restoreLocations = Int -> Int -> [(MyTok, Text)] -> [(MyTok, MyLoc, Text)]
go Int
1 Int
1
where
go :: Int -> Int -> [(MyTok, Text)] -> [(MyTok, MyLoc, Text)]
go Int
line Int
col [] = []
go Int
line Int
col ((MyTok
tok, Text
txt):[(MyTok, Text)]
ls) =
(MyTok
tok, Int -> Int -> Bool -> MyLoc
MyLoc Int
line Int
col (MyTok -> Bool
isMark MyTok
tok), Text
txt)(MyTok, MyLoc, Text)
-> [(MyTok, MyLoc, Text)] -> [(MyTok, MyLoc, Text)]
forall a. a -> [a] -> [a]
:Int -> Int -> [(MyTok, Text)] -> [(MyTok, MyLoc, Text)]
go Int
newLine Int
newCol [(MyTok, Text)]
ls
where
isMark :: MyTok -> Bool
isMark MyTok
TBlank = Bool
False
isMark MyTok
_ = Bool
True
newLine :: Int
newLine = Int
line Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
lineIncr
lineIncr :: Int
lineIncr = Text -> Int
T.length (Text -> Int) -> Text -> Int
forall a b. (a -> b) -> a -> b
$ (Char -> Bool) -> Text -> Text
T.filter (Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
==Char
'\n') Text
txt
newCol :: Int
newCol | Int
lineIncr Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int
0 = Int
col Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Text -> Int
T.length Text
txt
| Bool
otherwise = (Int -> Int -> Int
forall a. Num a => a -> a -> a
+Int
1)
(Int -> Int) -> Int -> Int
forall a b. (a -> b) -> a -> b
$ Text -> Int
T.length
(Text -> Int) -> Text -> Int
forall a b. (a -> b) -> a -> b
$ (Text, Text) -> Text
forall a b. (a, b) -> a
fst
((Text, Text) -> Text) -> (Text, Text) -> Text
forall a b. (a -> b) -> a -> b
$ (Char -> Bool) -> Text -> (Text, Text)
T.break (Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
==Char
'\n')
(Text -> (Text, Text)) -> Text -> (Text, Text)
forall a b. (a -> b) -> a -> b
$ Text -> Text
T.reverse Text
txt
splitTokens :: [(MyTok, MyLoc, Text)] -> [(MyTok, MyLoc, Text)]
splitTokens :: [(MyTok, MyLoc, Text)] -> [(MyTok, MyLoc, Text)]
splitTokens = [[(MyTok, MyLoc, Text)]] -> [(MyTok, MyLoc, Text)]
forall a. Monoid a => [a] -> a
mconcat
([[(MyTok, MyLoc, Text)]] -> [(MyTok, MyLoc, Text)])
-> ([(MyTok, MyLoc, Text)] -> [[(MyTok, MyLoc, Text)]])
-> [(MyTok, MyLoc, Text)]
-> [(MyTok, MyLoc, Text)]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ((MyTok, MyLoc, Text) -> [(MyTok, MyLoc, Text)])
-> [(MyTok, MyLoc, Text)] -> [[(MyTok, MyLoc, Text)]]
forall a b. (a -> b) -> [a] -> [b]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap (MyTok, MyLoc, Text) -> [(MyTok, MyLoc, Text)]
splitter
where
splitter :: (MyTok, MyLoc, Text) -> [(MyTok, MyLoc, Text)]
splitter :: (MyTok, MyLoc, Text) -> [(MyTok, MyLoc, Text)]
splitter (MyTok
TBlank, loc :: MyLoc
loc@(MyLoc Int
line Int
_ Bool
_), Text
txt) | (Char -> Bool) -> Text -> Text
T.filter (Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
==Char
'\n') Text
txt Text -> Text -> Bool
forall a. Eq a => a -> a -> Bool
/= Text
"" =
[Text] -> [(MyTok, MyLoc, Text)]
withLocs [Text]
withNewLines
where
split, withNewLines :: [Text]
split :: [Text]
split = Text -> [Text]
T.lines Text
txt
withNewLines :: [Text]
withNewLines = (Text -> Text) -> [Text] -> [Text]
forall a b. (a -> b) -> [a] -> [b]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap (Text -> Text -> Text
forall a. Semigroup a => a -> a -> a
<>Text
"\n") ([Text] -> [Text]
forall a. HasCallStack => [a] -> [a]
init [Text]
split)
[Text] -> [Text] -> [Text]
forall a. Semigroup a => a -> a -> a
<> [[Text] -> Text
forall a. HasCallStack => [a] -> a
last [Text]
split]
withLocs :: [Text] -> [(MyTok, MyLoc, Text)]
withLocs :: [Text] -> [(MyTok, MyLoc, Text)]
withLocs (Text
l:[Text]
ls) = (MyTok
TBlank, Optic A_Lens NoIx MyLoc MyLoc Bool Bool -> Bool -> MyLoc -> MyLoc
forall k (is :: IxList) s t a b.
Is k A_Setter =>
Optic k is s t a b -> b -> s -> t
set Optic A_Lens NoIx MyLoc MyLoc Bool Bool
mark Bool
True MyLoc
loc, Text
l)
(MyTok, MyLoc, Text)
-> [(MyTok, MyLoc, Text)] -> [(MyTok, MyLoc, Text)]
forall a. a -> [a] -> [a]
: (Int -> Text -> (MyTok, MyLoc, Text))
-> [Int] -> [Text] -> [(MyTok, MyLoc, Text)]
forall a b c. (a -> b -> c) -> [a] -> [b] -> [c]
zipWith Int -> Text -> (MyTok, MyLoc, Text)
mkEntry [Int
lineInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
1..] [Text]
ls
mkEntry :: Int -> Text -> (MyTok, MyLoc, Text)
mkEntry :: Int -> Text -> (MyTok, MyLoc, Text)
mkEntry Int
i Text
t = (MyTok
TBlank, Int -> Int -> Bool -> MyLoc
MyLoc Int
i Int
1 Bool
True, Text
t)
splitter other :: (MyTok, MyLoc, Text)
other@(MyTok
_, loc :: MyLoc
loc@(MyLoc Int
line Int
1 Bool
x), Text
txt) = [Optic
A_Lens NoIx (MyTok, MyLoc, Text) (MyTok, MyLoc, Text) Bool Bool
-> Bool -> (MyTok, MyLoc, Text) -> (MyTok, MyLoc, Text)
forall k (is :: IxList) s t a b.
Is k A_Setter =>
Optic k is s t a b -> b -> s -> t
set (Lens (MyTok, MyLoc, Text) (MyTok, MyLoc, Text) MyLoc MyLoc
forall s t a b. Field2 s t a b => Lens s t a b
_2 Lens (MyTok, MyLoc, Text) (MyTok, MyLoc, Text) MyLoc MyLoc
-> Optic A_Lens NoIx MyLoc MyLoc Bool Bool
-> Optic
A_Lens NoIx (MyTok, MyLoc, Text) (MyTok, MyLoc, Text) Bool Bool
forall k l m (is :: IxList) (js :: IxList) (ks :: IxList) s t u v a
b.
(JoinKinds k l m, AppendIndices is js ks) =>
Optic k is s t u v -> Optic l js u v a b -> Optic m ks s t a b
% Optic A_Lens NoIx MyLoc MyLoc Bool Bool
mark) Bool
True (MyTok, MyLoc, Text)
other]
splitter (MyTok, MyLoc, Text)
other = [(MyTok, MyLoc, Text)
other]
unmark :: Field2 a a MyLoc MyLoc => a -> a
unmark :: forall a. Field2 a a MyLoc MyLoc => a -> a
unmark = Optic A_Lens NoIx a a Bool Bool -> Bool -> a -> a
forall k (is :: IxList) s t a b.
Is k A_Setter =>
Optic k is s t a b -> b -> s -> t
set (Lens a a MyLoc MyLoc
forall s t a b. Field2 s t a b => Lens s t a b
_2 Lens a a MyLoc MyLoc
-> Optic A_Lens NoIx MyLoc MyLoc Bool Bool
-> Optic A_Lens NoIx a a Bool Bool
forall k l m (is :: IxList) (js :: IxList) (ks :: IxList) s t u v a
b.
(JoinKinds k l m, AppendIndices is js ks) =>
Optic k is s t u v -> Optic l js u v a b -> Optic m ks s t a b
% Optic A_Lens NoIx MyLoc MyLoc Bool Bool
mark) Bool
False
joinEscapedOperators :: (Eq c, IsString c, Semigroup c) => [(MyTok, MyLoc, c)] -> [(MyTok, MyLoc, c)]
joinEscapedOperators :: forall c.
(Eq c, IsString c, Semigroup c) =>
[(MyTok, MyLoc, c)] -> [(MyTok, MyLoc, c)]
joinEscapedOperators (a :: (MyTok, MyLoc, c)
a@(MyTok
_, MyLoc
_, c
"("):b :: (MyTok, MyLoc, c)
b@(MyTok
_, MyLoc
_, c
_):c :: (MyTok, MyLoc, c)
c@(MyTok
_, MyLoc
_, c
")"):[(MyTok, MyLoc, c)]
rest) =
(MyTok, MyLoc, c)
a(MyTok, MyLoc, c) -> [(MyTok, MyLoc, c)] -> [(MyTok, MyLoc, c)]
forall a. a -> [a] -> [a]
:(MyTok, MyLoc, c) -> (MyTok, MyLoc, c)
forall a. Field2 a a MyLoc MyLoc => a -> a
unmark (MyTok, MyLoc, c)
b(MyTok, MyLoc, c) -> [(MyTok, MyLoc, c)] -> [(MyTok, MyLoc, c)]
forall a. a -> [a] -> [a]
:(MyTok, MyLoc, c) -> (MyTok, MyLoc, c)
forall a. Field2 a a MyLoc MyLoc => a -> a
unmark (MyTok, MyLoc, c)
c(MyTok, MyLoc, c) -> [(MyTok, MyLoc, c)] -> [(MyTok, MyLoc, c)]
forall a. a -> [a] -> [a]
:[(MyTok, MyLoc, c)] -> [(MyTok, MyLoc, c)]
forall c.
(Eq c, IsString c, Semigroup c) =>
[(MyTok, MyLoc, c)] -> [(MyTok, MyLoc, c)]
joinEscapedOperators [(MyTok, MyLoc, c)]
rest
joinEscapedOperators (a :: (MyTok, MyLoc, c)
a@(MyTok
_, MyLoc
loc, c
"`"):b :: (MyTok, MyLoc, c)
b@(MyTok
_, MyLoc
_, c
_):c :: (MyTok, MyLoc, c)
c@(MyTok
_, MyLoc
_, c
"`"):[(MyTok, MyLoc, c)]
rest) =
(MyTok, MyLoc, c)
a(MyTok, MyLoc, c) -> [(MyTok, MyLoc, c)] -> [(MyTok, MyLoc, c)]
forall a. a -> [a] -> [a]
:(MyTok, MyLoc, c) -> (MyTok, MyLoc, c)
forall a. Field2 a a MyLoc MyLoc => a -> a
unmark (MyTok, MyLoc, c)
b(MyTok, MyLoc, c) -> [(MyTok, MyLoc, c)] -> [(MyTok, MyLoc, c)]
forall a. a -> [a] -> [a]
:(MyTok, MyLoc, c) -> (MyTok, MyLoc, c)
forall a. Field2 a a MyLoc MyLoc => a -> a
unmark (MyTok, MyLoc, c)
c(MyTok, MyLoc, c) -> [(MyTok, MyLoc, c)] -> [(MyTok, MyLoc, c)]
forall a. a -> [a] -> [a]
:[(MyTok, MyLoc, c)] -> [(MyTok, MyLoc, c)]
forall c.
(Eq c, IsString c, Semigroup c) =>
[(MyTok, MyLoc, c)] -> [(MyTok, MyLoc, c)]
joinEscapedOperators [(MyTok, MyLoc, c)]
rest
joinEscapedOperators (a :: (MyTok, MyLoc, c)
a@(MyTok
_, MyLoc
_, c
"("):b :: (MyTok, MyLoc, c)
b@(MyTok
TOperator, MyLoc
_, c
_):[(MyTok, MyLoc, c)]
rest) =
(MyTok, MyLoc, c)
a(MyTok, MyLoc, c) -> [(MyTok, MyLoc, c)] -> [(MyTok, MyLoc, c)]
forall a. a -> [a] -> [a]
:(MyTok, MyLoc, c) -> (MyTok, MyLoc, c)
forall a. Field2 a a MyLoc MyLoc => a -> a
unmark (MyTok, MyLoc, c)
b(MyTok, MyLoc, c) -> [(MyTok, MyLoc, c)] -> [(MyTok, MyLoc, c)]
forall a. a -> [a] -> [a]
:[(MyTok, MyLoc, c)] -> [(MyTok, MyLoc, c)]
forall c.
(Eq c, IsString c, Semigroup c) =>
[(MyTok, MyLoc, c)] -> [(MyTok, MyLoc, c)]
joinEscapedOperators [(MyTok, MyLoc, c)]
rest
joinEscapedOperators (a :: (MyTok, MyLoc, c)
a@(MyTok
_, MyLoc
_, c
_):b :: (MyTok, MyLoc, c)
b@(MyTok
_, MyLoc
_, c
")"):[(MyTok, MyLoc, c)]
rest) =
(MyTok, MyLoc, c)
a(MyTok, MyLoc, c) -> [(MyTok, MyLoc, c)] -> [(MyTok, MyLoc, c)]
forall a. a -> [a] -> [a]
:(MyTok, MyLoc, c) -> (MyTok, MyLoc, c)
forall a. Field2 a a MyLoc MyLoc => a -> a
unmark (MyTok, MyLoc, c)
b(MyTok, MyLoc, c) -> [(MyTok, MyLoc, c)] -> [(MyTok, MyLoc, c)]
forall a. a -> [a] -> [a]
:[(MyTok, MyLoc, c)] -> [(MyTok, MyLoc, c)]
forall c.
(Eq c, IsString c, Semigroup c) =>
[(MyTok, MyLoc, c)] -> [(MyTok, MyLoc, c)]
joinEscapedOperators [(MyTok, MyLoc, c)]
rest
joinEscapedOperators ((MyTok, MyLoc, c)
tok:[(MyTok, MyLoc, c)]
rest) = (MyTok, MyLoc, c)
tok(MyTok, MyLoc, c) -> [(MyTok, MyLoc, c)] -> [(MyTok, MyLoc, c)]
forall a. a -> [a] -> [a]
:[(MyTok, MyLoc, c)] -> [(MyTok, MyLoc, c)]
forall c.
(Eq c, IsString c, Semigroup c) =>
[(MyTok, MyLoc, c)] -> [(MyTok, MyLoc, c)]
joinEscapedOperators [(MyTok, MyLoc, c)]
rest
joinEscapedOperators [] = []