{-# LANGUAGE OverloadedStrings     #-}
{-# LANGUAGE PartialTypeSignatures #-}
{-# LANGUAGE FlexibleContexts      #-}
-- | Skylighting code tokenizer
module Token.Skylighting(lookupTokenizer, tokenizer, fixBackslashOperators) where

import Control.Arrow(first)
import Text.Pandoc.JSON ()
import Text.Pandoc.Definition ()
import Data.Maybe(listToMaybe, mapMaybe)
import Data.String (IsString)
import Data.Char(isAsciiLower, isAsciiUpper, isDigit)
import Data.Text (Text)
import qualified Data.Text as T
import Prelude hiding(getLine)
import Optics.Core

import qualified Skylighting.Types     as Sky
import           Skylighting.Types           (TokenType(..), Syntax, SourceLine, Token)
import qualified Skylighting.Syntax    as Sky(defaultSyntaxMap)
import qualified Skylighting.Tokenizer as Sky(tokenize, TokenizerConfig(..))
import qualified Skylighting.Core      as Sky(lookupSyntax, syntaxByShortName)

import Token ( MyLoc(MyLoc), MyTok(..), unTikzMark, mark )

rightToMaybe :: Either a b -> Maybe b
rightToMaybe :: forall a b. Either a b -> Maybe b
rightToMaybe (Left  a
err   ) = Maybe b
forall a. Maybe a
Nothing
rightToMaybe (Right b
result) = b -> Maybe b
forall a. a -> Maybe a
Just b
result

-- | Looks up the tokenizer from Skylighting preset library by the short name of the language.
--   Picks the first match.
lookupTokenizer :: [Text] -> Maybe Syntax
lookupTokenizer :: [Text] -> Maybe Syntax
lookupTokenizer  = [Syntax] -> Maybe Syntax
forall a. [a] -> Maybe a
listToMaybe
                 ([Syntax] -> Maybe Syntax)
-> ([Text] -> [Syntax]) -> [Text] -> Maybe Syntax
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Text -> Maybe Syntax) -> [Text] -> [Syntax]
forall a b. (a -> Maybe b) -> [a] -> [b]
mapMaybe (SyntaxMap -> Text -> Maybe Syntax
Sky.syntaxByShortName SyntaxMap
Sky.defaultSyntaxMap)

-- * Haskell tokenizer frontend
-- | Attempt to tokenize input,
--   returns `Nothing` if unsuccessful,
--   so the processor can just pass input
--   further when tokenizer fails.
tokenizer :: Syntax -- Skylighting syntax description
          -> Text -- ^ Input text of code block
          -> Maybe [(MyTok, MyLoc, Text)]
tokenizer :: Syntax -> Text -> Maybe [(MyTok, MyLoc, Text)]
tokenizer Syntax
syntax =
    ([SourceLine] -> [(MyTok, MyLoc, Text)])
-> Maybe [SourceLine] -> Maybe [(MyTok, MyLoc, Text)]
forall a b. (a -> b) -> Maybe a -> Maybe b
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap ( [(MyTok, MyLoc, Text)] -> [(MyTok, MyLoc, Text)]
forall c.
(Eq c, IsString c, Semigroup c) =>
[(MyTok, MyLoc, c)] -> [(MyTok, MyLoc, c)]
joinEscapedOperators
         ([(MyTok, MyLoc, Text)] -> [(MyTok, MyLoc, Text)])
-> ([SourceLine] -> [(MyTok, MyLoc, Text)])
-> [SourceLine]
-> [(MyTok, MyLoc, Text)]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [(MyTok, MyLoc, Text)] -> [(MyTok, MyLoc, Text)]
fixBackslashOperators  -- Fix Skylighting's incorrect splitting of backslash operators
         ([(MyTok, MyLoc, Text)] -> [(MyTok, MyLoc, Text)])
-> ([SourceLine] -> [(MyTok, MyLoc, Text)])
-> [SourceLine]
-> [(MyTok, MyLoc, Text)]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [(MyTok, MyLoc, Text)] -> [(MyTok, MyLoc, Text)]
fixBracketOperators    -- Fix Skylighting's splitting of bracket operators like (|, |), [|, |]
         ([(MyTok, MyLoc, Text)] -> [(MyTok, MyLoc, Text)])
-> ([SourceLine] -> [(MyTok, MyLoc, Text)])
-> [SourceLine]
-> [(MyTok, MyLoc, Text)]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [(MyTok, MyLoc, Text)] -> [(MyTok, MyLoc, Text)]
splitTokens
         ([(MyTok, MyLoc, Text)] -> [(MyTok, MyLoc, Text)])
-> ([SourceLine] -> [(MyTok, MyLoc, Text)])
-> [SourceLine]
-> [(MyTok, MyLoc, Text)]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [[(MyTok, Text)]] -> [(MyTok, MyLoc, Text)]
restoreLocations
         ([[(MyTok, Text)]] -> [(MyTok, MyLoc, Text)])
-> ([SourceLine] -> [[(MyTok, Text)]])
-> [SourceLine]
-> [(MyTok, MyLoc, Text)]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [[(MyTok, Text)]] -> [[(MyTok, Text)]]
splitUnicodeLambda     -- Split unicode lambda from following characters
         ([[(MyTok, Text)]] -> [[(MyTok, Text)]])
-> ([SourceLine] -> [[(MyTok, Text)]])
-> [SourceLine]
-> [[(MyTok, Text)]]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [SourceLine] -> [[(MyTok, Text)]]
recognizeTokens )
    (Maybe [SourceLine] -> Maybe [(MyTok, MyLoc, Text)])
-> (Text -> Maybe [SourceLine])
-> Text
-> Maybe [(MyTok, MyLoc, Text)]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Either String [SourceLine] -> Maybe [SourceLine]
forall a b. Either a b -> Maybe b
rightToMaybe
    (Either String [SourceLine] -> Maybe [SourceLine])
-> (Text -> Either String [SourceLine])
-> Text
-> Maybe [SourceLine]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. TokenizerConfig -> Syntax -> Text -> Either String [SourceLine]
Sky.tokenize TokenizerConfig
tokenizerOpts Syntax
syntax
  where
    tokenizerOpts :: TokenizerConfig
tokenizerOpts = SyntaxMap -> Bool -> TokenizerConfig
Sky.TokenizerConfig SyntaxMap
Sky.defaultSyntaxMap Bool
False

-- | Split tokens that start with unicode lambda (λ) into separate tokens.
--   E.g., "λx" becomes ["λ", "x"]
--   Note: λ is not officially part of GHC's UnicodeSyntax extension,
--   but is widely used as a de facto standard for backslash in lambdas.
splitUnicodeLambda :: [[(MyTok, Text)]] -> [[(MyTok, Text)]]
splitUnicodeLambda :: [[(MyTok, Text)]] -> [[(MyTok, Text)]]
splitUnicodeLambda = ([(MyTok, Text)] -> [(MyTok, Text)])
-> [[(MyTok, Text)]] -> [[(MyTok, Text)]]
forall a b. (a -> b) -> [a] -> [b]
map (((MyTok, Text) -> [(MyTok, Text)])
-> [(MyTok, Text)] -> [(MyTok, Text)]
forall (t :: * -> *) a b. Foldable t => (a -> [b]) -> t a -> [b]
concatMap (MyTok, Text) -> [(MyTok, Text)]
splitToken)
  where
    splitToken :: (MyTok, Text) -> [(MyTok, Text)]
splitToken (MyTok
tok, Text
txt)
      | Text
"λ" Text -> Text -> Bool
`T.isPrefixOf` Text
txt Bool -> Bool -> Bool
&& Text -> Int
T.length Text
txt Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
> Int
1 =
          [(MyTok
TOther, Text
"λ"), (MyTok
tok, Int -> Text -> Text
T.drop Int
1 Text
txt)]
      | Bool
otherwise = [(MyTok
tok, Text
txt)]

-- | Recognize tokens from all source lines.
recognizeTokens :: [SourceLine] -> [[(MyTok, Text)]]
recognizeTokens :: [SourceLine] -> [[(MyTok, Text)]]
recognizeTokens  = (SourceLine -> [(MyTok, Text)])
-> [SourceLine] -> [[(MyTok, Text)]]
forall a b. (a -> b) -> [a] -> [b]
map ((SourceLine -> [(MyTok, Text)])
 -> [SourceLine] -> [[(MyTok, Text)]])
-> (SourceLine -> [(MyTok, Text)])
-> [SourceLine]
-> [[(MyTok, Text)]]
forall a b. (a -> b) -> a -> b
$ (Token -> (MyTok, Text)) -> SourceLine -> [(MyTok, Text)]
forall a b. (a -> b) -> [a] -> [b]
map ((Token -> (MyTok, Text)) -> SourceLine -> [(MyTok, Text)])
-> (Token -> (MyTok, Text)) -> SourceLine -> [(MyTok, Text)]
forall a b. (a -> b) -> a -> b
$ (TokenType -> MyTok) -> Token -> (MyTok, Text)
forall b c d. (b -> c) -> (b, d) -> (c, d)
forall (a :: * -> * -> *) b c d.
Arrow a =>
a b c -> a (b, d) (c, d)
first TokenType -> MyTok
skyTok

-- | Convert token type of `ghc-lib` into tokens recognized by the filter.
skyTok :: TokenType -> MyTok
skyTok :: TokenType -> MyTok
skyTok TokenType
FloatTok          = MyTok
TNum
skyTok TokenType
DecValTok         = MyTok
TNum
skyTok TokenType
BaseNTok          = MyTok
TNum
skyTok TokenType
StringTok         = MyTok
TString
skyTok TokenType
CharTok           = MyTok
TString
skyTok TokenType
FunctionTok       = MyTok
TVar
skyTok TokenType
AttributeTok      = MyTok
TBlank
skyTok TokenType
VerbatimStringTok = MyTok
TString
skyTok TokenType
SpecialStringTok  = MyTok
TCons
skyTok TokenType
ConstantTok       = MyTok
TCons
skyTok TokenType
KeywordTok        = MyTok
TKeyword
skyTok TokenType
BuiltInTok        = MyTok
TKeyword
skyTok TokenType
PreprocessorTok   = MyTok
TBlank
skyTok TokenType
CommentTok        = MyTok
TBlank
skyTok TokenType
DocumentationTok  = MyTok
TBlank
skyTok TokenType
OperatorTok       = MyTok
TOperator
skyTok TokenType
SpecialCharTok    = MyTok
TOperator
skyTok TokenType
RegionMarkerTok   = MyTok
TOperator
skyTok TokenType
AnnotationTok     = MyTok
TBlank
skyTok TokenType
ControlFlowTok    = MyTok
TKeyword
skyTok TokenType
VariableTok       = MyTok
TVar
skyTok TokenType
DataTypeTok       = MyTok
TCons
skyTok TokenType
other             = MyTok
TOther

-- FIXME: generalize for GHC tokenizer and Skylighting
-- | Restore locations
-- TESTME: test
-- 1. Without newlines should return a list of indices up to length
-- 2. Of the same length as number of tokens
-- 3. With newlines should return line indices up to number of lines.
-- 4. Same for a list of lists of words without newlines joined as lines
restoreLocations :: [[(MyTok, Text)]] -> [(MyTok, MyLoc, Text)]
restoreLocations :: [[(MyTok, Text)]] -> [(MyTok, MyLoc, Text)]
restoreLocations [[(MyTok, Text)]]
srcLines = [[(MyTok, MyLoc, Text)]] -> [(MyTok, MyLoc, Text)]
forall (t :: * -> *) a. Foldable t => t [a] -> [a]
concat
                          ([[(MyTok, MyLoc, Text)]] -> [(MyTok, MyLoc, Text)])
-> [[(MyTok, MyLoc, Text)]] -> [(MyTok, MyLoc, Text)]
forall a b. (a -> b) -> a -> b
$ (Int -> [(MyTok, Text)] -> [(MyTok, MyLoc, Text)])
-> [Int] -> [[(MyTok, Text)]] -> [[(MyTok, MyLoc, Text)]]
forall a b c. (a -> b -> c) -> [a] -> [b] -> [c]
zipWith (Int -> Int -> [(MyTok, Text)] -> [(MyTok, MyLoc, Text)]
`go` Int
1) [Int
1..] [[(MyTok, Text)]]
srcLines
  where
    go :: Int -> Int -> [(MyTok, Text)] -> [(MyTok, MyLoc, Text)]
go Int
line Int
col []              = []
    go Int
line Int
col ((MyTok
tok, Text
txt):[(MyTok, Text)]
ls) =
        (MyTok
tok, Int -> Int -> Bool -> MyLoc
MyLoc Int
line Int
col (MyTok -> Bool
isMark MyTok
tok), Text
txt)(MyTok, MyLoc, Text)
-> [(MyTok, MyLoc, Text)] -> [(MyTok, MyLoc, Text)]
forall a. a -> [a] -> [a]
:Int -> Int -> [(MyTok, Text)] -> [(MyTok, MyLoc, Text)]
go Int
newLine Int
newCol [(MyTok, Text)]
ls
      where
        isMark :: MyTok -> Bool
isMark MyTok
TBlank = Bool
False
        isMark MyTok
_      = Bool
True
        newLine :: Int
newLine  = Int
line Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
lineIncr
        lineIncr :: Int
lineIncr = Text -> Int
T.length (Text -> Int) -> Text -> Int
forall a b. (a -> b) -> a -> b
$ (Char -> Bool) -> Text -> Text
T.filter (Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
==Char
'\n') Text
txt
        newCol :: Int
newCol  | Int
lineIncr Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int
0 = Int
col Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Text -> Int
T.length Text
txt
                | Bool
otherwise     = (Int -> Int -> Int
forall a. Num a => a -> a -> a
+Int
1)
                                (Int -> Int) -> Int -> Int
forall a b. (a -> b) -> a -> b
$ Text -> Int
T.length
                                (Text -> Int) -> Text -> Int
forall a b. (a -> b) -> a -> b
$ (Text, Text) -> Text
forall a b. (a, b) -> a
fst
                                ((Text, Text) -> Text) -> (Text, Text) -> Text
forall a b. (a -> b) -> a -> b
$ (Char -> Bool) -> Text -> (Text, Text)
T.break (Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
==Char
'\n')
                                (Text -> (Text, Text)) -> Text -> (Text, Text)
forall a b. (a -> b) -> a -> b
$ Text -> Text
T.reverse Text
txt

-- * Likely common with other tokenizers
-- | Split tokens into one blank per line.
-- TESTME: assures that no token has '\n' before the end of text.
splitTokens :: [(MyTok, MyLoc, Text)] -> [(MyTok, MyLoc, Text)]
splitTokens :: [(MyTok, MyLoc, Text)] -> [(MyTok, MyLoc, Text)]
splitTokens = [[(MyTok, MyLoc, Text)]] -> [(MyTok, MyLoc, Text)]
forall a. Monoid a => [a] -> a
mconcat
            ([[(MyTok, MyLoc, Text)]] -> [(MyTok, MyLoc, Text)])
-> ([(MyTok, MyLoc, Text)] -> [[(MyTok, MyLoc, Text)]])
-> [(MyTok, MyLoc, Text)]
-> [(MyTok, MyLoc, Text)]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ((MyTok, MyLoc, Text) -> [(MyTok, MyLoc, Text)])
-> [(MyTok, MyLoc, Text)] -> [[(MyTok, MyLoc, Text)]]
forall a b. (a -> b) -> [a] -> [b]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap (MyTok, MyLoc, Text) -> [(MyTok, MyLoc, Text)]
splitter
  where
    splitter :: (MyTok, MyLoc, Text) -> [(MyTok, MyLoc, Text)]
    splitter :: (MyTok, MyLoc, Text) -> [(MyTok, MyLoc, Text)]
splitter (MyTok
TBlank, loc :: MyLoc
loc@(MyLoc Int
line Int
_ Bool
_), Text
txt) | (Char -> Bool) -> Text -> Text
T.filter (Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
==Char
'\n') Text
txt Text -> Text -> Bool
forall a. Eq a => a -> a -> Bool
/= Text
"" =
        [Text] -> [(MyTok, MyLoc, Text)]
withLocs [Text]
withNewLines
      where
        split, withNewLines :: [Text]
        split :: [Text]
split = Text -> [Text]
T.lines Text
txt
        withNewLines :: [Text]
withNewLines = (Text -> Text) -> [Text] -> [Text]
forall a b. (a -> b) -> [a] -> [b]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap (Text -> Text -> Text
forall a. Semigroup a => a -> a -> a
<>Text
"\n") ([Text] -> [Text]
forall a. HasCallStack => [a] -> [a]
init [Text]
split)
                    [Text] -> [Text] -> [Text]
forall a. Semigroup a => a -> a -> a
<> [[Text] -> Text
forall a. HasCallStack => [a] -> a
last [Text]
split]
        withLocs :: [Text] -> [(MyTok, MyLoc, Text)]
        withLocs :: [Text] -> [(MyTok, MyLoc, Text)]
withLocs (Text
l:[Text]
ls) = (MyTok
TBlank, Optic A_Lens NoIx MyLoc MyLoc Bool Bool -> Bool -> MyLoc -> MyLoc
forall k (is :: IxList) s t a b.
Is k A_Setter =>
Optic k is s t a b -> b -> s -> t
set Optic A_Lens NoIx MyLoc MyLoc Bool Bool
mark Bool
True MyLoc
loc, Text
l)
                        (MyTok, MyLoc, Text)
-> [(MyTok, MyLoc, Text)] -> [(MyTok, MyLoc, Text)]
forall a. a -> [a] -> [a]
: (Int -> Text -> (MyTok, MyLoc, Text))
-> [Int] -> [Text] -> [(MyTok, MyLoc, Text)]
forall a b c. (a -> b -> c) -> [a] -> [b] -> [c]
zipWith Int -> Text -> (MyTok, MyLoc, Text)
mkEntry [Int
lineInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
1..] [Text]
ls
        mkEntry :: Int -> Text -> (MyTok, MyLoc, Text)
        mkEntry :: Int -> Text -> (MyTok, MyLoc, Text)
mkEntry Int
i Text
t = (MyTok
TBlank, Int -> Int -> Bool -> MyLoc
MyLoc Int
i Int
1 Bool
True, Text
t)
    splitter other :: (MyTok, MyLoc, Text)
other@(MyTok
_, loc :: MyLoc
loc@(MyLoc Int
line Int
1 Bool
x), Text
txt) = [Optic
  A_Lens NoIx (MyTok, MyLoc, Text) (MyTok, MyLoc, Text) Bool Bool
-> Bool -> (MyTok, MyLoc, Text) -> (MyTok, MyLoc, Text)
forall k (is :: IxList) s t a b.
Is k A_Setter =>
Optic k is s t a b -> b -> s -> t
set (Lens (MyTok, MyLoc, Text) (MyTok, MyLoc, Text) MyLoc MyLoc
forall s t a b. Field2 s t a b => Lens s t a b
_2 Lens (MyTok, MyLoc, Text) (MyTok, MyLoc, Text) MyLoc MyLoc
-> Optic A_Lens NoIx MyLoc MyLoc Bool Bool
-> Optic
     A_Lens NoIx (MyTok, MyLoc, Text) (MyTok, MyLoc, Text) Bool Bool
forall k l m (is :: IxList) (js :: IxList) (ks :: IxList) s t u v a
       b.
(JoinKinds k l m, AppendIndices is js ks) =>
Optic k is s t u v -> Optic l js u v a b -> Optic m ks s t a b
% Optic A_Lens NoIx MyLoc MyLoc Bool Bool
mark) Bool
True (MyTok, MyLoc, Text)
other]
    splitter (MyTok, MyLoc, Text)
other                                = [(MyTok, MyLoc, Text)
other]


unmark :: Field2 a a MyLoc MyLoc => a -> a
unmark :: forall a. Field2 a a MyLoc MyLoc => a -> a
unmark = Optic A_Lens NoIx a a Bool Bool -> Bool -> a -> a
forall k (is :: IxList) s t a b.
Is k A_Setter =>
Optic k is s t a b -> b -> s -> t
set (Lens a a MyLoc MyLoc
forall s t a b. Field2 s t a b => Lens s t a b
_2 Lens a a MyLoc MyLoc
-> Optic A_Lens NoIx MyLoc MyLoc Bool Bool
-> Optic A_Lens NoIx a a Bool Bool
forall k l m (is :: IxList) (js :: IxList) (ks :: IxList) s t u v a
       b.
(JoinKinds k l m, AppendIndices is js ks) =>
Optic k is s t u v -> Optic l js u v a b -> Optic m ks s t a b
% Optic A_Lens NoIx MyLoc MyLoc Bool Bool
mark) Bool
False

-- | Fix Skylighting's tokenization of bracket operators used in GHC extensions.
--   Skylighting splits operators like (|, |), [|, |] which should be single tokens.
--   This function merges them back together.
fixBracketOperators :: [(MyTok, MyLoc, Text)] -> [(MyTok, MyLoc, Text)]
fixBracketOperators :: [(MyTok, MyLoc, Text)] -> [(MyTok, MyLoc, Text)]
fixBracketOperators [] = []
-- (| parallel array bracket
fixBracketOperators ((MyTok
TOther, MyLoc
loc, Text
"("):(MyTok
TOperator, MyLoc
_, Text
"|"):[(MyTok, MyLoc, Text)]
remaining) =
  (MyTok
TOperator, MyLoc
loc, Text
"(|") (MyTok, MyLoc, Text)
-> [(MyTok, MyLoc, Text)] -> [(MyTok, MyLoc, Text)]
forall a. a -> [a] -> [a]
: [(MyTok, MyLoc, Text)] -> [(MyTok, MyLoc, Text)]
fixBracketOperators [(MyTok, MyLoc, Text)]
remaining
-- |) parallel array bracket
fixBracketOperators ((MyTok
TOperator, MyLoc
loc, Text
"|"):(MyTok
TOther, MyLoc
_, Text
")"):[(MyTok, MyLoc, Text)]
remaining) =
  (MyTok
TOperator, MyLoc
loc, Text
"|)") (MyTok, MyLoc, Text)
-> [(MyTok, MyLoc, Text)] -> [(MyTok, MyLoc, Text)]
forall a. a -> [a] -> [a]
: [(MyTok, MyLoc, Text)] -> [(MyTok, MyLoc, Text)]
fixBracketOperators [(MyTok, MyLoc, Text)]
remaining
-- [| quasiquote bracket
fixBracketOperators ((MyTok
TOther, MyLoc
loc, Text
"["):(MyTok
TOperator, MyLoc
_, Text
"|"):[(MyTok, MyLoc, Text)]
remaining) =
  (MyTok
TOperator, MyLoc
loc, Text
"[|") (MyTok, MyLoc, Text)
-> [(MyTok, MyLoc, Text)] -> [(MyTok, MyLoc, Text)]
forall a. a -> [a] -> [a]
: [(MyTok, MyLoc, Text)] -> [(MyTok, MyLoc, Text)]
fixBracketOperators [(MyTok, MyLoc, Text)]
remaining
-- |] quasiquote bracket
fixBracketOperators ((MyTok
TOperator, MyLoc
loc, Text
"|"):(MyTok
TOther, MyLoc
_, Text
"]"):[(MyTok, MyLoc, Text)]
remaining) =
  (MyTok
TOperator, MyLoc
loc, Text
"|]") (MyTok, MyLoc, Text)
-> [(MyTok, MyLoc, Text)] -> [(MyTok, MyLoc, Text)]
forall a. a -> [a] -> [a]
: [(MyTok, MyLoc, Text)] -> [(MyTok, MyLoc, Text)]
fixBracketOperators [(MyTok, MyLoc, Text)]
remaining
-- Default case
fixBracketOperators ((MyTok, MyLoc, Text)
tok:[(MyTok, MyLoc, Text)]
rest) =
  (MyTok, MyLoc, Text)
tok (MyTok, MyLoc, Text)
-> [(MyTok, MyLoc, Text)] -> [(MyTok, MyLoc, Text)]
forall a. a -> [a] -> [a]
: [(MyTok, MyLoc, Text)] -> [(MyTok, MyLoc, Text)]
fixBracketOperators [(MyTok, MyLoc, Text)]
rest

-- | Fix Skylighting's incorrect tokenization of backslash operators.
--   Skylighting incorrectly splits operators like \+, \>, \/, etc.
--   This function merges them back together to match Haskell tokenizer behavior.
fixBackslashOperators :: [(MyTok, MyLoc, Text)] -> [(MyTok, MyLoc, Text)]
fixBackslashOperators :: [(MyTok, MyLoc, Text)] -> [(MyTok, MyLoc, Text)]
fixBackslashOperators [] = []
-- Standalone backslash followed by backslash -> \\ set difference operator
fixBackslashOperators ((MyTok
TOther, MyLoc
loc, Text
"\\"):(MyTok
TOther, MyLoc
_, Text
"\\"):[(MyTok, MyLoc, Text)]
remaining) =
  (MyTok
TOperator, MyLoc
loc, Text
"\\\\") (MyTok, MyLoc, Text)
-> [(MyTok, MyLoc, Text)] -> [(MyTok, MyLoc, Text)]
forall a. a -> [a] -> [a]
: [(MyTok, MyLoc, Text)] -> [(MyTok, MyLoc, Text)]
fixBackslashOperators [(MyTok, MyLoc, Text)]
remaining
-- Standalone backslash followed by operator symbol -> merge as operator (\+, \>, etc.)
fixBackslashOperators ((MyTok
TOther, MyLoc
loc, Text
"\\"):(MyTok
TOperator, MyLoc
_, Text
op):[(MyTok, MyLoc, Text)]
remaining) =
  (MyTok
TOperator, MyLoc
loc, Text
"\\" Text -> Text -> Text
forall a. Semigroup a => a -> a -> a
<> Text
op) (MyTok, MyLoc, Text)
-> [(MyTok, MyLoc, Text)] -> [(MyTok, MyLoc, Text)]
forall a. a -> [a] -> [a]
: [(MyTok, MyLoc, Text)] -> [(MyTok, MyLoc, Text)]
fixBackslashOperators [(MyTok, MyLoc, Text)]
remaining
-- Standalone backslash followed by variable -> lambda, keep separate
fixBackslashOperators (tok1 :: (MyTok, MyLoc, Text)
tok1@(MyTok
TOther, MyLoc
_, Text
"\\"):rest :: [(MyTok, MyLoc, Text)]
rest@((MyTok
TVar, MyLoc
_, Text
_):[(MyTok, MyLoc, Text)]
_)) =
  (MyTok, MyLoc, Text)
tok1 (MyTok, MyLoc, Text)
-> [(MyTok, MyLoc, Text)] -> [(MyTok, MyLoc, Text)]
forall a. a -> [a] -> [a]
: [(MyTok, MyLoc, Text)] -> [(MyTok, MyLoc, Text)]
fixBackslashOperators [(MyTok, MyLoc, Text)]
rest
-- Standalone backslash followed by alphanumeric TOther -> lambda, keep separate
fixBackslashOperators (tok1 :: (MyTok, MyLoc, Text)
tok1@(MyTok
TOther, MyLoc
loc1, Text
"\\"):tok2 :: (MyTok, MyLoc, Text)
tok2@(MyTok
TOther, MyLoc
loc2, Text
txt):[(MyTok, MyLoc, Text)]
rest)
  | Bool -> Bool
not (Text -> Bool
T.null Text
txt) Bool -> Bool -> Bool
&& Text -> Bool
isAlphaStart Text
txt =
  (MyTok, MyLoc, Text)
tok1 (MyTok, MyLoc, Text)
-> [(MyTok, MyLoc, Text)] -> [(MyTok, MyLoc, Text)]
forall a. a -> [a] -> [a]
: [(MyTok, MyLoc, Text)] -> [(MyTok, MyLoc, Text)]
fixBackslashOperators ((MyTok, MyLoc, Text)
tok2(MyTok, MyLoc, Text)
-> [(MyTok, MyLoc, Text)] -> [(MyTok, MyLoc, Text)]
forall a. a -> [a] -> [a]
:[(MyTok, MyLoc, Text)]
rest)
-- Token ending with backslash followed by / -> merge as \/ operator
fixBackslashOperators ((MyTok
TOther, MyLoc
loc, Text
txt):(MyTok
TOperator, MyLoc
_, Text
"/"):[(MyTok, MyLoc, Text)]
remaining)
  | Text
"\\" Text -> Text -> Bool
`T.isSuffixOf` Text
txt =
  (MyTok
TOperator, MyLoc
loc, HasCallStack => Text -> Text
Text -> Text
T.init Text
txt Text -> Text -> Text
forall a. Semigroup a => a -> a -> a
<> Text
"\\/") (MyTok, MyLoc, Text)
-> [(MyTok, MyLoc, Text)] -> [(MyTok, MyLoc, Text)]
forall a. a -> [a] -> [a]
: [(MyTok, MyLoc, Text)] -> [(MyTok, MyLoc, Text)]
fixBackslashOperators [(MyTok, MyLoc, Text)]
remaining
-- Operator / followed by token starting with backslash -> merge as /\ operator
fixBackslashOperators ((MyTok
TOperator, MyLoc
loc, Text
"/"):(MyTok
TOther, MyLoc
_, Text
txt):[(MyTok, MyLoc, Text)]
remaining)
  | Text
"\\" Text -> Text -> Bool
`T.isPrefixOf` Text
txt =
  (MyTok
TOperator, MyLoc
loc, Text
"/\\") (MyTok, MyLoc, Text)
-> [(MyTok, MyLoc, Text)] -> [(MyTok, MyLoc, Text)]
forall a. a -> [a] -> [a]
: [(MyTok, MyLoc, Text)] -> [(MyTok, MyLoc, Text)]
fixBackslashOperators ((MyTok
TOther, MyLoc
loc, HasCallStack => Text -> Text
Text -> Text
T.tail Text
txt)(MyTok, MyLoc, Text)
-> [(MyTok, MyLoc, Text)] -> [(MyTok, MyLoc, Text)]
forall a. a -> [a] -> [a]
:[(MyTok, MyLoc, Text)]
remaining)
-- Token starting with backslash followed by dot (lambda notation like \f.)
fixBackslashOperators (tok1 :: (MyTok, MyLoc, Text)
tok1@(MyTok
TOther, MyLoc
_, Text
txt):tok2 :: (MyTok, MyLoc, Text)
tok2@(MyTok
TOperator, MyLoc
_, Text
"."):[(MyTok, MyLoc, Text)]
rest)
  | Text
"\\" Text -> Text -> Bool
`T.isPrefixOf` Text
txt Bool -> Bool -> Bool
&& Text -> Int
T.length Text
txt Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
> Int
1 Bool -> Bool -> Bool
&& Text -> Bool
isAlphaRest (HasCallStack => Text -> Text
Text -> Text
T.tail Text
txt) =
  (MyTok, MyLoc, Text)
tok1 (MyTok, MyLoc, Text)
-> [(MyTok, MyLoc, Text)] -> [(MyTok, MyLoc, Text)]
forall a. a -> [a] -> [a]
: (MyTok, MyLoc, Text)
tok2 (MyTok, MyLoc, Text)
-> [(MyTok, MyLoc, Text)] -> [(MyTok, MyLoc, Text)]
forall a. a -> [a] -> [a]
: [(MyTok, MyLoc, Text)] -> [(MyTok, MyLoc, Text)]
fixBackslashOperators [(MyTok, MyLoc, Text)]
rest
-- Default case - keep token as is
fixBackslashOperators ((MyTok, MyLoc, Text)
tok:[(MyTok, MyLoc, Text)]
rest) =
  (MyTok, MyLoc, Text)
tok (MyTok, MyLoc, Text)
-> [(MyTok, MyLoc, Text)] -> [(MyTok, MyLoc, Text)]
forall a. a -> [a] -> [a]
: [(MyTok, MyLoc, Text)] -> [(MyTok, MyLoc, Text)]
fixBackslashOperators [(MyTok, MyLoc, Text)]
rest

-- Helper functions for fixBackslashOperators
isAlphaStart :: Text -> Bool
isAlphaStart :: Text -> Bool
isAlphaStart Text
txt = case Text -> Maybe (Char, Text)
T.uncons Text
txt of
  Just (Char
c, Text
_) -> Char -> Bool
isAsciiLower Char
c Bool -> Bool -> Bool
|| Char -> Bool
isAsciiUpper Char
c
  Maybe (Char, Text)
Nothing -> Bool
False

isAlphaRest :: Text -> Bool
isAlphaRest :: Text -> Bool
isAlphaRest = (Char -> Bool) -> Text -> Bool
T.all (\Char
c -> Char -> Bool
isAsciiLower Char
c Bool -> Bool -> Bool
|| Char -> Bool
isAsciiUpper Char
c Bool -> Bool -> Bool
|| Char -> Bool
isDigit Char
c)

-- FIXME: use no-indent-mark instead.
joinEscapedOperators :: (Eq c, IsString c, Semigroup c) => [(MyTok, MyLoc, c)] -> [(MyTok, MyLoc, c)]
joinEscapedOperators :: forall c.
(Eq c, IsString c, Semigroup c) =>
[(MyTok, MyLoc, c)] -> [(MyTok, MyLoc, c)]
joinEscapedOperators (a :: (MyTok, MyLoc, c)
a@(MyTok
_, MyLoc
_, c
"("):b :: (MyTok, MyLoc, c)
b@(MyTok
_, MyLoc
_, c
_):c :: (MyTok, MyLoc, c)
c@(MyTok
_, MyLoc
_, c
")"):[(MyTok, MyLoc, c)]
rest) =
   (MyTok, MyLoc, c)
a(MyTok, MyLoc, c) -> [(MyTok, MyLoc, c)] -> [(MyTok, MyLoc, c)]
forall a. a -> [a] -> [a]
:(MyTok, MyLoc, c) -> (MyTok, MyLoc, c)
forall a. Field2 a a MyLoc MyLoc => a -> a
unmark (MyTok, MyLoc, c)
b(MyTok, MyLoc, c) -> [(MyTok, MyLoc, c)] -> [(MyTok, MyLoc, c)]
forall a. a -> [a] -> [a]
:(MyTok, MyLoc, c) -> (MyTok, MyLoc, c)
forall a. Field2 a a MyLoc MyLoc => a -> a
unmark (MyTok, MyLoc, c)
c(MyTok, MyLoc, c) -> [(MyTok, MyLoc, c)] -> [(MyTok, MyLoc, c)]
forall a. a -> [a] -> [a]
:[(MyTok, MyLoc, c)] -> [(MyTok, MyLoc, c)]
forall c.
(Eq c, IsString c, Semigroup c) =>
[(MyTok, MyLoc, c)] -> [(MyTok, MyLoc, c)]
joinEscapedOperators [(MyTok, MyLoc, c)]
rest
joinEscapedOperators (a :: (MyTok, MyLoc, c)
a@(MyTok
_,    MyLoc
loc, c
"`"):b :: (MyTok, MyLoc, c)
b@(MyTok
_, MyLoc
_, c
_):c :: (MyTok, MyLoc, c)
c@(MyTok
_, MyLoc
_, c
"`"):[(MyTok, MyLoc, c)]
rest) =
   (MyTok, MyLoc, c)
a(MyTok, MyLoc, c) -> [(MyTok, MyLoc, c)] -> [(MyTok, MyLoc, c)]
forall a. a -> [a] -> [a]
:(MyTok, MyLoc, c) -> (MyTok, MyLoc, c)
forall a. Field2 a a MyLoc MyLoc => a -> a
unmark (MyTok, MyLoc, c)
b(MyTok, MyLoc, c) -> [(MyTok, MyLoc, c)] -> [(MyTok, MyLoc, c)]
forall a. a -> [a] -> [a]
:(MyTok, MyLoc, c) -> (MyTok, MyLoc, c)
forall a. Field2 a a MyLoc MyLoc => a -> a
unmark (MyTok, MyLoc, c)
c(MyTok, MyLoc, c) -> [(MyTok, MyLoc, c)] -> [(MyTok, MyLoc, c)]
forall a. a -> [a] -> [a]
:[(MyTok, MyLoc, c)] -> [(MyTok, MyLoc, c)]
forall c.
(Eq c, IsString c, Semigroup c) =>
[(MyTok, MyLoc, c)] -> [(MyTok, MyLoc, c)]
joinEscapedOperators [(MyTok, MyLoc, c)]
rest
joinEscapedOperators (a :: (MyTok, MyLoc, c)
a@(MyTok
_, MyLoc
_, c
"("):b :: (MyTok, MyLoc, c)
b@(MyTok
TOperator, MyLoc
_, c
_):[(MyTok, MyLoc, c)]
rest) =
   (MyTok, MyLoc, c)
a(MyTok, MyLoc, c) -> [(MyTok, MyLoc, c)] -> [(MyTok, MyLoc, c)]
forall a. a -> [a] -> [a]
:(MyTok, MyLoc, c) -> (MyTok, MyLoc, c)
forall a. Field2 a a MyLoc MyLoc => a -> a
unmark (MyTok, MyLoc, c)
b(MyTok, MyLoc, c) -> [(MyTok, MyLoc, c)] -> [(MyTok, MyLoc, c)]
forall a. a -> [a] -> [a]
:[(MyTok, MyLoc, c)] -> [(MyTok, MyLoc, c)]
forall c.
(Eq c, IsString c, Semigroup c) =>
[(MyTok, MyLoc, c)] -> [(MyTok, MyLoc, c)]
joinEscapedOperators [(MyTok, MyLoc, c)]
rest
joinEscapedOperators (a :: (MyTok, MyLoc, c)
a@(MyTok
_, MyLoc
_, c
_):b :: (MyTok, MyLoc, c)
b@(MyTok
_, MyLoc
_, c
")"):[(MyTok, MyLoc, c)]
rest) =
   (MyTok, MyLoc, c)
a(MyTok, MyLoc, c) -> [(MyTok, MyLoc, c)] -> [(MyTok, MyLoc, c)]
forall a. a -> [a] -> [a]
:(MyTok, MyLoc, c) -> (MyTok, MyLoc, c)
forall a. Field2 a a MyLoc MyLoc => a -> a
unmark (MyTok, MyLoc, c)
b(MyTok, MyLoc, c) -> [(MyTok, MyLoc, c)] -> [(MyTok, MyLoc, c)]
forall a. a -> [a] -> [a]
:[(MyTok, MyLoc, c)] -> [(MyTok, MyLoc, c)]
forall c.
(Eq c, IsString c, Semigroup c) =>
[(MyTok, MyLoc, c)] -> [(MyTok, MyLoc, c)]
joinEscapedOperators [(MyTok, MyLoc, c)]
rest
joinEscapedOperators ((MyTok, MyLoc, c)
tok:[(MyTok, MyLoc, c)]
rest) = (MyTok, MyLoc, c)
tok(MyTok, MyLoc, c) -> [(MyTok, MyLoc, c)] -> [(MyTok, MyLoc, c)]
forall a. a -> [a] -> [a]
:[(MyTok, MyLoc, c)] -> [(MyTok, MyLoc, c)]
forall c.
(Eq c, IsString c, Semigroup c) =>
[(MyTok, MyLoc, c)] -> [(MyTok, MyLoc, c)]
joinEscapedOperators [(MyTok, MyLoc, c)]
rest
joinEscapedOperators []         = []

{-

-- | Recognize token using both token type from `ghc-lib`,
--   and text content.
--   Only TikZ marks are recognized by looking up text content.
recognizeToken :: [Token] -> (MyTok, Text)
recognizeToken (CommentTok, tokText@(unTikzMark -> Just mark)) =
  (TTikz mark,           tokText)
recognizeToken (tokType, tokText) =
  (skyTok       tokType, tokText)
 -}