module Text.Highlighting.Kate.Syntax.Html
(highlight, parseExpression, syntaxName, syntaxExtensions)
where
import Text.Highlighting.Kate.Types
import Text.Highlighting.Kate.Common
import qualified Text.Highlighting.Kate.Syntax.Alert
import qualified Text.Highlighting.Kate.Syntax.Css
import qualified Text.Highlighting.Kate.Syntax.Javascript
import Text.ParserCombinators.Parsec hiding (State)
import Control.Monad.State
import Data.Char (isSpace)
syntaxName :: String
syntaxName = "HTML"
syntaxExtensions :: String
syntaxExtensions = "*.htm;*.html;*.shtml;*.shtm"
highlight :: String -> [SourceLine]
highlight input = evalState (mapM parseSourceLine $ lines input) startingState
parseSourceLine :: String -> State SyntaxState SourceLine
parseSourceLine = mkParseSourceLine (parseExpression Nothing)
parseExpression :: Maybe (String,String)
-> KateParser Token
parseExpression mbcontext = do
(lang,cont) <- maybe currentContext return mbcontext
result <- parseRules (lang,cont)
optional $ do eof
updateState $ \st -> st{ synStPrevChar = '\n' }
pEndLine
return result
startingState = SyntaxState {synStContexts = [("HTML","Start")], synStLineNumber = 0, synStPrevChar = '\n', synStPrevNonspace = False, synStCaseSensitive = True, synStKeywordCaseSensitive = True, synStCaptures = []}
pEndLine = do
updateState $ \st -> st{ synStPrevNonspace = False }
context <- currentContext
contexts <- synStContexts `fmap` getState
if length contexts >= 2
then case context of
("HTML","Start") -> return ()
("HTML","FindHTML") -> return ()
("HTML","FindEntityRefs") -> return ()
("HTML","FindPEntityRefs") -> return ()
("HTML","FindAttributes") -> return ()
("HTML","FindDTDRules") -> return ()
("HTML","Comment") -> return ()
("HTML","CDATA") -> return ()
("HTML","PI") -> return ()
("HTML","Doctype") -> return ()
("HTML","Doctype Internal Subset") -> return ()
("HTML","Doctype Markupdecl") -> return ()
("HTML","Doctype Markupdecl DQ") -> return ()
("HTML","Doctype Markupdecl SQ") -> return ()
("HTML","El Open") -> return ()
("HTML","El Close") -> return ()
("HTML","El Close 2") -> return ()
("HTML","El Close 3") -> return ()
("HTML","CSS") -> return ()
("HTML","CSS content") -> return ()
("HTML","JS") -> return ()
("HTML","JS content") -> return ()
("HTML","JS comment close") -> (popContext) >> pEndLine
("HTML","Value") -> return ()
("HTML","Value NQ") -> (popContext >> popContext) >> pEndLine
("HTML","Value DQ") -> return ()
("HTML","Value SQ") -> return ()
_ -> return ()
else return ()
withAttribute attr txt = do
when (null txt) $ fail "Parser matched no text"
updateState $ \st -> st { synStPrevChar = last txt
, synStPrevNonspace = synStPrevNonspace st || not (all isSpace txt) }
return (attr, txt)
regex_'3c'21DOCTYPE'5cs'2b = compileRegex "<!DOCTYPE\\s+"
regex_'3c'5c'3f'5b'5cw'3a'2d'5d'2a = compileRegex "<\\?[\\w:-]*"
regex_'3cstyle'5cb = compileRegex "<style\\b"
regex_'3cscript'5cb = compileRegex "<script\\b"
regex_'3cpre'5cb = compileRegex "<pre\\b"
regex_'3cdiv'5cb = compileRegex "<div\\b"
regex_'3ctable'5cb = compileRegex "<table\\b"
regex_'3cul'5cb = compileRegex "<ul\\b"
regex_'3col'5cb = compileRegex "<ol\\b"
regex_'3cdl'5cb = compileRegex "<dl\\b"
regex_'3c'5bA'2dZa'2dz'5f'3a'5d'5b'5cw'2e'3a'5f'2d'5d'2a = compileRegex "<[A-Za-z_:][\\w.:_-]*"
regex_'3c'2fpre'5cb = compileRegex "</pre\\b"
regex_'3c'2fdiv'5cb = compileRegex "</div\\b"
regex_'3c'2ftable'5cb = compileRegex "</table\\b"
regex_'3c'2ful'5cb = compileRegex "</ul\\b"
regex_'3c'2fol'5cb = compileRegex "</ol\\b"
regex_'3c'2fdl'5cb = compileRegex "</dl\\b"
regex_'3c'2f'5bA'2dZa'2dz'5f'3a'5d'5b'5cw'2e'3a'5f'2d'5d'2a = compileRegex "</[A-Za-z_:][\\w.:_-]*"
regex_'26'28'23'5b0'2d9'5d'2b'7c'23'5bxX'5d'5b0'2d9A'2dFa'2df'5d'2b'7c'5bA'2dZa'2dz'5f'3a'5d'5b'5cw'2e'3a'5f'2d'5d'2a'29'3b = compileRegex "&(#[0-9]+|#[xX][0-9A-Fa-f]+|[A-Za-z_:][\\w.:_-]*);"
regex_'25'5bA'2dZa'2dz'5f'3a'5d'5b'5cw'2e'3a'5f'2d'5d'2a'3b = compileRegex "%[A-Za-z_:][\\w.:_-]*;"
regex_'5bA'2dZa'2dz'5f'3a'5d'5b'5cw'2e'3a'5f'2d'5d'2a = compileRegex "[A-Za-z_:][\\w.:_-]*"
regex_'5cs'2b'5bA'2dZa'2dz'5f'3a'5d'5b'5cw'2e'3a'5f'2d'5d'2a = compileRegex "\\s+[A-Za-z_:][\\w.:_-]*"
regex_'3c'21'28ELEMENT'7cENTITY'7cATTLIST'7cNOTATION'29'5cb = compileRegex "<!(ELEMENT|ENTITY|ATTLIST|NOTATION)\\b"
regex_'2d'28'2d'28'3f'21'2d'3e'29'29'2b = compileRegex "-(-(?!->))+"
regex_'5cS = compileRegex "\\S"
regex_'3c'2fstyle'5cb = compileRegex "</style\\b"
regex_'3c'2fscript'5cb = compileRegex "</script\\b"
regex_'2f'2f'28'3f'3d'2e'2a'3c'2fscript'5cb'29 = compileRegex "//(?=.*</script\\b)"
regex_'2f'28'3f'21'3e'29 = compileRegex "/(?!>)"
regex_'5b'5e'2f'3e'3c'22'27'5cs'5d = compileRegex "[^/><\"'\\s]"
parseRules ("HTML","Start") =
(((parseRules ("HTML","FindHTML")))
<|>
(currentContext >>= \x -> guard (x == ("HTML","Start")) >> pDefault >>= withAttribute NormalTok))
parseRules ("HTML","FindHTML") =
(((pDetectSpaces >>= withAttribute NormalTok))
<|>
((pDetectIdentifier >>= withAttribute NormalTok))
<|>
((pString False "<!--" >>= withAttribute CommentTok) >>~ pushContext ("HTML","Comment"))
<|>
((pString False "<![CDATA[" >>= withAttribute BaseNTok) >>~ pushContext ("HTML","CDATA"))
<|>
((pRegExpr regex_'3c'21DOCTYPE'5cs'2b >>= withAttribute DataTypeTok) >>~ pushContext ("HTML","Doctype"))
<|>
((pRegExpr regex_'3c'5c'3f'5b'5cw'3a'2d'5d'2a >>= withAttribute KeywordTok) >>~ pushContext ("HTML","PI"))
<|>
((pRegExpr regex_'3cstyle'5cb >>= withAttribute KeywordTok) >>~ pushContext ("HTML","CSS"))
<|>
((pRegExpr regex_'3cscript'5cb >>= withAttribute KeywordTok) >>~ pushContext ("HTML","JS"))
<|>
((pRegExpr regex_'3cpre'5cb >>= withAttribute KeywordTok) >>~ pushContext ("HTML","El Open"))
<|>
((pRegExpr regex_'3cdiv'5cb >>= withAttribute KeywordTok) >>~ pushContext ("HTML","El Open"))
<|>
((pRegExpr regex_'3ctable'5cb >>= withAttribute KeywordTok) >>~ pushContext ("HTML","El Open"))
<|>
((pRegExpr regex_'3cul'5cb >>= withAttribute KeywordTok) >>~ pushContext ("HTML","El Open"))
<|>
((pRegExpr regex_'3col'5cb >>= withAttribute KeywordTok) >>~ pushContext ("HTML","El Open"))
<|>
((pRegExpr regex_'3cdl'5cb >>= withAttribute KeywordTok) >>~ pushContext ("HTML","El Open"))
<|>
((pRegExpr regex_'3c'5bA'2dZa'2dz'5f'3a'5d'5b'5cw'2e'3a'5f'2d'5d'2a >>= withAttribute KeywordTok) >>~ pushContext ("HTML","El Open"))
<|>
((pRegExpr regex_'3c'2fpre'5cb >>= withAttribute KeywordTok) >>~ pushContext ("HTML","El Close"))
<|>
((pRegExpr regex_'3c'2fdiv'5cb >>= withAttribute KeywordTok) >>~ pushContext ("HTML","El Close"))
<|>
((pRegExpr regex_'3c'2ftable'5cb >>= withAttribute KeywordTok) >>~ pushContext ("HTML","El Close"))
<|>
((pRegExpr regex_'3c'2ful'5cb >>= withAttribute KeywordTok) >>~ pushContext ("HTML","El Close"))
<|>
((pRegExpr regex_'3c'2fol'5cb >>= withAttribute KeywordTok) >>~ pushContext ("HTML","El Close"))
<|>
((pRegExpr regex_'3c'2fdl'5cb >>= withAttribute KeywordTok) >>~ pushContext ("HTML","El Close"))
<|>
((pRegExpr regex_'3c'2f'5bA'2dZa'2dz'5f'3a'5d'5b'5cw'2e'3a'5f'2d'5d'2a >>= withAttribute KeywordTok) >>~ pushContext ("HTML","El Close"))
<|>
((parseRules ("HTML","FindDTDRules")))
<|>
((parseRules ("HTML","FindEntityRefs")))
<|>
(currentContext >>= \x -> guard (x == ("HTML","FindHTML")) >> pDefault >>= withAttribute NormalTok))
parseRules ("HTML","FindEntityRefs") =
(((pRegExpr regex_'26'28'23'5b0'2d9'5d'2b'7c'23'5bxX'5d'5b0'2d9A'2dFa'2df'5d'2b'7c'5bA'2dZa'2dz'5f'3a'5d'5b'5cw'2e'3a'5f'2d'5d'2a'29'3b >>= withAttribute DecValTok))
<|>
((pAnyChar "&<" >>= withAttribute ErrorTok))
<|>
(currentContext >>= \x -> guard (x == ("HTML","FindEntityRefs")) >> pDefault >>= withAttribute NormalTok))
parseRules ("HTML","FindPEntityRefs") =
(((pRegExpr regex_'26'28'23'5b0'2d9'5d'2b'7c'23'5bxX'5d'5b0'2d9A'2dFa'2df'5d'2b'7c'5bA'2dZa'2dz'5f'3a'5d'5b'5cw'2e'3a'5f'2d'5d'2a'29'3b >>= withAttribute DecValTok))
<|>
((pRegExpr regex_'25'5bA'2dZa'2dz'5f'3a'5d'5b'5cw'2e'3a'5f'2d'5d'2a'3b >>= withAttribute DecValTok))
<|>
((pAnyChar "&%" >>= withAttribute ErrorTok))
<|>
(currentContext >>= \x -> guard (x == ("HTML","FindPEntityRefs")) >> pDefault >>= withAttribute NormalTok))
parseRules ("HTML","FindAttributes") =
(((pColumn 0 >> pRegExpr regex_'5bA'2dZa'2dz'5f'3a'5d'5b'5cw'2e'3a'5f'2d'5d'2a >>= withAttribute OtherTok))
<|>
((pRegExpr regex_'5cs'2b'5bA'2dZa'2dz'5f'3a'5d'5b'5cw'2e'3a'5f'2d'5d'2a >>= withAttribute OtherTok))
<|>
((pDetectChar False '=' >>= withAttribute OtherTok) >>~ pushContext ("HTML","Value"))
<|>
(currentContext >>= \x -> guard (x == ("HTML","FindAttributes")) >> pDefault >>= withAttribute NormalTok))
parseRules ("HTML","FindDTDRules") =
(((pRegExpr regex_'3c'21'28ELEMENT'7cENTITY'7cATTLIST'7cNOTATION'29'5cb >>= withAttribute DataTypeTok) >>~ pushContext ("HTML","Doctype Markupdecl"))
<|>
(currentContext >>= \x -> guard (x == ("HTML","FindDTDRules")) >> pDefault >>= withAttribute NormalTok))
parseRules ("HTML","Comment") =
(((pDetectSpaces >>= withAttribute CommentTok))
<|>
((Text.Highlighting.Kate.Syntax.Alert.parseExpression (Just ("Alerts","")) >>= ((withAttribute CommentTok) . snd)))
<|>
((pDetectIdentifier >>= withAttribute CommentTok))
<|>
((pString False "-->" >>= withAttribute CommentTok) >>~ (popContext))
<|>
((pRegExpr regex_'2d'28'2d'28'3f'21'2d'3e'29'29'2b >>= withAttribute ErrorTok))
<|>
(currentContext >>= \x -> guard (x == ("HTML","Comment")) >> pDefault >>= withAttribute CommentTok))
parseRules ("HTML","CDATA") =
(((pDetectSpaces >>= withAttribute NormalTok))
<|>
((pDetectIdentifier >>= withAttribute NormalTok))
<|>
((pString False "]]>" >>= withAttribute BaseNTok) >>~ (popContext))
<|>
((pString False "]]>" >>= withAttribute DecValTok))
<|>
(currentContext >>= \x -> guard (x == ("HTML","CDATA")) >> pDefault >>= withAttribute NormalTok))
parseRules ("HTML","PI") =
(((pDetect2Chars False '?' '>' >>= withAttribute KeywordTok) >>~ (popContext))
<|>
(currentContext >>= \x -> guard (x == ("HTML","PI")) >> pDefault >>= withAttribute NormalTok))
parseRules ("HTML","Doctype") =
(((pDetectChar False '>' >>= withAttribute DataTypeTok) >>~ (popContext))
<|>
((pDetectChar False '[' >>= withAttribute DataTypeTok) >>~ pushContext ("HTML","Doctype Internal Subset"))
<|>
(currentContext >>= \x -> guard (x == ("HTML","Doctype")) >> pDefault >>= withAttribute NormalTok))
parseRules ("HTML","Doctype Internal Subset") =
(((pDetectChar False ']' >>= withAttribute DataTypeTok) >>~ (popContext))
<|>
((parseRules ("HTML","FindDTDRules")))
<|>
((pString False "<!--" >>= withAttribute CommentTok) >>~ pushContext ("HTML","Comment"))
<|>
((pRegExpr regex_'3c'5c'3f'5b'5cw'3a'2d'5d'2a >>= withAttribute KeywordTok) >>~ pushContext ("HTML","PI"))
<|>
((parseRules ("HTML","FindPEntityRefs")))
<|>
(currentContext >>= \x -> guard (x == ("HTML","Doctype Internal Subset")) >> pDefault >>= withAttribute NormalTok))
parseRules ("HTML","Doctype Markupdecl") =
(((pDetectChar False '>' >>= withAttribute DataTypeTok) >>~ (popContext))
<|>
((pDetectChar False '"' >>= withAttribute StringTok) >>~ pushContext ("HTML","Doctype Markupdecl DQ"))
<|>
((pDetectChar False '\'' >>= withAttribute StringTok) >>~ pushContext ("HTML","Doctype Markupdecl SQ"))
<|>
(currentContext >>= \x -> guard (x == ("HTML","Doctype Markupdecl")) >> pDefault >>= withAttribute NormalTok))
parseRules ("HTML","Doctype Markupdecl DQ") =
(((pDetectChar False '"' >>= withAttribute StringTok) >>~ (popContext))
<|>
((parseRules ("HTML","FindPEntityRefs")))
<|>
(currentContext >>= \x -> guard (x == ("HTML","Doctype Markupdecl DQ")) >> pDefault >>= withAttribute StringTok))
parseRules ("HTML","Doctype Markupdecl SQ") =
(((pDetectChar False '\'' >>= withAttribute StringTok) >>~ (popContext))
<|>
((parseRules ("HTML","FindPEntityRefs")))
<|>
(currentContext >>= \x -> guard (x == ("HTML","Doctype Markupdecl SQ")) >> pDefault >>= withAttribute StringTok))
parseRules ("HTML","El Open") =
(((pDetect2Chars False '/' '>' >>= withAttribute KeywordTok) >>~ (popContext))
<|>
((pDetectChar False '>' >>= withAttribute KeywordTok) >>~ (popContext))
<|>
((parseRules ("HTML","FindAttributes")))
<|>
((pRegExpr regex_'5cS >>= withAttribute ErrorTok))
<|>
(currentContext >>= \x -> guard (x == ("HTML","El Open")) >> pDefault >>= withAttribute NormalTok))
parseRules ("HTML","El Close") =
(((pDetectChar False '>' >>= withAttribute KeywordTok) >>~ (popContext))
<|>
((pRegExpr regex_'5cS >>= withAttribute ErrorTok))
<|>
(currentContext >>= \x -> guard (x == ("HTML","El Close")) >> pDefault >>= withAttribute NormalTok))
parseRules ("HTML","El Close 2") =
(((pDetectChar False '>' >>= withAttribute KeywordTok) >>~ (popContext >> popContext >> popContext))
<|>
((pRegExpr regex_'5cS >>= withAttribute ErrorTok))
<|>
(currentContext >>= \x -> guard (x == ("HTML","El Close 2")) >> pDefault >>= withAttribute NormalTok))
parseRules ("HTML","El Close 3") =
(((pDetectChar False '>' >>= withAttribute KeywordTok) >>~ (popContext >> popContext >> popContext >> popContext))
<|>
((pRegExpr regex_'5cS >>= withAttribute ErrorTok))
<|>
(currentContext >>= \x -> guard (x == ("HTML","El Close 3")) >> pDefault >>= withAttribute NormalTok))
parseRules ("HTML","CSS") =
(((pDetect2Chars False '/' '>' >>= withAttribute KeywordTok) >>~ (popContext))
<|>
((pDetectChar False '>' >>= withAttribute KeywordTok) >>~ pushContext ("HTML","CSS content"))
<|>
((parseRules ("HTML","FindAttributes")))
<|>
((pRegExpr regex_'5cS >>= withAttribute ErrorTok))
<|>
(currentContext >>= \x -> guard (x == ("HTML","CSS")) >> pDefault >>= withAttribute NormalTok))
parseRules ("HTML","CSS content") =
(((pRegExpr regex_'3c'2fstyle'5cb >>= withAttribute KeywordTok) >>~ pushContext ("HTML","El Close 2"))
<|>
((Text.Highlighting.Kate.Syntax.Css.parseExpression (Just ("CSS",""))))
<|>
(currentContext >>= \x -> guard (x == ("HTML","CSS content")) >> pDefault >>= withAttribute NormalTok))
parseRules ("HTML","JS") =
(((pDetect2Chars False '/' '>' >>= withAttribute KeywordTok) >>~ (popContext))
<|>
((pDetectChar False '>' >>= withAttribute KeywordTok) >>~ pushContext ("HTML","JS content"))
<|>
((parseRules ("HTML","FindAttributes")))
<|>
((pRegExpr regex_'5cS >>= withAttribute ErrorTok))
<|>
(currentContext >>= \x -> guard (x == ("HTML","JS")) >> pDefault >>= withAttribute NormalTok))
parseRules ("HTML","JS content") =
(((pRegExpr regex_'3c'2fscript'5cb >>= withAttribute KeywordTok) >>~ pushContext ("HTML","El Close 2"))
<|>
((pRegExpr regex_'2f'2f'28'3f'3d'2e'2a'3c'2fscript'5cb'29 >>= withAttribute CommentTok) >>~ pushContext ("HTML","JS comment close"))
<|>
((Text.Highlighting.Kate.Syntax.Javascript.parseExpression (Just ("JavaScript",""))))
<|>
(currentContext >>= \x -> guard (x == ("HTML","JS content")) >> pDefault >>= withAttribute NormalTok))
parseRules ("HTML","JS comment close") =
(((pRegExpr regex_'3c'2fscript'5cb >>= withAttribute KeywordTok) >>~ pushContext ("HTML","El Close 3"))
<|>
((Text.Highlighting.Kate.Syntax.Alert.parseExpression (Just ("Alerts","")) >>= ((withAttribute CommentTok) . snd)))
<|>
(currentContext >>= \x -> guard (x == ("HTML","JS comment close")) >> pDefault >>= withAttribute CommentTok))
parseRules ("HTML","Value") =
(((pDetectChar False '"' >>= withAttribute StringTok) >>~ pushContext ("HTML","Value DQ"))
<|>
((pDetectChar False '\'' >>= withAttribute StringTok) >>~ pushContext ("HTML","Value SQ"))
<|>
((pDetectSpaces >>= withAttribute NormalTok))
<|>
(pushContext ("HTML","Value NQ") >> currentContext >>= parseRules))
parseRules ("HTML","Value NQ") =
(((parseRules ("HTML","FindEntityRefs")))
<|>
((pRegExpr regex_'2f'28'3f'21'3e'29 >>= withAttribute StringTok))
<|>
((pRegExpr regex_'5b'5e'2f'3e'3c'22'27'5cs'5d >>= withAttribute StringTok))
<|>
((popContext >> popContext) >> currentContext >>= parseRules))
parseRules ("HTML","Value DQ") =
(((pDetectChar False '"' >>= withAttribute StringTok) >>~ (popContext >> popContext))
<|>
((parseRules ("HTML","FindEntityRefs")))
<|>
(currentContext >>= \x -> guard (x == ("HTML","Value DQ")) >> pDefault >>= withAttribute StringTok))
parseRules ("HTML","Value SQ") =
(((pDetectChar False '\'' >>= withAttribute StringTok) >>~ (popContext >> popContext))
<|>
((parseRules ("HTML","FindEntityRefs")))
<|>
(currentContext >>= \x -> guard (x == ("HTML","Value SQ")) >> pDefault >>= withAttribute StringTok))
parseRules ("Alerts", _) = Text.Highlighting.Kate.Syntax.Alert.parseExpression Nothing
parseRules ("CSS", _) = Text.Highlighting.Kate.Syntax.Css.parseExpression Nothing
parseRules ("JavaScript", _) = Text.Highlighting.Kate.Syntax.Javascript.parseExpression Nothing
parseRules x = parseRules ("HTML","Start") <|> fail ("Unknown context" ++ show x)