haskell.py 31 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871
  1. # -*- coding: utf-8 -*-
  2. """
  3. pygments.lexers.haskell
  4. ~~~~~~~~~~~~~~~~~~~~~~~
  5. Lexers for Haskell and related languages.
  6. :copyright: Copyright 2006-2019 by the Pygments team, see AUTHORS.
  7. :license: BSD, see LICENSE for details.
  8. """
  9. import re
  10. from pygments.lexer import Lexer, RegexLexer, bygroups, do_insertions, \
  11. default, include, inherit
  12. from pygments.token import Text, Comment, Operator, Keyword, Name, String, \
  13. Number, Punctuation, Generic
  14. from pygments import unistring as uni
  15. __all__ = ['HaskellLexer', 'HspecLexer', 'IdrisLexer', 'AgdaLexer', 'CryptolLexer',
  16. 'LiterateHaskellLexer', 'LiterateIdrisLexer', 'LiterateAgdaLexer',
  17. 'LiterateCryptolLexer', 'KokaLexer']
  18. line_re = re.compile('.*?\n')
  19. class HaskellLexer(RegexLexer):
  20. """
  21. A Haskell lexer based on the lexemes defined in the Haskell 98 Report.
  22. .. versionadded:: 0.8
  23. """
  24. name = 'Haskell'
  25. aliases = ['haskell', 'hs']
  26. filenames = ['*.hs']
  27. mimetypes = ['text/x-haskell']
  28. flags = re.MULTILINE | re.UNICODE
  29. reserved = ('case', 'class', 'data', 'default', 'deriving', 'do', 'else',
  30. 'family', 'if', 'in', 'infix[lr]?', 'instance',
  31. 'let', 'newtype', 'of', 'then', 'type', 'where', '_')
  32. ascii = ('NUL', 'SOH', '[SE]TX', 'EOT', 'ENQ', 'ACK',
  33. 'BEL', 'BS', 'HT', 'LF', 'VT', 'FF', 'CR', 'S[OI]', 'DLE',
  34. 'DC[1-4]', 'NAK', 'SYN', 'ETB', 'CAN',
  35. 'EM', 'SUB', 'ESC', '[FGRU]S', 'SP', 'DEL')
  36. tokens = {
  37. 'root': [
  38. # Whitespace:
  39. (r'\s+', Text),
  40. # (r'--\s*|.*$', Comment.Doc),
  41. (r'--(?![!#$%&*+./<=>?@^|_~:\\]).*?$', Comment.Single),
  42. (r'\{-', Comment.Multiline, 'comment'),
  43. # Lexemes:
  44. # Identifiers
  45. (r'\bimport\b', Keyword.Reserved, 'import'),
  46. (r'\bmodule\b', Keyword.Reserved, 'module'),
  47. (r'\berror\b', Name.Exception),
  48. (r'\b(%s)(?!\')\b' % '|'.join(reserved), Keyword.Reserved),
  49. (r"'[^\\]'", String.Char), # this has to come before the TH quote
  50. (r'^[_' + uni.Ll + r'][\w\']*', Name.Function),
  51. (r"'?[_" + uni.Ll + r"][\w']*", Name),
  52. (r"('')?[" + uni.Lu + r"][\w\']*", Keyword.Type),
  53. (r"(')[" + uni.Lu + r"][\w\']*", Keyword.Type),
  54. (r"(')\[[^\]]*\]", Keyword.Type), # tuples and lists get special treatment in GHC
  55. (r"(')\([^)]*\)", Keyword.Type), # ..
  56. (r"(')[:!#$%&*+.\\/<=>?@^|~-]+", Keyword.Type), # promoted type operators
  57. # Operators
  58. (r'\\(?![:!#$%&*+.\\/<=>?@^|~-]+)', Name.Function), # lambda operator
  59. (r'(<-|::|->|=>|=)(?![:!#$%&*+.\\/<=>?@^|~-]+)', Operator.Word), # specials
  60. (r':[:!#$%&*+.\\/<=>?@^|~-]*', Keyword.Type), # Constructor operators
  61. (r'[:!#$%&*+.\\/<=>?@^|~-]+', Operator), # Other operators
  62. # Numbers
  63. (r'0[xX]_*[\da-fA-F](_*[\da-fA-F])*_*[pP][+-]?\d(_*\d)*', Number.Float),
  64. (r'0[xX]_*[\da-fA-F](_*[\da-fA-F])*\.[\da-fA-F](_*[\da-fA-F])*'
  65. r'(_*[pP][+-]?\d(_*\d)*)?', Number.Float),
  66. (r'\d(_*\d)*_*[eE][+-]?\d(_*\d)*', Number.Float),
  67. (r'\d(_*\d)*\.\d(_*\d)*(_*[eE][+-]?\d(_*\d)*)?', Number.Float),
  68. (r'0[bB]_*[01](_*[01])*', Number.Bin),
  69. (r'0[oO]_*[0-7](_*[0-7])*', Number.Oct),
  70. (r'0[xX]_*[\da-fA-F](_*[\da-fA-F])*', Number.Hex),
  71. (r'\d(_*\d)*', Number.Integer),
  72. # Character/String Literals
  73. (r"'", String.Char, 'character'),
  74. (r'"', String, 'string'),
  75. # Special
  76. (r'\[\]', Keyword.Type),
  77. (r'\(\)', Name.Builtin),
  78. (r'[][(),;`{}]', Punctuation),
  79. ],
  80. 'import': [
  81. # Import statements
  82. (r'\s+', Text),
  83. (r'"', String, 'string'),
  84. # after "funclist" state
  85. (r'\)', Punctuation, '#pop'),
  86. (r'qualified\b', Keyword),
  87. # import X as Y
  88. (r'([' + uni.Lu + r'][\w.]*)(\s+)(as)(\s+)([' + uni.Lu + r'][\w.]*)',
  89. bygroups(Name.Namespace, Text, Keyword, Text, Name), '#pop'),
  90. # import X hiding (functions)
  91. (r'([' + uni.Lu + r'][\w.]*)(\s+)(hiding)(\s+)(\()',
  92. bygroups(Name.Namespace, Text, Keyword, Text, Punctuation), 'funclist'),
  93. # import X (functions)
  94. (r'([' + uni.Lu + r'][\w.]*)(\s+)(\()',
  95. bygroups(Name.Namespace, Text, Punctuation), 'funclist'),
  96. # import X
  97. (r'[\w.]+', Name.Namespace, '#pop'),
  98. ],
  99. 'module': [
  100. (r'\s+', Text),
  101. (r'([' + uni.Lu + r'][\w.]*)(\s+)(\()',
  102. bygroups(Name.Namespace, Text, Punctuation), 'funclist'),
  103. (r'[' + uni.Lu + r'][\w.]*', Name.Namespace, '#pop'),
  104. ],
  105. 'funclist': [
  106. (r'\s+', Text),
  107. (r'[' + uni.Lu + r']\w*', Keyword.Type),
  108. (r'(_[\w\']+|[' + uni.Ll + r'][\w\']*)', Name.Function),
  109. (r'--(?![!#$%&*+./<=>?@^|_~:\\]).*?$', Comment.Single),
  110. (r'\{-', Comment.Multiline, 'comment'),
  111. (r',', Punctuation),
  112. (r'[:!#$%&*+.\\/<=>?@^|~-]+', Operator),
  113. # (HACK, but it makes sense to push two instances, believe me)
  114. (r'\(', Punctuation, ('funclist', 'funclist')),
  115. (r'\)', Punctuation, '#pop:2'),
  116. ],
  117. # NOTE: the next four states are shared in the AgdaLexer; make sure
  118. # any change is compatible with Agda as well or copy over and change
  119. 'comment': [
  120. # Multiline Comments
  121. (r'[^-{}]+', Comment.Multiline),
  122. (r'\{-', Comment.Multiline, '#push'),
  123. (r'-\}', Comment.Multiline, '#pop'),
  124. (r'[-{}]', Comment.Multiline),
  125. ],
  126. 'character': [
  127. # Allows multi-chars, incorrectly.
  128. (r"[^\\']'", String.Char, '#pop'),
  129. (r"\\", String.Escape, 'escape'),
  130. ("'", String.Char, '#pop'),
  131. ],
  132. 'string': [
  133. (r'[^\\"]+', String),
  134. (r"\\", String.Escape, 'escape'),
  135. ('"', String, '#pop'),
  136. ],
  137. 'escape': [
  138. (r'[abfnrtv"\'&\\]', String.Escape, '#pop'),
  139. (r'\^[][' + uni.Lu + r'@^_]', String.Escape, '#pop'),
  140. ('|'.join(ascii), String.Escape, '#pop'),
  141. (r'o[0-7]+', String.Escape, '#pop'),
  142. (r'x[\da-fA-F]+', String.Escape, '#pop'),
  143. (r'\d+', String.Escape, '#pop'),
  144. (r'\s+\\', String.Escape, '#pop'),
  145. ],
  146. }
  147. class HspecLexer(HaskellLexer):
  148. """
  149. A Haskell lexer with support for Hspec constructs.
  150. .. versionadded:: 2.4.0
  151. """
  152. name = 'Hspec'
  153. aliases = ['hspec']
  154. filenames = []
  155. mimetypes = []
  156. tokens = {
  157. 'root': [
  158. (r'(it\s*)("[^"]*")', bygroups(Text, String.Doc)),
  159. (r'(describe\s*)("[^"]*")', bygroups(Text, String.Doc)),
  160. (r'(context\s*)("[^"]*")', bygroups(Text, String.Doc)),
  161. inherit,
  162. ],
  163. }
  164. class IdrisLexer(RegexLexer):
  165. """
  166. A lexer for the dependently typed programming language Idris.
  167. Based on the Haskell and Agda Lexer.
  168. .. versionadded:: 2.0
  169. """
  170. name = 'Idris'
  171. aliases = ['idris', 'idr']
  172. filenames = ['*.idr']
  173. mimetypes = ['text/x-idris']
  174. reserved = ('case', 'class', 'data', 'default', 'using', 'do', 'else',
  175. 'if', 'in', 'infix[lr]?', 'instance', 'rewrite', 'auto',
  176. 'namespace', 'codata', 'mutual', 'private', 'public', 'abstract',
  177. 'total', 'partial',
  178. 'interface', 'implementation', 'export', 'covering', 'constructor',
  179. 'let', 'proof', 'of', 'then', 'static', 'where', '_', 'with',
  180. 'pattern', 'term', 'syntax', 'prefix',
  181. 'postulate', 'parameters', 'record', 'dsl', 'impossible', 'implicit',
  182. 'tactics', 'intros', 'intro', 'compute', 'refine', 'exact', 'trivial')
  183. ascii = ('NUL', 'SOH', '[SE]TX', 'EOT', 'ENQ', 'ACK',
  184. 'BEL', 'BS', 'HT', 'LF', 'VT', 'FF', 'CR', 'S[OI]', 'DLE',
  185. 'DC[1-4]', 'NAK', 'SYN', 'ETB', 'CAN',
  186. 'EM', 'SUB', 'ESC', '[FGRU]S', 'SP', 'DEL')
  187. directives = ('lib', 'link', 'flag', 'include', 'hide', 'freeze', 'access',
  188. 'default', 'logging', 'dynamic', 'name', 'error_handlers', 'language')
  189. tokens = {
  190. 'root': [
  191. # Comments
  192. (r'^(\s*)(%%(%s))' % '|'.join(directives),
  193. bygroups(Text, Keyword.Reserved)),
  194. (r'(\s*)(--(?![!#$%&*+./<=>?@^|_~:\\]).*?)$', bygroups(Text, Comment.Single)),
  195. (r'(\s*)(\|{3}.*?)$', bygroups(Text, Comment.Single)),
  196. (r'(\s*)(\{-)', bygroups(Text, Comment.Multiline), 'comment'),
  197. # Declaration
  198. (r'^(\s*)([^\s(){}]+)(\s*)(:)(\s*)',
  199. bygroups(Text, Name.Function, Text, Operator.Word, Text)),
  200. # Identifiers
  201. (r'\b(%s)(?!\')\b' % '|'.join(reserved), Keyword.Reserved),
  202. (r'(import|module)(\s+)', bygroups(Keyword.Reserved, Text), 'module'),
  203. (r"('')?[A-Z][\w\']*", Keyword.Type),
  204. (r'[a-z][\w\']*', Text),
  205. # Special Symbols
  206. (r'(<-|::|->|=>|=)', Operator.Word), # specials
  207. (r'([(){}\[\]:!#$%&*+.\\/<=>?@^|~-]+)', Operator.Word), # specials
  208. # Numbers
  209. (r'\d+[eE][+-]?\d+', Number.Float),
  210. (r'\d+\.\d+([eE][+-]?\d+)?', Number.Float),
  211. (r'0[xX][\da-fA-F]+', Number.Hex),
  212. (r'\d+', Number.Integer),
  213. # Strings
  214. (r"'", String.Char, 'character'),
  215. (r'"', String, 'string'),
  216. (r'[^\s(){}]+', Text),
  217. (r'\s+?', Text), # Whitespace
  218. ],
  219. 'module': [
  220. (r'\s+', Text),
  221. (r'([A-Z][\w.]*)(\s+)(\()',
  222. bygroups(Name.Namespace, Text, Punctuation), 'funclist'),
  223. (r'[A-Z][\w.]*', Name.Namespace, '#pop'),
  224. ],
  225. 'funclist': [
  226. (r'\s+', Text),
  227. (r'[A-Z]\w*', Keyword.Type),
  228. (r'(_[\w\']+|[a-z][\w\']*)', Name.Function),
  229. (r'--.*$', Comment.Single),
  230. (r'\{-', Comment.Multiline, 'comment'),
  231. (r',', Punctuation),
  232. (r'[:!#$%&*+.\\/<=>?@^|~-]+', Operator),
  233. # (HACK, but it makes sense to push two instances, believe me)
  234. (r'\(', Punctuation, ('funclist', 'funclist')),
  235. (r'\)', Punctuation, '#pop:2'),
  236. ],
  237. # NOTE: the next four states are shared in the AgdaLexer; make sure
  238. # any change is compatible with Agda as well or copy over and change
  239. 'comment': [
  240. # Multiline Comments
  241. (r'[^-{}]+', Comment.Multiline),
  242. (r'\{-', Comment.Multiline, '#push'),
  243. (r'-\}', Comment.Multiline, '#pop'),
  244. (r'[-{}]', Comment.Multiline),
  245. ],
  246. 'character': [
  247. # Allows multi-chars, incorrectly.
  248. (r"[^\\']", String.Char),
  249. (r"\\", String.Escape, 'escape'),
  250. ("'", String.Char, '#pop'),
  251. ],
  252. 'string': [
  253. (r'[^\\"]+', String),
  254. (r"\\", String.Escape, 'escape'),
  255. ('"', String, '#pop'),
  256. ],
  257. 'escape': [
  258. (r'[abfnrtv"\'&\\]', String.Escape, '#pop'),
  259. (r'\^[][A-Z@^_]', String.Escape, '#pop'),
  260. ('|'.join(ascii), String.Escape, '#pop'),
  261. (r'o[0-7]+', String.Escape, '#pop'),
  262. (r'x[\da-fA-F]+', String.Escape, '#pop'),
  263. (r'\d+', String.Escape, '#pop'),
  264. (r'\s+\\', String.Escape, '#pop')
  265. ],
  266. }
  267. class AgdaLexer(RegexLexer):
  268. """
  269. For the `Agda <http://wiki.portal.chalmers.se/agda/pmwiki.php>`_
  270. dependently typed functional programming language and proof assistant.
  271. .. versionadded:: 2.0
  272. """
  273. name = 'Agda'
  274. aliases = ['agda']
  275. filenames = ['*.agda']
  276. mimetypes = ['text/x-agda']
  277. reserved = ['abstract', 'codata', 'coinductive', 'constructor', 'data',
  278. 'field', 'forall', 'hiding', 'in', 'inductive', 'infix',
  279. 'infixl', 'infixr', 'instance', 'let', 'mutual', 'open',
  280. 'pattern', 'postulate', 'primitive', 'private',
  281. 'quote', 'quoteGoal', 'quoteTerm',
  282. 'record', 'renaming', 'rewrite', 'syntax', 'tactic',
  283. 'unquote', 'unquoteDecl', 'using', 'where', 'with']
  284. tokens = {
  285. 'root': [
  286. # Declaration
  287. (r'^(\s*)([^\s(){}]+)(\s*)(:)(\s*)',
  288. bygroups(Text, Name.Function, Text, Operator.Word, Text)),
  289. # Comments
  290. (r'--(?![!#$%&*+./<=>?@^|_~:\\]).*?$', Comment.Single),
  291. (r'\{-', Comment.Multiline, 'comment'),
  292. # Holes
  293. (r'\{!', Comment.Directive, 'hole'),
  294. # Lexemes:
  295. # Identifiers
  296. (r'\b(%s)(?!\')\b' % '|'.join(reserved), Keyword.Reserved),
  297. (r'(import|module)(\s+)', bygroups(Keyword.Reserved, Text), 'module'),
  298. (u'\\b(Set|Prop)[\u2080-\u2089]*\\b', Keyword.Type),
  299. # Special Symbols
  300. (r'(\(|\)|\{|\})', Operator),
  301. (u'(\\.{1,3}|\\||\u03BB|\u2200|\u2192|:|=|->)', Operator.Word),
  302. # Numbers
  303. (r'\d+[eE][+-]?\d+', Number.Float),
  304. (r'\d+\.\d+([eE][+-]?\d+)?', Number.Float),
  305. (r'0[xX][\da-fA-F]+', Number.Hex),
  306. (r'\d+', Number.Integer),
  307. # Strings
  308. (r"'", String.Char, 'character'),
  309. (r'"', String, 'string'),
  310. (r'[^\s(){}]+', Text),
  311. (r'\s+?', Text), # Whitespace
  312. ],
  313. 'hole': [
  314. # Holes
  315. (r'[^!{}]+', Comment.Directive),
  316. (r'\{!', Comment.Directive, '#push'),
  317. (r'!\}', Comment.Directive, '#pop'),
  318. (r'[!{}]', Comment.Directive),
  319. ],
  320. 'module': [
  321. (r'\{-', Comment.Multiline, 'comment'),
  322. (r'[a-zA-Z][\w.]*', Name, '#pop'),
  323. (r'[\W0-9_]+', Text)
  324. ],
  325. 'comment': HaskellLexer.tokens['comment'],
  326. 'character': HaskellLexer.tokens['character'],
  327. 'string': HaskellLexer.tokens['string'],
  328. 'escape': HaskellLexer.tokens['escape']
  329. }
  330. class CryptolLexer(RegexLexer):
  331. """
  332. FIXME: A Cryptol2 lexer based on the lexemes defined in the Haskell 98 Report.
  333. .. versionadded:: 2.0
  334. """
  335. name = 'Cryptol'
  336. aliases = ['cryptol', 'cry']
  337. filenames = ['*.cry']
  338. mimetypes = ['text/x-cryptol']
  339. reserved = ('Arith', 'Bit', 'Cmp', 'False', 'Inf', 'True', 'else',
  340. 'export', 'extern', 'fin', 'if', 'import', 'inf', 'lg2',
  341. 'max', 'min', 'module', 'newtype', 'pragma', 'property',
  342. 'then', 'type', 'where', 'width')
  343. ascii = ('NUL', 'SOH', '[SE]TX', 'EOT', 'ENQ', 'ACK',
  344. 'BEL', 'BS', 'HT', 'LF', 'VT', 'FF', 'CR', 'S[OI]', 'DLE',
  345. 'DC[1-4]', 'NAK', 'SYN', 'ETB', 'CAN',
  346. 'EM', 'SUB', 'ESC', '[FGRU]S', 'SP', 'DEL')
  347. tokens = {
  348. 'root': [
  349. # Whitespace:
  350. (r'\s+', Text),
  351. # (r'--\s*|.*$', Comment.Doc),
  352. (r'//.*$', Comment.Single),
  353. (r'/\*', Comment.Multiline, 'comment'),
  354. # Lexemes:
  355. # Identifiers
  356. (r'\bimport\b', Keyword.Reserved, 'import'),
  357. (r'\bmodule\b', Keyword.Reserved, 'module'),
  358. (r'\berror\b', Name.Exception),
  359. (r'\b(%s)(?!\')\b' % '|'.join(reserved), Keyword.Reserved),
  360. (r'^[_a-z][\w\']*', Name.Function),
  361. (r"'?[_a-z][\w']*", Name),
  362. (r"('')?[A-Z][\w\']*", Keyword.Type),
  363. # Operators
  364. (r'\\(?![:!#$%&*+.\\/<=>?@^|~-]+)', Name.Function), # lambda operator
  365. (r'(<-|::|->|=>|=)(?![:!#$%&*+.\\/<=>?@^|~-]+)', Operator.Word), # specials
  366. (r':[:!#$%&*+.\\/<=>?@^|~-]*', Keyword.Type), # Constructor operators
  367. (r'[:!#$%&*+.\\/<=>?@^|~-]+', Operator), # Other operators
  368. # Numbers
  369. (r'\d+[eE][+-]?\d+', Number.Float),
  370. (r'\d+\.\d+([eE][+-]?\d+)?', Number.Float),
  371. (r'0[oO][0-7]+', Number.Oct),
  372. (r'0[xX][\da-fA-F]+', Number.Hex),
  373. (r'\d+', Number.Integer),
  374. # Character/String Literals
  375. (r"'", String.Char, 'character'),
  376. (r'"', String, 'string'),
  377. # Special
  378. (r'\[\]', Keyword.Type),
  379. (r'\(\)', Name.Builtin),
  380. (r'[][(),;`{}]', Punctuation),
  381. ],
  382. 'import': [
  383. # Import statements
  384. (r'\s+', Text),
  385. (r'"', String, 'string'),
  386. # after "funclist" state
  387. (r'\)', Punctuation, '#pop'),
  388. (r'qualified\b', Keyword),
  389. # import X as Y
  390. (r'([A-Z][\w.]*)(\s+)(as)(\s+)([A-Z][\w.]*)',
  391. bygroups(Name.Namespace, Text, Keyword, Text, Name), '#pop'),
  392. # import X hiding (functions)
  393. (r'([A-Z][\w.]*)(\s+)(hiding)(\s+)(\()',
  394. bygroups(Name.Namespace, Text, Keyword, Text, Punctuation), 'funclist'),
  395. # import X (functions)
  396. (r'([A-Z][\w.]*)(\s+)(\()',
  397. bygroups(Name.Namespace, Text, Punctuation), 'funclist'),
  398. # import X
  399. (r'[\w.]+', Name.Namespace, '#pop'),
  400. ],
  401. 'module': [
  402. (r'\s+', Text),
  403. (r'([A-Z][\w.]*)(\s+)(\()',
  404. bygroups(Name.Namespace, Text, Punctuation), 'funclist'),
  405. (r'[A-Z][\w.]*', Name.Namespace, '#pop'),
  406. ],
  407. 'funclist': [
  408. (r'\s+', Text),
  409. (r'[A-Z]\w*', Keyword.Type),
  410. (r'(_[\w\']+|[a-z][\w\']*)', Name.Function),
  411. # TODO: these don't match the comments in docs, remove.
  412. # (r'--(?![!#$%&*+./<=>?@^|_~:\\]).*?$', Comment.Single),
  413. # (r'{-', Comment.Multiline, 'comment'),
  414. (r',', Punctuation),
  415. (r'[:!#$%&*+.\\/<=>?@^|~-]+', Operator),
  416. # (HACK, but it makes sense to push two instances, believe me)
  417. (r'\(', Punctuation, ('funclist', 'funclist')),
  418. (r'\)', Punctuation, '#pop:2'),
  419. ],
  420. 'comment': [
  421. # Multiline Comments
  422. (r'[^/*]+', Comment.Multiline),
  423. (r'/\*', Comment.Multiline, '#push'),
  424. (r'\*/', Comment.Multiline, '#pop'),
  425. (r'[*/]', Comment.Multiline),
  426. ],
  427. 'character': [
  428. # Allows multi-chars, incorrectly.
  429. (r"[^\\']'", String.Char, '#pop'),
  430. (r"\\", String.Escape, 'escape'),
  431. ("'", String.Char, '#pop'),
  432. ],
  433. 'string': [
  434. (r'[^\\"]+', String),
  435. (r"\\", String.Escape, 'escape'),
  436. ('"', String, '#pop'),
  437. ],
  438. 'escape': [
  439. (r'[abfnrtv"\'&\\]', String.Escape, '#pop'),
  440. (r'\^[][A-Z@^_]', String.Escape, '#pop'),
  441. ('|'.join(ascii), String.Escape, '#pop'),
  442. (r'o[0-7]+', String.Escape, '#pop'),
  443. (r'x[\da-fA-F]+', String.Escape, '#pop'),
  444. (r'\d+', String.Escape, '#pop'),
  445. (r'\s+\\', String.Escape, '#pop'),
  446. ],
  447. }
  448. EXTRA_KEYWORDS = {'join', 'split', 'reverse', 'transpose', 'width',
  449. 'length', 'tail', '<<', '>>', '<<<', '>>>', 'const',
  450. 'reg', 'par', 'seq', 'ASSERT', 'undefined', 'error',
  451. 'trace'}
  452. def get_tokens_unprocessed(self, text):
  453. stack = ['root']
  454. for index, token, value in \
  455. RegexLexer.get_tokens_unprocessed(self, text, stack):
  456. if token is Name and value in self.EXTRA_KEYWORDS:
  457. yield index, Name.Builtin, value
  458. else:
  459. yield index, token, value
  460. class LiterateLexer(Lexer):
  461. """
  462. Base class for lexers of literate file formats based on LaTeX or Bird-style
  463. (prefixing each code line with ">").
  464. Additional options accepted:
  465. `litstyle`
  466. If given, must be ``"bird"`` or ``"latex"``. If not given, the style
  467. is autodetected: if the first non-whitespace character in the source
  468. is a backslash or percent character, LaTeX is assumed, else Bird.
  469. """
  470. bird_re = re.compile(r'(>[ \t]*)(.*\n)')
  471. def __init__(self, baselexer, **options):
  472. self.baselexer = baselexer
  473. Lexer.__init__(self, **options)
  474. def get_tokens_unprocessed(self, text):
  475. style = self.options.get('litstyle')
  476. if style is None:
  477. style = (text.lstrip()[0:1] in '%\\') and 'latex' or 'bird'
  478. code = ''
  479. insertions = []
  480. if style == 'bird':
  481. # bird-style
  482. for match in line_re.finditer(text):
  483. line = match.group()
  484. m = self.bird_re.match(line)
  485. if m:
  486. insertions.append((len(code),
  487. [(0, Comment.Special, m.group(1))]))
  488. code += m.group(2)
  489. else:
  490. insertions.append((len(code), [(0, Text, line)]))
  491. else:
  492. # latex-style
  493. from pygments.lexers.markup import TexLexer
  494. lxlexer = TexLexer(**self.options)
  495. codelines = 0
  496. latex = ''
  497. for match in line_re.finditer(text):
  498. line = match.group()
  499. if codelines:
  500. if line.lstrip().startswith('\\end{code}'):
  501. codelines = 0
  502. latex += line
  503. else:
  504. code += line
  505. elif line.lstrip().startswith('\\begin{code}'):
  506. codelines = 1
  507. latex += line
  508. insertions.append((len(code),
  509. list(lxlexer.get_tokens_unprocessed(latex))))
  510. latex = ''
  511. else:
  512. latex += line
  513. insertions.append((len(code),
  514. list(lxlexer.get_tokens_unprocessed(latex))))
  515. for item in do_insertions(insertions, self.baselexer.get_tokens_unprocessed(code)):
  516. yield item
  517. class LiterateHaskellLexer(LiterateLexer):
  518. """
  519. For Literate Haskell (Bird-style or LaTeX) source.
  520. Additional options accepted:
  521. `litstyle`
  522. If given, must be ``"bird"`` or ``"latex"``. If not given, the style
  523. is autodetected: if the first non-whitespace character in the source
  524. is a backslash or percent character, LaTeX is assumed, else Bird.
  525. .. versionadded:: 0.9
  526. """
  527. name = 'Literate Haskell'
  528. aliases = ['lhs', 'literate-haskell', 'lhaskell']
  529. filenames = ['*.lhs']
  530. mimetypes = ['text/x-literate-haskell']
  531. def __init__(self, **options):
  532. hslexer = HaskellLexer(**options)
  533. LiterateLexer.__init__(self, hslexer, **options)
  534. class LiterateIdrisLexer(LiterateLexer):
  535. """
  536. For Literate Idris (Bird-style or LaTeX) source.
  537. Additional options accepted:
  538. `litstyle`
  539. If given, must be ``"bird"`` or ``"latex"``. If not given, the style
  540. is autodetected: if the first non-whitespace character in the source
  541. is a backslash or percent character, LaTeX is assumed, else Bird.
  542. .. versionadded:: 2.0
  543. """
  544. name = 'Literate Idris'
  545. aliases = ['lidr', 'literate-idris', 'lidris']
  546. filenames = ['*.lidr']
  547. mimetypes = ['text/x-literate-idris']
  548. def __init__(self, **options):
  549. hslexer = IdrisLexer(**options)
  550. LiterateLexer.__init__(self, hslexer, **options)
  551. class LiterateAgdaLexer(LiterateLexer):
  552. """
  553. For Literate Agda source.
  554. Additional options accepted:
  555. `litstyle`
  556. If given, must be ``"bird"`` or ``"latex"``. If not given, the style
  557. is autodetected: if the first non-whitespace character in the source
  558. is a backslash or percent character, LaTeX is assumed, else Bird.
  559. .. versionadded:: 2.0
  560. """
  561. name = 'Literate Agda'
  562. aliases = ['lagda', 'literate-agda']
  563. filenames = ['*.lagda']
  564. mimetypes = ['text/x-literate-agda']
  565. def __init__(self, **options):
  566. agdalexer = AgdaLexer(**options)
  567. LiterateLexer.__init__(self, agdalexer, litstyle='latex', **options)
  568. class LiterateCryptolLexer(LiterateLexer):
  569. """
  570. For Literate Cryptol (Bird-style or LaTeX) source.
  571. Additional options accepted:
  572. `litstyle`
  573. If given, must be ``"bird"`` or ``"latex"``. If not given, the style
  574. is autodetected: if the first non-whitespace character in the source
  575. is a backslash or percent character, LaTeX is assumed, else Bird.
  576. .. versionadded:: 2.0
  577. """
  578. name = 'Literate Cryptol'
  579. aliases = ['lcry', 'literate-cryptol', 'lcryptol']
  580. filenames = ['*.lcry']
  581. mimetypes = ['text/x-literate-cryptol']
  582. def __init__(self, **options):
  583. crylexer = CryptolLexer(**options)
  584. LiterateLexer.__init__(self, crylexer, **options)
  585. class KokaLexer(RegexLexer):
  586. """
  587. Lexer for the `Koka <http://koka.codeplex.com>`_
  588. language.
  589. .. versionadded:: 1.6
  590. """
  591. name = 'Koka'
  592. aliases = ['koka']
  593. filenames = ['*.kk', '*.kki']
  594. mimetypes = ['text/x-koka']
  595. keywords = [
  596. 'infix', 'infixr', 'infixl',
  597. 'type', 'cotype', 'rectype', 'alias',
  598. 'struct', 'con',
  599. 'fun', 'function', 'val', 'var',
  600. 'external',
  601. 'if', 'then', 'else', 'elif', 'return', 'match',
  602. 'private', 'public', 'private',
  603. 'module', 'import', 'as',
  604. 'include', 'inline',
  605. 'rec',
  606. 'try', 'yield', 'enum',
  607. 'interface', 'instance',
  608. ]
  609. # keywords that are followed by a type
  610. typeStartKeywords = [
  611. 'type', 'cotype', 'rectype', 'alias', 'struct', 'enum',
  612. ]
  613. # keywords valid in a type
  614. typekeywords = [
  615. 'forall', 'exists', 'some', 'with',
  616. ]
  617. # builtin names and special names
  618. builtin = [
  619. 'for', 'while', 'repeat',
  620. 'foreach', 'foreach-indexed',
  621. 'error', 'catch', 'finally',
  622. 'cs', 'js', 'file', 'ref', 'assigned',
  623. ]
  624. # symbols that can be in an operator
  625. symbols = r'[$%&*+@!/\\^~=.:\-?|<>]+'
  626. # symbol boundary: an operator keyword should not be followed by any of these
  627. sboundary = '(?!' + symbols + ')'
  628. # name boundary: a keyword should not be followed by any of these
  629. boundary = r'(?![\w/])'
  630. # koka token abstractions
  631. tokenType = Name.Attribute
  632. tokenTypeDef = Name.Class
  633. tokenConstructor = Generic.Emph
  634. # main lexer
  635. tokens = {
  636. 'root': [
  637. include('whitespace'),
  638. # go into type mode
  639. (r'::?' + sboundary, tokenType, 'type'),
  640. (r'(alias)(\s+)([a-z]\w*)?', bygroups(Keyword, Text, tokenTypeDef),
  641. 'alias-type'),
  642. (r'(struct)(\s+)([a-z]\w*)?', bygroups(Keyword, Text, tokenTypeDef),
  643. 'struct-type'),
  644. ((r'(%s)' % '|'.join(typeStartKeywords)) +
  645. r'(\s+)([a-z]\w*)?', bygroups(Keyword, Text, tokenTypeDef),
  646. 'type'),
  647. # special sequences of tokens (we use ?: for non-capturing group as
  648. # required by 'bygroups')
  649. (r'(module)(\s+)(interface\s+)?((?:[a-z]\w*/)*[a-z]\w*)',
  650. bygroups(Keyword, Text, Keyword, Name.Namespace)),
  651. (r'(import)(\s+)((?:[a-z]\w*/)*[a-z]\w*)'
  652. r'(?:(\s*)(=)(\s*)((?:qualified\s*)?)'
  653. r'((?:[a-z]\w*/)*[a-z]\w*))?',
  654. bygroups(Keyword, Text, Name.Namespace, Text, Keyword, Text,
  655. Keyword, Name.Namespace)),
  656. (r'(^(?:(?:public|private)\s*)?(?:function|fun|val))'
  657. r'(\s+)([a-z]\w*|\((?:' + symbols + r'|/)\))',
  658. bygroups(Keyword, Text, Name.Function)),
  659. (r'(^(?:(?:public|private)\s*)?external)(\s+)(inline\s+)?'
  660. r'([a-z]\w*|\((?:' + symbols + r'|/)\))',
  661. bygroups(Keyword, Text, Keyword, Name.Function)),
  662. # keywords
  663. (r'(%s)' % '|'.join(typekeywords) + boundary, Keyword.Type),
  664. (r'(%s)' % '|'.join(keywords) + boundary, Keyword),
  665. (r'(%s)' % '|'.join(builtin) + boundary, Keyword.Pseudo),
  666. (r'::?|:=|\->|[=.]' + sboundary, Keyword),
  667. # names
  668. (r'((?:[a-z]\w*/)*)([A-Z]\w*)',
  669. bygroups(Name.Namespace, tokenConstructor)),
  670. (r'((?:[a-z]\w*/)*)([a-z]\w*)', bygroups(Name.Namespace, Name)),
  671. (r'((?:[a-z]\w*/)*)(\((?:' + symbols + r'|/)\))',
  672. bygroups(Name.Namespace, Name)),
  673. (r'_\w*', Name.Variable),
  674. # literal string
  675. (r'@"', String.Double, 'litstring'),
  676. # operators
  677. (symbols + "|/(?![*/])", Operator),
  678. (r'`', Operator),
  679. (r'[{}()\[\];,]', Punctuation),
  680. # literals. No check for literal characters with len > 1
  681. (r'[0-9]+\.[0-9]+([eE][\-+]?[0-9]+)?', Number.Float),
  682. (r'0[xX][0-9a-fA-F]+', Number.Hex),
  683. (r'[0-9]+', Number.Integer),
  684. (r"'", String.Char, 'char'),
  685. (r'"', String.Double, 'string'),
  686. ],
  687. # type started by alias
  688. 'alias-type': [
  689. (r'=', Keyword),
  690. include('type')
  691. ],
  692. # type started by struct
  693. 'struct-type': [
  694. (r'(?=\((?!,*\)))', Punctuation, '#pop'),
  695. include('type')
  696. ],
  697. # type started by colon
  698. 'type': [
  699. (r'[(\[<]', tokenType, 'type-nested'),
  700. include('type-content')
  701. ],
  702. # type nested in brackets: can contain parameters, comma etc.
  703. 'type-nested': [
  704. (r'[)\]>]', tokenType, '#pop'),
  705. (r'[(\[<]', tokenType, 'type-nested'),
  706. (r',', tokenType),
  707. (r'([a-z]\w*)(\s*)(:)(?!:)',
  708. bygroups(Name, Text, tokenType)), # parameter name
  709. include('type-content')
  710. ],
  711. # shared contents of a type
  712. 'type-content': [
  713. include('whitespace'),
  714. # keywords
  715. (r'(%s)' % '|'.join(typekeywords) + boundary, Keyword),
  716. (r'(?=((%s)' % '|'.join(keywords) + boundary + '))',
  717. Keyword, '#pop'), # need to match because names overlap...
  718. # kinds
  719. (r'[EPHVX]' + boundary, tokenType),
  720. # type names
  721. (r'[a-z][0-9]*(?![\w/])', tokenType),
  722. (r'_\w*', tokenType.Variable), # Generic.Emph
  723. (r'((?:[a-z]\w*/)*)([A-Z]\w*)',
  724. bygroups(Name.Namespace, tokenType)),
  725. (r'((?:[a-z]\w*/)*)([a-z]\w+)',
  726. bygroups(Name.Namespace, tokenType)),
  727. # type keyword operators
  728. (r'::|->|[.:|]', tokenType),
  729. # catchall
  730. default('#pop')
  731. ],
  732. # comments and literals
  733. 'whitespace': [
  734. (r'\n\s*#.*$', Comment.Preproc),
  735. (r'\s+', Text),
  736. (r'/\*', Comment.Multiline, 'comment'),
  737. (r'//.*$', Comment.Single)
  738. ],
  739. 'comment': [
  740. (r'[^/*]+', Comment.Multiline),
  741. (r'/\*', Comment.Multiline, '#push'),
  742. (r'\*/', Comment.Multiline, '#pop'),
  743. (r'[*/]', Comment.Multiline),
  744. ],
  745. 'litstring': [
  746. (r'[^"]+', String.Double),
  747. (r'""', String.Escape),
  748. (r'"', String.Double, '#pop'),
  749. ],
  750. 'string': [
  751. (r'[^\\"\n]+', String.Double),
  752. include('escape-sequence'),
  753. (r'["\n]', String.Double, '#pop'),
  754. ],
  755. 'char': [
  756. (r'[^\\\'\n]+', String.Char),
  757. include('escape-sequence'),
  758. (r'[\'\n]', String.Char, '#pop'),
  759. ],
  760. 'escape-sequence': [
  761. (r'\\[nrt\\"\']', String.Escape),
  762. (r'\\x[0-9a-fA-F]{2}', String.Escape),
  763. (r'\\u[0-9a-fA-F]{4}', String.Escape),
  764. # Yes, \U literals are 6 hex digits.
  765. (r'\\U[0-9a-fA-F]{6}', String.Escape)
  766. ]
  767. }