codehilite.py 9.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270
  1. """
  2. CodeHilite Extension for Python-Markdown
  3. ========================================
  4. Adds code/syntax highlighting to standard Python-Markdown code blocks.
  5. See <https://Python-Markdown.github.io/extensions/code_hilite>
  6. for documentation.
  7. Original code Copyright 2006-2008 [Waylan Limberg](http://achinghead.com/).
  8. All changes Copyright 2008-2014 The Python Markdown Project
  9. License: [BSD](https://opensource.org/licenses/bsd-license.php)
  10. """
  11. from . import Extension
  12. from ..treeprocessors import Treeprocessor
  13. try:
  14. from pygments import highlight
  15. from pygments.lexers import get_lexer_by_name, guess_lexer
  16. from pygments.formatters import get_formatter_by_name
  17. pygments = True
  18. except ImportError:
  19. pygments = False
  20. def parse_hl_lines(expr):
  21. """Support our syntax for emphasizing certain lines of code.
  22. expr should be like '1 2' to emphasize lines 1 and 2 of a code block.
  23. Returns a list of ints, the line numbers to emphasize.
  24. """
  25. if not expr:
  26. return []
  27. try:
  28. return list(map(int, expr.split()))
  29. except ValueError:
  30. return []
  31. # ------------------ The Main CodeHilite Class ----------------------
  32. class CodeHilite:
  33. """
  34. Determine language of source code, and pass it into pygments hilighter.
  35. Basic Usage:
  36. >>> code = CodeHilite(src = 'some text')
  37. >>> html = code.hilite()
  38. * src: Source string or any object with a .readline attribute.
  39. * linenums: (Boolean) Set line numbering to 'on' (True),
  40. 'off' (False) or 'auto'(None). Set to 'auto' by default.
  41. * guess_lang: (Boolean) Turn language auto-detection
  42. 'on' or 'off' (on by default).
  43. * css_class: Set class name of wrapper div ('codehilite' by default).
  44. * hl_lines: (List of integers) Lines to emphasize, 1-indexed.
  45. Low Level Usage:
  46. >>> code = CodeHilite()
  47. >>> code.src = 'some text' # String or anything with a .readline attr.
  48. >>> code.linenos = True # Turns line numbering on or of.
  49. >>> html = code.hilite()
  50. """
  51. def __init__(self, src=None, linenums=None, guess_lang=True,
  52. css_class="codehilite", lang=None, style='default',
  53. noclasses=False, tab_length=4, hl_lines=None, use_pygments=True):
  54. self.src = src
  55. self.lang = lang
  56. self.linenums = linenums
  57. self.guess_lang = guess_lang
  58. self.css_class = css_class
  59. self.style = style
  60. self.noclasses = noclasses
  61. self.tab_length = tab_length
  62. self.hl_lines = hl_lines or []
  63. self.use_pygments = use_pygments
  64. def hilite(self):
  65. """
  66. Pass code to the [Pygments](http://pygments.pocoo.org/) highliter with
  67. optional line numbers. The output should then be styled with css to
  68. your liking. No styles are applied by default - only styling hooks
  69. (i.e.: <span class="k">).
  70. returns : A string of html.
  71. """
  72. self.src = self.src.strip('\n')
  73. if self.lang is None:
  74. self._parseHeader()
  75. if pygments and self.use_pygments:
  76. try:
  77. lexer = get_lexer_by_name(self.lang)
  78. except ValueError:
  79. try:
  80. if self.guess_lang:
  81. lexer = guess_lexer(self.src)
  82. else:
  83. lexer = get_lexer_by_name('text')
  84. except ValueError:
  85. lexer = get_lexer_by_name('text')
  86. formatter = get_formatter_by_name('html',
  87. linenos=self.linenums,
  88. cssclass=self.css_class,
  89. style=self.style,
  90. noclasses=self.noclasses,
  91. hl_lines=self.hl_lines,
  92. wrapcode=True)
  93. return highlight(self.src, lexer, formatter)
  94. else:
  95. # just escape and build markup usable by JS highlighting libs
  96. txt = self.src.replace('&', '&amp;')
  97. txt = txt.replace('<', '&lt;')
  98. txt = txt.replace('>', '&gt;')
  99. txt = txt.replace('"', '&quot;')
  100. classes = []
  101. if self.lang:
  102. classes.append('language-%s' % self.lang)
  103. if self.linenums:
  104. classes.append('linenums')
  105. class_str = ''
  106. if classes:
  107. class_str = ' class="%s"' % ' '.join(classes)
  108. return '<pre class="%s"><code%s>%s</code></pre>\n' % \
  109. (self.css_class, class_str, txt)
  110. def _parseHeader(self):
  111. """
  112. Determines language of a code block from shebang line and whether the
  113. said line should be removed or left in place. If the sheband line
  114. contains a path (even a single /) then it is assumed to be a real
  115. shebang line and left alone. However, if no path is given
  116. (e.i.: #!python or :::python) then it is assumed to be a mock shebang
  117. for language identification of a code fragment and removed from the
  118. code block prior to processing for code highlighting. When a mock
  119. shebang (e.i: #!python) is found, line numbering is turned on. When
  120. colons are found in place of a shebang (e.i.: :::python), line
  121. numbering is left in the current state - off by default.
  122. Also parses optional list of highlight lines, like:
  123. :::python hl_lines="1 3"
  124. """
  125. import re
  126. # split text into lines
  127. lines = self.src.split("\n")
  128. # pull first line to examine
  129. fl = lines.pop(0)
  130. c = re.compile(r'''
  131. (?:(?:^::+)|(?P<shebang>^[#]!)) # Shebang or 2 or more colons
  132. (?P<path>(?:/\w+)*[/ ])? # Zero or 1 path
  133. (?P<lang>[\w#.+-]*) # The language
  134. \s* # Arbitrary whitespace
  135. # Optional highlight lines, single- or double-quote-delimited
  136. (hl_lines=(?P<quot>"|')(?P<hl_lines>.*?)(?P=quot))?
  137. ''', re.VERBOSE)
  138. # search first line for shebang
  139. m = c.search(fl)
  140. if m:
  141. # we have a match
  142. try:
  143. self.lang = m.group('lang').lower()
  144. except IndexError:
  145. self.lang = None
  146. if m.group('path'):
  147. # path exists - restore first line
  148. lines.insert(0, fl)
  149. if self.linenums is None and m.group('shebang'):
  150. # Overridable and Shebang exists - use line numbers
  151. self.linenums = True
  152. self.hl_lines = parse_hl_lines(m.group('hl_lines'))
  153. else:
  154. # No match
  155. lines.insert(0, fl)
  156. self.src = "\n".join(lines).strip("\n")
  157. # ------------------ The Markdown Extension -------------------------------
  158. class HiliteTreeprocessor(Treeprocessor):
  159. """ Hilight source code in code blocks. """
  160. def code_unescape(self, text):
  161. """Unescape code."""
  162. text = text.replace("&amp;", "&")
  163. text = text.replace("&lt;", "<")
  164. text = text.replace("&gt;", ">")
  165. return text
  166. def run(self, root):
  167. """ Find code blocks and store in htmlStash. """
  168. blocks = root.iter('pre')
  169. for block in blocks:
  170. if len(block) == 1 and block[0].tag == 'code':
  171. code = CodeHilite(
  172. self.code_unescape(block[0].text),
  173. linenums=self.config['linenums'],
  174. guess_lang=self.config['guess_lang'],
  175. css_class=self.config['css_class'],
  176. style=self.config['pygments_style'],
  177. noclasses=self.config['noclasses'],
  178. tab_length=self.md.tab_length,
  179. use_pygments=self.config['use_pygments']
  180. )
  181. placeholder = self.md.htmlStash.store(code.hilite())
  182. # Clear codeblock in etree instance
  183. block.clear()
  184. # Change to p element which will later
  185. # be removed when inserting raw html
  186. block.tag = 'p'
  187. block.text = placeholder
  188. class CodeHiliteExtension(Extension):
  189. """ Add source code hilighting to markdown codeblocks. """
  190. def __init__(self, **kwargs):
  191. # define default configs
  192. self.config = {
  193. 'linenums': [None,
  194. "Use lines numbers. True=yes, False=no, None=auto"],
  195. 'guess_lang': [True,
  196. "Automatic language detection - Default: True"],
  197. 'css_class': ["codehilite",
  198. "Set class name for wrapper <div> - "
  199. "Default: codehilite"],
  200. 'pygments_style': ['default',
  201. 'Pygments HTML Formatter Style '
  202. '(Colorscheme) - Default: default'],
  203. 'noclasses': [False,
  204. 'Use inline styles instead of CSS classes - '
  205. 'Default false'],
  206. 'use_pygments': [True,
  207. 'Use Pygments to Highlight code blocks. '
  208. 'Disable if using a JavaScript library. '
  209. 'Default: True']
  210. }
  211. super().__init__(**kwargs)
  212. def extendMarkdown(self, md):
  213. """ Add HilitePostprocessor to Markdown instance. """
  214. hiliter = HiliteTreeprocessor(md)
  215. hiliter.config = self.getConfigs()
  216. md.treeprocessors.register(hiliter, 'hilite', 30)
  217. md.registerExtension(self)
  218. def makeExtension(**kwargs): # pragma: no cover
  219. return CodeHiliteExtension(**kwargs)