postprocessors.py 3.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117
  1. """
  2. Python Markdown
  3. A Python implementation of John Gruber's Markdown.
  4. Documentation: https://python-markdown.github.io/
  5. GitHub: https://github.com/Python-Markdown/markdown/
  6. PyPI: https://pypi.org/project/Markdown/
  7. Started by Manfred Stienstra (http://www.dwerg.net/).
  8. Maintained for a few years by Yuri Takhteyev (http://www.freewisdom.org).
  9. Currently maintained by Waylan Limberg (https://github.com/waylan),
  10. Dmitry Shachnev (https://github.com/mitya57) and Isaac Muse (https://github.com/facelessuser).
  11. Copyright 2007-2018 The Python Markdown Project (v. 1.7 and later)
  12. Copyright 2004, 2005, 2006 Yuri Takhteyev (v. 0.2-1.6b)
  13. Copyright 2004 Manfred Stienstra (the original version)
  14. License: BSD (see LICENSE.md for details).
  15. POST-PROCESSORS
  16. =============================================================================
  17. Markdown also allows post-processors, which are similar to preprocessors in
  18. that they need to implement a "run" method. However, they are run after core
  19. processing.
  20. """
  21. from collections import OrderedDict
  22. from . import util
  23. import re
  24. def build_postprocessors(md, **kwargs):
  25. """ Build the default postprocessors for Markdown. """
  26. postprocessors = util.Registry()
  27. postprocessors.register(RawHtmlPostprocessor(md), 'raw_html', 30)
  28. postprocessors.register(AndSubstitutePostprocessor(), 'amp_substitute', 20)
  29. postprocessors.register(UnescapePostprocessor(), 'unescape', 10)
  30. return postprocessors
  31. class Postprocessor(util.Processor):
  32. """
  33. Postprocessors are run after the ElementTree it converted back into text.
  34. Each Postprocessor implements a "run" method that takes a pointer to a
  35. text string, modifies it as necessary and returns a text string.
  36. Postprocessors must extend markdown.Postprocessor.
  37. """
  38. def run(self, text):
  39. """
  40. Subclasses of Postprocessor should implement a `run` method, which
  41. takes the html document as a single text string and returns a
  42. (possibly modified) string.
  43. """
  44. pass # pragma: no cover
  45. class RawHtmlPostprocessor(Postprocessor):
  46. """ Restore raw html to the document. """
  47. def run(self, text):
  48. """ Iterate over html stash and restore html. """
  49. replacements = OrderedDict()
  50. for i in range(self.md.htmlStash.html_counter):
  51. html = self.md.htmlStash.rawHtmlBlocks[i]
  52. if self.isblocklevel(html):
  53. replacements["<p>%s</p>" %
  54. (self.md.htmlStash.get_placeholder(i))] = \
  55. html + "\n"
  56. replacements[self.md.htmlStash.get_placeholder(i)] = html
  57. if replacements:
  58. pattern = re.compile("|".join(re.escape(k) for k in replacements))
  59. processed_text = pattern.sub(lambda m: replacements[m.group(0)], text)
  60. else:
  61. return text
  62. if processed_text == text:
  63. return processed_text
  64. else:
  65. return self.run(processed_text)
  66. def isblocklevel(self, html):
  67. m = re.match(r'^\<\/?([^ >]+)', html)
  68. if m:
  69. if m.group(1)[0] in ('!', '?', '@', '%'):
  70. # Comment, php etc...
  71. return True
  72. return self.md.is_block_level(m.group(1))
  73. return False
  74. class AndSubstitutePostprocessor(Postprocessor):
  75. """ Restore valid entities """
  76. def run(self, text):
  77. text = text.replace(util.AMP_SUBSTITUTE, "&")
  78. return text
  79. class UnescapePostprocessor(Postprocessor):
  80. """ Restore escaped chars """
  81. RE = re.compile(r'{}(\d+){}'.format(util.STX, util.ETX))
  82. def unescape(self, m):
  83. return chr(int(m.group(1)))
  84. def run(self, text):
  85. return self.RE.sub(self.unescape, text)