blockparser.py 4.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125
  1. """
  2. Python Markdown
  3. A Python implementation of John Gruber's Markdown.
  4. Documentation: https://python-markdown.github.io/
  5. GitHub: https://github.com/Python-Markdown/markdown/
  6. PyPI: https://pypi.org/project/Markdown/
  7. Started by Manfred Stienstra (http://www.dwerg.net/).
  8. Maintained for a few years by Yuri Takhteyev (http://www.freewisdom.org).
  9. Currently maintained by Waylan Limberg (https://github.com/waylan),
  10. Dmitry Shachnev (https://github.com/mitya57) and Isaac Muse (https://github.com/facelessuser).
  11. Copyright 2007-2018 The Python Markdown Project (v. 1.7 and later)
  12. Copyright 2004, 2005, 2006 Yuri Takhteyev (v. 0.2-1.6b)
  13. Copyright 2004 Manfred Stienstra (the original version)
  14. License: BSD (see LICENSE.md for details).
  15. """
  16. import xml.etree.ElementTree as etree
  17. from . import util
  18. class State(list):
  19. """ Track the current and nested state of the parser.
  20. This utility class is used to track the state of the BlockParser and
  21. support multiple levels if nesting. It's just a simple API wrapped around
  22. a list. Each time a state is set, that state is appended to the end of the
  23. list. Each time a state is reset, that state is removed from the end of
  24. the list.
  25. Therefore, each time a state is set for a nested block, that state must be
  26. reset when we back out of that level of nesting or the state could be
  27. corrupted.
  28. While all the methods of a list object are available, only the three
  29. defined below need be used.
  30. """
  31. def set(self, state):
  32. """ Set a new state. """
  33. self.append(state)
  34. def reset(self):
  35. """ Step back one step in nested state. """
  36. self.pop()
  37. def isstate(self, state):
  38. """ Test that top (current) level is of given state. """
  39. if len(self):
  40. return self[-1] == state
  41. else:
  42. return False
  43. class BlockParser:
  44. """ Parse Markdown blocks into an ElementTree object.
  45. A wrapper class that stitches the various BlockProcessors together,
  46. looping through them and creating an ElementTree object.
  47. """
  48. def __init__(self, md):
  49. self.blockprocessors = util.Registry()
  50. self.state = State()
  51. self.md = md
  52. @property
  53. @util.deprecated("Use 'md' instead.")
  54. def markdown(self):
  55. # TODO: remove this later
  56. return self.md
  57. def parseDocument(self, lines):
  58. """ Parse a markdown document into an ElementTree.
  59. Given a list of lines, an ElementTree object (not just a parent
  60. Element) is created and the root element is passed to the parser
  61. as the parent. The ElementTree object is returned.
  62. This should only be called on an entire document, not pieces.
  63. """
  64. # Create a ElementTree from the lines
  65. self.root = etree.Element(self.md.doc_tag)
  66. self.parseChunk(self.root, '\n'.join(lines))
  67. return etree.ElementTree(self.root)
  68. def parseChunk(self, parent, text):
  69. """ Parse a chunk of markdown text and attach to given etree node.
  70. While the ``text`` argument is generally assumed to contain multiple
  71. blocks which will be split on blank lines, it could contain only one
  72. block. Generally, this method would be called by extensions when
  73. block parsing is required.
  74. The ``parent`` etree Element passed in is altered in place.
  75. Nothing is returned.
  76. """
  77. self.parseBlocks(parent, text.split('\n\n'))
  78. def parseBlocks(self, parent, blocks):
  79. """ Process blocks of markdown text and attach to given etree node.
  80. Given a list of ``blocks``, each blockprocessor is stepped through
  81. until there are no blocks left. While an extension could potentially
  82. call this method directly, it's generally expected to be used
  83. internally.
  84. This is a public method as an extension may need to add/alter
  85. additional BlockProcessors which call this method to recursively
  86. parse a nested block.
  87. """
  88. while blocks:
  89. for processor in self.blockprocessors:
  90. if processor.test(parent, blocks[0]):
  91. if processor.run(parent, blocks) is not False:
  92. # run returns True or None
  93. break