| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435 |
- """
- Python Markdown
- A Python implementation of John Gruber's Markdown.
- Documentation: https://python-markdown.github.io/
- GitHub: https://github.com/Python-Markdown/markdown/
- PyPI: https://pypi.org/project/Markdown/
- Started by Manfred Stienstra (http://www.dwerg.net/).
- Maintained for a few years by Yuri Takhteyev (http://www.freewisdom.org).
- Currently maintained by Waylan Limberg (https://github.com/waylan),
- Dmitry Shachnev (https://github.com/mitya57) and Isaac Muse (https://github.com/facelessuser).
- Copyright 2007-2018 The Python Markdown Project (v. 1.7 and later)
- Copyright 2004, 2005, 2006 Yuri Takhteyev (v. 0.2-1.6b)
- Copyright 2004 Manfred Stienstra (the original version)
- License: BSD (see LICENSE.md for details).
- """
- import xml.etree.ElementTree as etree
- from . import util
- from . import inlinepatterns
- def build_treeprocessors(md, **kwargs):
- """ Build the default treeprocessors for Markdown. """
- treeprocessors = util.Registry()
- treeprocessors.register(InlineProcessor(md), 'inline', 20)
- treeprocessors.register(PrettifyTreeprocessor(md), 'prettify', 10)
- return treeprocessors
- def isString(s):
- """ Check if it's string """
- if not isinstance(s, util.AtomicString):
- return isinstance(s, str)
- return False
- class Treeprocessor(util.Processor):
- """
- Treeprocessors are run on the ElementTree object before serialization.
- Each Treeprocessor implements a "run" method that takes a pointer to an
- ElementTree, modifies it as necessary and returns an ElementTree
- object.
- Treeprocessors must extend markdown.Treeprocessor.
- """
- def run(self, root):
- """
- Subclasses of Treeprocessor should implement a `run` method, which
- takes a root ElementTree. This method can return another ElementTree
- object, and the existing root ElementTree will be replaced, or it can
- modify the current tree and return None.
- """
- pass # pragma: no cover
- class InlineProcessor(Treeprocessor):
- """
- A Treeprocessor that traverses a tree, applying inline patterns.
- """
- def __init__(self, md):
- self.__placeholder_prefix = util.INLINE_PLACEHOLDER_PREFIX
- self.__placeholder_suffix = util.ETX
- self.__placeholder_length = 4 + len(self.__placeholder_prefix) \
- + len(self.__placeholder_suffix)
- self.__placeholder_re = util.INLINE_PLACEHOLDER_RE
- self.md = md
- self.inlinePatterns = md.inlinePatterns
- self.ancestors = []
- @property
- @util.deprecated("Use 'md' instead.")
- def markdown(self):
- # TODO: remove this later
- return self.md
- def __makePlaceholder(self, type):
- """ Generate a placeholder """
- id = "%04d" % len(self.stashed_nodes)
- hash = util.INLINE_PLACEHOLDER % id
- return hash, id
- def __findPlaceholder(self, data, index):
- """
- Extract id from data string, start from index
- Keyword arguments:
- * data: string
- * index: index, from which we start search
- Returns: placeholder id and string index, after the found placeholder.
- """
- m = self.__placeholder_re.search(data, index)
- if m:
- return m.group(1), m.end()
- else:
- return None, index + 1
- def __stashNode(self, node, type):
- """ Add node to stash """
- placeholder, id = self.__makePlaceholder(type)
- self.stashed_nodes[id] = node
- return placeholder
- def __handleInline(self, data, patternIndex=0):
- """
- Process string with inline patterns and replace it
- with placeholders
- Keyword arguments:
- * data: A line of Markdown text
- * patternIndex: The index of the inlinePattern to start with
- Returns: String with placeholders.
- """
- if not isinstance(data, util.AtomicString):
- startIndex = 0
- while patternIndex < len(self.inlinePatterns):
- data, matched, startIndex = self.__applyPattern(
- self.inlinePatterns[patternIndex], data, patternIndex, startIndex
- )
- if not matched:
- patternIndex += 1
- return data
- def __processElementText(self, node, subnode, isText=True):
- """
- Process placeholders in Element.text or Element.tail
- of Elements popped from self.stashed_nodes.
- Keywords arguments:
- * node: parent node
- * subnode: processing node
- * isText: bool variable, True - it's text, False - it's tail
- Returns: None
- """
- if isText:
- text = subnode.text
- subnode.text = None
- else:
- text = subnode.tail
- subnode.tail = None
- childResult = self.__processPlaceholders(text, subnode, isText)
- if not isText and node is not subnode:
- pos = list(node).index(subnode) + 1
- else:
- pos = 0
- childResult.reverse()
- for newChild in childResult:
- node.insert(pos, newChild[0])
- def __processPlaceholders(self, data, parent, isText=True):
- """
- Process string with placeholders and generate ElementTree tree.
- Keyword arguments:
- * data: string with placeholders instead of ElementTree elements.
- * parent: Element, which contains processing inline data
- Returns: list with ElementTree elements with applied inline patterns.
- """
- def linkText(text):
- if text:
- if result:
- if result[-1][0].tail:
- result[-1][0].tail += text
- else:
- result[-1][0].tail = text
- elif not isText:
- if parent.tail:
- parent.tail += text
- else:
- parent.tail = text
- else:
- if parent.text:
- parent.text += text
- else:
- parent.text = text
- result = []
- strartIndex = 0
- while data:
- index = data.find(self.__placeholder_prefix, strartIndex)
- if index != -1:
- id, phEndIndex = self.__findPlaceholder(data, index)
- if id in self.stashed_nodes:
- node = self.stashed_nodes.get(id)
- if index > 0:
- text = data[strartIndex:index]
- linkText(text)
- if not isString(node): # it's Element
- for child in [node] + list(node):
- if child.tail:
- if child.tail.strip():
- self.__processElementText(
- node, child, False
- )
- if child.text:
- if child.text.strip():
- self.__processElementText(child, child)
- else: # it's just a string
- linkText(node)
- strartIndex = phEndIndex
- continue
- strartIndex = phEndIndex
- result.append((node, self.ancestors[:]))
- else: # wrong placeholder
- end = index + len(self.__placeholder_prefix)
- linkText(data[strartIndex:end])
- strartIndex = end
- else:
- text = data[strartIndex:]
- if isinstance(data, util.AtomicString):
- # We don't want to loose the AtomicString
- text = util.AtomicString(text)
- linkText(text)
- data = ""
- return result
- def __applyPattern(self, pattern, data, patternIndex, startIndex=0):
- """
- Check if the line fits the pattern, create the necessary
- elements, add it to stashed_nodes.
- Keyword arguments:
- * data: the text to be processed
- * pattern: the pattern to be checked
- * patternIndex: index of current pattern
- * startIndex: string index, from which we start searching
- Returns: String with placeholders instead of ElementTree elements.
- """
- new_style = isinstance(pattern, inlinepatterns.InlineProcessor)
- for exclude in pattern.ANCESTOR_EXCLUDES:
- if exclude.lower() in self.ancestors:
- return data, False, 0
- if new_style:
- match = None
- # Since handleMatch may reject our first match,
- # we iterate over the buffer looking for matches
- # until we can't find any more.
- for match in pattern.getCompiledRegExp().finditer(data, startIndex):
- node, start, end = pattern.handleMatch(match, data)
- if start is None or end is None:
- startIndex += match.end(0)
- match = None
- continue
- break
- else: # pragma: no cover
- match = pattern.getCompiledRegExp().match(data[startIndex:])
- leftData = data[:startIndex]
- if not match:
- return data, False, 0
- if not new_style: # pragma: no cover
- node = pattern.handleMatch(match)
- start = match.start(0)
- end = match.end(0)
- if node is None:
- return data, True, end
- if not isString(node):
- if not isinstance(node.text, util.AtomicString):
- # We need to process current node too
- for child in [node] + list(node):
- if not isString(node):
- if child.text:
- self.ancestors.append(child.tag.lower())
- child.text = self.__handleInline(
- child.text, patternIndex + 1
- )
- self.ancestors.pop()
- if child.tail:
- child.tail = self.__handleInline(
- child.tail, patternIndex
- )
- placeholder = self.__stashNode(node, pattern.type())
- if new_style:
- return "{}{}{}".format(data[:start],
- placeholder, data[end:]), True, 0
- else: # pragma: no cover
- return "{}{}{}{}".format(leftData,
- match.group(1),
- placeholder, match.groups()[-1]), True, 0
- def __build_ancestors(self, parent, parents):
- """Build the ancestor list."""
- ancestors = []
- while parent is not None:
- if parent is not None:
- ancestors.append(parent.tag.lower())
- parent = self.parent_map.get(parent)
- ancestors.reverse()
- parents.extend(ancestors)
- def run(self, tree, ancestors=None):
- """Apply inline patterns to a parsed Markdown tree.
- Iterate over ElementTree, find elements with inline tag, apply inline
- patterns and append newly created Elements to tree. If you don't
- want to process your data with inline paterns, instead of normal
- string, use subclass AtomicString:
- node.text = markdown.AtomicString("This will not be processed.")
- Arguments:
- * tree: ElementTree object, representing Markdown tree.
- * ancestors: List of parent tag names that precede the tree node (if needed).
- Returns: ElementTree object with applied inline patterns.
- """
- self.stashed_nodes = {}
- # Ensure a valid parent list, but copy passed in lists
- # to ensure we don't have the user accidentally change it on us.
- tree_parents = [] if ancestors is None else ancestors[:]
- self.parent_map = {c: p for p in tree.iter() for c in p}
- stack = [(tree, tree_parents)]
- while stack:
- currElement, parents = stack.pop()
- self.ancestors = parents
- self.__build_ancestors(currElement, self.ancestors)
- insertQueue = []
- for child in currElement:
- if child.text and not isinstance(
- child.text, util.AtomicString
- ):
- self.ancestors.append(child.tag.lower())
- text = child.text
- child.text = None
- lst = self.__processPlaceholders(
- self.__handleInline(text), child
- )
- for l in lst:
- self.parent_map[l[0]] = child
- stack += lst
- insertQueue.append((child, lst))
- self.ancestors.pop()
- if child.tail:
- tail = self.__handleInline(child.tail)
- dumby = etree.Element('d')
- child.tail = None
- tailResult = self.__processPlaceholders(tail, dumby, False)
- if dumby.tail:
- child.tail = dumby.tail
- pos = list(currElement).index(child) + 1
- tailResult.reverse()
- for newChild in tailResult:
- self.parent_map[newChild[0]] = currElement
- currElement.insert(pos, newChild[0])
- if len(child):
- self.parent_map[child] = currElement
- stack.append((child, self.ancestors[:]))
- for element, lst in insertQueue:
- for i, obj in enumerate(lst):
- newChild = obj[0]
- element.insert(i, newChild)
- return tree
- class PrettifyTreeprocessor(Treeprocessor):
- """ Add linebreaks to the html document. """
- def _prettifyETree(self, elem):
- """ Recursively add linebreaks to ElementTree children. """
- i = "\n"
- if self.md.is_block_level(elem.tag) and elem.tag not in ['code', 'pre']:
- if (not elem.text or not elem.text.strip()) \
- and len(elem) and self.md.is_block_level(elem[0].tag):
- elem.text = i
- for e in elem:
- if self.md.is_block_level(e.tag):
- self._prettifyETree(e)
- if not elem.tail or not elem.tail.strip():
- elem.tail = i
- if not elem.tail or not elem.tail.strip():
- elem.tail = i
- def run(self, root):
- """ Add linebreaks to ElementTree root object. """
- self._prettifyETree(root)
- # Do <br />'s separately as they are often in the middle of
- # inline content and missed by _prettifyETree.
- brs = root.iter('br')
- for br in brs:
- if not br.tail or not br.tail.strip():
- br.tail = '\n'
- else:
- br.tail = '\n%s' % br.tail
- # Clean up extra empty lines at end of code blocks.
- pres = root.iter('pre')
- for pre in pres:
- if len(pre) and pre[0].tag == 'code':
- pre[0].text = util.AtomicString(pre[0].text.rstrip() + '\n')
|