attr_list.py 5.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167
  1. """
  2. Attribute List Extension for Python-Markdown
  3. ============================================
  4. Adds attribute list syntax. Inspired by
  5. [maruku](http://maruku.rubyforge.org/proposal.html#attribute_lists)'s
  6. feature of the same name.
  7. See <https://Python-Markdown.github.io/extensions/attr_list>
  8. for documentation.
  9. Original code Copyright 2011 [Waylan Limberg](http://achinghead.com/).
  10. All changes Copyright 2011-2014 The Python Markdown Project
  11. License: [BSD](https://opensource.org/licenses/bsd-license.php)
  12. """
  13. from . import Extension
  14. from ..treeprocessors import Treeprocessor
  15. import re
  16. def _handle_double_quote(s, t):
  17. k, v = t.split('=', 1)
  18. return k, v.strip('"')
  19. def _handle_single_quote(s, t):
  20. k, v = t.split('=', 1)
  21. return k, v.strip("'")
  22. def _handle_key_value(s, t):
  23. return t.split('=', 1)
  24. def _handle_word(s, t):
  25. if t.startswith('.'):
  26. return '.', t[1:]
  27. if t.startswith('#'):
  28. return 'id', t[1:]
  29. return t, t
  30. _scanner = re.Scanner([
  31. (r'[^ =]+=".*?"', _handle_double_quote),
  32. (r"[^ =]+='.*?'", _handle_single_quote),
  33. (r'[^ =]+=[^ =]+', _handle_key_value),
  34. (r'[^ =]+', _handle_word),
  35. (r' ', None)
  36. ])
  37. def get_attrs(str):
  38. """ Parse attribute list and return a list of attribute tuples. """
  39. return _scanner.scan(str)[0]
  40. def isheader(elem):
  41. return elem.tag in ['h1', 'h2', 'h3', 'h4', 'h5', 'h6']
  42. class AttrListTreeprocessor(Treeprocessor):
  43. BASE_RE = r'\{\:?([^\}\n]*)\}'
  44. HEADER_RE = re.compile(r'[ ]+%s[ ]*$' % BASE_RE)
  45. BLOCK_RE = re.compile(r'\n[ ]*%s[ ]*$' % BASE_RE)
  46. INLINE_RE = re.compile(r'^%s' % BASE_RE)
  47. NAME_RE = re.compile(r'[^A-Z_a-z\u00c0-\u00d6\u00d8-\u00f6\u00f8-\u02ff'
  48. r'\u0370-\u037d\u037f-\u1fff\u200c-\u200d'
  49. r'\u2070-\u218f\u2c00-\u2fef\u3001-\ud7ff'
  50. r'\uf900-\ufdcf\ufdf0-\ufffd'
  51. r'\:\-\.0-9\u00b7\u0300-\u036f\u203f-\u2040]+')
  52. def run(self, doc):
  53. for elem in doc.iter():
  54. if self.md.is_block_level(elem.tag):
  55. # Block level: check for attrs on last line of text
  56. RE = self.BLOCK_RE
  57. if isheader(elem) or elem.tag == 'dt':
  58. # header or def-term: check for attrs at end of line
  59. RE = self.HEADER_RE
  60. if len(elem) and elem.tag == 'li':
  61. # special case list items. children may include a ul or ol.
  62. pos = None
  63. # find the ul or ol position
  64. for i, child in enumerate(elem):
  65. if child.tag in ['ul', 'ol']:
  66. pos = i
  67. break
  68. if pos is None and elem[-1].tail:
  69. # use tail of last child. no ul or ol.
  70. m = RE.search(elem[-1].tail)
  71. if m:
  72. self.assign_attrs(elem, m.group(1))
  73. elem[-1].tail = elem[-1].tail[:m.start()]
  74. elif pos is not None and pos > 0 and elem[pos-1].tail:
  75. # use tail of last child before ul or ol
  76. m = RE.search(elem[pos-1].tail)
  77. if m:
  78. self.assign_attrs(elem, m.group(1))
  79. elem[pos-1].tail = elem[pos-1].tail[:m.start()]
  80. elif elem.text:
  81. # use text. ul is first child.
  82. m = RE.search(elem.text)
  83. if m:
  84. self.assign_attrs(elem, m.group(1))
  85. elem.text = elem.text[:m.start()]
  86. elif len(elem) and elem[-1].tail:
  87. # has children. Get from tail of last child
  88. m = RE.search(elem[-1].tail)
  89. if m:
  90. self.assign_attrs(elem, m.group(1))
  91. elem[-1].tail = elem[-1].tail[:m.start()]
  92. if isheader(elem):
  93. # clean up trailing #s
  94. elem[-1].tail = elem[-1].tail.rstrip('#').rstrip()
  95. elif elem.text:
  96. # no children. Get from text.
  97. m = RE.search(elem.text)
  98. if not m and elem.tag == 'td':
  99. m = re.search(self.BASE_RE, elem.text)
  100. if m:
  101. self.assign_attrs(elem, m.group(1))
  102. elem.text = elem.text[:m.start()]
  103. if isheader(elem):
  104. # clean up trailing #s
  105. elem.text = elem.text.rstrip('#').rstrip()
  106. else:
  107. # inline: check for attrs at start of tail
  108. if elem.tail:
  109. m = self.INLINE_RE.match(elem.tail)
  110. if m:
  111. self.assign_attrs(elem, m.group(1))
  112. elem.tail = elem.tail[m.end():]
  113. def assign_attrs(self, elem, attrs):
  114. """ Assign attrs to element. """
  115. for k, v in get_attrs(attrs):
  116. if k == '.':
  117. # add to class
  118. cls = elem.get('class')
  119. if cls:
  120. elem.set('class', '{} {}'.format(cls, v))
  121. else:
  122. elem.set('class', v)
  123. else:
  124. # assign attr k with v
  125. elem.set(self.sanitize_name(k), v)
  126. def sanitize_name(self, name):
  127. """
  128. Sanitize name as 'an XML Name, minus the ":"'.
  129. See https://www.w3.org/TR/REC-xml-names/#NT-NCName
  130. """
  131. return self.NAME_RE.sub('_', name)
  132. class AttrListExtension(Extension):
  133. def extendMarkdown(self, md):
  134. md.treeprocessors.register(AttrListTreeprocessor(md), 'attr_list', 8)
  135. def makeExtension(**kwargs): # pragma: no cover
  136. return AttrListExtension(**kwargs)