tables.py 7.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223
  1. """
  2. Tables Extension for Python-Markdown
  3. ====================================
  4. Added parsing of tables to Python-Markdown.
  5. See <https://Python-Markdown.github.io/extensions/tables>
  6. for documentation.
  7. Original code Copyright 2009 [Waylan Limberg](http://achinghead.com)
  8. All changes Copyright 2008-2014 The Python Markdown Project
  9. License: [BSD](https://opensource.org/licenses/bsd-license.php)
  10. """
  11. from . import Extension
  12. from ..blockprocessors import BlockProcessor
  13. import xml.etree.ElementTree as etree
  14. import re
  15. PIPE_NONE = 0
  16. PIPE_LEFT = 1
  17. PIPE_RIGHT = 2
  18. class TableProcessor(BlockProcessor):
  19. """ Process Tables. """
  20. RE_CODE_PIPES = re.compile(r'(?:(\\\\)|(\\`+)|(`+)|(\\\|)|(\|))')
  21. RE_END_BORDER = re.compile(r'(?<!\\)(?:\\\\)*\|$')
  22. def __init__(self, parser):
  23. self.border = False
  24. self.separator = ''
  25. super().__init__(parser)
  26. def test(self, parent, block):
  27. """
  28. Ensure first two rows (column header and separator row) are valid table rows.
  29. Keep border check and separator row do avoid repeating the work.
  30. """
  31. is_table = False
  32. rows = [row.strip(' ') for row in block.split('\n')]
  33. if len(rows) > 1:
  34. header0 = rows[0]
  35. self.border = PIPE_NONE
  36. if header0.startswith('|'):
  37. self.border |= PIPE_LEFT
  38. if self.RE_END_BORDER.search(header0) is not None:
  39. self.border |= PIPE_RIGHT
  40. row = self._split_row(header0)
  41. row0_len = len(row)
  42. is_table = row0_len > 1
  43. # Each row in a single column table needs at least one pipe.
  44. if not is_table and row0_len == 1 and self.border:
  45. for index in range(1, len(rows)):
  46. is_table = rows[index].startswith('|')
  47. if not is_table:
  48. is_table = self.RE_END_BORDER.search(rows[index]) is not None
  49. if not is_table:
  50. break
  51. if is_table:
  52. row = self._split_row(rows[1])
  53. is_table = (len(row) == row0_len) and set(''.join(row)) <= set('|:- ')
  54. if is_table:
  55. self.separator = row
  56. return is_table
  57. def run(self, parent, blocks):
  58. """ Parse a table block and build table. """
  59. block = blocks.pop(0).split('\n')
  60. header = block[0].strip(' ')
  61. rows = [] if len(block) < 3 else block[2:]
  62. # Get alignment of columns
  63. align = []
  64. for c in self.separator:
  65. c = c.strip(' ')
  66. if c.startswith(':') and c.endswith(':'):
  67. align.append('center')
  68. elif c.startswith(':'):
  69. align.append('left')
  70. elif c.endswith(':'):
  71. align.append('right')
  72. else:
  73. align.append(None)
  74. # Build table
  75. table = etree.SubElement(parent, 'table')
  76. thead = etree.SubElement(table, 'thead')
  77. self._build_row(header, thead, align)
  78. tbody = etree.SubElement(table, 'tbody')
  79. if len(rows) == 0:
  80. # Handle empty table
  81. self._build_empty_row(tbody, align)
  82. else:
  83. for row in rows:
  84. self._build_row(row.strip(' '), tbody, align)
  85. def _build_empty_row(self, parent, align):
  86. """Build an empty row."""
  87. tr = etree.SubElement(parent, 'tr')
  88. count = len(align)
  89. while count:
  90. etree.SubElement(tr, 'td')
  91. count -= 1
  92. def _build_row(self, row, parent, align):
  93. """ Given a row of text, build table cells. """
  94. tr = etree.SubElement(parent, 'tr')
  95. tag = 'td'
  96. if parent.tag == 'thead':
  97. tag = 'th'
  98. cells = self._split_row(row)
  99. # We use align here rather than cells to ensure every row
  100. # contains the same number of columns.
  101. for i, a in enumerate(align):
  102. c = etree.SubElement(tr, tag)
  103. try:
  104. c.text = cells[i].strip(' ')
  105. except IndexError: # pragma: no cover
  106. c.text = ""
  107. if a:
  108. c.set('align', a)
  109. def _split_row(self, row):
  110. """ split a row of text into list of cells. """
  111. if self.border:
  112. if row.startswith('|'):
  113. row = row[1:]
  114. row = self.RE_END_BORDER.sub('', row)
  115. return self._split(row)
  116. def _split(self, row):
  117. """ split a row of text with some code into a list of cells. """
  118. elements = []
  119. pipes = []
  120. tics = []
  121. tic_points = []
  122. tic_region = []
  123. good_pipes = []
  124. # Parse row
  125. # Throw out \\, and \|
  126. for m in self.RE_CODE_PIPES.finditer(row):
  127. # Store ` data (len, start_pos, end_pos)
  128. if m.group(2):
  129. # \`+
  130. # Store length of each tic group: subtract \
  131. tics.append(len(m.group(2)) - 1)
  132. # Store start of group, end of group, and escape length
  133. tic_points.append((m.start(2), m.end(2) - 1, 1))
  134. elif m.group(3):
  135. # `+
  136. # Store length of each tic group
  137. tics.append(len(m.group(3)))
  138. # Store start of group, end of group, and escape length
  139. tic_points.append((m.start(3), m.end(3) - 1, 0))
  140. # Store pipe location
  141. elif m.group(5):
  142. pipes.append(m.start(5))
  143. # Pair up tics according to size if possible
  144. # Subtract the escape length *only* from the opening.
  145. # Walk through tic list and see if tic has a close.
  146. # Store the tic region (start of region, end of region).
  147. pos = 0
  148. tic_len = len(tics)
  149. while pos < tic_len:
  150. try:
  151. tic_size = tics[pos] - tic_points[pos][2]
  152. if tic_size == 0:
  153. raise ValueError
  154. index = tics[pos + 1:].index(tic_size) + 1
  155. tic_region.append((tic_points[pos][0], tic_points[pos + index][1]))
  156. pos += index + 1
  157. except ValueError:
  158. pos += 1
  159. # Resolve pipes. Check if they are within a tic pair region.
  160. # Walk through pipes comparing them to each region.
  161. # - If pipe position is less that a region, it isn't in a region
  162. # - If it is within a region, we don't want it, so throw it out
  163. # - If we didn't throw it out, it must be a table pipe
  164. for pipe in pipes:
  165. throw_out = False
  166. for region in tic_region:
  167. if pipe < region[0]:
  168. # Pipe is not in a region
  169. break
  170. elif region[0] <= pipe <= region[1]:
  171. # Pipe is within a code region. Throw it out.
  172. throw_out = True
  173. break
  174. if not throw_out:
  175. good_pipes.append(pipe)
  176. # Split row according to table delimeters.
  177. pos = 0
  178. for pipe in good_pipes:
  179. elements.append(row[pos:pipe])
  180. pos = pipe + 1
  181. elements.append(row[pos:])
  182. return elements
  183. class TableExtension(Extension):
  184. """ Add tables to Markdown. """
  185. def extendMarkdown(self, md):
  186. """ Add an instance of TableProcessor to BlockParser. """
  187. if '|' not in md.ESCAPED_CHARS:
  188. md.ESCAPED_CHARS.append('|')
  189. md.parser.blockprocessors.register(TableProcessor(md.parser), 'table', 75)
  190. def makeExtension(**kwargs): # pragma: no cover
  191. return TableExtension(**kwargs)