| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223 |
- """
- Tables Extension for Python-Markdown
- ====================================
- Added parsing of tables to Python-Markdown.
- See <https://Python-Markdown.github.io/extensions/tables>
- for documentation.
- Original code Copyright 2009 [Waylan Limberg](http://achinghead.com)
- All changes Copyright 2008-2014 The Python Markdown Project
- License: [BSD](https://opensource.org/licenses/bsd-license.php)
- """
- from . import Extension
- from ..blockprocessors import BlockProcessor
- import xml.etree.ElementTree as etree
- import re
- PIPE_NONE = 0
- PIPE_LEFT = 1
- PIPE_RIGHT = 2
- class TableProcessor(BlockProcessor):
- """ Process Tables. """
- RE_CODE_PIPES = re.compile(r'(?:(\\\\)|(\\`+)|(`+)|(\\\|)|(\|))')
- RE_END_BORDER = re.compile(r'(?<!\\)(?:\\\\)*\|$')
- def __init__(self, parser):
- self.border = False
- self.separator = ''
- super().__init__(parser)
- def test(self, parent, block):
- """
- Ensure first two rows (column header and separator row) are valid table rows.
- Keep border check and separator row do avoid repeating the work.
- """
- is_table = False
- rows = [row.strip(' ') for row in block.split('\n')]
- if len(rows) > 1:
- header0 = rows[0]
- self.border = PIPE_NONE
- if header0.startswith('|'):
- self.border |= PIPE_LEFT
- if self.RE_END_BORDER.search(header0) is not None:
- self.border |= PIPE_RIGHT
- row = self._split_row(header0)
- row0_len = len(row)
- is_table = row0_len > 1
- # Each row in a single column table needs at least one pipe.
- if not is_table and row0_len == 1 and self.border:
- for index in range(1, len(rows)):
- is_table = rows[index].startswith('|')
- if not is_table:
- is_table = self.RE_END_BORDER.search(rows[index]) is not None
- if not is_table:
- break
- if is_table:
- row = self._split_row(rows[1])
- is_table = (len(row) == row0_len) and set(''.join(row)) <= set('|:- ')
- if is_table:
- self.separator = row
- return is_table
- def run(self, parent, blocks):
- """ Parse a table block and build table. """
- block = blocks.pop(0).split('\n')
- header = block[0].strip(' ')
- rows = [] if len(block) < 3 else block[2:]
- # Get alignment of columns
- align = []
- for c in self.separator:
- c = c.strip(' ')
- if c.startswith(':') and c.endswith(':'):
- align.append('center')
- elif c.startswith(':'):
- align.append('left')
- elif c.endswith(':'):
- align.append('right')
- else:
- align.append(None)
- # Build table
- table = etree.SubElement(parent, 'table')
- thead = etree.SubElement(table, 'thead')
- self._build_row(header, thead, align)
- tbody = etree.SubElement(table, 'tbody')
- if len(rows) == 0:
- # Handle empty table
- self._build_empty_row(tbody, align)
- else:
- for row in rows:
- self._build_row(row.strip(' '), tbody, align)
- def _build_empty_row(self, parent, align):
- """Build an empty row."""
- tr = etree.SubElement(parent, 'tr')
- count = len(align)
- while count:
- etree.SubElement(tr, 'td')
- count -= 1
- def _build_row(self, row, parent, align):
- """ Given a row of text, build table cells. """
- tr = etree.SubElement(parent, 'tr')
- tag = 'td'
- if parent.tag == 'thead':
- tag = 'th'
- cells = self._split_row(row)
- # We use align here rather than cells to ensure every row
- # contains the same number of columns.
- for i, a in enumerate(align):
- c = etree.SubElement(tr, tag)
- try:
- c.text = cells[i].strip(' ')
- except IndexError: # pragma: no cover
- c.text = ""
- if a:
- c.set('align', a)
- def _split_row(self, row):
- """ split a row of text into list of cells. """
- if self.border:
- if row.startswith('|'):
- row = row[1:]
- row = self.RE_END_BORDER.sub('', row)
- return self._split(row)
- def _split(self, row):
- """ split a row of text with some code into a list of cells. """
- elements = []
- pipes = []
- tics = []
- tic_points = []
- tic_region = []
- good_pipes = []
- # Parse row
- # Throw out \\, and \|
- for m in self.RE_CODE_PIPES.finditer(row):
- # Store ` data (len, start_pos, end_pos)
- if m.group(2):
- # \`+
- # Store length of each tic group: subtract \
- tics.append(len(m.group(2)) - 1)
- # Store start of group, end of group, and escape length
- tic_points.append((m.start(2), m.end(2) - 1, 1))
- elif m.group(3):
- # `+
- # Store length of each tic group
- tics.append(len(m.group(3)))
- # Store start of group, end of group, and escape length
- tic_points.append((m.start(3), m.end(3) - 1, 0))
- # Store pipe location
- elif m.group(5):
- pipes.append(m.start(5))
- # Pair up tics according to size if possible
- # Subtract the escape length *only* from the opening.
- # Walk through tic list and see if tic has a close.
- # Store the tic region (start of region, end of region).
- pos = 0
- tic_len = len(tics)
- while pos < tic_len:
- try:
- tic_size = tics[pos] - tic_points[pos][2]
- if tic_size == 0:
- raise ValueError
- index = tics[pos + 1:].index(tic_size) + 1
- tic_region.append((tic_points[pos][0], tic_points[pos + index][1]))
- pos += index + 1
- except ValueError:
- pos += 1
- # Resolve pipes. Check if they are within a tic pair region.
- # Walk through pipes comparing them to each region.
- # - If pipe position is less that a region, it isn't in a region
- # - If it is within a region, we don't want it, so throw it out
- # - If we didn't throw it out, it must be a table pipe
- for pipe in pipes:
- throw_out = False
- for region in tic_region:
- if pipe < region[0]:
- # Pipe is not in a region
- break
- elif region[0] <= pipe <= region[1]:
- # Pipe is within a code region. Throw it out.
- throw_out = True
- break
- if not throw_out:
- good_pipes.append(pipe)
- # Split row according to table delimeters.
- pos = 0
- for pipe in good_pipes:
- elements.append(row[pos:pipe])
- pos = pipe + 1
- elements.append(row[pos:])
- return elements
- class TableExtension(Extension):
- """ Add tables to Markdown. """
- def extendMarkdown(self, md):
- """ Add an instance of TableProcessor to BlockParser. """
- if '|' not in md.ESCAPED_CHARS:
- md.ESCAPED_CHARS.append('|')
- md.parser.blockprocessors.register(TableProcessor(md.parser), 'table', 75)
- def makeExtension(**kwargs): # pragma: no cover
- return TableExtension(**kwargs)
|