| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293 |
- """
- General utilities.
- MIT license.
- Copyright (c) 2017 Isaac Muse <isaacmuse@gmail.com>
- """
- from markdown.inlinepatterns import InlineProcessor
- import xml.etree.ElementTree as etree
- from collections import namedtuple
- import sys
- import copy
- import re
- import html
- from urllib.request import pathname2url, url2pathname
- from urllib.parse import urlparse
- RE_WIN_DRIVE_LETTER = re.compile(r"^[A-Za-z]$")
- RE_WIN_DRIVE_PATH = re.compile(r"^[A-Za-z]:(?:\\.*)?$")
- RE_URL = re.compile('(http|ftp)s?|data|mailto|tel|news')
- RE_WIN_DEFAULT_PROTOCOL = re.compile(r"^///[A-Za-z]:(?:/.*)?$")
- if sys.platform.startswith('win'):
- _PLATFORM = "windows"
- elif sys.platform == "darwin": # pragma: no cover
- _PLATFORM = "osx"
- else:
- _PLATFORM = "linux"
- def is_win(): # pragma: no cover
- """Is Windows."""
- return _PLATFORM == "windows"
- def is_linux(): # pragma: no cover
- """Is Linux."""
- return _PLATFORM == "linux"
- def is_mac(): # pragma: no cover
- """Is macOS."""
- return _PLATFORM == "osx"
- def url2path(path):
- """Path to URL."""
- return url2pathname(path)
- def path2url(url):
- """URL to path."""
- path = pathname2url(url)
- # If on windows, replace the notation to use a default protocol `///` with nothing.
- if is_win() and RE_WIN_DEFAULT_PROTOCOL.match(path):
- path = path.replace('///', '', 1)
- return path
- def get_code_points(s):
- """Get the Unicode code points."""
- return [c for c in s]
- def get_ord(c):
- """Get Unicode ord."""
- return ord(c)
- def get_char(value):
- """Get the Unicode char."""
- return chr(value)
- def escape_chars(md, echrs):
- """
- Add chars to the escape list.
- Don't just append as it modifies the global list permanently.
- Make a copy and extend **that** copy so that only this Markdown
- instance gets modified.
- """
- escaped = copy.copy(md.ESCAPED_CHARS)
- for ec in echrs:
- if ec not in escaped:
- escaped.append(ec)
- md.ESCAPED_CHARS = escaped
- def parse_url(url):
- """
- Parse the URL.
- Try to determine if the following is a file path or
- (as we will call anything else) a URL.
- We return it slightly modified and combine the path parts.
- We also assume if we see something like c:/ it is a Windows path.
- We don't bother checking if this **is** a Windows system, but
- 'nix users really shouldn't be creating weird names like c: for their folder.
- """
- is_url = False
- is_absolute = False
- scheme, netloc, path, params, query, fragment = urlparse(html.unescape(url))
- if RE_URL.match(scheme):
- # Clearly a URL
- is_url = True
- elif scheme == '' and netloc == '' and path == '':
- # Maybe just a URL fragment
- is_url = True
- elif scheme == 'file' and (RE_WIN_DRIVE_PATH.match(netloc)):
- # file://c:/path or file://c:\path
- path = '/' + (netloc + path).replace('\\', '/')
- netloc = ''
- is_absolute = True
- elif scheme == 'file' and netloc.startswith('\\'):
- # file://\c:\path or file://\\path
- path = (netloc + path).replace('\\', '/')
- netloc = ''
- is_absolute = True
- elif scheme == 'file':
- # file:///path
- is_absolute = True
- elif RE_WIN_DRIVE_LETTER.match(scheme):
- # c:/path
- path = '/%s:%s' % (scheme, path.replace('\\', '/'))
- scheme = 'file'
- netloc = ''
- is_absolute = True
- elif scheme == '' and netloc != '' and url.startswith('//'):
- # //file/path
- path = '//' + netloc + path
- scheme = 'file'
- netloc = ''
- is_absolute = True
- elif scheme != '' and netloc != '':
- # A non-file path or strange URL
- is_url = True
- elif path.startswith(('/', '\\')):
- # /root path
- is_absolute = True
- return (scheme, netloc, path, params, query, fragment, is_url, is_absolute)
- class PatSeqItem(namedtuple('PatSeqItem', ['pattern', 'builder', 'tags'])):
- """Pattern sequence item item."""
- class PatternSequenceProcessor(InlineProcessor):
- """Processor for handling complex nested patterns such as strong and em matches."""
- PATTERNS = []
- def build_single(self, m, tag, idx):
- """Return single tag."""
- el1 = etree.Element(tag)
- text = m.group(2)
- self.parse_sub_patterns(text, el1, None, idx)
- return el1
- def build_double(self, m, tags, idx):
- """Return double tag."""
- tag1, tag2 = tags.split(",")
- el1 = etree.Element(tag1)
- el2 = etree.Element(tag2)
- text = m.group(2)
- self.parse_sub_patterns(text, el2, None, idx)
- el1.append(el2)
- if len(m.groups()) == 3:
- text = m.group(3)
- self.parse_sub_patterns(text, el1, el2, idx)
- return el1
- def build_double2(self, m, tags, idx):
- """Return double tags (variant 2): `<strong>text <em>text</em></strong>`."""
- tag1, tag2 = tags.split(",")
- el1 = etree.Element(tag1)
- el2 = etree.Element(tag2)
- text = m.group(2)
- self.parse_sub_patterns(text, el1, None, idx)
- text = m.group(3)
- el1.append(el2)
- self.parse_sub_patterns(text, el2, None, idx)
- return el1
- def parse_sub_patterns(self, data, parent, last, idx):
- """
- Parses sub patterns.
- `data` (`str`):
- text to evaluate.
- `parent` (`etree.Element`):
- Parent to attach text and sub elements to.
- `last` (`etree.Element`):
- Last appended child to parent. Can also be None if parent has no children.
- `idx` (`int`):
- Current pattern index that was used to evaluate the parent.
- """
- offset = 0
- pos = 0
- length = len(data)
- while pos < length:
- # Find the start of potential emphasis or strong tokens
- if self.compiled_re.match(data, pos):
- matched = False
- # See if the we can match an emphasis/strong pattern
- for index, item in enumerate(self.PATTERNS):
- # Only evaluate patterns that are after what was used on the parent
- if index <= idx:
- continue
- m = item.pattern.match(data, pos)
- if m:
- # Append child nodes to parent
- # Text nodes should be appended to the last
- # child if present, and if not, it should
- # be added as the parent's text node.
- text = data[offset:m.start(0)]
- if text:
- if last is not None:
- last.tail = text
- else:
- parent.text = text
- el = self.build_element(m, item.builder, item.tags, index)
- parent.append(el)
- last = el
- # Move our position past the matched hunk
- offset = pos = m.end(0)
- matched = True
- if not matched:
- # We matched nothing, move on to the next character
- pos += 1
- else:
- # Increment position as no potential emphasis start was found.
- pos += 1
- # Append any leftover text as a text node.
- text = data[offset:]
- if text:
- if last is not None:
- last.tail = text
- else:
- parent.text = text
- def build_element(self, m, builder, tags, index):
- """Element builder."""
- if builder == 'double2':
- return self.build_double2(m, tags, index)
- elif builder == 'double':
- return self.build_double(m, tags, index)
- else:
- return self.build_single(m, tags, index)
- def handleMatch(self, m, data):
- """Parse patterns."""
- el = None
- start = None
- end = None
- for index, item in enumerate(self.PATTERNS):
- m1 = item.pattern.match(data, m.start(0))
- if m1:
- start = m1.start(0)
- end = m1.end(0)
- el = self.build_element(m1, item.builder, item.tags, index)
- break
- return el, start, end
- class PymdownxDeprecationWarning(UserWarning): # pragma: no cover
- """Deprecation warning for Pymdownx that is not hidden."""
|