util.py 8.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293
  1. """
  2. General utilities.
  3. MIT license.
  4. Copyright (c) 2017 Isaac Muse <isaacmuse@gmail.com>
  5. """
  6. from markdown.inlinepatterns import InlineProcessor
  7. import xml.etree.ElementTree as etree
  8. from collections import namedtuple
  9. import sys
  10. import copy
  11. import re
  12. import html
  13. from urllib.request import pathname2url, url2pathname
  14. from urllib.parse import urlparse
  15. RE_WIN_DRIVE_LETTER = re.compile(r"^[A-Za-z]$")
  16. RE_WIN_DRIVE_PATH = re.compile(r"^[A-Za-z]:(?:\\.*)?$")
  17. RE_URL = re.compile('(http|ftp)s?|data|mailto|tel|news')
  18. RE_WIN_DEFAULT_PROTOCOL = re.compile(r"^///[A-Za-z]:(?:/.*)?$")
  19. if sys.platform.startswith('win'):
  20. _PLATFORM = "windows"
  21. elif sys.platform == "darwin": # pragma: no cover
  22. _PLATFORM = "osx"
  23. else:
  24. _PLATFORM = "linux"
  25. def is_win(): # pragma: no cover
  26. """Is Windows."""
  27. return _PLATFORM == "windows"
  28. def is_linux(): # pragma: no cover
  29. """Is Linux."""
  30. return _PLATFORM == "linux"
  31. def is_mac(): # pragma: no cover
  32. """Is macOS."""
  33. return _PLATFORM == "osx"
  34. def url2path(path):
  35. """Path to URL."""
  36. return url2pathname(path)
  37. def path2url(url):
  38. """URL to path."""
  39. path = pathname2url(url)
  40. # If on windows, replace the notation to use a default protocol `///` with nothing.
  41. if is_win() and RE_WIN_DEFAULT_PROTOCOL.match(path):
  42. path = path.replace('///', '', 1)
  43. return path
  44. def get_code_points(s):
  45. """Get the Unicode code points."""
  46. return [c for c in s]
  47. def get_ord(c):
  48. """Get Unicode ord."""
  49. return ord(c)
  50. def get_char(value):
  51. """Get the Unicode char."""
  52. return chr(value)
  53. def escape_chars(md, echrs):
  54. """
  55. Add chars to the escape list.
  56. Don't just append as it modifies the global list permanently.
  57. Make a copy and extend **that** copy so that only this Markdown
  58. instance gets modified.
  59. """
  60. escaped = copy.copy(md.ESCAPED_CHARS)
  61. for ec in echrs:
  62. if ec not in escaped:
  63. escaped.append(ec)
  64. md.ESCAPED_CHARS = escaped
  65. def parse_url(url):
  66. """
  67. Parse the URL.
  68. Try to determine if the following is a file path or
  69. (as we will call anything else) a URL.
  70. We return it slightly modified and combine the path parts.
  71. We also assume if we see something like c:/ it is a Windows path.
  72. We don't bother checking if this **is** a Windows system, but
  73. 'nix users really shouldn't be creating weird names like c: for their folder.
  74. """
  75. is_url = False
  76. is_absolute = False
  77. scheme, netloc, path, params, query, fragment = urlparse(html.unescape(url))
  78. if RE_URL.match(scheme):
  79. # Clearly a URL
  80. is_url = True
  81. elif scheme == '' and netloc == '' and path == '':
  82. # Maybe just a URL fragment
  83. is_url = True
  84. elif scheme == 'file' and (RE_WIN_DRIVE_PATH.match(netloc)):
  85. # file://c:/path or file://c:\path
  86. path = '/' + (netloc + path).replace('\\', '/')
  87. netloc = ''
  88. is_absolute = True
  89. elif scheme == 'file' and netloc.startswith('\\'):
  90. # file://\c:\path or file://\\path
  91. path = (netloc + path).replace('\\', '/')
  92. netloc = ''
  93. is_absolute = True
  94. elif scheme == 'file':
  95. # file:///path
  96. is_absolute = True
  97. elif RE_WIN_DRIVE_LETTER.match(scheme):
  98. # c:/path
  99. path = '/%s:%s' % (scheme, path.replace('\\', '/'))
  100. scheme = 'file'
  101. netloc = ''
  102. is_absolute = True
  103. elif scheme == '' and netloc != '' and url.startswith('//'):
  104. # //file/path
  105. path = '//' + netloc + path
  106. scheme = 'file'
  107. netloc = ''
  108. is_absolute = True
  109. elif scheme != '' and netloc != '':
  110. # A non-file path or strange URL
  111. is_url = True
  112. elif path.startswith(('/', '\\')):
  113. # /root path
  114. is_absolute = True
  115. return (scheme, netloc, path, params, query, fragment, is_url, is_absolute)
  116. class PatSeqItem(namedtuple('PatSeqItem', ['pattern', 'builder', 'tags'])):
  117. """Pattern sequence item item."""
  118. class PatternSequenceProcessor(InlineProcessor):
  119. """Processor for handling complex nested patterns such as strong and em matches."""
  120. PATTERNS = []
  121. def build_single(self, m, tag, idx):
  122. """Return single tag."""
  123. el1 = etree.Element(tag)
  124. text = m.group(2)
  125. self.parse_sub_patterns(text, el1, None, idx)
  126. return el1
  127. def build_double(self, m, tags, idx):
  128. """Return double tag."""
  129. tag1, tag2 = tags.split(",")
  130. el1 = etree.Element(tag1)
  131. el2 = etree.Element(tag2)
  132. text = m.group(2)
  133. self.parse_sub_patterns(text, el2, None, idx)
  134. el1.append(el2)
  135. if len(m.groups()) == 3:
  136. text = m.group(3)
  137. self.parse_sub_patterns(text, el1, el2, idx)
  138. return el1
  139. def build_double2(self, m, tags, idx):
  140. """Return double tags (variant 2): `<strong>text <em>text</em></strong>`."""
  141. tag1, tag2 = tags.split(",")
  142. el1 = etree.Element(tag1)
  143. el2 = etree.Element(tag2)
  144. text = m.group(2)
  145. self.parse_sub_patterns(text, el1, None, idx)
  146. text = m.group(3)
  147. el1.append(el2)
  148. self.parse_sub_patterns(text, el2, None, idx)
  149. return el1
  150. def parse_sub_patterns(self, data, parent, last, idx):
  151. """
  152. Parses sub patterns.
  153. `data` (`str`):
  154. text to evaluate.
  155. `parent` (`etree.Element`):
  156. Parent to attach text and sub elements to.
  157. `last` (`etree.Element`):
  158. Last appended child to parent. Can also be None if parent has no children.
  159. `idx` (`int`):
  160. Current pattern index that was used to evaluate the parent.
  161. """
  162. offset = 0
  163. pos = 0
  164. length = len(data)
  165. while pos < length:
  166. # Find the start of potential emphasis or strong tokens
  167. if self.compiled_re.match(data, pos):
  168. matched = False
  169. # See if the we can match an emphasis/strong pattern
  170. for index, item in enumerate(self.PATTERNS):
  171. # Only evaluate patterns that are after what was used on the parent
  172. if index <= idx:
  173. continue
  174. m = item.pattern.match(data, pos)
  175. if m:
  176. # Append child nodes to parent
  177. # Text nodes should be appended to the last
  178. # child if present, and if not, it should
  179. # be added as the parent's text node.
  180. text = data[offset:m.start(0)]
  181. if text:
  182. if last is not None:
  183. last.tail = text
  184. else:
  185. parent.text = text
  186. el = self.build_element(m, item.builder, item.tags, index)
  187. parent.append(el)
  188. last = el
  189. # Move our position past the matched hunk
  190. offset = pos = m.end(0)
  191. matched = True
  192. if not matched:
  193. # We matched nothing, move on to the next character
  194. pos += 1
  195. else:
  196. # Increment position as no potential emphasis start was found.
  197. pos += 1
  198. # Append any leftover text as a text node.
  199. text = data[offset:]
  200. if text:
  201. if last is not None:
  202. last.tail = text
  203. else:
  204. parent.text = text
  205. def build_element(self, m, builder, tags, index):
  206. """Element builder."""
  207. if builder == 'double2':
  208. return self.build_double2(m, tags, index)
  209. elif builder == 'double':
  210. return self.build_double(m, tags, index)
  211. else:
  212. return self.build_single(m, tags, index)
  213. def handleMatch(self, m, data):
  214. """Parse patterns."""
  215. el = None
  216. start = None
  217. end = None
  218. for index, item in enumerate(self.PATTERNS):
  219. m1 = item.pattern.match(data, m.start(0))
  220. if m1:
  221. start = m1.start(0)
  222. end = m1.end(0)
  223. el = self.build_element(m1, item.builder, item.tags, index)
  224. break
  225. return el, start, end
  226. class PymdownxDeprecationWarning(UserWarning): # pragma: no cover
  227. """Deprecation warning for Pymdownx that is not hidden."""