smarty.py 10.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263
  1. '''
  2. Smarty extension for Python-Markdown
  3. ====================================
  4. Adds conversion of ASCII dashes, quotes and ellipses to their HTML
  5. entity equivalents.
  6. See <https://Python-Markdown.github.io/extensions/smarty>
  7. for documentation.
  8. Author: 2013, Dmitry Shachnev <mitya57@gmail.com>
  9. All changes Copyright 2013-2014 The Python Markdown Project
  10. License: [BSD](https://opensource.org/licenses/bsd-license.php)
  11. SmartyPants license:
  12. Copyright (c) 2003 John Gruber <https://daringfireball.net/>
  13. All rights reserved.
  14. Redistribution and use in source and binary forms, with or without
  15. modification, are permitted provided that the following conditions are
  16. met:
  17. * Redistributions of source code must retain the above copyright
  18. notice, this list of conditions and the following disclaimer.
  19. * Redistributions in binary form must reproduce the above copyright
  20. notice, this list of conditions and the following disclaimer in
  21. the documentation and/or other materials provided with the
  22. distribution.
  23. * Neither the name "SmartyPants" nor the names of its contributors
  24. may be used to endorse or promote products derived from this
  25. software without specific prior written permission.
  26. This software is provided by the copyright holders and contributors "as
  27. is" and any express or implied warranties, including, but not limited
  28. to, the implied warranties of merchantability and fitness for a
  29. particular purpose are disclaimed. In no event shall the copyright
  30. owner or contributors be liable for any direct, indirect, incidental,
  31. special, exemplary, or consequential damages (including, but not
  32. limited to, procurement of substitute goods or services; loss of use,
  33. data, or profits; or business interruption) however caused and on any
  34. theory of liability, whether in contract, strict liability, or tort
  35. (including negligence or otherwise) arising in any way out of the use
  36. of this software, even if advised of the possibility of such damage.
  37. smartypants.py license:
  38. smartypants.py is a derivative work of SmartyPants.
  39. Copyright (c) 2004, 2007 Chad Miller <http://web.chad.org/>
  40. Redistribution and use in source and binary forms, with or without
  41. modification, are permitted provided that the following conditions are
  42. met:
  43. * Redistributions of source code must retain the above copyright
  44. notice, this list of conditions and the following disclaimer.
  45. * Redistributions in binary form must reproduce the above copyright
  46. notice, this list of conditions and the following disclaimer in
  47. the documentation and/or other materials provided with the
  48. distribution.
  49. This software is provided by the copyright holders and contributors "as
  50. is" and any express or implied warranties, including, but not limited
  51. to, the implied warranties of merchantability and fitness for a
  52. particular purpose are disclaimed. In no event shall the copyright
  53. owner or contributors be liable for any direct, indirect, incidental,
  54. special, exemplary, or consequential damages (including, but not
  55. limited to, procurement of substitute goods or services; loss of use,
  56. data, or profits; or business interruption) however caused and on any
  57. theory of liability, whether in contract, strict liability, or tort
  58. (including negligence or otherwise) arising in any way out of the use
  59. of this software, even if advised of the possibility of such damage.
  60. '''
  61. from . import Extension
  62. from ..inlinepatterns import HtmlInlineProcessor, HTML_RE
  63. from ..treeprocessors import InlineProcessor
  64. from ..util import Registry, deprecated
  65. # Constants for quote education.
  66. punctClass = r"""[!"#\$\%'()*+,-.\/:;<=>?\@\[\\\]\^_`{|}~]"""
  67. endOfWordClass = r"[\s.,;:!?)]"
  68. closeClass = r"[^\ \t\r\n\[\{\(\-\u0002\u0003]"
  69. openingQuotesBase = (
  70. r'(\s' # a whitespace char
  71. r'|&nbsp;' # or a non-breaking space entity
  72. r'|--' # or dashes
  73. r'|–|—' # or unicode
  74. r'|&[mn]dash;' # or named dash entities
  75. r'|&#8211;|&#8212;' # or decimal entities
  76. r')'
  77. )
  78. substitutions = {
  79. 'mdash': '&mdash;',
  80. 'ndash': '&ndash;',
  81. 'ellipsis': '&hellip;',
  82. 'left-angle-quote': '&laquo;',
  83. 'right-angle-quote': '&raquo;',
  84. 'left-single-quote': '&lsquo;',
  85. 'right-single-quote': '&rsquo;',
  86. 'left-double-quote': '&ldquo;',
  87. 'right-double-quote': '&rdquo;',
  88. }
  89. # Special case if the very first character is a quote
  90. # followed by punctuation at a non-word-break. Close the quotes by brute force:
  91. singleQuoteStartRe = r"^'(?=%s\B)" % punctClass
  92. doubleQuoteStartRe = r'^"(?=%s\B)' % punctClass
  93. # Special case for double sets of quotes, e.g.:
  94. # <p>He said, "'Quoted' words in a larger quote."</p>
  95. doubleQuoteSetsRe = r""""'(?=\w)"""
  96. singleQuoteSetsRe = r"""'"(?=\w)"""
  97. # Special case for decade abbreviations (the '80s):
  98. decadeAbbrRe = r"(?<!\w)'(?=\d{2}s)"
  99. # Get most opening double quotes:
  100. openingDoubleQuotesRegex = r'%s"(?=\w)' % openingQuotesBase
  101. # Double closing quotes:
  102. closingDoubleQuotesRegex = r'"(?=\s)'
  103. closingDoubleQuotesRegex2 = '(?<=%s)"' % closeClass
  104. # Get most opening single quotes:
  105. openingSingleQuotesRegex = r"%s'(?=\w)" % openingQuotesBase
  106. # Single closing quotes:
  107. closingSingleQuotesRegex = r"(?<=%s)'(?!\s|s\b|\d)" % closeClass
  108. closingSingleQuotesRegex2 = r"(?<=%s)'(\s|s\b)" % closeClass
  109. # All remaining quotes should be opening ones
  110. remainingSingleQuotesRegex = r"'"
  111. remainingDoubleQuotesRegex = r'"'
  112. HTML_STRICT_RE = HTML_RE + r'(?!\>)'
  113. class SubstituteTextPattern(HtmlInlineProcessor):
  114. def __init__(self, pattern, replace, md):
  115. """ Replaces matches with some text. """
  116. HtmlInlineProcessor.__init__(self, pattern)
  117. self.replace = replace
  118. self.md = md
  119. @property
  120. @deprecated("Use 'md' instead.")
  121. def markdown(self):
  122. # TODO: remove this later
  123. return self.md
  124. def handleMatch(self, m, data):
  125. result = ''
  126. for part in self.replace:
  127. if isinstance(part, int):
  128. result += m.group(part)
  129. else:
  130. result += self.md.htmlStash.store(part)
  131. return result, m.start(0), m.end(0)
  132. class SmartyExtension(Extension):
  133. def __init__(self, **kwargs):
  134. self.config = {
  135. 'smart_quotes': [True, 'Educate quotes'],
  136. 'smart_angled_quotes': [False, 'Educate angled quotes'],
  137. 'smart_dashes': [True, 'Educate dashes'],
  138. 'smart_ellipses': [True, 'Educate ellipses'],
  139. 'substitutions': [{}, 'Overwrite default substitutions'],
  140. }
  141. super().__init__(**kwargs)
  142. self.substitutions = dict(substitutions)
  143. self.substitutions.update(self.getConfig('substitutions', default={}))
  144. def _addPatterns(self, md, patterns, serie, priority):
  145. for ind, pattern in enumerate(patterns):
  146. pattern += (md,)
  147. pattern = SubstituteTextPattern(*pattern)
  148. name = 'smarty-%s-%d' % (serie, ind)
  149. self.inlinePatterns.register(pattern, name, priority-ind)
  150. def educateDashes(self, md):
  151. emDashesPattern = SubstituteTextPattern(
  152. r'(?<!-)---(?!-)', (self.substitutions['mdash'],), md
  153. )
  154. enDashesPattern = SubstituteTextPattern(
  155. r'(?<!-)--(?!-)', (self.substitutions['ndash'],), md
  156. )
  157. self.inlinePatterns.register(emDashesPattern, 'smarty-em-dashes', 50)
  158. self.inlinePatterns.register(enDashesPattern, 'smarty-en-dashes', 45)
  159. def educateEllipses(self, md):
  160. ellipsesPattern = SubstituteTextPattern(
  161. r'(?<!\.)\.{3}(?!\.)', (self.substitutions['ellipsis'],), md
  162. )
  163. self.inlinePatterns.register(ellipsesPattern, 'smarty-ellipses', 10)
  164. def educateAngledQuotes(self, md):
  165. leftAngledQuotePattern = SubstituteTextPattern(
  166. r'\<\<', (self.substitutions['left-angle-quote'],), md
  167. )
  168. rightAngledQuotePattern = SubstituteTextPattern(
  169. r'\>\>', (self.substitutions['right-angle-quote'],), md
  170. )
  171. self.inlinePatterns.register(leftAngledQuotePattern, 'smarty-left-angle-quotes', 40)
  172. self.inlinePatterns.register(rightAngledQuotePattern, 'smarty-right-angle-quotes', 35)
  173. def educateQuotes(self, md):
  174. lsquo = self.substitutions['left-single-quote']
  175. rsquo = self.substitutions['right-single-quote']
  176. ldquo = self.substitutions['left-double-quote']
  177. rdquo = self.substitutions['right-double-quote']
  178. patterns = (
  179. (singleQuoteStartRe, (rsquo,)),
  180. (doubleQuoteStartRe, (rdquo,)),
  181. (doubleQuoteSetsRe, (ldquo + lsquo,)),
  182. (singleQuoteSetsRe, (lsquo + ldquo,)),
  183. (decadeAbbrRe, (rsquo,)),
  184. (openingSingleQuotesRegex, (1, lsquo)),
  185. (closingSingleQuotesRegex, (rsquo,)),
  186. (closingSingleQuotesRegex2, (rsquo, 1)),
  187. (remainingSingleQuotesRegex, (lsquo,)),
  188. (openingDoubleQuotesRegex, (1, ldquo)),
  189. (closingDoubleQuotesRegex, (rdquo,)),
  190. (closingDoubleQuotesRegex2, (rdquo,)),
  191. (remainingDoubleQuotesRegex, (ldquo,))
  192. )
  193. self._addPatterns(md, patterns, 'quotes', 30)
  194. def extendMarkdown(self, md):
  195. configs = self.getConfigs()
  196. self.inlinePatterns = Registry()
  197. if configs['smart_ellipses']:
  198. self.educateEllipses(md)
  199. if configs['smart_quotes']:
  200. self.educateQuotes(md)
  201. if configs['smart_angled_quotes']:
  202. self.educateAngledQuotes(md)
  203. # Override HTML_RE from inlinepatterns.py so that it does not
  204. # process tags with duplicate closing quotes.
  205. md.inlinePatterns.register(HtmlInlineProcessor(HTML_STRICT_RE, md), 'html', 90)
  206. if configs['smart_dashes']:
  207. self.educateDashes(md)
  208. inlineProcessor = InlineProcessor(md)
  209. inlineProcessor.inlinePatterns = self.inlinePatterns
  210. md.treeprocessors.register(inlineProcessor, 'smarty', 2)
  211. md.ESCAPED_CHARS.extend(['"', "'"])
  212. def makeExtension(**kwargs): # pragma: no cover
  213. return SmartyExtension(**kwargs)