| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263 |
- '''
- Smarty extension for Python-Markdown
- ====================================
- Adds conversion of ASCII dashes, quotes and ellipses to their HTML
- entity equivalents.
- See <https://Python-Markdown.github.io/extensions/smarty>
- for documentation.
- Author: 2013, Dmitry Shachnev <mitya57@gmail.com>
- All changes Copyright 2013-2014 The Python Markdown Project
- License: [BSD](https://opensource.org/licenses/bsd-license.php)
- SmartyPants license:
- Copyright (c) 2003 John Gruber <https://daringfireball.net/>
- All rights reserved.
- Redistribution and use in source and binary forms, with or without
- modification, are permitted provided that the following conditions are
- met:
- * Redistributions of source code must retain the above copyright
- notice, this list of conditions and the following disclaimer.
- * Redistributions in binary form must reproduce the above copyright
- notice, this list of conditions and the following disclaimer in
- the documentation and/or other materials provided with the
- distribution.
- * Neither the name "SmartyPants" nor the names of its contributors
- may be used to endorse or promote products derived from this
- software without specific prior written permission.
- This software is provided by the copyright holders and contributors "as
- is" and any express or implied warranties, including, but not limited
- to, the implied warranties of merchantability and fitness for a
- particular purpose are disclaimed. In no event shall the copyright
- owner or contributors be liable for any direct, indirect, incidental,
- special, exemplary, or consequential damages (including, but not
- limited to, procurement of substitute goods or services; loss of use,
- data, or profits; or business interruption) however caused and on any
- theory of liability, whether in contract, strict liability, or tort
- (including negligence or otherwise) arising in any way out of the use
- of this software, even if advised of the possibility of such damage.
- smartypants.py license:
- smartypants.py is a derivative work of SmartyPants.
- Copyright (c) 2004, 2007 Chad Miller <http://web.chad.org/>
- Redistribution and use in source and binary forms, with or without
- modification, are permitted provided that the following conditions are
- met:
- * Redistributions of source code must retain the above copyright
- notice, this list of conditions and the following disclaimer.
- * Redistributions in binary form must reproduce the above copyright
- notice, this list of conditions and the following disclaimer in
- the documentation and/or other materials provided with the
- distribution.
- This software is provided by the copyright holders and contributors "as
- is" and any express or implied warranties, including, but not limited
- to, the implied warranties of merchantability and fitness for a
- particular purpose are disclaimed. In no event shall the copyright
- owner or contributors be liable for any direct, indirect, incidental,
- special, exemplary, or consequential damages (including, but not
- limited to, procurement of substitute goods or services; loss of use,
- data, or profits; or business interruption) however caused and on any
- theory of liability, whether in contract, strict liability, or tort
- (including negligence or otherwise) arising in any way out of the use
- of this software, even if advised of the possibility of such damage.
- '''
- from . import Extension
- from ..inlinepatterns import HtmlInlineProcessor, HTML_RE
- from ..treeprocessors import InlineProcessor
- from ..util import Registry, deprecated
- # Constants for quote education.
- punctClass = r"""[!"#\$\%'()*+,-.\/:;<=>?\@\[\\\]\^_`{|}~]"""
- endOfWordClass = r"[\s.,;:!?)]"
- closeClass = r"[^\ \t\r\n\[\{\(\-\u0002\u0003]"
- openingQuotesBase = (
- r'(\s' # a whitespace char
- r'| ' # or a non-breaking space entity
- r'|--' # or dashes
- r'|–|—' # or unicode
- r'|&[mn]dash;' # or named dash entities
- r'|–|—' # or decimal entities
- r')'
- )
- substitutions = {
- 'mdash': '—',
- 'ndash': '–',
- 'ellipsis': '…',
- 'left-angle-quote': '«',
- 'right-angle-quote': '»',
- 'left-single-quote': '‘',
- 'right-single-quote': '’',
- 'left-double-quote': '“',
- 'right-double-quote': '”',
- }
- # Special case if the very first character is a quote
- # followed by punctuation at a non-word-break. Close the quotes by brute force:
- singleQuoteStartRe = r"^'(?=%s\B)" % punctClass
- doubleQuoteStartRe = r'^"(?=%s\B)' % punctClass
- # Special case for double sets of quotes, e.g.:
- # <p>He said, "'Quoted' words in a larger quote."</p>
- doubleQuoteSetsRe = r""""'(?=\w)"""
- singleQuoteSetsRe = r"""'"(?=\w)"""
- # Special case for decade abbreviations (the '80s):
- decadeAbbrRe = r"(?<!\w)'(?=\d{2}s)"
- # Get most opening double quotes:
- openingDoubleQuotesRegex = r'%s"(?=\w)' % openingQuotesBase
- # Double closing quotes:
- closingDoubleQuotesRegex = r'"(?=\s)'
- closingDoubleQuotesRegex2 = '(?<=%s)"' % closeClass
- # Get most opening single quotes:
- openingSingleQuotesRegex = r"%s'(?=\w)" % openingQuotesBase
- # Single closing quotes:
- closingSingleQuotesRegex = r"(?<=%s)'(?!\s|s\b|\d)" % closeClass
- closingSingleQuotesRegex2 = r"(?<=%s)'(\s|s\b)" % closeClass
- # All remaining quotes should be opening ones
- remainingSingleQuotesRegex = r"'"
- remainingDoubleQuotesRegex = r'"'
- HTML_STRICT_RE = HTML_RE + r'(?!\>)'
- class SubstituteTextPattern(HtmlInlineProcessor):
- def __init__(self, pattern, replace, md):
- """ Replaces matches with some text. """
- HtmlInlineProcessor.__init__(self, pattern)
- self.replace = replace
- self.md = md
- @property
- @deprecated("Use 'md' instead.")
- def markdown(self):
- # TODO: remove this later
- return self.md
- def handleMatch(self, m, data):
- result = ''
- for part in self.replace:
- if isinstance(part, int):
- result += m.group(part)
- else:
- result += self.md.htmlStash.store(part)
- return result, m.start(0), m.end(0)
- class SmartyExtension(Extension):
- def __init__(self, **kwargs):
- self.config = {
- 'smart_quotes': [True, 'Educate quotes'],
- 'smart_angled_quotes': [False, 'Educate angled quotes'],
- 'smart_dashes': [True, 'Educate dashes'],
- 'smart_ellipses': [True, 'Educate ellipses'],
- 'substitutions': [{}, 'Overwrite default substitutions'],
- }
- super().__init__(**kwargs)
- self.substitutions = dict(substitutions)
- self.substitutions.update(self.getConfig('substitutions', default={}))
- def _addPatterns(self, md, patterns, serie, priority):
- for ind, pattern in enumerate(patterns):
- pattern += (md,)
- pattern = SubstituteTextPattern(*pattern)
- name = 'smarty-%s-%d' % (serie, ind)
- self.inlinePatterns.register(pattern, name, priority-ind)
- def educateDashes(self, md):
- emDashesPattern = SubstituteTextPattern(
- r'(?<!-)---(?!-)', (self.substitutions['mdash'],), md
- )
- enDashesPattern = SubstituteTextPattern(
- r'(?<!-)--(?!-)', (self.substitutions['ndash'],), md
- )
- self.inlinePatterns.register(emDashesPattern, 'smarty-em-dashes', 50)
- self.inlinePatterns.register(enDashesPattern, 'smarty-en-dashes', 45)
- def educateEllipses(self, md):
- ellipsesPattern = SubstituteTextPattern(
- r'(?<!\.)\.{3}(?!\.)', (self.substitutions['ellipsis'],), md
- )
- self.inlinePatterns.register(ellipsesPattern, 'smarty-ellipses', 10)
- def educateAngledQuotes(self, md):
- leftAngledQuotePattern = SubstituteTextPattern(
- r'\<\<', (self.substitutions['left-angle-quote'],), md
- )
- rightAngledQuotePattern = SubstituteTextPattern(
- r'\>\>', (self.substitutions['right-angle-quote'],), md
- )
- self.inlinePatterns.register(leftAngledQuotePattern, 'smarty-left-angle-quotes', 40)
- self.inlinePatterns.register(rightAngledQuotePattern, 'smarty-right-angle-quotes', 35)
- def educateQuotes(self, md):
- lsquo = self.substitutions['left-single-quote']
- rsquo = self.substitutions['right-single-quote']
- ldquo = self.substitutions['left-double-quote']
- rdquo = self.substitutions['right-double-quote']
- patterns = (
- (singleQuoteStartRe, (rsquo,)),
- (doubleQuoteStartRe, (rdquo,)),
- (doubleQuoteSetsRe, (ldquo + lsquo,)),
- (singleQuoteSetsRe, (lsquo + ldquo,)),
- (decadeAbbrRe, (rsquo,)),
- (openingSingleQuotesRegex, (1, lsquo)),
- (closingSingleQuotesRegex, (rsquo,)),
- (closingSingleQuotesRegex2, (rsquo, 1)),
- (remainingSingleQuotesRegex, (lsquo,)),
- (openingDoubleQuotesRegex, (1, ldquo)),
- (closingDoubleQuotesRegex, (rdquo,)),
- (closingDoubleQuotesRegex2, (rdquo,)),
- (remainingDoubleQuotesRegex, (ldquo,))
- )
- self._addPatterns(md, patterns, 'quotes', 30)
- def extendMarkdown(self, md):
- configs = self.getConfigs()
- self.inlinePatterns = Registry()
- if configs['smart_ellipses']:
- self.educateEllipses(md)
- if configs['smart_quotes']:
- self.educateQuotes(md)
- if configs['smart_angled_quotes']:
- self.educateAngledQuotes(md)
- # Override HTML_RE from inlinepatterns.py so that it does not
- # process tags with duplicate closing quotes.
- md.inlinePatterns.register(HtmlInlineProcessor(HTML_STRICT_RE, md), 'html', 90)
- if configs['smart_dashes']:
- self.educateDashes(md)
- inlineProcessor = InlineProcessor(md)
- inlineProcessor.inlinePatterns = self.inlinePatterns
- md.treeprocessors.register(inlineProcessor, 'smarty', 2)
- md.ESCAPED_CHARS.extend(['"', "'"])
- def makeExtension(**kwargs): # pragma: no cover
- return SmartyExtension(**kwargs)
|