betterem.py 9.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200
  1. """
  2. Better Emphasis.
  3. pymdownx.betterem
  4. Add intelligent handling of to em and strong notations
  5. MIT license.
  6. Copyright (c) 2014 - 2017 Isaac Muse <isaacmuse@gmail.com>
  7. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
  8. documentation files (the "Software"), to deal in the Software without restriction, including without limitation
  9. the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software,
  10. and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
  11. The above copyright notice and this permission notice shall be included in all copies or substantial portions
  12. of the Software.
  13. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED
  14. TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
  15. THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF
  16. CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
  17. DEALINGS IN THE SOFTWARE.
  18. """
  19. import re
  20. from markdown import Extension
  21. from . import util
  22. SMART_UNDER_CONTENT = r'((?:(?<=\s)_+?(?=\s)|.)+?_*?)'
  23. SMART_STAR_CONTENT = r'((?:(?<=\s)\*+?(?=\s)|.)+?\**?)'
  24. SMART_UNDER_MIXED_CONTENT = r'((?:_(?=[^\s])|(?<=\s)_+?(?=\s))+?_*)'
  25. SMART_STAR_MIXED_CONTENT = r'((?:\*(?=[^\s])|(?<=\s)\*+?(?=\s))+?\**)'
  26. UNDER_CONTENT = r'(_|(?:(?<=\s)_|[^_])+?)'
  27. UNDER_CONTENT2 = r'((?:[^_]|(?<!_{2})_)+?)'
  28. STAR_CONTENT = r'(\*|(?:(?<=\s)\*|[^\*])+?)'
  29. STAR_CONTENT2 = r'((?:[^\*]|(?<!\*{2})\*)+?)'
  30. # ***strong,em***
  31. STAR_STRONG_EM = r'(\*{3})(?!\s)(\*{1,2}|[^\*]+?)(?<!\s)\1'
  32. # ___strong,em___
  33. UNDER_STRONG_EM = r'(_{3})(?!\s)(_{1,2}|[^_]+?)(?<!\s)\1'
  34. # ***strong,em*strong**
  35. STAR_STRONG_EM2 = r'(\*{3})(?![\s\*])%s(?<!\s)\*%s(?<!\s)\*{2}' % (STAR_CONTENT, STAR_CONTENT2)
  36. # ___strong,em_strong__
  37. UNDER_STRONG_EM2 = r'(_{3})(?![\s_])%s(?<!\s)_%s(?<!\s)_{2}' % (UNDER_CONTENT, UNDER_CONTENT2)
  38. # ***em,strong**em*
  39. STAR_EM_STRONG = r'(\*{3})(?![\s\*])%s(?<!\s)\*{2}%s(?<!\s)\*' % (STAR_CONTENT2, STAR_CONTENT)
  40. # **strong*em,strong***
  41. STAR_STRONG_EM3 = r'(\*{2})(?![\s\*])%s\*(?![\s\*])%s(?<!\s)\*{3}' % (STAR_CONTENT2, STAR_CONTENT)
  42. # ___em,strong__em_
  43. UNDER_EM_STRONG = r'(_{3})(?![\s_])%s(?<!\s)_{2}%s(?<!\s)_' % (UNDER_CONTENT2, UNDER_CONTENT)
  44. # __strong_em,strong___
  45. UNDER_STRONG_EM3 = r'(_{2})(?![\s_])%s_(?![\s_])%s(?<!\s)_{3}' % (UNDER_CONTENT2, UNDER_CONTENT)
  46. # **strong**
  47. STAR_STRONG = r'(\*{2})(?!\s)%s(?<!\s)\1' % STAR_CONTENT2
  48. # __strong__
  49. UNDER_STRONG = r'(_{2})(?!\s)%s(?<!\s)\1' % UNDER_CONTENT2
  50. # *emphasis*
  51. STAR_EM = r'(\*)(?!\s)%s(?<!\s)\1' % STAR_CONTENT
  52. # _emphasis_
  53. UNDER_EM = r'(_)(?!\s)%s(?<!\s)\1' % UNDER_CONTENT
  54. # Smart rules for when "smart underscore" is enabled
  55. # SMART: ___strong,em___
  56. SMART_UNDER_STRONG_EM = r'(?<!\w)(_{3})(?![\s_])%s(?<!\s)\1(?!\w)' % SMART_UNDER_CONTENT
  57. # ___strong,em_ strong__
  58. SMART_UNDER_STRONG_EM2 = \
  59. r'(?<!\w)(_{3})(?![\s_])%s(?<!\s)_(?!\w)%s(?<!\s)_{2}(?!\w)' % (SMART_UNDER_MIXED_CONTENT, SMART_UNDER_CONTENT)
  60. # ___em,strong__ em_
  61. SMART_UNDER_EM_STRONG = \
  62. r'(?<!\w)(_{3})(?![\s_])%s(?<!\s)_{2}(?!\w)%s(?<!\s)_(?!\w)' % (SMART_UNDER_MIXED_CONTENT, SMART_UNDER_CONTENT)
  63. # __strong__
  64. SMART_UNDER_STRONG = r'(?<!\w)(_{2})(?![\s_])%s(?<!\s)\1(?!\w)' % SMART_UNDER_CONTENT
  65. # SMART _em_
  66. SMART_UNDER_EM = r'(?<!\w)(_)(?![\s_])%s(?<!\s)\1(?!\w)' % SMART_UNDER_CONTENT
  67. # Smart rules for when "smart asterisk" is enabled
  68. # SMART: ***strong,em***
  69. SMART_STAR_STRONG_EM = r'(?:(?<=_)|(?<![\w\*]))(\*{3})(?![\s\*])%s(?<!\s)\1(?:(?=_)|(?![\w\*]))' % SMART_STAR_CONTENT
  70. # ***strong,em* strong**
  71. SMART_STAR_STRONG_EM2 = \
  72. r'(?:(?<=_)|(?<![\w\*]))(\*{3})(?![\s\*])%s(?<!\s)\*(?:(?=_)|(?![\w\*]))%s(?<!\s)\*{2}(?:(?=_)|(?![\w\*]))' % (
  73. SMART_STAR_MIXED_CONTENT, SMART_STAR_CONTENT
  74. )
  75. # ***em,strong** em*
  76. SMART_STAR_EM_STRONG = \
  77. r'(?:(?<=_)|(?<![\w\*]))(\*{3})(?![\s\*])%s(?<!\s)\*{2}(?:(?=_)|(?![\w\*]))%s(?<!\s)\*(?:(?=_)|(?![\w\*]))' % (
  78. SMART_STAR_MIXED_CONTENT, SMART_STAR_CONTENT
  79. )
  80. # **strong**
  81. SMART_STAR_STRONG = r'(?:(?<=_)|(?<![\w\*]))(\*{2})(?![\s\*])%s(?<!\s)\1(?:(?=_)|(?![\w\*]))' % SMART_STAR_CONTENT
  82. # SMART *em*
  83. SMART_STAR_EM = r'(?:(?<=_)|(?<![\w\*]))(\*)(?![\s\*])%s(?<!\s)\1(?:(?=_)|(?![\w\*]))' % SMART_STAR_CONTENT
  84. class AsteriskProcessor(util.PatternSequenceProcessor):
  85. """Emphasis processor for handling strong and em matches."""
  86. PATTERNS = [
  87. util.PatSeqItem(re.compile(STAR_STRONG_EM, re.DOTALL | re.UNICODE), 'double', 'strong,em'),
  88. util.PatSeqItem(re.compile(STAR_EM_STRONG, re.DOTALL | re.UNICODE), 'double', 'em,strong'),
  89. util.PatSeqItem(re.compile(STAR_STRONG_EM2, re.DOTALL | re.UNICODE), 'double', 'strong,em'),
  90. util.PatSeqItem(re.compile(STAR_STRONG_EM3, re.DOTALL | re.UNICODE), 'double2', 'strong,em'),
  91. util.PatSeqItem(re.compile(STAR_STRONG, re.DOTALL | re.UNICODE), 'single', 'strong'),
  92. util.PatSeqItem(re.compile(STAR_EM, re.DOTALL | re.UNICODE), 'single', 'em')
  93. ]
  94. class SmartAsteriskProcessor(util.PatternSequenceProcessor):
  95. """Smart emphasis and strong processor."""
  96. PATTERNS = [
  97. util.PatSeqItem(re.compile(SMART_STAR_STRONG_EM, re.DOTALL | re.UNICODE), 'double', 'strong,em'),
  98. util.PatSeqItem(re.compile(SMART_STAR_EM_STRONG, re.DOTALL | re.UNICODE), 'double', 'em,strong'),
  99. util.PatSeqItem(re.compile(SMART_STAR_STRONG_EM2, re.DOTALL | re.UNICODE), 'double', 'strong,em'),
  100. util.PatSeqItem(re.compile(SMART_STAR_STRONG, re.DOTALL | re.UNICODE), 'single', 'strong'),
  101. util.PatSeqItem(re.compile(SMART_STAR_EM, re.DOTALL | re.UNICODE), 'single', 'em')
  102. ]
  103. class UnderscoreProcessor(util.PatternSequenceProcessor):
  104. """Emphasis processor for handling strong and em matches."""
  105. PATTERNS = [
  106. util.PatSeqItem(re.compile(UNDER_STRONG_EM, re.DOTALL | re.UNICODE), 'double', 'strong,em'),
  107. util.PatSeqItem(re.compile(UNDER_EM_STRONG, re.DOTALL | re.UNICODE), 'double', 'em,strong'),
  108. util.PatSeqItem(re.compile(UNDER_STRONG_EM2, re.DOTALL | re.UNICODE), 'double', 'strong,em'),
  109. util.PatSeqItem(re.compile(UNDER_STRONG_EM3, re.DOTALL | re.UNICODE), 'double2', 'strong,em'),
  110. util.PatSeqItem(re.compile(UNDER_STRONG, re.DOTALL | re.UNICODE), 'single', 'strong'),
  111. util.PatSeqItem(re.compile(UNDER_EM, re.DOTALL | re.UNICODE), 'single', 'em')
  112. ]
  113. class SmartUnderscoreProcessor(util.PatternSequenceProcessor):
  114. """Emphasis processor for handling strong and em matches."""
  115. PATTERNS = [
  116. util.PatSeqItem(re.compile(SMART_UNDER_STRONG_EM, re.DOTALL | re.UNICODE), 'double', 'strong,em'),
  117. util.PatSeqItem(re.compile(SMART_UNDER_EM_STRONG, re.DOTALL | re.UNICODE), 'double', 'em,strong'),
  118. util.PatSeqItem(re.compile(SMART_UNDER_STRONG_EM2, re.DOTALL | re.UNICODE), 'double', 'strong,em'),
  119. util.PatSeqItem(re.compile(SMART_UNDER_STRONG, re.DOTALL | re.UNICODE), 'single', 'strong'),
  120. util.PatSeqItem(re.compile(SMART_UNDER_EM, re.DOTALL | re.UNICODE), 'single', 'em')
  121. ]
  122. class BetterEmExtension(Extension):
  123. """Add extension to Markdown class."""
  124. def __init__(self, *args, **kwargs):
  125. """Initialize."""
  126. self.config = {
  127. 'smart_enable': ["underscore", "Treat connected words intelligently - Default: underscore"]
  128. }
  129. super(BetterEmExtension, self).__init__(*args, **kwargs)
  130. def extendMarkdown(self, md):
  131. """Modify inline patterns."""
  132. # Not better yet, so let's make it better
  133. md.registerExtension(self)
  134. self.make_better(md)
  135. def make_better(self, md):
  136. """
  137. Configure all the pattern rules.
  138. This should be used instead of smart_strong package.
  139. pymdownx.extra should be used in place of markdown.extensions.extra.
  140. """
  141. config = self.getConfigs()
  142. enabled = config["smart_enable"]
  143. if enabled:
  144. enable_all = enabled == "all"
  145. enable_under = enabled == "underscore" or enable_all
  146. enable_star = enabled == "asterisk" or enable_all
  147. # If we don't have to move an existing extension, use the same priority,
  148. # but if we do have to, move it closely to the relative needed position.
  149. md.inlinePatterns.deregister('not_strong', False)
  150. md.inlinePatterns.deregister('strong_em', False)
  151. md.inlinePatterns.deregister('em_strong', False)
  152. md.inlinePatterns.deregister('em_strong2', False)
  153. md.inlinePatterns.deregister('strong', False)
  154. md.inlinePatterns.deregister('emphasis', False)
  155. md.inlinePatterns.deregister('strong2', False)
  156. md.inlinePatterns.deregister('emphasis2', False)
  157. asterisk = SmartAsteriskProcessor(r'\*') if enable_star else AsteriskProcessor(r'\*')
  158. md.inlinePatterns.register(asterisk, "strong_em", 50)
  159. underscore = SmartUnderscoreProcessor('_') if enable_under else UnderscoreProcessor('_')
  160. md.inlinePatterns.register(underscore, "strong_em2", 40)
  161. def makeExtension(*args, **kwargs):
  162. """Return extension."""
  163. return BetterEmExtension(*args, **kwargs)