abbr.py 2.8 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394
  1. '''
  2. Abbreviation Extension for Python-Markdown
  3. ==========================================
  4. This extension adds abbreviation handling to Python-Markdown.
  5. See <https://Python-Markdown.github.io/extensions/abbreviations>
  6. for documentation.
  7. Oringinal code Copyright 2007-2008 [Waylan Limberg](http://achinghead.com/) and
  8. [Seemant Kulleen](http://www.kulleen.org/)
  9. All changes Copyright 2008-2014 The Python Markdown Project
  10. License: [BSD](https://opensource.org/licenses/bsd-license.php)
  11. '''
  12. from . import Extension
  13. from ..preprocessors import Preprocessor
  14. from ..inlinepatterns import InlineProcessor
  15. from ..util import AtomicString
  16. import re
  17. import xml.etree.ElementTree as etree
  18. # Global Vars
  19. ABBR_REF_RE = re.compile(r'[*]\[(?P<abbr>[^\]]*)\][ ]?:\s*(?P<title>.*)')
  20. class AbbrExtension(Extension):
  21. """ Abbreviation Extension for Python-Markdown. """
  22. def extendMarkdown(self, md):
  23. """ Insert AbbrPreprocessor before ReferencePreprocessor. """
  24. md.preprocessors.register(AbbrPreprocessor(md), 'abbr', 12)
  25. class AbbrPreprocessor(Preprocessor):
  26. """ Abbreviation Preprocessor - parse text for abbr references. """
  27. def run(self, lines):
  28. '''
  29. Find and remove all Abbreviation references from the text.
  30. Each reference is set as a new AbbrPattern in the markdown instance.
  31. '''
  32. new_text = []
  33. for line in lines:
  34. m = ABBR_REF_RE.match(line)
  35. if m:
  36. abbr = m.group('abbr').strip()
  37. title = m.group('title').strip()
  38. self.md.inlinePatterns.register(
  39. AbbrInlineProcessor(self._generate_pattern(abbr), title), 'abbr-%s' % abbr, 2
  40. )
  41. # Preserve the line to prevent raw HTML indexing issue.
  42. # https://github.com/Python-Markdown/markdown/issues/584
  43. new_text.append('')
  44. else:
  45. new_text.append(line)
  46. return new_text
  47. def _generate_pattern(self, text):
  48. '''
  49. Given a string, returns an regex pattern to match that string.
  50. 'HTML' -> r'(?P<abbr>[H][T][M][L])'
  51. Note: we force each char as a literal match (in brackets) as we don't
  52. know what they will be beforehand.
  53. '''
  54. chars = list(text)
  55. for i in range(len(chars)):
  56. chars[i] = r'[%s]' % chars[i]
  57. return r'(?P<abbr>\b%s\b)' % (r''.join(chars))
  58. class AbbrInlineProcessor(InlineProcessor):
  59. """ Abbreviation inline pattern. """
  60. def __init__(self, pattern, title):
  61. super().__init__(pattern)
  62. self.title = title
  63. def handleMatch(self, m, data):
  64. abbr = etree.Element('abbr')
  65. abbr.text = AtomicString(m.group('abbr'))
  66. abbr.set('title', self.title)
  67. return abbr, m.start(0), m.end(0)
  68. def makeExtension(**kwargs): # pragma: no cover
  69. return AbbrExtension(**kwargs)