pages.py 7.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244
  1. import os
  2. import logging
  3. from urllib.parse import urlparse, urlunparse, urljoin
  4. from urllib.parse import unquote as urlunquote
  5. import markdown
  6. from markdown.extensions import Extension
  7. from markdown.treeprocessors import Treeprocessor
  8. from markdown.util import AMP_SUBSTITUTE
  9. from mkdocs.structure.toc import get_toc
  10. from mkdocs.utils import meta, get_build_date, get_markdown_title, warning_filter
  11. log = logging.getLogger(__name__)
  12. log.addFilter(warning_filter)
  13. class Page:
  14. def __init__(self, title, file, config):
  15. file.page = self
  16. self.file = file
  17. self.title = title
  18. # Navigation attributes
  19. self.parent = None
  20. self.children = None
  21. self.previous_page = None
  22. self.next_page = None
  23. self.active = False
  24. self.is_section = False
  25. self.is_page = True
  26. self.is_link = False
  27. self.update_date = get_build_date()
  28. self._set_canonical_url(config.get('site_url', None))
  29. self._set_edit_url(config.get('repo_url', None), config.get('edit_uri', None))
  30. # Placeholders to be filled in later in the build process.
  31. self.markdown = None
  32. self.content = None
  33. self.toc = []
  34. self.meta = {}
  35. def __eq__(self, other):
  36. def sub_dict(d):
  37. return {key: value for key, value in d.items() if key in ['title', 'file']}
  38. return (isinstance(other, self.__class__) and sub_dict(self.__dict__) == sub_dict(other.__dict__))
  39. def __ne__(self, other):
  40. return not self.__eq__(other)
  41. def __repr__(self):
  42. title = "'{}'".format(self.title) if (self.title is not None) else '[blank]'
  43. return "Page(title={}, url='{}')".format(title, self.abs_url or self.file.url)
  44. def _indent_print(self, depth=0):
  45. return '{}{}'.format(' ' * depth, repr(self))
  46. def _get_active(self):
  47. """ Return active status of page. """
  48. return self.__active
  49. def _set_active(self, value):
  50. """ Set active status of page and ancestors. """
  51. self.__active = bool(value)
  52. if self.parent is not None:
  53. self.parent.active = bool(value)
  54. active = property(_get_active, _set_active)
  55. @property
  56. def is_index(self):
  57. return self.file.name == 'index'
  58. @property
  59. def is_top_level(self):
  60. return self.parent is None
  61. @property
  62. def is_homepage(self):
  63. return self.is_top_level and self.is_index and self.file.url == '.'
  64. @property
  65. def url(self):
  66. return '' if self.file.url == '.' else self.file.url
  67. @property
  68. def ancestors(self):
  69. if self.parent is None:
  70. return []
  71. return [self.parent] + self.parent.ancestors
  72. def _set_canonical_url(self, base):
  73. if base:
  74. if not base.endswith('/'):
  75. base += '/'
  76. self.canonical_url = urljoin(base, self.url)
  77. self.abs_url = urlparse(self.canonical_url).path
  78. else:
  79. self.canonical_url = None
  80. self.abs_url = None
  81. def _set_edit_url(self, repo_url, edit_uri):
  82. if repo_url and edit_uri:
  83. src_path = self.file.src_path.replace('\\', '/')
  84. self.edit_url = urljoin(repo_url, edit_uri + src_path)
  85. else:
  86. self.edit_url = None
  87. def read_source(self, config):
  88. source = config['plugins'].run_event(
  89. 'page_read_source', page=self, config=config
  90. )
  91. if source is None:
  92. try:
  93. with open(self.file.abs_src_path, 'r', encoding='utf-8-sig', errors='strict') as f:
  94. source = f.read()
  95. except OSError:
  96. log.error('File not found: {}'.format(self.file.src_path))
  97. raise
  98. except ValueError:
  99. log.error('Encoding error reading file: {}'.format(self.file.src_path))
  100. raise
  101. self.markdown, self.meta = meta.get_data(source)
  102. self._set_title()
  103. def _set_title(self):
  104. """
  105. Set the title for a Markdown document.
  106. Check these in order and use the first that returns a valid title:
  107. - value provided on init (passed in from config)
  108. - value of metadata 'title'
  109. - content of the first H1 in Markdown content
  110. - convert filename to title
  111. """
  112. if self.title is not None:
  113. return
  114. if 'title' in self.meta:
  115. self.title = self.meta['title']
  116. return
  117. title = get_markdown_title(self.markdown)
  118. if title is None:
  119. if self.is_homepage:
  120. title = 'Home'
  121. else:
  122. title = self.file.name.replace('-', ' ').replace('_', ' ')
  123. # Capitalize if the filename was all lowercase, otherwise leave it as-is.
  124. if title.lower() == title:
  125. title = title.capitalize()
  126. self.title = title
  127. def render(self, config, files):
  128. """
  129. Convert the Markdown source file to HTML as per the config.
  130. """
  131. extensions = [
  132. _RelativePathExtension(self.file, files)
  133. ] + config['markdown_extensions']
  134. md = markdown.Markdown(
  135. extensions=extensions,
  136. extension_configs=config['mdx_configs'] or {}
  137. )
  138. self.content = md.convert(self.markdown)
  139. self.toc = get_toc(getattr(md, 'toc_tokens', []))
  140. class _RelativePathTreeprocessor(Treeprocessor):
  141. def __init__(self, file, files):
  142. self.file = file
  143. self.files = files
  144. def run(self, root):
  145. """
  146. Update urls on anchors and images to make them relative
  147. Iterates through the full document tree looking for specific
  148. tags and then makes them relative based on the site navigation
  149. """
  150. for element in root.iter():
  151. if element.tag == 'a':
  152. key = 'href'
  153. elif element.tag == 'img':
  154. key = 'src'
  155. else:
  156. continue
  157. url = element.get(key)
  158. new_url = self.path_to_url(url)
  159. element.set(key, new_url)
  160. return root
  161. def path_to_url(self, url):
  162. scheme, netloc, path, params, query, fragment = urlparse(url)
  163. if (scheme or netloc or not path or url.startswith('/') or url.startswith('\\')
  164. or AMP_SUBSTITUTE in url or '.' not in os.path.split(path)[-1]):
  165. # Ignore URLs unless they are a relative link to a source file.
  166. # AMP_SUBSTITUTE is used internally by Markdown only for email.
  167. # No '.' in the last part of a path indicates path does not point to a file.
  168. return url
  169. # Determine the filepath of the target.
  170. target_path = os.path.join(os.path.dirname(self.file.src_path), urlunquote(path))
  171. target_path = os.path.normpath(target_path).lstrip(os.sep)
  172. # Validate that the target exists in files collection.
  173. if target_path not in self.files:
  174. log.warning(
  175. "Documentation file '{}' contains a link to '{}' which is not found "
  176. "in the documentation files.".format(self.file.src_path, target_path)
  177. )
  178. return url
  179. target_file = self.files.get_file_from_path(target_path)
  180. path = target_file.url_relative_to(self.file)
  181. components = (scheme, netloc, path, params, query, fragment)
  182. return urlunparse(components)
  183. class _RelativePathExtension(Extension):
  184. """
  185. The Extension class is what we pass to markdown, it then
  186. registers the Treeprocessor.
  187. """
  188. def __init__(self, file, files):
  189. self.file = file
  190. self.files = files
  191. def extendMarkdown(self, md):
  192. relpath = _RelativePathTreeprocessor(self.file, self.files)
  193. md.treeprocessors.register(relpath, "relpath", 0)