files.py 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270
  1. import fnmatch
  2. import os
  3. import logging
  4. from functools import cmp_to_key
  5. from urllib.parse import quote as urlquote
  6. from mkdocs import utils
  7. log = logging.getLogger(__name__)
  8. log.addFilter(utils.warning_filter)
  9. class Files:
  10. """ A collection of File objects. """
  11. def __init__(self, files):
  12. self._files = files
  13. self.src_paths = {file.src_path: file for file in files}
  14. def __iter__(self):
  15. return iter(self._files)
  16. def __len__(self):
  17. return len(self._files)
  18. def __contains__(self, path):
  19. return path in self.src_paths
  20. def get_file_from_path(self, path):
  21. """ Return a File instance with File.src_path equal to path. """
  22. return self.src_paths.get(os.path.normpath(path))
  23. def append(self, file):
  24. """ Append file to Files collection. """
  25. self._files.append(file)
  26. self.src_paths[file.src_path] = file
  27. def copy_static_files(self, dirty=False):
  28. """ Copy static files from source to destination. """
  29. for file in self:
  30. if not file.is_documentation_page():
  31. file.copy_file(dirty)
  32. def documentation_pages(self):
  33. """ Return iterable of all Markdown page file objects. """
  34. return [file for file in self if file.is_documentation_page()]
  35. def static_pages(self):
  36. """ Return iterable of all static page file objects. """
  37. return [file for file in self if file.is_static_page()]
  38. def media_files(self):
  39. """ Return iterable of all file objects which are not documentation or static pages. """
  40. return [file for file in self if file.is_media_file()]
  41. def javascript_files(self):
  42. """ Return iterable of all javascript file objects. """
  43. return [file for file in self if file.is_javascript()]
  44. def css_files(self):
  45. """ Return iterable of all CSS file objects. """
  46. return [file for file in self if file.is_css()]
  47. def add_files_from_theme(self, env, config):
  48. """ Retrieve static files from Jinja environment and add to collection. """
  49. def filter(name):
  50. # '.*' filters dot files/dirs at root level whereas '*/.*' filters nested levels
  51. patterns = ['.*', '*/.*', '*.py', '*.pyc', '*.html', '*readme*', 'mkdocs_theme.yml']
  52. patterns.extend('*{}'.format(x) for x in utils.markdown_extensions)
  53. patterns.extend(config['theme'].static_templates)
  54. for pattern in patterns:
  55. if fnmatch.fnmatch(name.lower(), pattern):
  56. return False
  57. return True
  58. for path in env.list_templates(filter_func=filter):
  59. # Theme files do not override docs_dir files
  60. path = os.path.normpath(path)
  61. if path not in self:
  62. for dir in config['theme'].dirs:
  63. # Find the first theme dir which contains path
  64. if os.path.isfile(os.path.join(dir, path)):
  65. self.append(File(path, dir, config['site_dir'], config['use_directory_urls']))
  66. break
  67. class File:
  68. """
  69. A MkDocs File object.
  70. Points to the source and destination locations of a file.
  71. The `path` argument must be a path that exists relative to `src_dir`.
  72. The `src_dir` and `dest_dir` must be absolute paths on the local file system.
  73. The `use_directory_urls` argument controls how destination paths are generated. If `False`, a Markdown file is
  74. mapped to an HTML file of the same name (the file extension is changed to `.html`). If True, a Markdown file is
  75. mapped to an HTML index file (`index.html`) nested in a directory using the "name" of the file in `path`. The
  76. `use_directory_urls` argument has no effect on non-Markdown files.
  77. File objects have the following properties, which are Unicode strings:
  78. File.src_path
  79. The pure path of the source file relative to the source directory.
  80. File.abs_src_path
  81. The absolute concrete path of the source file.
  82. File.dest_path
  83. The pure path of the destination file relative to the destination directory.
  84. File.abs_dest_path
  85. The absolute concrete path of the destination file.
  86. File.url
  87. The url of the destination file relative to the destination directory as a string.
  88. """
  89. def __init__(self, path, src_dir, dest_dir, use_directory_urls):
  90. self.page = None
  91. self.src_path = os.path.normpath(path)
  92. self.abs_src_path = os.path.normpath(os.path.join(src_dir, self.src_path))
  93. self.name = self._get_stem()
  94. self.dest_path = self._get_dest_path(use_directory_urls)
  95. self.abs_dest_path = os.path.normpath(os.path.join(dest_dir, self.dest_path))
  96. self.url = self._get_url(use_directory_urls)
  97. def __eq__(self, other):
  98. def sub_dict(d):
  99. return {key: value for key, value in d.items() if key in ['src_path', 'abs_src_path', 'url']}
  100. return (isinstance(other, self.__class__) and sub_dict(self.__dict__) == sub_dict(other.__dict__))
  101. def __ne__(self, other):
  102. return not self.__eq__(other)
  103. def _get_stem(self):
  104. """ Return the name of the file without it's extension. """
  105. filename = os.path.basename(self.src_path)
  106. stem, ext = os.path.splitext(filename)
  107. return 'index' if stem in ('index', 'README') else stem
  108. def _get_dest_path(self, use_directory_urls):
  109. """ Return destination path based on source path. """
  110. if self.is_documentation_page():
  111. parent, filename = os.path.split(self.src_path)
  112. if not use_directory_urls or self.name == 'index':
  113. # index.md or README.md => index.html
  114. # foo.md => foo.html
  115. return os.path.join(parent, self.name + '.html')
  116. else:
  117. # foo.md => foo/index.html
  118. return os.path.join(parent, self.name, 'index.html')
  119. return self.src_path
  120. def _get_url(self, use_directory_urls):
  121. """ Return url based in destination path. """
  122. url = self.dest_path.replace(os.path.sep, '/')
  123. dirname, filename = os.path.split(url)
  124. if use_directory_urls and filename == 'index.html':
  125. if dirname == '':
  126. url = '.'
  127. else:
  128. url = dirname + '/'
  129. return urlquote(url)
  130. def url_relative_to(self, other):
  131. """ Return url for file relative to other file. """
  132. return utils.get_relative_url(self.url, other.url if isinstance(other, File) else other)
  133. def copy_file(self, dirty=False):
  134. """ Copy source file to destination, ensuring parent directories exist. """
  135. if dirty and not self.is_modified():
  136. log.debug("Skip copying unmodified file: '{}'".format(self.src_path))
  137. else:
  138. log.debug("Copying media file: '{}'".format(self.src_path))
  139. utils.copy_file(self.abs_src_path, self.abs_dest_path)
  140. def is_modified(self):
  141. if os.path.isfile(self.abs_dest_path):
  142. return os.path.getmtime(self.abs_dest_path) < os.path.getmtime(self.abs_src_path)
  143. return True
  144. def is_documentation_page(self):
  145. """ Return True if file is a Markdown page. """
  146. return os.path.splitext(self.src_path)[1] in utils.markdown_extensions
  147. def is_static_page(self):
  148. """ Return True if file is a static page (html, xml, json). """
  149. return os.path.splitext(self.src_path)[1] in (
  150. '.html',
  151. '.htm',
  152. '.xml',
  153. '.json',
  154. )
  155. def is_media_file(self):
  156. """ Return True if file is not a documentation or static page. """
  157. return not (self.is_documentation_page() or self.is_static_page())
  158. def is_javascript(self):
  159. """ Return True if file is a JavaScript file. """
  160. return os.path.splitext(self.src_path)[1] in (
  161. '.js',
  162. '.javascript',
  163. )
  164. def is_css(self):
  165. """ Return True if file is a CSS file. """
  166. return os.path.splitext(self.src_path)[1] in (
  167. '.css',
  168. )
  169. def get_files(config):
  170. """ Walk the `docs_dir` and return a Files collection. """
  171. files = []
  172. exclude = ['.*', '/templates']
  173. for source_dir, dirnames, filenames in os.walk(config['docs_dir'], followlinks=True):
  174. relative_dir = os.path.relpath(source_dir, config['docs_dir'])
  175. for dirname in list(dirnames):
  176. path = os.path.normpath(os.path.join(relative_dir, dirname))
  177. # Skip any excluded directories
  178. if _filter_paths(basename=dirname, path=path, is_dir=True, exclude=exclude):
  179. dirnames.remove(dirname)
  180. dirnames.sort()
  181. for filename in _sort_files(filenames):
  182. path = os.path.normpath(os.path.join(relative_dir, filename))
  183. # Skip any excluded files
  184. if _filter_paths(basename=filename, path=path, is_dir=False, exclude=exclude):
  185. continue
  186. # Skip README.md if an index file also exists in dir
  187. if filename.lower() == 'readme.md' and 'index.md' in filenames:
  188. log.warning("Both index.md and readme.md found. Skipping readme.md from {}".format(source_dir))
  189. continue
  190. files.append(File(path, config['docs_dir'], config['site_dir'], config['use_directory_urls']))
  191. return Files(files)
  192. def _sort_files(filenames):
  193. """ Always sort `index` or `README` as first filename in list. """
  194. def compare(x, y):
  195. if x == y:
  196. return 0
  197. if os.path.splitext(y)[0] in ['index', 'README']:
  198. return 1
  199. if os.path.splitext(x)[0] in ['index', 'README'] or x < y:
  200. return -1
  201. return 1
  202. return sorted(filenames, key=cmp_to_key(compare))
  203. def _filter_paths(basename, path, is_dir, exclude):
  204. """ .gitignore style file filtering. """
  205. for item in exclude:
  206. # Items ending in '/' apply only to directories.
  207. if item.endswith('/') and not is_dir:
  208. continue
  209. # Items starting with '/' apply to the whole path.
  210. # In any other cases just the basename is used.
  211. match = path if item.startswith('/') else basename
  212. if fnmatch.fnmatch(match, item.strip('/')):
  213. return True
  214. return False