compat.py 1.3 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243
  1. # -*- coding: utf-8 -*-
  2. # Natural Language Toolkit: Compatibility
  3. #
  4. # Copyright (C) 2001-2020 NLTK Project
  5. #
  6. # URL: <http://nltk.org/>
  7. # For license information, see LICENSE.TXT
  8. import os
  9. from functools import wraps
  10. # ======= Compatibility for datasets that care about Python versions ========
  11. # The following datasets have a /PY3 subdirectory containing
  12. # a full copy of the data which has been re-encoded or repickled.
  13. DATA_UPDATES = [
  14. ("chunkers", "maxent_ne_chunker"),
  15. ("help", "tagsets"),
  16. ("taggers", "maxent_treebank_pos_tagger"),
  17. ("tokenizers", "punkt"),
  18. ]
  19. _PY3_DATA_UPDATES = [os.path.join(*path_list) for path_list in DATA_UPDATES]
  20. def add_py3_data(path):
  21. for item in _PY3_DATA_UPDATES:
  22. if item in str(path) and "/PY3" not in str(path):
  23. pos = path.index(item) + len(item)
  24. if path[pos : pos + 4] == ".zip":
  25. pos += 4
  26. path = path[:pos] + "/PY3" + path[pos:]
  27. break
  28. return path
  29. # for use in adding /PY3 to the second (filename) argument
  30. # of the file pointers in data.py
  31. def py3_data(init_func):
  32. def _decorator(*args, **kwargs):
  33. args = (args[0], add_py3_data(args[1])) + args[2:]
  34. return init_func(*args, **kwargs)
  35. return wraps(init_func)(_decorator)