| 12345678910111213141516171819202122232425262728293031323334353637383940414243 |
- # -*- coding: utf-8 -*-
- # Natural Language Toolkit: Compatibility
- #
- # Copyright (C) 2001-2020 NLTK Project
- #
- # URL: <http://nltk.org/>
- # For license information, see LICENSE.TXT
- import os
- from functools import wraps
- # ======= Compatibility for datasets that care about Python versions ========
- # The following datasets have a /PY3 subdirectory containing
- # a full copy of the data which has been re-encoded or repickled.
- DATA_UPDATES = [
- ("chunkers", "maxent_ne_chunker"),
- ("help", "tagsets"),
- ("taggers", "maxent_treebank_pos_tagger"),
- ("tokenizers", "punkt"),
- ]
- _PY3_DATA_UPDATES = [os.path.join(*path_list) for path_list in DATA_UPDATES]
- def add_py3_data(path):
- for item in _PY3_DATA_UPDATES:
- if item in str(path) and "/PY3" not in str(path):
- pos = path.index(item) + len(item)
- if path[pos : pos + 4] == ".zip":
- pos += 4
- path = path[:pos] + "/PY3" + path[pos:]
- break
- return path
- # for use in adding /PY3 to the second (filename) argument
- # of the file pointers in data.py
- def py3_data(init_func):
- def _decorator(*args, **kwargs):
- args = (args[0], add_py3_data(args[1])) + args[2:]
- return init_func(*args, **kwargs)
- return wraps(init_func)(_decorator)
|