| 1234567891011121314151617181920212223 |
- from __future__ import unicode_literals
- import re
- def generate_trimmer(word_characters):
- """Returns a trimmer function from a string of word characters.
- TODO: lunr-languages ships with lists of word characters for each language
- I haven't found an equivalent in Python, we may need to copy it.
- """
- full_re = re.compile(r"^[^{0}]*?([{0}]+)[^{0}]*?$".format(word_characters))
- def trimmer(token, i=None, tokens=None):
- def trim(s, metadata=None):
- match = full_re.match(s)
- if match is None:
- return s
- return match.group(1)
- return token.update(trim)
- return trimmer
|