query.py 4.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148
  1. from __future__ import unicode_literals
  2. from enum import Enum
  3. class QueryPresence(Enum):
  4. """Defines possible behaviours for the term's presence in a document."""
  5. OPTIONAL = 1 # default
  6. REQUIRED = 2
  7. PROHIBITED = 3 # documents that contain this term will not be returned
  8. class Query(object):
  9. """A `lunr.Query` provides a programmatic way of defining queries to be
  10. performed against a `lunr.Index`.
  11. Prefer constructing a `lunr.Query` using `the lunr.Index.query` method
  12. so the query object is pre-initialized with the right index fields.
  13. """
  14. # Constants for indicating what kind of automatic wildcard insertion will
  15. # be used when constructing a query clause.
  16. # This allows wildcards to be added to the beginning and end of a term
  17. # without having to manually do any string concatenation.
  18. # The wildcard constants can be bitwise combined to select both leading and
  19. # trailing wildcards.
  20. WILDCARD = "*"
  21. WILDCARD_NONE = 0
  22. WILDCARD_LEADING = 1
  23. WILDCARD_TRAILING = 2
  24. def __init__(self, all_fields):
  25. self.clauses = []
  26. self.all_fields = all_fields
  27. def __repr__(self):
  28. return '<Query fields="{}" clauses="{}">'.format(
  29. ",".join(self.all_fields), ",".join(c.term for c in self.clauses)
  30. )
  31. def clause(self, *args, **kwargs):
  32. """Adds a `lunr.Clause` to this query.
  33. Unless the clause contains the fields to be matched all fields will be
  34. matched. In addition a default boost of 1 is applied to the clause.
  35. If the first argument is a `lunr.Clause` it will be mutated and added,
  36. otherwise args and kwargs will be used in the constructor.
  37. Returns:
  38. lunr.Query: The Query itself.
  39. """
  40. if args and isinstance(args[0], Clause):
  41. clause = args[0]
  42. else:
  43. clause = Clause(*args, **kwargs)
  44. if not clause.fields:
  45. clause.fields = self.all_fields
  46. if (clause.wildcard & Query.WILDCARD_LEADING) and (
  47. clause.term[0] != Query.WILDCARD
  48. ):
  49. clause.term = Query.WILDCARD + clause.term
  50. if (clause.wildcard & Query.WILDCARD_TRAILING) and (
  51. clause.term[-1] != Query.WILDCARD
  52. ):
  53. clause.term = clause.term + Query.WILDCARD
  54. self.clauses.append(clause)
  55. return self
  56. def term(self, term, **kwargs):
  57. """Adds a term to the current query, creating a Clause and adds it to
  58. the list of clauses making up this Query.
  59. The term is not tokenized and used "as is". Any conversion to token
  60. or token-like strings should be performed before calling this method.
  61. For example:
  62. query.term(lunr.Tokenizer("foo bar"))
  63. Args:
  64. term (Token or iterable): Token or iterable of tokens to add.
  65. kwargs (dict): Additional properties to add to the Clause.
  66. """
  67. if isinstance(term, (list, tuple)):
  68. for t in term:
  69. self.term(t, **kwargs)
  70. else:
  71. self.clause(str(term), **kwargs)
  72. return self
  73. def is_negated(self):
  74. """A negated query is one in which every clause has a presence of
  75. prohibited. These queries require some special processing to return
  76. the expected results.
  77. """
  78. return all(
  79. clause.presence == QueryPresence.PROHIBITED for clause in self.clauses
  80. )
  81. class Clause(object):
  82. """A single clause in a `lunr.Query` contains a term and details on
  83. how to match that term against a `lunr.Index`
  84. Args:
  85. term (str, optional): The term for the clause.
  86. field (iterable, optional): The fields for the term to be searched
  87. against.
  88. edit_distance (int, optional): The character distance to use, defaults
  89. to 0.
  90. use_pipeline (bool, optional): Whether the clause should be pre
  91. processed by the index's pipeline, default to True.
  92. boost (int, optional): Boost to apply to the clause, defaults to 1.
  93. wildcard (Query.WILDCARD_*, optional): Any of the Query.WILDCARD
  94. constants defining if a wildcard is to be used and how, defaults
  95. to Query.WILDCARD_NONE.
  96. presence (QueryPresence, optional): Behaviour for a terms presence
  97. in a document.
  98. """
  99. def __init__(
  100. self,
  101. term=None,
  102. fields=None,
  103. edit_distance=0,
  104. use_pipeline=True,
  105. boost=1,
  106. wildcard=Query.WILDCARD_NONE,
  107. presence=QueryPresence.OPTIONAL,
  108. ):
  109. super(Clause, self).__init__()
  110. self.term = term
  111. self.fields = fields or []
  112. self.edit_distance = edit_distance
  113. self.use_pipeline = use_pipeline
  114. self.boost = boost
  115. self.wildcard = wildcard
  116. self.presence = presence
  117. def __repr__(self):
  118. return '<Clause term="{}">'.format(self.term)