query_parser.py 5.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177
  1. from __future__ import unicode_literals
  2. import six
  3. from lunr.query_lexer import QueryLexer
  4. from lunr.query import Clause, QueryPresence
  5. from lunr.exceptions import QueryParseError
  6. class QueryParser:
  7. def __init__(self, string, query):
  8. self.lexer = QueryLexer(string)
  9. self.query = query
  10. self.current_clause = Clause()
  11. self.lexeme_idx = 0
  12. def parse(self):
  13. self.lexer.run()
  14. self.lexemes = self.lexer.lexemes
  15. state = self.__class__.parse_clause
  16. while state:
  17. state = state(self)
  18. return self.query
  19. def peek_lexeme(self):
  20. try:
  21. return self.lexemes[self.lexeme_idx]
  22. except IndexError:
  23. return None
  24. def consume_lexeme(self):
  25. lexeme = self.peek_lexeme()
  26. self.lexeme_idx += 1
  27. return lexeme
  28. def next_clause(self):
  29. self.query.clause(self.current_clause)
  30. self.current_clause = Clause()
  31. @classmethod
  32. def parse_clause(cls, parser):
  33. lexeme = parser.peek_lexeme()
  34. if lexeme is None:
  35. return
  36. if lexeme["type"] == QueryLexer.FIELD:
  37. return cls.parse_field
  38. elif lexeme["type"] == QueryLexer.TERM:
  39. return cls.parse_term
  40. elif lexeme["type"] == QueryLexer.PRESENCE:
  41. return cls.parse_presence
  42. else:
  43. raise QueryParseError(
  44. "Expected either a field or a term, found {}{}".format(
  45. lexeme["type"],
  46. 'with value "' + lexeme["string"] + '"'
  47. if len(lexeme["string"])
  48. else "",
  49. )
  50. )
  51. @classmethod
  52. def parse_field(cls, parser):
  53. lexeme = parser.consume_lexeme()
  54. if lexeme["string"] not in parser.query.all_fields:
  55. raise QueryParseError(
  56. 'Unrecognized field "{}", possible fields {}'.format(
  57. lexeme["string"], ", ".join(parser.query.all_fields)
  58. )
  59. )
  60. parser.current_clause.fields = [lexeme["string"]]
  61. next_lexeme = parser.peek_lexeme()
  62. if next_lexeme is None:
  63. raise QueryParseError("Expected term, found nothing")
  64. if next_lexeme["type"] == QueryLexer.TERM:
  65. return cls.parse_term
  66. else:
  67. raise QueryParseError("Expected term, found {}".format(next_lexeme["type"]))
  68. @classmethod
  69. def parse_term(cls, parser):
  70. lexeme = parser.consume_lexeme()
  71. parser.current_clause.term = lexeme["string"].lower()
  72. if "*" in lexeme["string"]:
  73. parser.current_clause.use_pipeline = False
  74. return cls._peek_next_lexeme(parser)
  75. @classmethod
  76. def parse_presence(cls, parser):
  77. lexeme = parser.consume_lexeme()
  78. if lexeme is None:
  79. return
  80. if lexeme["string"] == "-":
  81. parser.current_clause.presence = QueryPresence.PROHIBITED
  82. elif lexeme["string"] == "+":
  83. parser.current_clause.presence = QueryPresence.REQUIRED
  84. else:
  85. raise QueryParseError(
  86. "Unrecognized parser operator: {}, expected `+` or `-`".format(
  87. lexeme.str
  88. )
  89. )
  90. next_lexeme = parser.peek_lexeme()
  91. if next_lexeme is None:
  92. raise QueryParseError("Expected either a field or a term, found nothing")
  93. if next_lexeme["type"] == QueryLexer.FIELD:
  94. return cls.parse_field
  95. elif next_lexeme["type"] == QueryLexer.TERM:
  96. return cls.parse_term
  97. else:
  98. raise QueryParseError(
  99. "Expected either a field or a term, found {}".format(lexeme["type"])
  100. )
  101. @classmethod
  102. def parse_edit_distance(cls, parser):
  103. lexeme = parser.consume_lexeme()
  104. try:
  105. edit_distance = int(lexeme["string"])
  106. except ValueError as e:
  107. six.raise_from(QueryParseError("Edit distance must be numeric"), e)
  108. parser.current_clause.edit_distance = edit_distance
  109. return cls._peek_next_lexeme(parser)
  110. @classmethod
  111. def parse_boost(cls, parser):
  112. lexeme = parser.consume_lexeme()
  113. try:
  114. boost = int(lexeme["string"])
  115. except ValueError as e:
  116. six.raise_from(QueryParseError("Boost must be numeric"), e)
  117. parser.current_clause.boost = boost
  118. return cls._peek_next_lexeme(parser)
  119. @classmethod
  120. def _peek_next_lexeme(cls, parser):
  121. next_lexeme = parser.peek_lexeme()
  122. if next_lexeme is None:
  123. parser.next_clause()
  124. return
  125. if next_lexeme["type"] == QueryLexer.TERM:
  126. parser.next_clause()
  127. return cls.parse_term
  128. elif next_lexeme["type"] == QueryLexer.FIELD:
  129. parser.next_clause()
  130. return cls.parse_field
  131. elif next_lexeme["type"] == QueryLexer.EDIT_DISTANCE:
  132. return cls.parse_edit_distance
  133. elif next_lexeme["type"] == QueryLexer.BOOST:
  134. return cls.parse_boost
  135. elif next_lexeme["type"] == QueryLexer.PRESENCE:
  136. parser.next_clause()
  137. return cls.parse_presence
  138. else:
  139. raise QueryParseError(
  140. "Unexpected lexeme type {}".format(next_lexeme["type"])
  141. )