idf.py 522 B

1234567891011121314151617
  1. from __future__ import unicode_literals
  2. import math
  3. def idf(posting, document_count):
  4. """A function to calculate the inverse document frequency for a posting.
  5. This is shared between the builder and the index.
  6. """
  7. documents_with_term = 0
  8. for field_name in posting:
  9. if field_name == "_index":
  10. continue
  11. documents_with_term += len(posting[field_name].keys())
  12. x = (document_count - documents_with_term + 0.5) / (documents_with_term + 0.5)
  13. return math.log(1 + abs(x))