metrics.py 1.4 KB

1234567891011121314151617181920212223242526272829303132333435363738394041
  1. # Natural Language Toolkit: Translation metrics
  2. #
  3. # Copyright (C) 2001-2020 NLTK Project
  4. # Author: Will Zhang <wilzzha@gmail.com>
  5. # Guan Gui <ggui@student.unimelb.edu.au>
  6. # Steven Bird <stevenbird1@gmail.com>
  7. # URL: <http://nltk.org/>
  8. # For license information, see LICENSE.TXT
  9. def alignment_error_rate(reference, hypothesis, possible=None):
  10. """
  11. Return the Alignment Error Rate (AER) of an alignment
  12. with respect to a "gold standard" reference alignment.
  13. Return an error rate between 0.0 (perfect alignment) and 1.0 (no
  14. alignment).
  15. >>> from nltk.translate import Alignment
  16. >>> ref = Alignment([(0, 0), (1, 1), (2, 2)])
  17. >>> test = Alignment([(0, 0), (1, 2), (2, 1)])
  18. >>> alignment_error_rate(ref, test) # doctest: +ELLIPSIS
  19. 0.6666666666666667
  20. :type reference: Alignment
  21. :param reference: A gold standard alignment (sure alignments)
  22. :type hypothesis: Alignment
  23. :param hypothesis: A hypothesis alignment (aka. candidate alignments)
  24. :type possible: Alignment or None
  25. :param possible: A gold standard reference of possible alignments
  26. (defaults to *reference* if None)
  27. :rtype: float or None
  28. """
  29. if possible is None:
  30. possible = reference
  31. else:
  32. assert reference.issubset(possible) # sanity check
  33. return 1.0 - (len(hypothesis & reference) + len(hypothesis & possible)) / float(
  34. len(hypothesis) + len(reference)
  35. )