erroranalysis.py 1.5 KB

1234567891011121314151617181920212223242526272829303132333435363738394041
  1. # -*- coding: utf-8 -*-
  2. # Natural Language Toolkit: Transformation-based learning
  3. #
  4. # Copyright (C) 2001-2020 NLTK Project
  5. # Author: Marcus Uneson <marcus.uneson@gmail.com>
  6. # based on previous (nltk2) version by
  7. # Christopher Maloof, Edward Loper, Steven Bird
  8. # URL: <http://nltk.org/>
  9. # For license information, see LICENSE.TXT
  10. # returns a list of errors in string format
  11. def error_list(train_sents, test_sents):
  12. """
  13. Returns a list of human-readable strings indicating the errors in the
  14. given tagging of the corpus.
  15. :param train_sents: The correct tagging of the corpus
  16. :type train_sents: list(tuple)
  17. :param test_sents: The tagged corpus
  18. :type test_sents: list(tuple)
  19. """
  20. hdr = ("%25s | %s | %s\n" + "-" * 26 + "+" + "-" * 24 + "+" + "-" * 26) % (
  21. "left context",
  22. "word/test->gold".center(22),
  23. "right context",
  24. )
  25. errors = [hdr]
  26. for (train_sent, test_sent) in zip(train_sents, test_sents):
  27. for wordnum, (word, train_pos) in enumerate(train_sent):
  28. test_pos = test_sent[wordnum][1]
  29. if train_pos != test_pos:
  30. left = " ".join("%s/%s" % w for w in train_sent[:wordnum])
  31. right = " ".join("%s/%s" % w for w in train_sent[wordnum + 1 :])
  32. mid = "%s/%s->%s" % (word, test_pos, train_pos)
  33. errors.append(
  34. "%25s | %s | %s" % (left[-25:], mid.center(22), right[:25])
  35. )
  36. return errors