test_disagreement.py 4.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141
  1. # -*- coding: utf-8 -*-
  2. import unittest
  3. from nltk.metrics.agreement import AnnotationTask
  4. class TestDisagreement(unittest.TestCase):
  5. '''
  6. Class containing unit tests for nltk.metrics.agreement.Disagreement.
  7. '''
  8. def test_easy(self):
  9. '''
  10. Simple test, based on
  11. https://github.com/foolswood/krippendorffs_alpha/raw/master/krippendorff.pdf.
  12. '''
  13. data = [('coder1', 'dress1', 'YES'),
  14. ('coder2', 'dress1', 'NO'),
  15. ('coder3', 'dress1', 'NO'),
  16. ('coder1', 'dress2', 'YES'),
  17. ('coder2', 'dress2', 'NO'),
  18. ('coder3', 'dress3', 'NO'),
  19. ]
  20. annotation_task = AnnotationTask(data)
  21. self.assertAlmostEqual(annotation_task.alpha(), -0.3333333)
  22. def test_easy2(self):
  23. '''
  24. Same simple test with 1 rating removed.
  25. Removal of that rating should not matter: K-Apha ignores items with
  26. only 1 rating.
  27. '''
  28. data = [('coder1', 'dress1', 'YES'),
  29. ('coder2', 'dress1', 'NO'),
  30. ('coder3', 'dress1', 'NO'),
  31. ('coder1', 'dress2', 'YES'),
  32. ('coder2', 'dress2', 'NO'),
  33. ]
  34. annotation_task = AnnotationTask(data)
  35. self.assertAlmostEqual(annotation_task.alpha(), -0.3333333)
  36. def test_advanced(self):
  37. '''
  38. More advanced test, based on
  39. http://www.agreestat.com/research_papers/onkrippendorffalpha.pdf
  40. '''
  41. data = [('A', '1', '1'),
  42. ('B', '1', '1'),
  43. ('D', '1', '1'),
  44. ('A', '2', '2'),
  45. ('B', '2', '2'),
  46. ('C', '2', '3'),
  47. ('D', '2', '2'),
  48. ('A', '3', '3'),
  49. ('B', '3', '3'),
  50. ('C', '3', '3'),
  51. ('D', '3', '3'),
  52. ('A', '4', '3'),
  53. ('B', '4', '3'),
  54. ('C', '4', '3'),
  55. ('D', '4', '3'),
  56. ('A', '5', '2'),
  57. ('B', '5', '2'),
  58. ('C', '5', '2'),
  59. ('D', '5', '2'),
  60. ('A', '6', '1'),
  61. ('B', '6', '2'),
  62. ('C', '6', '3'),
  63. ('D', '6', '4'),
  64. ('A', '7', '4'),
  65. ('B', '7', '4'),
  66. ('C', '7', '4'),
  67. ('D', '7', '4'),
  68. ('A', '8', '1'),
  69. ('B', '8', '1'),
  70. ('C', '8', '2'),
  71. ('D', '8', '1'),
  72. ('A', '9', '2'),
  73. ('B', '9', '2'),
  74. ('C', '9', '2'),
  75. ('D', '9', '2'),
  76. ('B', '10', '5'),
  77. ('C', '10', '5'),
  78. ('D', '10', '5'),
  79. ('C', '11', '1'),
  80. ('D', '11', '1'),
  81. ('C', '12', '3'),
  82. ]
  83. annotation_task = AnnotationTask(data)
  84. self.assertAlmostEqual(annotation_task.alpha(), 0.743421052632)
  85. def test_advanced2(self):
  86. '''
  87. Same more advanced example, but with 1 rating removed.
  88. Again, removal of that 1 rating shoudl not matter.
  89. '''
  90. data = [('A', '1', '1'),
  91. ('B', '1', '1'),
  92. ('D', '1', '1'),
  93. ('A', '2', '2'),
  94. ('B', '2', '2'),
  95. ('C', '2', '3'),
  96. ('D', '2', '2'),
  97. ('A', '3', '3'),
  98. ('B', '3', '3'),
  99. ('C', '3', '3'),
  100. ('D', '3', '3'),
  101. ('A', '4', '3'),
  102. ('B', '4', '3'),
  103. ('C', '4', '3'),
  104. ('D', '4', '3'),
  105. ('A', '5', '2'),
  106. ('B', '5', '2'),
  107. ('C', '5', '2'),
  108. ('D', '5', '2'),
  109. ('A', '6', '1'),
  110. ('B', '6', '2'),
  111. ('C', '6', '3'),
  112. ('D', '6', '4'),
  113. ('A', '7', '4'),
  114. ('B', '7', '4'),
  115. ('C', '7', '4'),
  116. ('D', '7', '4'),
  117. ('A', '8', '1'),
  118. ('B', '8', '1'),
  119. ('C', '8', '2'),
  120. ('D', '8', '1'),
  121. ('A', '9', '2'),
  122. ('B', '9', '2'),
  123. ('C', '9', '2'),
  124. ('D', '9', '2'),
  125. ('B', '10', '5'),
  126. ('C', '10', '5'),
  127. ('D', '10', '5'),
  128. ('C', '11', '1'),
  129. ('D', '11', '1'),
  130. ('C', '12', '3'),
  131. ]
  132. annotation_task = AnnotationTask(data)
  133. self.assertAlmostEqual(annotation_task.alpha(), 0.743421052632)