test_gdfa.py 4.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157
  1. # -*- coding: utf-8 -*-
  2. """
  3. Tests GDFA alignments
  4. """
  5. import functools
  6. import io
  7. import unittest
  8. from nltk.translate.gdfa import grow_diag_final_and
  9. class TestGDFA(unittest.TestCase):
  10. def test_from_eflomal_outputs(self):
  11. """
  12. Testing GDFA with first 10 eflomal outputs from issue #1829
  13. https://github.com/nltk/nltk/issues/1829
  14. """
  15. # Input.
  16. forwards = [
  17. '0-0 1-2',
  18. '0-0 1-1',
  19. '0-0 2-1 3-2 4-3 5-4 6-5 7-6 8-7 7-8 9-9 10-10 9-11 11-12 12-13 13-14',
  20. '0-0 1-1 1-2 2-3 3-4 4-5 4-6 5-7 6-8 8-9 9-10',
  21. '0-0 14-1 15-2 16-3 20-5 21-6 22-7 5-8 6-9 7-10 8-11 9-12 10-13 11-14 12-15 13-16 14-17 17-18 18-19 19-20 20-21 23-22 24-23 25-24 26-25 27-27 28-28 29-29 30-30 31-31',
  22. '0-0 1-1 0-2 2-3',
  23. '0-0 2-2 4-4',
  24. '0-0 1-1 2-3 3-4 5-5 7-6 8-7 9-8 10-9 11-10 12-11 13-12 14-13 15-14 16-16 17-17 18-18 19-19 20-20',
  25. '3-0 4-1 6-2 5-3 6-4 7-5 8-6 9-7 10-8 11-9 16-10 9-12 10-13 12-14',
  26. '1-0',
  27. ]
  28. backwards = [
  29. '0-0 1-2',
  30. '0-0 1-1',
  31. '0-0 2-1 3-2 4-3 5-4 6-5 7-6 8-7 9-8 10-10 11-12 12-11 13-13',
  32. '0-0 1-2 2-3 3-4 4-6 6-8 7-5 8-7 9-8',
  33. '0-0 1-8 2-9 3-10 4-11 5-12 6-11 8-13 9-14 10-15 11-16 12-17 13-18 14-19 15-20 16-21 17-22 18-23 19-24 20-29 21-30 22-31 23-2 24-3 25-4 26-5 27-5 28-6 29-7 30-28 31-31',
  34. '0-0 1-1 2-3',
  35. '0-0 1-1 2-3 4-4',
  36. '0-0 1-1 2-3 3-4 5-5 7-6 8-7 9-8 10-9 11-10 12-11 13-12 14-13 15-14 16-16 17-17 18-18 19-19 20-16 21-18',
  37. '0-0 1-1 3-2 4-1 5-3 6-4 7-5 8-6 9-7 10-8 11-9 12-8 13-9 14-8 15-9 16-10',
  38. '1-0',
  39. ]
  40. source_lens = [2, 3, 3, 15, 11, 33, 4, 6, 23, 18]
  41. target_lens = [2, 4, 3, 16, 12, 33, 5, 6, 22, 16]
  42. # Expected Output.
  43. expected = [
  44. [(0, 0), (1, 2)],
  45. [(0, 0), (1, 1)],
  46. [
  47. (0, 0),
  48. (2, 1),
  49. (3, 2),
  50. (4, 3),
  51. (5, 4),
  52. (6, 5),
  53. (7, 6),
  54. (8, 7),
  55. (10, 10),
  56. (11, 12),
  57. ],
  58. [
  59. (0, 0),
  60. (1, 1),
  61. (1, 2),
  62. (2, 3),
  63. (3, 4),
  64. (4, 5),
  65. (4, 6),
  66. (5, 7),
  67. (6, 8),
  68. (7, 5),
  69. (8, 7),
  70. (8, 9),
  71. (9, 8),
  72. (9, 10),
  73. ],
  74. [
  75. (0, 0),
  76. (1, 8),
  77. (2, 9),
  78. (3, 10),
  79. (4, 11),
  80. (5, 8),
  81. (6, 9),
  82. (6, 11),
  83. (7, 10),
  84. (8, 11),
  85. (31, 31),
  86. ],
  87. [(0, 0), (0, 2), (1, 1), (2, 3)],
  88. [(0, 0), (1, 1), (2, 2), (2, 3), (4, 4)],
  89. [
  90. (0, 0),
  91. (1, 1),
  92. (2, 3),
  93. (3, 4),
  94. (5, 5),
  95. (7, 6),
  96. (8, 7),
  97. (9, 8),
  98. (10, 9),
  99. (11, 10),
  100. (12, 11),
  101. (13, 12),
  102. (14, 13),
  103. (15, 14),
  104. (16, 16),
  105. (17, 17),
  106. (18, 18),
  107. (19, 19),
  108. ],
  109. [
  110. (0, 0),
  111. (1, 1),
  112. (3, 0),
  113. (3, 2),
  114. (4, 1),
  115. (5, 3),
  116. (6, 2),
  117. (6, 4),
  118. (7, 5),
  119. (8, 6),
  120. (9, 7),
  121. (9, 12),
  122. (10, 8),
  123. (10, 13),
  124. (11, 9),
  125. (12, 8),
  126. (12, 14),
  127. (13, 9),
  128. (14, 8),
  129. (15, 9),
  130. (16, 10),
  131. ],
  132. [(1, 0)],
  133. [
  134. (0, 0),
  135. (1, 1),
  136. (3, 2),
  137. (4, 3),
  138. (5, 4),
  139. (6, 5),
  140. (7, 6),
  141. (9, 10),
  142. (10, 12),
  143. (11, 13),
  144. (12, 14),
  145. (13, 15),
  146. ],
  147. ]
  148. # Iterate through all 10 examples and check for expected outputs.
  149. for fw, bw, src_len, trg_len, expect in zip(
  150. forwards, backwards, source_lens, target_lens, expected
  151. ):
  152. self.assertListEqual(expect, grow_diag_final_and(src_len, trg_len, fw, bw))