treetransforms.doctest 4.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156
  1. .. Copyright (C) 2001-2020 NLTK Project
  2. .. For license information, see LICENSE.TXT
  3. -------------------------------------------
  4. Unit tests for the TreeTransformation class
  5. -------------------------------------------
  6. >>> from copy import deepcopy
  7. >>> from nltk.tree import *
  8. >>> from nltk.treetransforms import *
  9. >>> tree_string = "(TOP (S (S (VP (VBN Turned) (ADVP (RB loose)) (PP (IN in) (NP (NP (NNP Shane) (NNP Longman) (POS 's)) (NN trading) (NN room))))) (, ,) (NP (DT the) (NN yuppie) (NNS dealers)) (VP (AUX do) (NP (NP (RB little)) (ADJP (RB right)))) (. .)))"
  10. >>> tree = Tree.fromstring(tree_string)
  11. >>> print(tree)
  12. (TOP
  13. (S
  14. (S
  15. (VP
  16. (VBN Turned)
  17. (ADVP (RB loose))
  18. (PP
  19. (IN in)
  20. (NP
  21. (NP (NNP Shane) (NNP Longman) (POS 's))
  22. (NN trading)
  23. (NN room)))))
  24. (, ,)
  25. (NP (DT the) (NN yuppie) (NNS dealers))
  26. (VP (AUX do) (NP (NP (RB little)) (ADJP (RB right))))
  27. (. .)))
  28. Make a copy of the original tree and collapse the subtrees with only one child
  29. >>> collapsedTree = deepcopy(tree)
  30. >>> collapse_unary(collapsedTree)
  31. >>> print(collapsedTree)
  32. (TOP
  33. (S
  34. (S+VP
  35. (VBN Turned)
  36. (ADVP (RB loose))
  37. (PP
  38. (IN in)
  39. (NP
  40. (NP (NNP Shane) (NNP Longman) (POS 's))
  41. (NN trading)
  42. (NN room))))
  43. (, ,)
  44. (NP (DT the) (NN yuppie) (NNS dealers))
  45. (VP (AUX do) (NP (NP (RB little)) (ADJP (RB right))))
  46. (. .)))
  47. >>> collapsedTree2 = deepcopy(tree)
  48. >>> collapse_unary(collapsedTree2, collapsePOS=True, collapseRoot=True)
  49. >>> print(collapsedTree2)
  50. (TOP+S
  51. (S+VP
  52. (VBN Turned)
  53. (ADVP+RB loose)
  54. (PP
  55. (IN in)
  56. (NP
  57. (NP (NNP Shane) (NNP Longman) (POS 's))
  58. (NN trading)
  59. (NN room))))
  60. (, ,)
  61. (NP (DT the) (NN yuppie) (NNS dealers))
  62. (VP (AUX do) (NP (NP+RB little) (ADJP+RB right)))
  63. (. .))
  64. Convert the tree to Chomsky Normal Form i.e. each subtree has either two
  65. subtree children or a single leaf value. This conversion can be performed
  66. using either left- or right-factoring.
  67. >>> cnfTree = deepcopy(collapsedTree)
  68. >>> chomsky_normal_form(cnfTree, factor='left')
  69. >>> print(cnfTree)
  70. (TOP
  71. (S
  72. (S|<S+VP-,-NP-VP>
  73. (S|<S+VP-,-NP>
  74. (S|<S+VP-,>
  75. (S+VP
  76. (S+VP|<VBN-ADVP> (VBN Turned) (ADVP (RB loose)))
  77. (PP
  78. (IN in)
  79. (NP
  80. (NP|<NP-NN>
  81. (NP
  82. (NP|<NNP-NNP> (NNP Shane) (NNP Longman))
  83. (POS 's))
  84. (NN trading))
  85. (NN room))))
  86. (, ,))
  87. (NP (NP|<DT-NN> (DT the) (NN yuppie)) (NNS dealers)))
  88. (VP (AUX do) (NP (NP (RB little)) (ADJP (RB right)))))
  89. (. .)))
  90. >>> cnfTree = deepcopy(collapsedTree)
  91. >>> chomsky_normal_form(cnfTree, factor='right')
  92. >>> print(cnfTree)
  93. (TOP
  94. (S
  95. (S+VP
  96. (VBN Turned)
  97. (S+VP|<ADVP-PP>
  98. (ADVP (RB loose))
  99. (PP
  100. (IN in)
  101. (NP
  102. (NP (NNP Shane) (NP|<NNP-POS> (NNP Longman) (POS 's)))
  103. (NP|<NN-NN> (NN trading) (NN room))))))
  104. (S|<,-NP-VP-.>
  105. (, ,)
  106. (S|<NP-VP-.>
  107. (NP (DT the) (NP|<NN-NNS> (NN yuppie) (NNS dealers)))
  108. (S|<VP-.>
  109. (VP (AUX do) (NP (NP (RB little)) (ADJP (RB right))))
  110. (. .))))))
  111. Employ some Markov smoothing to make the artificial node labels a bit more
  112. readable. See the treetransforms.py documentation for more details.
  113. >>> markovTree = deepcopy(collapsedTree)
  114. >>> chomsky_normal_form(markovTree, horzMarkov=2, vertMarkov=1)
  115. >>> print(markovTree)
  116. (TOP
  117. (S^<TOP>
  118. (S+VP^<S>
  119. (VBN Turned)
  120. (S+VP|<ADVP-PP>^<S>
  121. (ADVP^<S+VP> (RB loose))
  122. (PP^<S+VP>
  123. (IN in)
  124. (NP^<PP>
  125. (NP^<NP>
  126. (NNP Shane)
  127. (NP|<NNP-POS>^<NP> (NNP Longman) (POS 's)))
  128. (NP|<NN-NN>^<PP> (NN trading) (NN room))))))
  129. (S|<,-NP>^<TOP>
  130. (, ,)
  131. (S|<NP-VP>^<TOP>
  132. (NP^<S> (DT the) (NP|<NN-NNS>^<S> (NN yuppie) (NNS dealers)))
  133. (S|<VP-.>^<TOP>
  134. (VP^<S>
  135. (AUX do)
  136. (NP^<VP> (NP^<NP> (RB little)) (ADJP^<NP> (RB right))))
  137. (. .))))))
  138. Convert the transformed tree back to its original form
  139. >>> un_chomsky_normal_form(markovTree)
  140. >>> tree == markovTree
  141. True