test_wordnet.py 7.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220
  1. # -*- coding: utf-8 -*-
  2. """
  3. Unit tests for nltk.corpus.wordnet
  4. See also nltk/test/wordnet.doctest
  5. """
  6. import collections
  7. import os
  8. import unittest
  9. from nose import SkipTest
  10. from nltk.corpus.reader.wordnet import WordNetCorpusReader
  11. from nltk.corpus import wordnet as wn
  12. from nltk.corpus import wordnet_ic as wnic
  13. from nltk.data import find as find_data
  14. wn.ensure_loaded()
  15. S = wn.synset
  16. L = wn.lemma
  17. class WordnNetDemo(unittest.TestCase):
  18. def test_retrieve_synset(self):
  19. move_synset = S('go.v.21')
  20. self.assertEqual(move_synset.name(), "move.v.15")
  21. self.assertEqual(move_synset.lemma_names(), ['move', 'go'])
  22. self.assertEqual(
  23. move_synset.definition(), "have a turn; make one's move in a game"
  24. )
  25. self.assertEqual(move_synset.examples(), ['Can I go now?'])
  26. def test_retrieve_synsets(self):
  27. self.assertEqual(sorted(wn.synsets('zap', pos='n')), [S('zap.n.01')])
  28. self.assertEqual(
  29. sorted(wn.synsets('zap', pos='v')),
  30. [S('microwave.v.01'), S('nuke.v.01'), S('zap.v.01'), S('zap.v.02')],
  31. )
  32. def test_hyperhyponyms(self):
  33. # Not every synset as hypernyms()
  34. self.assertEqual(S('travel.v.01').hypernyms(), [])
  35. self.assertEqual(S('travel.v.02').hypernyms(), [S('travel.v.03')])
  36. self.assertEqual(S('travel.v.03').hypernyms(), [])
  37. # Test hyper-/hyponyms.
  38. self.assertEqual(S('breakfast.n.1').hypernyms(), [S('meal.n.01')])
  39. first_five_meal_hypo = [
  40. S('banquet.n.02'),
  41. S('bite.n.04'),
  42. S('breakfast.n.01'),
  43. S('brunch.n.01'),
  44. S('buffet.n.02'),
  45. ]
  46. self.assertEqual(sorted(S('meal.n.1').hyponyms()[:5]), first_five_meal_hypo)
  47. self.assertEqual(S('Austen.n.1').instance_hypernyms(), [S('writer.n.01')])
  48. first_five_composer_hypo = [
  49. S('ambrose.n.01'),
  50. S('bach.n.01'),
  51. S('barber.n.01'),
  52. S('bartok.n.01'),
  53. S('beethoven.n.01'),
  54. ]
  55. self.assertEqual(
  56. S('composer.n.1').instance_hyponyms()[:5], first_five_composer_hypo
  57. )
  58. # Test root hyper-/hyponyms
  59. self.assertEqual(S('person.n.01').root_hypernyms(), [S('entity.n.01')])
  60. self.assertEqual(S('sail.v.01').root_hypernyms(), [S('travel.v.01')])
  61. self.assertEqual(
  62. S('fall.v.12').root_hypernyms(), [S('act.v.01'), S('fall.v.17')]
  63. )
  64. def test_derivationally_related_forms(self):
  65. # Test `derivationally_related_forms()`
  66. self.assertEqual(
  67. L('zap.v.03.nuke').derivationally_related_forms(),
  68. [L('atomic_warhead.n.01.nuke')],
  69. )
  70. self.assertEqual(
  71. L('zap.v.03.atomize').derivationally_related_forms(),
  72. [L('atomization.n.02.atomization')],
  73. )
  74. self.assertEqual(
  75. L('zap.v.03.atomise').derivationally_related_forms(),
  76. [L('atomization.n.02.atomisation')],
  77. )
  78. self.assertEqual(L('zap.v.03.zap').derivationally_related_forms(), [])
  79. def test_meronyms_holonyms(self):
  80. # Test meronyms, holonyms.
  81. self.assertEqual(
  82. S('dog.n.01').member_holonyms(), [S('canis.n.01'), S('pack.n.06')]
  83. )
  84. self.assertEqual(S('dog.n.01').part_meronyms(), [S('flag.n.07')])
  85. self.assertEqual(S('faculty.n.2').member_meronyms(), [S('professor.n.01')])
  86. self.assertEqual(S('copilot.n.1').member_holonyms(), [S('crew.n.01')])
  87. self.assertEqual(
  88. S('table.n.2').part_meronyms(),
  89. [S('leg.n.03'), S('tabletop.n.01'), S('tableware.n.01')],
  90. )
  91. self.assertEqual(S('course.n.7').part_holonyms(), [S('meal.n.01')])
  92. self.assertEqual(
  93. S('water.n.1').substance_meronyms(), [S('hydrogen.n.01'), S('oxygen.n.01')]
  94. )
  95. self.assertEqual(
  96. S('gin.n.1').substance_holonyms(),
  97. [
  98. S('gin_and_it.n.01'),
  99. S('gin_and_tonic.n.01'),
  100. S('martini.n.01'),
  101. S('pink_lady.n.01'),
  102. ],
  103. )
  104. def test_antonyms(self):
  105. # Test antonyms.
  106. self.assertEqual(
  107. L('leader.n.1.leader').antonyms(), [L('follower.n.01.follower')]
  108. )
  109. self.assertEqual(
  110. L('increase.v.1.increase').antonyms(), [L('decrease.v.01.decrease')]
  111. )
  112. def test_misc_relations(self):
  113. # Test misc relations.
  114. self.assertEqual(S('snore.v.1').entailments(), [S('sleep.v.01')])
  115. self.assertEqual(
  116. S('heavy.a.1').similar_tos(),
  117. [
  118. S('dense.s.03'),
  119. S('doughy.s.01'),
  120. S('heavier-than-air.s.01'),
  121. S('hefty.s.02'),
  122. S('massive.s.04'),
  123. S('non-buoyant.s.01'),
  124. S('ponderous.s.02'),
  125. ],
  126. )
  127. self.assertEqual(S('light.a.1').attributes(), [S('weight.n.01')])
  128. self.assertEqual(S('heavy.a.1').attributes(), [S('weight.n.01')])
  129. # Test pertainyms.
  130. self.assertEqual(
  131. L('English.a.1.English').pertainyms(), [L('england.n.01.England')]
  132. )
  133. def test_lch(self):
  134. # Test LCH.
  135. self.assertEqual(
  136. S('person.n.01').lowest_common_hypernyms(S('dog.n.01')),
  137. [S('organism.n.01')],
  138. )
  139. self.assertEqual(
  140. S('woman.n.01').lowest_common_hypernyms(S('girlfriend.n.02')),
  141. [S('woman.n.01')],
  142. )
  143. def test_domains(self):
  144. # Test domains.
  145. self.assertEqual(S('code.n.03').topic_domains(), [S('computer_science.n.01')])
  146. self.assertEqual(S('pukka.a.01').region_domains(), [S('india.n.01')])
  147. self.assertEqual(S('freaky.a.01').usage_domains(), [S('slang.n.02')])
  148. def test_in_topic_domains(self):
  149. # Test in domains.
  150. self.assertEqual(
  151. S('computer_science.n.01').in_topic_domains()[0], S('access.n.05')
  152. )
  153. self.assertEqual(S('germany.n.01').in_region_domains()[23], S('trillion.n.02'))
  154. self.assertEqual(S('slang.n.02').in_usage_domains()[1], S('airhead.n.01'))
  155. def test_wordnet_similarities(self):
  156. # Path based similarities.
  157. self.assertAlmostEqual(S('cat.n.01').path_similarity(S('cat.n.01')), 1.0)
  158. self.assertAlmostEqual(S('dog.n.01').path_similarity(S('cat.n.01')), 0.2)
  159. self.assertAlmostEqual(
  160. S('dog.n.01').lch_similarity(S('cat.n.01')), 2.028, places=3
  161. )
  162. self.assertAlmostEqual(
  163. S('dog.n.01').wup_similarity(S('cat.n.01')), 0.8571, places=3
  164. )
  165. # Information Content similarities.
  166. brown_ic = wnic.ic('ic-brown.dat')
  167. self.assertAlmostEqual(
  168. S('dog.n.01').jcn_similarity(S('cat.n.01'), brown_ic), 0.4497, places=3
  169. )
  170. semcor_ic = wnic.ic('ic-semcor.dat')
  171. self.assertAlmostEqual(
  172. S('dog.n.01').lin_similarity(S('cat.n.01'), semcor_ic), 0.8863, places=3
  173. )
  174. def test_omw_lemma_no_trailing_underscore(self):
  175. expected = sorted([
  176. u'popolna_sprememba_v_mišljenju',
  177. u'popoln_obrat',
  178. u'preobrat',
  179. u'preobrat_v_mišljenju'
  180. ])
  181. self.assertEqual(sorted(S('about-face.n.02').lemma_names(lang='slv')), expected)
  182. def test_iterable_type_for_all_lemma_names(self):
  183. # Duck-test for iterables.
  184. # See https://stackoverflow.com/a/36230057/610569
  185. cat_lemmas = wn.all_lemma_names(lang='cat')
  186. eng_lemmas = wn.all_lemma_names(lang='eng')
  187. self.assertTrue(hasattr(eng_lemmas, '__iter__'))
  188. self.assertTrue(hasattr(eng_lemmas, '__next__') or hasattr(eng_lemmas, 'next'))
  189. self.assertTrue(eng_lemmas.__iter__() is eng_lemmas)
  190. self.assertTrue(hasattr(cat_lemmas, '__iter__'))
  191. self.assertTrue(hasattr(cat_lemmas, '__next__') or hasattr(eng_lemmas, 'next'))
  192. self.assertTrue(cat_lemmas.__iter__() is cat_lemmas)