| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220 |
- # -*- coding: utf-8 -*-
- """
- Unit tests for nltk.corpus.wordnet
- See also nltk/test/wordnet.doctest
- """
- import collections
- import os
- import unittest
- from nose import SkipTest
- from nltk.corpus.reader.wordnet import WordNetCorpusReader
- from nltk.corpus import wordnet as wn
- from nltk.corpus import wordnet_ic as wnic
- from nltk.data import find as find_data
- wn.ensure_loaded()
- S = wn.synset
- L = wn.lemma
- class WordnNetDemo(unittest.TestCase):
- def test_retrieve_synset(self):
- move_synset = S('go.v.21')
- self.assertEqual(move_synset.name(), "move.v.15")
- self.assertEqual(move_synset.lemma_names(), ['move', 'go'])
- self.assertEqual(
- move_synset.definition(), "have a turn; make one's move in a game"
- )
- self.assertEqual(move_synset.examples(), ['Can I go now?'])
- def test_retrieve_synsets(self):
- self.assertEqual(sorted(wn.synsets('zap', pos='n')), [S('zap.n.01')])
- self.assertEqual(
- sorted(wn.synsets('zap', pos='v')),
- [S('microwave.v.01'), S('nuke.v.01'), S('zap.v.01'), S('zap.v.02')],
- )
- def test_hyperhyponyms(self):
- # Not every synset as hypernyms()
- self.assertEqual(S('travel.v.01').hypernyms(), [])
- self.assertEqual(S('travel.v.02').hypernyms(), [S('travel.v.03')])
- self.assertEqual(S('travel.v.03').hypernyms(), [])
- # Test hyper-/hyponyms.
- self.assertEqual(S('breakfast.n.1').hypernyms(), [S('meal.n.01')])
- first_five_meal_hypo = [
- S('banquet.n.02'),
- S('bite.n.04'),
- S('breakfast.n.01'),
- S('brunch.n.01'),
- S('buffet.n.02'),
- ]
- self.assertEqual(sorted(S('meal.n.1').hyponyms()[:5]), first_five_meal_hypo)
- self.assertEqual(S('Austen.n.1').instance_hypernyms(), [S('writer.n.01')])
- first_five_composer_hypo = [
- S('ambrose.n.01'),
- S('bach.n.01'),
- S('barber.n.01'),
- S('bartok.n.01'),
- S('beethoven.n.01'),
- ]
- self.assertEqual(
- S('composer.n.1').instance_hyponyms()[:5], first_five_composer_hypo
- )
- # Test root hyper-/hyponyms
- self.assertEqual(S('person.n.01').root_hypernyms(), [S('entity.n.01')])
- self.assertEqual(S('sail.v.01').root_hypernyms(), [S('travel.v.01')])
- self.assertEqual(
- S('fall.v.12').root_hypernyms(), [S('act.v.01'), S('fall.v.17')]
- )
- def test_derivationally_related_forms(self):
- # Test `derivationally_related_forms()`
- self.assertEqual(
- L('zap.v.03.nuke').derivationally_related_forms(),
- [L('atomic_warhead.n.01.nuke')],
- )
- self.assertEqual(
- L('zap.v.03.atomize').derivationally_related_forms(),
- [L('atomization.n.02.atomization')],
- )
- self.assertEqual(
- L('zap.v.03.atomise').derivationally_related_forms(),
- [L('atomization.n.02.atomisation')],
- )
- self.assertEqual(L('zap.v.03.zap').derivationally_related_forms(), [])
- def test_meronyms_holonyms(self):
- # Test meronyms, holonyms.
- self.assertEqual(
- S('dog.n.01').member_holonyms(), [S('canis.n.01'), S('pack.n.06')]
- )
- self.assertEqual(S('dog.n.01').part_meronyms(), [S('flag.n.07')])
- self.assertEqual(S('faculty.n.2').member_meronyms(), [S('professor.n.01')])
- self.assertEqual(S('copilot.n.1').member_holonyms(), [S('crew.n.01')])
- self.assertEqual(
- S('table.n.2').part_meronyms(),
- [S('leg.n.03'), S('tabletop.n.01'), S('tableware.n.01')],
- )
- self.assertEqual(S('course.n.7').part_holonyms(), [S('meal.n.01')])
- self.assertEqual(
- S('water.n.1').substance_meronyms(), [S('hydrogen.n.01'), S('oxygen.n.01')]
- )
- self.assertEqual(
- S('gin.n.1').substance_holonyms(),
- [
- S('gin_and_it.n.01'),
- S('gin_and_tonic.n.01'),
- S('martini.n.01'),
- S('pink_lady.n.01'),
- ],
- )
- def test_antonyms(self):
- # Test antonyms.
- self.assertEqual(
- L('leader.n.1.leader').antonyms(), [L('follower.n.01.follower')]
- )
- self.assertEqual(
- L('increase.v.1.increase').antonyms(), [L('decrease.v.01.decrease')]
- )
- def test_misc_relations(self):
- # Test misc relations.
- self.assertEqual(S('snore.v.1').entailments(), [S('sleep.v.01')])
- self.assertEqual(
- S('heavy.a.1').similar_tos(),
- [
- S('dense.s.03'),
- S('doughy.s.01'),
- S('heavier-than-air.s.01'),
- S('hefty.s.02'),
- S('massive.s.04'),
- S('non-buoyant.s.01'),
- S('ponderous.s.02'),
- ],
- )
- self.assertEqual(S('light.a.1').attributes(), [S('weight.n.01')])
- self.assertEqual(S('heavy.a.1').attributes(), [S('weight.n.01')])
- # Test pertainyms.
- self.assertEqual(
- L('English.a.1.English').pertainyms(), [L('england.n.01.England')]
- )
- def test_lch(self):
- # Test LCH.
- self.assertEqual(
- S('person.n.01').lowest_common_hypernyms(S('dog.n.01')),
- [S('organism.n.01')],
- )
- self.assertEqual(
- S('woman.n.01').lowest_common_hypernyms(S('girlfriend.n.02')),
- [S('woman.n.01')],
- )
- def test_domains(self):
- # Test domains.
- self.assertEqual(S('code.n.03').topic_domains(), [S('computer_science.n.01')])
- self.assertEqual(S('pukka.a.01').region_domains(), [S('india.n.01')])
- self.assertEqual(S('freaky.a.01').usage_domains(), [S('slang.n.02')])
- def test_in_topic_domains(self):
- # Test in domains.
- self.assertEqual(
- S('computer_science.n.01').in_topic_domains()[0], S('access.n.05')
- )
- self.assertEqual(S('germany.n.01').in_region_domains()[23], S('trillion.n.02'))
- self.assertEqual(S('slang.n.02').in_usage_domains()[1], S('airhead.n.01'))
- def test_wordnet_similarities(self):
- # Path based similarities.
- self.assertAlmostEqual(S('cat.n.01').path_similarity(S('cat.n.01')), 1.0)
- self.assertAlmostEqual(S('dog.n.01').path_similarity(S('cat.n.01')), 0.2)
- self.assertAlmostEqual(
- S('dog.n.01').lch_similarity(S('cat.n.01')), 2.028, places=3
- )
- self.assertAlmostEqual(
- S('dog.n.01').wup_similarity(S('cat.n.01')), 0.8571, places=3
- )
- # Information Content similarities.
- brown_ic = wnic.ic('ic-brown.dat')
- self.assertAlmostEqual(
- S('dog.n.01').jcn_similarity(S('cat.n.01'), brown_ic), 0.4497, places=3
- )
- semcor_ic = wnic.ic('ic-semcor.dat')
- self.assertAlmostEqual(
- S('dog.n.01').lin_similarity(S('cat.n.01'), semcor_ic), 0.8863, places=3
- )
- def test_omw_lemma_no_trailing_underscore(self):
- expected = sorted([
- u'popolna_sprememba_v_mišljenju',
- u'popoln_obrat',
- u'preobrat',
- u'preobrat_v_mišljenju'
- ])
- self.assertEqual(sorted(S('about-face.n.02').lemma_names(lang='slv')), expected)
- def test_iterable_type_for_all_lemma_names(self):
- # Duck-test for iterables.
- # See https://stackoverflow.com/a/36230057/610569
- cat_lemmas = wn.all_lemma_names(lang='cat')
- eng_lemmas = wn.all_lemma_names(lang='eng')
- self.assertTrue(hasattr(eng_lemmas, '__iter__'))
- self.assertTrue(hasattr(eng_lemmas, '__next__') or hasattr(eng_lemmas, 'next'))
- self.assertTrue(eng_lemmas.__iter__() is eng_lemmas)
- self.assertTrue(hasattr(cat_lemmas, '__iter__'))
- self.assertTrue(hasattr(cat_lemmas, '__next__') or hasattr(eng_lemmas, 'next'))
- self.assertTrue(cat_lemmas.__iter__() is cat_lemmas)
|