| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604 |
- .. Copyright (C) 2001-2020 NLTK Project
- .. For license information, see LICENSE.TXT
- =================
- WordNet Interface
- =================
- WordNet is just another NLTK corpus reader, and can be imported like this:
- >>> from nltk.corpus import wordnet
- For more compact code, we recommend:
- >>> from nltk.corpus import wordnet as wn
- -----
- Words
- -----
- Look up a word using ``synsets()``; this function has an optional ``pos`` argument
- which lets you constrain the part of speech of the word:
- >>> wn.synsets('dog') # doctest: +ELLIPSIS +NORMALIZE_WHITESPACE
- [Synset('dog.n.01'), Synset('frump.n.01'), Synset('dog.n.03'), Synset('cad.n.01'),
- Synset('frank.n.02'), Synset('pawl.n.01'), Synset('andiron.n.01'), Synset('chase.v.01')]
- >>> wn.synsets('dog', pos=wn.VERB)
- [Synset('chase.v.01')]
- The other parts of speech are ``NOUN``, ``ADJ`` and ``ADV``.
- A synset is identified with a 3-part name of the form: word.pos.nn:
- >>> wn.synset('dog.n.01')
- Synset('dog.n.01')
- >>> print(wn.synset('dog.n.01').definition())
- a member of the genus Canis (probably descended from the common wolf) that has been domesticated by man since prehistoric times; occurs in many breeds
- >>> len(wn.synset('dog.n.01').examples())
- 1
- >>> print(wn.synset('dog.n.01').examples()[0])
- the dog barked all night
- >>> wn.synset('dog.n.01').lemmas()
- [Lemma('dog.n.01.dog'), Lemma('dog.n.01.domestic_dog'), Lemma('dog.n.01.Canis_familiaris')]
- >>> [str(lemma.name()) for lemma in wn.synset('dog.n.01').lemmas()]
- ['dog', 'domestic_dog', 'Canis_familiaris']
- >>> wn.lemma('dog.n.01.dog').synset()
- Synset('dog.n.01')
- The WordNet corpus reader gives access to the Open Multilingual
- WordNet, using ISO-639 language codes.
- >>> sorted(wn.langs()) # doctest: +NORMALIZE_WHITESPACE
- ['als', 'arb', 'bul', 'cat', 'cmn', 'dan', 'ell', 'eng', 'eus', 'fas',
- 'fin', 'fra', 'glg', 'heb', 'hrv', 'ind', 'ita', 'jpn', 'nld', 'nno',
- 'nob', 'pol', 'por', 'qcn', 'slv', 'spa', 'swe', 'tha', 'zsm']
- >>> wn.synsets(b'\xe7\x8a\xac'.decode('utf-8'), lang='jpn')
- [Synset('dog.n.01'), Synset('spy.n.01')]
- wn.synset('spy.n.01').lemma_names('jpn') # doctest: +NORMALIZE_WHITESPACE
- ['\u3044\u306c', '\u307e\u308f\u3057\u8005', '\u30b9\u30d1\u30a4', '\u56de\u3057\u8005',
- '\u56de\u8005', '\u5bc6\u5075', '\u5de5\u4f5c\u54e1', '\u5efb\u3057\u8005',
- '\u5efb\u8005', '\u63a2', '\u63a2\u308a', '\u72ac', '\u79d8\u5bc6\u635c\u67fb\u54e1',
- '\u8adc\u5831\u54e1', '\u8adc\u8005', '\u9593\u8005', '\u9593\u8adc', '\u96a0\u5bc6']
- >>> wn.synset('dog.n.01').lemma_names('ita')
- ['cane', 'Canis_familiaris']
- >>> wn.lemmas('cane', lang='ita') # doctest: +NORMALIZE_WHITESPACE
- [Lemma('dog.n.01.cane'), Lemma('cramp.n.02.cane'), Lemma('hammer.n.01.cane'), Lemma('bad_person.n.01.cane'),
- Lemma('incompetent.n.01.cane')]
- >>> sorted(wn.synset('dog.n.01').lemmas('dan')) # doctest: +NORMALIZE_WHITESPACE
- [Lemma('dog.n.01.hund'), Lemma('dog.n.01.k\xf8ter'),
- Lemma('dog.n.01.vovhund'), Lemma('dog.n.01.vovse')]
- sorted(wn.synset('dog.n.01').lemmas('por'))
- [Lemma('dog.n.01.cachorra'), Lemma('dog.n.01.cachorro'), Lemma('dog.n.01.cadela'), Lemma('dog.n.01.c\xe3o')]
- >>> dog_lemma = wn.lemma(b'dog.n.01.c\xc3\xa3o'.decode('utf-8'), lang='por')
- >>> dog_lemma
- Lemma('dog.n.01.c\xe3o')
- >>> dog_lemma.lang()
- 'por'
- >>> len(list(wordnet.all_lemma_names(pos='n', lang='jpn')))
- 64797
- -------
- Synsets
- -------
- `Synset`: a set of synonyms that share a common meaning.
- >>> dog = wn.synset('dog.n.01')
- >>> dog.hypernyms()
- [Synset('canine.n.02'), Synset('domestic_animal.n.01')]
- >>> dog.hyponyms() # doctest: +ELLIPSIS
- [Synset('basenji.n.01'), Synset('corgi.n.01'), Synset('cur.n.01'), Synset('dalmatian.n.02'), ...]
- >>> dog.member_holonyms()
- [Synset('canis.n.01'), Synset('pack.n.06')]
- >>> dog.root_hypernyms()
- [Synset('entity.n.01')]
- >>> wn.synset('dog.n.01').lowest_common_hypernyms(wn.synset('cat.n.01'))
- [Synset('carnivore.n.01')]
- Each synset contains one or more lemmas, which represent a specific
- sense of a specific word.
- Note that some relations are defined by WordNet only over Lemmas:
- >>> good = wn.synset('good.a.01')
- >>> good.antonyms()
- Traceback (most recent call last):
- File "<stdin>", line 1, in <module>
- AttributeError: 'Synset' object has no attribute 'antonyms'
- >>> good.lemmas()[0].antonyms()
- [Lemma('bad.a.01.bad')]
- The relations that are currently defined in this way are `antonyms`,
- `derivationally_related_forms` and `pertainyms`.
- If you know the byte offset used to identify a synset in the original
- Princeton WordNet data file, you can use that to instantiate the synset
- in NLTK:
- >>> wn.synset_from_pos_and_offset('n', 4543158)
- Synset('wagon.n.01')
- ------
- Lemmas
- ------
- >>> eat = wn.lemma('eat.v.03.eat')
- >>> eat
- Lemma('feed.v.06.eat')
- >>> print(eat.key())
- eat%2:34:02::
- >>> eat.count()
- 4
- >>> wn.lemma_from_key(eat.key())
- Lemma('feed.v.06.eat')
- >>> wn.lemma_from_key(eat.key()).synset()
- Synset('feed.v.06')
- >>> wn.lemma_from_key('feebleminded%5:00:00:retarded:00')
- Lemma('backward.s.03.feebleminded')
- >>> for lemma in wn.synset('eat.v.03').lemmas():
- ... print(lemma, lemma.count())
- ...
- Lemma('feed.v.06.feed') 3
- Lemma('feed.v.06.eat') 4
- >>> for lemma in wn.lemmas('eat', 'v'):
- ... print(lemma, lemma.count())
- ...
- Lemma('eat.v.01.eat') 61
- Lemma('eat.v.02.eat') 13
- Lemma('feed.v.06.eat') 4
- Lemma('eat.v.04.eat') 0
- Lemma('consume.v.05.eat') 0
- Lemma('corrode.v.01.eat') 0
- >>> wn.lemma('jump.v.11.jump')
- Lemma('jump.v.11.jump')
- Lemmas can also have relations between them:
- >>> vocal = wn.lemma('vocal.a.01.vocal')
- >>> vocal.derivationally_related_forms()
- [Lemma('vocalize.v.02.vocalize')]
- >>> vocal.pertainyms()
- [Lemma('voice.n.02.voice')]
- >>> vocal.antonyms()
- [Lemma('instrumental.a.01.instrumental')]
- The three relations above exist only on lemmas, not on synsets.
- -----------
- Verb Frames
- -----------
- >>> wn.synset('think.v.01').frame_ids()
- [5, 9]
- >>> for lemma in wn.synset('think.v.01').lemmas():
- ... print(lemma, lemma.frame_ids())
- ... print(" | ".join(lemma.frame_strings()))
- ...
- Lemma('think.v.01.think') [5, 9]
- Something think something Adjective/Noun | Somebody think somebody
- Lemma('think.v.01.believe') [5, 9]
- Something believe something Adjective/Noun | Somebody believe somebody
- Lemma('think.v.01.consider') [5, 9]
- Something consider something Adjective/Noun | Somebody consider somebody
- Lemma('think.v.01.conceive') [5, 9]
- Something conceive something Adjective/Noun | Somebody conceive somebody
- >>> wn.synset('stretch.v.02').frame_ids()
- [8]
- >>> for lemma in wn.synset('stretch.v.02').lemmas():
- ... print(lemma, lemma.frame_ids())
- ... print(" | ".join(lemma.frame_strings()))
- ...
- Lemma('stretch.v.02.stretch') [8, 2]
- Somebody stretch something | Somebody stretch
- Lemma('stretch.v.02.extend') [8]
- Somebody extend something
- ----------
- Similarity
- ----------
- >>> dog = wn.synset('dog.n.01')
- >>> cat = wn.synset('cat.n.01')
- >>> hit = wn.synset('hit.v.01')
- >>> slap = wn.synset('slap.v.01')
- ``synset1.path_similarity(synset2):``
- Return a score denoting how similar two word senses are, based on the
- shortest path that connects the senses in the is-a (hypernym/hypnoym)
- taxonomy. The score is in the range 0 to 1. By default, there is now
- a fake root node added to verbs so for cases where previously a path
- could not be found---and None was returned---it should return a value.
- The old behavior can be achieved by setting simulate_root to be False.
- A score of 1 represents identity i.e. comparing a sense with itself
- will return 1.
- >>> dog.path_similarity(cat) # doctest: +ELLIPSIS
- 0.2...
- >>> hit.path_similarity(slap) # doctest: +ELLIPSIS
- 0.142...
- >>> wn.path_similarity(hit, slap) # doctest: +ELLIPSIS
- 0.142...
- >>> print(hit.path_similarity(slap, simulate_root=False))
- None
- >>> print(wn.path_similarity(hit, slap, simulate_root=False))
- None
- ``synset1.lch_similarity(synset2):``
- Leacock-Chodorow Similarity:
- Return a score denoting how similar two word senses are, based on the
- shortest path that connects the senses (as above) and the maximum depth
- of the taxonomy in which the senses occur. The relationship is given
- as -log(p/2d) where p is the shortest path length and d the taxonomy
- depth.
- >>> dog.lch_similarity(cat) # doctest: +ELLIPSIS
- 2.028...
- >>> hit.lch_similarity(slap) # doctest: +ELLIPSIS
- 1.312...
- >>> wn.lch_similarity(hit, slap) # doctest: +ELLIPSIS
- 1.312...
- >>> print(hit.lch_similarity(slap, simulate_root=False))
- None
- >>> print(wn.lch_similarity(hit, slap, simulate_root=False))
- None
- ``synset1.wup_similarity(synset2):``
- Wu-Palmer Similarity:
- Return a score denoting how similar two word senses are, based on the
- depth of the two senses in the taxonomy and that of their Least Common
- Subsumer (most specific ancestor node). Note that at this time the
- scores given do _not_ always agree with those given by Pedersen's Perl
- implementation of Wordnet Similarity.
- The LCS does not necessarily feature in the shortest path connecting the
- two senses, as it is by definition the common ancestor deepest in the
- taxonomy, not closest to the two senses. Typically, however, it will so
- feature. Where multiple candidates for the LCS exist, that whose
- shortest path to the root node is the longest will be selected. Where
- the LCS has multiple paths to the root, the longer path is used for
- the purposes of the calculation.
- >>> dog.wup_similarity(cat) # doctest: +ELLIPSIS
- 0.857...
- >>> hit.wup_similarity(slap)
- 0.25
- >>> wn.wup_similarity(hit, slap)
- 0.25
- >>> print(hit.wup_similarity(slap, simulate_root=False))
- None
- >>> print(wn.wup_similarity(hit, slap, simulate_root=False))
- None
- ``wordnet_ic``
- Information Content:
- Load an information content file from the wordnet_ic corpus.
- >>> from nltk.corpus import wordnet_ic
- >>> brown_ic = wordnet_ic.ic('ic-brown.dat')
- >>> semcor_ic = wordnet_ic.ic('ic-semcor.dat')
- Or you can create an information content dictionary from a corpus (or
- anything that has a words() method).
- >>> from nltk.corpus import genesis
- >>> genesis_ic = wn.ic(genesis, False, 0.0)
- ``synset1.res_similarity(synset2, ic):``
- Resnik Similarity:
- Return a score denoting how similar two word senses are, based on the
- Information Content (IC) of the Least Common Subsumer (most specific
- ancestor node). Note that for any similarity measure that uses
- information content, the result is dependent on the corpus used to
- generate the information content and the specifics of how the
- information content was created.
- >>> dog.res_similarity(cat, brown_ic) # doctest: +ELLIPSIS
- 7.911...
- >>> dog.res_similarity(cat, genesis_ic) # doctest: +ELLIPSIS
- 7.204...
- ``synset1.jcn_similarity(synset2, ic):``
- Jiang-Conrath Similarity
- Return a score denoting how similar two word senses are, based on the
- Information Content (IC) of the Least Common Subsumer (most specific
- ancestor node) and that of the two input Synsets. The relationship is
- given by the equation 1 / (IC(s1) + IC(s2) - 2 * IC(lcs)).
- >>> dog.jcn_similarity(cat, brown_ic) # doctest: +ELLIPSIS
- 0.449...
- >>> dog.jcn_similarity(cat, genesis_ic) # doctest: +ELLIPSIS
- 0.285...
- ``synset1.lin_similarity(synset2, ic):``
- Lin Similarity:
- Return a score denoting how similar two word senses are, based on the
- Information Content (IC) of the Least Common Subsumer (most specific
- ancestor node) and that of the two input Synsets. The relationship is
- given by the equation 2 * IC(lcs) / (IC(s1) + IC(s2)).
- >>> dog.lin_similarity(cat, semcor_ic) # doctest: +ELLIPSIS
- 0.886...
- ---------------------
- Access to all Synsets
- ---------------------
- Iterate over all the noun synsets:
- >>> for synset in list(wn.all_synsets('n'))[:10]:
- ... print(synset)
- ...
- Synset('entity.n.01')
- Synset('physical_entity.n.01')
- Synset('abstraction.n.06')
- Synset('thing.n.12')
- Synset('object.n.01')
- Synset('whole.n.02')
- Synset('congener.n.03')
- Synset('living_thing.n.01')
- Synset('organism.n.01')
- Synset('benthos.n.02')
- Get all synsets for this word, possibly restricted by POS:
- >>> wn.synsets('dog') # doctest: +ELLIPSIS
- [Synset('dog.n.01'), Synset('frump.n.01'), Synset('dog.n.03'), Synset('cad.n.01'), ...]
- >>> wn.synsets('dog', pos='v')
- [Synset('chase.v.01')]
- Walk through the noun synsets looking at their hypernyms:
- >>> from itertools import islice
- >>> for synset in islice(wn.all_synsets('n'), 5):
- ... print(synset, synset.hypernyms())
- ...
- Synset('entity.n.01') []
- Synset('physical_entity.n.01') [Synset('entity.n.01')]
- Synset('abstraction.n.06') [Synset('entity.n.01')]
- Synset('thing.n.12') [Synset('physical_entity.n.01')]
- Synset('object.n.01') [Synset('physical_entity.n.01')]
- ------
- Morphy
- ------
- Look up forms not in WordNet, with the help of Morphy:
- >>> wn.morphy('denied', wn.NOUN)
- >>> print(wn.morphy('denied', wn.VERB))
- deny
- >>> wn.synsets('denied', wn.NOUN)
- []
- >>> wn.synsets('denied', wn.VERB) # doctest: +NORMALIZE_WHITESPACE
- [Synset('deny.v.01'), Synset('deny.v.02'), Synset('deny.v.03'), Synset('deny.v.04'),
- Synset('deny.v.05'), Synset('traverse.v.03'), Synset('deny.v.07')]
- Morphy uses a combination of inflectional ending rules and exception
- lists to handle a variety of different possibilities:
- >>> print(wn.morphy('dogs'))
- dog
- >>> print(wn.morphy('churches'))
- church
- >>> print(wn.morphy('aardwolves'))
- aardwolf
- >>> print(wn.morphy('abaci'))
- abacus
- >>> print(wn.morphy('book', wn.NOUN))
- book
- >>> wn.morphy('hardrock', wn.ADV)
- >>> wn.morphy('book', wn.ADJ)
- >>> wn.morphy('his', wn.NOUN)
- >>>
- ---------------
- Synset Closures
- ---------------
- Compute transitive closures of synsets
- >>> dog = wn.synset('dog.n.01')
- >>> hypo = lambda s: s.hyponyms()
- >>> hyper = lambda s: s.hypernyms()
- >>> list(dog.closure(hypo, depth=1)) == dog.hyponyms()
- True
- >>> list(dog.closure(hyper, depth=1)) == dog.hypernyms()
- True
- >>> list(dog.closure(hypo)) # doctest: +NORMALIZE_WHITESPACE, +ELLIPSIS
- [Synset('basenji.n.01'), Synset('corgi.n.01'), Synset('cur.n.01'),
- Synset('dalmatian.n.02'), Synset('great_pyrenees.n.01'),
- Synset('griffon.n.02'), Synset('hunting_dog.n.01'), Synset('lapdog.n.01'),
- Synset('leonberg.n.01'), Synset('mexican_hairless.n.01'),
- Synset('newfoundland.n.01'), Synset('pooch.n.01'), Synset('poodle.n.01'), ...]
- >>> list(dog.closure(hyper)) # doctest: +NORMALIZE_WHITESPACE
- [Synset('canine.n.02'), Synset('domestic_animal.n.01'), Synset('carnivore.n.01'), Synset('animal.n.01'),
- Synset('placental.n.01'), Synset('organism.n.01'), Synset('mammal.n.01'), Synset('living_thing.n.01'),
- Synset('vertebrate.n.01'), Synset('whole.n.02'), Synset('chordate.n.01'), Synset('object.n.01'),
- Synset('physical_entity.n.01'), Synset('entity.n.01')]
- ----------------
- Regression Tests
- ----------------
- Bug 85: morphy returns the base form of a word, if it's input is given
- as a base form for a POS for which that word is not defined:
- >>> wn.synsets('book', wn.NOUN)
- [Synset('book.n.01'), Synset('book.n.02'), Synset('record.n.05'), Synset('script.n.01'), Synset('ledger.n.01'), Synset('book.n.06'), Synset('book.n.07'), Synset('koran.n.01'), Synset('bible.n.01'), Synset('book.n.10'), Synset('book.n.11')]
- >>> wn.synsets('book', wn.ADJ)
- []
- >>> wn.morphy('book', wn.NOUN)
- 'book'
- >>> wn.morphy('book', wn.ADJ)
- Bug 160: wup_similarity breaks when the two synsets have no common hypernym
- >>> t = wn.synsets('picasso')[0]
- >>> m = wn.synsets('male')[1]
- >>> t.wup_similarity(m) # doctest: +ELLIPSIS
- 0.631...
- >>> t = wn.synsets('titan')[1]
- >>> s = wn.synsets('say', wn.VERB)[0]
- >>> print(t.wup_similarity(s))
- None
- Bug 21: "instance of" not included in LCS (very similar to bug 160)
- >>> a = wn.synsets("writings")[0]
- >>> b = wn.synsets("scripture")[0]
- >>> brown_ic = wordnet_ic.ic('ic-brown.dat')
- >>> a.jcn_similarity(b, brown_ic) # doctest: +ELLIPSIS
- 0.175...
- Bug 221: Verb root IC is zero
- >>> from nltk.corpus.reader.wordnet import information_content
- >>> s = wn.synsets('say', wn.VERB)[0]
- >>> information_content(s, brown_ic) # doctest: +ELLIPSIS
- 4.623...
- Bug 161: Comparison between WN keys/lemmas should not be case sensitive
- >>> k = wn.synsets("jefferson")[0].lemmas()[0].key()
- >>> wn.lemma_from_key(k)
- Lemma('jefferson.n.01.Jefferson')
- >>> wn.lemma_from_key(k.upper())
- Lemma('jefferson.n.01.Jefferson')
- Bug 99: WordNet root_hypernyms gives incorrect results
- >>> from nltk.corpus import wordnet as wn
- >>> for s in wn.all_synsets(wn.NOUN):
- ... if s.root_hypernyms()[0] != wn.synset('entity.n.01'):
- ... print(s, s.root_hypernyms())
- ...
- >>>
- Bug 382: JCN Division by zero error
- >>> tow = wn.synset('tow.v.01')
- >>> shlep = wn.synset('shlep.v.02')
- >>> from nltk.corpus import wordnet_ic
- >>> brown_ic = wordnet_ic.ic('ic-brown.dat')
- >>> tow.jcn_similarity(shlep, brown_ic) # doctest: +ELLIPSIS
- 1...e+300
- Bug 428: Depth is zero for instance nouns
- >>> s = wn.synset("lincoln.n.01")
- >>> s.max_depth() > 0
- True
- Bug 429: Information content smoothing used old reference to all_synsets
- >>> genesis_ic = wn.ic(genesis, True, 1.0)
- Bug 430: all_synsets used wrong pos lookup when synsets were cached
- >>> for ii in wn.all_synsets(): pass
- >>> for ii in wn.all_synsets(): pass
- Bug 470: shortest_path_distance ignored instance hypernyms
- >>> google = wordnet.synsets("google")[0]
- >>> earth = wordnet.synsets("earth")[0]
- >>> google.wup_similarity(earth) # doctest: +ELLIPSIS
- 0.1...
- Bug 484: similarity metrics returned -1 instead of None for no LCS
- >>> t = wn.synsets('fly', wn.VERB)[0]
- >>> s = wn.synsets('say', wn.VERB)[0]
- >>> print(s.shortest_path_distance(t))
- None
- >>> print(s.path_similarity(t, simulate_root=False))
- None
- >>> print(s.lch_similarity(t, simulate_root=False))
- None
- >>> print(s.wup_similarity(t, simulate_root=False))
- None
- Bug 427: "pants" does not return all the senses it should
- >>> from nltk.corpus import wordnet
- >>> wordnet.synsets("pants",'n')
- [Synset('bloomers.n.01'), Synset('pant.n.01'), Synset('trouser.n.01'), Synset('gasp.n.01')]
- Bug 482: Some nouns not being lemmatised by WordNetLemmatizer().lemmatize
- >>> from nltk.stem.wordnet import WordNetLemmatizer
- >>> WordNetLemmatizer().lemmatize("eggs", pos="n")
- 'egg'
- >>> WordNetLemmatizer().lemmatize("legs", pos="n")
- 'leg'
- Bug 284: instance hypernyms not used in similarity calculations
- >>> wn.synset('john.n.02').lch_similarity(wn.synset('dog.n.01')) # doctest: +ELLIPSIS
- 1.335...
- >>> wn.synset('john.n.02').wup_similarity(wn.synset('dog.n.01')) # doctest: +ELLIPSIS
- 0.571...
- >>> wn.synset('john.n.02').res_similarity(wn.synset('dog.n.01'), brown_ic) # doctest: +ELLIPSIS
- 2.224...
- >>> wn.synset('john.n.02').jcn_similarity(wn.synset('dog.n.01'), brown_ic) # doctest: +ELLIPSIS
- 0.075...
- >>> wn.synset('john.n.02').lin_similarity(wn.synset('dog.n.01'), brown_ic) # doctest: +ELLIPSIS
- 0.252...
- >>> wn.synset('john.n.02').hypernym_paths() # doctest: +ELLIPSIS
- [[Synset('entity.n.01'), ..., Synset('john.n.02')]]
- Issue 541: add domains to wordnet
- >>> wn.synset('code.n.03').topic_domains()
- [Synset('computer_science.n.01')]
- >>> wn.synset('pukka.a.01').region_domains()
- [Synset('india.n.01')]
- >>> wn.synset('freaky.a.01').usage_domains()
- [Synset('slang.n.02')]
- Issue 629: wordnet failures when python run with -O optimizations
- >>> # Run the test suite with python -O to check this
- >>> wn.synsets("brunch")
- [Synset('brunch.n.01'), Synset('brunch.v.01')]
- Issue 395: wordnet returns incorrect result for lowest_common_hypernyms of chef and policeman
- >>> wn.synset('policeman.n.01').lowest_common_hypernyms(wn.synset('chef.n.01'))
- [Synset('person.n.01')]
- Bug https://github.com/nltk/nltk/issues/1641: Non-English lemmas containing capital letters cannot be looked up using wordnet.lemmas() or wordnet.synsets()
- >>> wn.lemmas('Londres', lang='fra')
- [Lemma('united_kingdom.n.01.Londres'), Lemma('london.n.01.Londres'), Lemma('london.n.02.Londres')]
- >>> wn.lemmas('londres', lang='fra')
- [Lemma('united_kingdom.n.01.Londres'), Lemma('london.n.01.Londres'), Lemma('london.n.02.Londres')]
- Patch-1 https://github.com/nltk/nltk/pull/2065 Adding 3 functions (relations) to WordNet class
- >>> wn.synsets("computer_science")[0].in_topic_domains()[2]
- Synset('access_time.n.01')
- >>> wn.synsets("France")[0].in_region_domains()[18]
- Synset('french.n.01')
- >>> wn.synsets("slang")[1].in_usage_domains()[18]
- Synset('can-do.s.01')
|