test_brill.py 1.0 KB

12345678910111213141516171819202122232425262728293031323334353637
  1. # -*- coding: utf-8 -*-
  2. """
  3. Tests for Brill tagger.
  4. """
  5. import unittest
  6. from nltk.tag import UnigramTagger, brill, brill_trainer
  7. from nltk.tbl import Template
  8. from nltk.corpus import treebank
  9. from nltk.tbl import demo
  10. class TestBrill(unittest.TestCase):
  11. def test_pos_template(self):
  12. train_sents = treebank.tagged_sents()[:1000]
  13. tagger = UnigramTagger(train_sents)
  14. trainer = brill_trainer.BrillTaggerTrainer(
  15. tagger, [brill.Template(brill.Pos([-1]))]
  16. )
  17. brill_tagger = trainer.train(train_sents)
  18. # Example from https://github.com/nltk/nltk/issues/769
  19. result = brill_tagger.tag('This is a foo bar sentence'.split())
  20. expected = [
  21. ('This', 'DT'),
  22. ('is', 'VBZ'),
  23. ('a', 'DT'),
  24. ('foo', None),
  25. ('bar', 'NN'),
  26. ('sentence', None),
  27. ]
  28. self.assertEqual(result, expected)
  29. @unittest.skip("Should be tested in __main__ of nltk.tbl.demo")
  30. def test_brill_demo(self):
  31. demo()