test_concordance.py 4.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102
  1. # -*- coding: utf-8 -*-
  2. import unittest
  3. import contextlib
  4. import sys
  5. from io import StringIO
  6. from nose import with_setup
  7. from nltk.corpus import gutenberg
  8. from nltk.text import Text
  9. @contextlib.contextmanager
  10. def stdout_redirect(where):
  11. sys.stdout = where
  12. try:
  13. yield where
  14. finally:
  15. sys.stdout = sys.__stdout__
  16. class TestConcordance(unittest.TestCase):
  17. """Text constructed using: http://www.nltk.org/book/ch01.html"""
  18. @classmethod
  19. def setup_class(cls):
  20. cls.corpus = gutenberg.words('melville-moby_dick.txt')
  21. @classmethod
  22. def teardown_class(cls):
  23. pass
  24. def setUp(self):
  25. self.text = Text(TestConcordance.corpus)
  26. self.query = "monstrous"
  27. self.maxDiff = None
  28. self.list_out = [
  29. 'ong the former , one was of a most monstrous size . ... This came towards us , ',
  30. 'ON OF THE PSALMS . " Touching that monstrous bulk of the whale or ork we have r',
  31. 'll over with a heathenish array of monstrous clubs and spears . Some were thick',
  32. 'd as you gazed , and wondered what monstrous cannibal and savage could ever hav',
  33. 'that has survived the flood ; most monstrous and most mountainous ! That Himmal',
  34. 'they might scout at Moby Dick as a monstrous fable , or still worse and more de',
  35. 'th of Radney .\'" CHAPTER 55 Of the Monstrous Pictures of Whales . I shall ere l',
  36. 'ing Scenes . In connexion with the monstrous pictures of whales , I am strongly',
  37. 'ere to enter upon those still more monstrous stories of them which are to be fo',
  38. 'ght have been rummaged out of this monstrous cabinet there is no telling . But ',
  39. 'of Whale - Bones ; for Whales of a monstrous size are oftentimes cast up dead u',
  40. ]
  41. def tearDown(self):
  42. pass
  43. def test_concordance_list(self):
  44. concordance_out = self.text.concordance_list(self.query)
  45. self.assertEqual(self.list_out, [c.line for c in concordance_out])
  46. def test_concordance_width(self):
  47. list_out = [
  48. "monstrous",
  49. "monstrous",
  50. "monstrous",
  51. "monstrous",
  52. "monstrous",
  53. "monstrous",
  54. "Monstrous",
  55. "monstrous",
  56. "monstrous",
  57. "monstrous",
  58. "monstrous",
  59. ]
  60. concordance_out = self.text.concordance_list(self.query, width=0)
  61. self.assertEqual(list_out, [c.query for c in concordance_out])
  62. def test_concordance_lines(self):
  63. concordance_out = self.text.concordance_list(self.query, lines=3)
  64. self.assertEqual(self.list_out[:3], [c.line for c in concordance_out])
  65. def test_concordance_print(self):
  66. print_out = """Displaying 11 of 11 matches:
  67. ong the former , one was of a most monstrous size . ... This came towards us ,
  68. ON OF THE PSALMS . " Touching that monstrous bulk of the whale or ork we have r
  69. ll over with a heathenish array of monstrous clubs and spears . Some were thick
  70. d as you gazed , and wondered what monstrous cannibal and savage could ever hav
  71. that has survived the flood ; most monstrous and most mountainous ! That Himmal
  72. they might scout at Moby Dick as a monstrous fable , or still worse and more de
  73. th of Radney .'" CHAPTER 55 Of the Monstrous Pictures of Whales . I shall ere l
  74. ing Scenes . In connexion with the monstrous pictures of whales , I am strongly
  75. ere to enter upon those still more monstrous stories of them which are to be fo
  76. ght have been rummaged out of this monstrous cabinet there is no telling . But
  77. of Whale - Bones ; for Whales of a monstrous size are oftentimes cast up dead u
  78. """
  79. with stdout_redirect(StringIO()) as stdout:
  80. self.text.concordance(self.query)
  81. def strip_space(raw_str):
  82. return raw_str.replace(" ", "")
  83. self.assertEqual(strip_space(print_out), strip_space(stdout.getvalue()))