test_cfd_mutation.py 1.3 KB

123456789101112131415161718192021222324252627282930313233343536373839
  1. import unittest
  2. from nltk import ConditionalFreqDist, tokenize
  3. class TestEmptyCondFreq(unittest.TestCase):
  4. def test_tabulate(self):
  5. empty = ConditionalFreqDist()
  6. self.assertEqual(empty.conditions(),[])
  7. try:
  8. empty.tabulate(conditions="BUG") # nonexistent keys shouldn't be added
  9. except:
  10. pass
  11. self.assertEqual(empty.conditions(), [])
  12. def test_plot(self):
  13. empty = ConditionalFreqDist()
  14. self.assertEqual(empty.conditions(),[])
  15. try:
  16. empty.plot(conditions=["BUG"]) # nonexistent keys shouldn't be added
  17. except:
  18. pass
  19. self.assertEqual(empty.conditions(),[])
  20. def test_increment(self):
  21. # make sure that we can still mutate cfd normally
  22. text = "cow cat mouse cat tiger"
  23. cfd = ConditionalFreqDist()
  24. # create cfd with word length as condition
  25. for word in tokenize.word_tokenize(text):
  26. condition = len(word)
  27. cfd[condition][word] += 1
  28. self.assertEqual(cfd.conditions(), [3,5])
  29. # incrementing previously unseen key is still possible
  30. cfd[2]['hi'] += 1
  31. self.assertEqual(set(cfd.conditions()),set([3,5,2])) # new condition added
  32. self.assertEqual(cfd[2]['hi'], 1) # key's frequency incremented from 0 (unseen) to 1