wordfreq_app.py 915 B

1234567891011121314151617181920212223242526272829303132333435
  1. # Natural Language Toolkit: Wordfreq Application
  2. #
  3. # Copyright (C) 2001-2020 NLTK Project
  4. # Author: Sumukh Ghodke <sghodke@csse.unimelb.edu.au>
  5. # URL: <http://nltk.org/>
  6. # For license information, see LICENSE.TXT
  7. from matplotlib import pylab
  8. from nltk.text import Text
  9. from nltk.corpus import gutenberg
  10. def plot_word_freq_dist(text):
  11. fd = text.vocab()
  12. samples = [item for item, _ in fd.most_common(50)]
  13. values = [fd[sample] for sample in samples]
  14. values = [sum(values[: i + 1]) * 100.0 / fd.N() for i in range(len(values))]
  15. pylab.title(text.name)
  16. pylab.xlabel("Samples")
  17. pylab.ylabel("Cumulative Percentage")
  18. pylab.plot(values)
  19. pylab.xticks(range(len(samples)), [str(s) for s in samples], rotation=90)
  20. pylab.show()
  21. def app():
  22. t1 = Text(gutenberg.words("melville-moby_dick.txt"))
  23. plot_word_freq_dist(t1)
  24. if __name__ == "__main__":
  25. app()
  26. __all__ = ["app"]