twitter_demo.py 7.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308
  1. # -*- coding: utf-8 -*-
  2. # Natural Language Toolkit: Twitter client
  3. #
  4. # Copyright (C) 2001-2020 NLTK Project
  5. # Author: Ewan Klein <ewan@inf.ed.ac.uk>
  6. # Lorenzo Rubio <lrnzcig@gmail.com>
  7. # URL: <http://nltk.org/>
  8. # For license information, see LICENSE.TXT
  9. """
  10. Examples to demo the :py:mod:`twitterclient` code.
  11. These demo functions should all run, with the following caveats:
  12. * You must have obtained API keys from Twitter, and installed them according to
  13. the instructions in the `twitter HOWTO <http://www.nltk.org/howto/twitter.html>`_.
  14. * If you are on a slow network, some of the calls to the Twitter API may
  15. timeout.
  16. * If you are being rate limited while searching, you will receive a 420
  17. error response.
  18. * Your terminal window / console must be able to display UTF-8 encoded characters.
  19. For documentation about the Twitter APIs, see `The Streaming APIs Overview
  20. <https://dev.twitter.com/streaming/overview>`_ and `The REST APIs Overview
  21. <https://dev.twitter.com/rest/public>`_.
  22. For error codes see Twitter's
  23. `Error Codes and Responses <https://dev.twitter.com/overview/api/response-codes>`
  24. """
  25. import datetime
  26. from functools import wraps
  27. import json
  28. from io import StringIO
  29. from nltk.twitter import (
  30. Query,
  31. Streamer,
  32. Twitter,
  33. TweetViewer,
  34. TweetWriter,
  35. credsfromfile,
  36. )
  37. SPACER = "###################################"
  38. def verbose(func):
  39. """Decorator for demo functions"""
  40. @wraps(func)
  41. def with_formatting(*args, **kwargs):
  42. print()
  43. print(SPACER)
  44. print("Using %s" % (func.__name__))
  45. print(SPACER)
  46. return func(*args, **kwargs)
  47. return with_formatting
  48. def yesterday():
  49. """
  50. Get yesterday's datetime as a 5-tuple.
  51. """
  52. date = datetime.datetime.now()
  53. date -= datetime.timedelta(days=1)
  54. date_tuple = date.timetuple()[:6]
  55. return date_tuple
  56. def setup():
  57. """
  58. Initialize global variables for the demos.
  59. """
  60. global USERIDS, FIELDS
  61. USERIDS = ["759251", "612473", "15108702", "6017542", "2673523800"]
  62. # UserIDs corresponding to\
  63. # @CNN, @BBCNews, @ReutersLive, @BreakingNews, @AJELive
  64. FIELDS = ["id_str"]
  65. @verbose
  66. def twitterclass_demo():
  67. """
  68. Use the simplified :class:`Twitter` class to write some tweets to a file.
  69. """
  70. tw = Twitter()
  71. print("Track from the public stream\n")
  72. tw.tweets(keywords="love, hate", limit=10) # public stream
  73. print(SPACER)
  74. print("Search past Tweets\n")
  75. tw = Twitter()
  76. tw.tweets(keywords="love, hate", stream=False, limit=10) # search past tweets
  77. print(SPACER)
  78. print(
  79. "Follow two accounts in the public stream"
  80. + " -- be prepared to wait a few minutes\n"
  81. )
  82. tw = Twitter()
  83. tw.tweets(follow=["759251", "6017542"], stream=True, limit=5) # public stream
  84. @verbose
  85. def sampletoscreen_demo(limit=20):
  86. """
  87. Sample from the Streaming API and send output to terminal.
  88. """
  89. oauth = credsfromfile()
  90. client = Streamer(**oauth)
  91. client.register(TweetViewer(limit=limit))
  92. client.sample()
  93. @verbose
  94. def tracktoscreen_demo(track="taylor swift", limit=10):
  95. """
  96. Track keywords from the public Streaming API and send output to terminal.
  97. """
  98. oauth = credsfromfile()
  99. client = Streamer(**oauth)
  100. client.register(TweetViewer(limit=limit))
  101. client.filter(track=track)
  102. @verbose
  103. def search_demo(keywords="nltk"):
  104. """
  105. Use the REST API to search for past tweets containing a given keyword.
  106. """
  107. oauth = credsfromfile()
  108. client = Query(**oauth)
  109. for tweet in client.search_tweets(keywords=keywords, limit=10):
  110. print(tweet["text"])
  111. @verbose
  112. def tweets_by_user_demo(user="NLTK_org", count=200):
  113. """
  114. Use the REST API to search for past tweets by a given user.
  115. """
  116. oauth = credsfromfile()
  117. client = Query(**oauth)
  118. client.register(TweetWriter())
  119. client.user_tweets(user, count)
  120. @verbose
  121. def lookup_by_userid_demo():
  122. """
  123. Use the REST API to convert a userID to a screen name.
  124. """
  125. oauth = credsfromfile()
  126. client = Query(**oauth)
  127. user_info = client.user_info_from_id(USERIDS)
  128. for info in user_info:
  129. name = info["screen_name"]
  130. followers = info["followers_count"]
  131. following = info["friends_count"]
  132. print("{0}, followers: {1}, following: {2}".format(name, followers, following))
  133. @verbose
  134. def followtoscreen_demo(limit=10):
  135. """
  136. Using the Streaming API, select just the tweets from a specified list of
  137. userIDs.
  138. This is will only give results in a reasonable time if the users in
  139. question produce a high volume of tweets, and may even so show some delay.
  140. """
  141. oauth = credsfromfile()
  142. client = Streamer(**oauth)
  143. client.register(TweetViewer(limit=limit))
  144. client.statuses.filter(follow=USERIDS)
  145. @verbose
  146. def streamtofile_demo(limit=20):
  147. """
  148. Write 20 tweets sampled from the public Streaming API to a file.
  149. """
  150. oauth = credsfromfile()
  151. client = Streamer(**oauth)
  152. client.register(TweetWriter(limit=limit, repeat=False))
  153. client.statuses.sample()
  154. @verbose
  155. def limit_by_time_demo(keywords="nltk"):
  156. """
  157. Query the REST API for Tweets about NLTK since yesterday and send
  158. the output to terminal.
  159. This example makes the assumption that there are sufficient Tweets since
  160. yesterday for the date to be an effective cut-off.
  161. """
  162. date = yesterday()
  163. dt_date = datetime.datetime(*date)
  164. oauth = credsfromfile()
  165. client = Query(**oauth)
  166. client.register(TweetViewer(limit=100, lower_date_limit=date))
  167. print("Cutoff date: {}\n".format(dt_date))
  168. for tweet in client.search_tweets(keywords=keywords):
  169. print("{} ".format(tweet["created_at"]), end="")
  170. client.handler.handle(tweet)
  171. @verbose
  172. def corpusreader_demo():
  173. """
  174. Use :module:`TwitterCorpusReader` tp read a file of tweets, and print out
  175. * some full tweets in JSON format;
  176. * some raw strings from the tweets (i.e., the value of the `text` field); and
  177. * the result of tokenising the raw strings.
  178. """
  179. from nltk.corpus import twitter_samples as tweets
  180. print()
  181. print("Complete tweet documents")
  182. print(SPACER)
  183. for tweet in tweets.docs("tweets.20150430-223406.json")[:1]:
  184. print(json.dumps(tweet, indent=1, sort_keys=True))
  185. print()
  186. print("Raw tweet strings:")
  187. print(SPACER)
  188. for text in tweets.strings("tweets.20150430-223406.json")[:15]:
  189. print(text)
  190. print()
  191. print("Tokenized tweet strings:")
  192. print(SPACER)
  193. for toks in tweets.tokenized("tweets.20150430-223406.json")[:15]:
  194. print(toks)
  195. @verbose
  196. def expand_tweetids_demo():
  197. """
  198. Given a file object containing a list of Tweet IDs, fetch the
  199. corresponding full Tweets, if available.
  200. """
  201. ids_f = StringIO(
  202. """\
  203. 588665495492124672
  204. 588665495487909888
  205. 588665495508766721
  206. 588665495513006080
  207. 588665495517200384
  208. 588665495487811584
  209. 588665495525588992
  210. 588665495487844352
  211. 588665495492014081
  212. 588665495512948737"""
  213. )
  214. oauth = credsfromfile()
  215. client = Query(**oauth)
  216. hydrated = client.expand_tweetids(ids_f)
  217. for tweet in hydrated:
  218. id_str = tweet["id_str"]
  219. print("id: {}".format(id_str))
  220. text = tweet["text"]
  221. if text.startswith("@null"):
  222. text = "[Tweet not available]"
  223. print(text + "\n")
  224. ALL = [
  225. twitterclass_demo,
  226. sampletoscreen_demo,
  227. tracktoscreen_demo,
  228. search_demo,
  229. tweets_by_user_demo,
  230. lookup_by_userid_demo,
  231. followtoscreen_demo,
  232. streamtofile_demo,
  233. limit_by_time_demo,
  234. corpusreader_demo,
  235. expand_tweetids_demo,
  236. ]
  237. """
  238. Select demo functions to run. E.g. replace the following line with "DEMOS =
  239. ALL[8:]" to execute only the final three demos.
  240. """
  241. DEMOS = ALL[:]
  242. if __name__ == "__main__":
  243. setup()
  244. for demo in DEMOS:
  245. demo()
  246. print("\n" + SPACER)
  247. print("All demos completed")
  248. print(SPACER)