generate.py 2.4 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889
  1. # -*- coding: utf-8 -*-
  2. # Natural Language Toolkit: Generating from a CFG
  3. #
  4. # Copyright (C) 2001-2020 NLTK Project
  5. # Author: Steven Bird <stevenbird1@gmail.com>
  6. # Peter Ljunglöf <peter.ljunglof@heatherleaf.se>
  7. # URL: <http://nltk.org/>
  8. # For license information, see LICENSE.TXT
  9. #
  10. import itertools
  11. import sys
  12. from nltk.grammar import Nonterminal
  13. def generate(grammar, start=None, depth=None, n=None):
  14. """
  15. Generates an iterator of all sentences from a CFG.
  16. :param grammar: The Grammar used to generate sentences.
  17. :param start: The Nonterminal from which to start generate sentences.
  18. :param depth: The maximal depth of the generated tree.
  19. :param n: The maximum number of sentences to return.
  20. :return: An iterator of lists of terminal tokens.
  21. """
  22. if not start:
  23. start = grammar.start()
  24. if depth is None:
  25. depth = sys.maxsize
  26. iter = _generate_all(grammar, [start], depth)
  27. if n:
  28. iter = itertools.islice(iter, n)
  29. return iter
  30. def _generate_all(grammar, items, depth):
  31. if items:
  32. try:
  33. for frag1 in _generate_one(grammar, items[0], depth):
  34. for frag2 in _generate_all(grammar, items[1:], depth):
  35. yield frag1 + frag2
  36. except RuntimeError as _error:
  37. if _error.message == "maximum recursion depth exceeded":
  38. # Helpful error message while still showing the recursion stack.
  39. raise RuntimeError(
  40. "The grammar has rule(s) that yield infinite recursion!!"
  41. )
  42. else:
  43. raise
  44. else:
  45. yield []
  46. def _generate_one(grammar, item, depth):
  47. if depth > 0:
  48. if isinstance(item, Nonterminal):
  49. for prod in grammar.productions(lhs=item):
  50. for frag in _generate_all(grammar, prod.rhs(), depth - 1):
  51. yield frag
  52. else:
  53. yield [item]
  54. demo_grammar = """
  55. S -> NP VP
  56. NP -> Det N
  57. PP -> P NP
  58. VP -> 'slept' | 'saw' NP | 'walked' PP
  59. Det -> 'the' | 'a'
  60. N -> 'man' | 'park' | 'dog'
  61. P -> 'in' | 'with'
  62. """
  63. def demo(N=23):
  64. from nltk.grammar import CFG
  65. print("Generating the first %d sentences for demo grammar:" % (N,))
  66. print(demo_grammar)
  67. grammar = CFG.fromstring(demo_grammar)
  68. for n, sent in enumerate(generate(grammar, n=N), 1):
  69. print("%3d. %s" % (n, " ".join(sent)))
  70. if __name__ == "__main__":
  71. demo()