util.py 8.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309
  1. # Natural Language Toolkit: Semantic Interpretation
  2. #
  3. # Author: Ewan Klein <ewan@inf.ed.ac.uk>
  4. #
  5. # Copyright (C) 2001-2020 NLTK Project
  6. # URL: <http://nltk.org/>
  7. # For license information, see LICENSE.TXT
  8. """
  9. Utility functions for batch-processing sentences: parsing and
  10. extraction of the semantic representation of the root node of the the
  11. syntax tree, followed by evaluation of the semantic representation in
  12. a first-order model.
  13. """
  14. import codecs
  15. from nltk.sem import evaluate
  16. ##############################################################
  17. ## Utility functions for connecting parse output to semantics
  18. ##############################################################
  19. def parse_sents(inputs, grammar, trace=0):
  20. """
  21. Convert input sentences into syntactic trees.
  22. :param inputs: sentences to be parsed
  23. :type inputs: list(str)
  24. :param grammar: ``FeatureGrammar`` or name of feature-based grammar
  25. :type grammar: nltk.grammar.FeatureGrammar
  26. :rtype: list(nltk.tree.Tree) or dict(list(str)): list(Tree)
  27. :return: a mapping from input sentences to a list of ``Tree``s
  28. """
  29. # put imports here to avoid circult dependencies
  30. from nltk.grammar import FeatureGrammar
  31. from nltk.parse import FeatureChartParser, load_parser
  32. if isinstance(grammar, FeatureGrammar):
  33. cp = FeatureChartParser(grammar)
  34. else:
  35. cp = load_parser(grammar, trace=trace)
  36. parses = []
  37. for sent in inputs:
  38. tokens = sent.split() # use a tokenizer?
  39. syntrees = list(cp.parse(tokens))
  40. parses.append(syntrees)
  41. return parses
  42. def root_semrep(syntree, semkey="SEM"):
  43. """
  44. Find the semantic representation at the root of a tree.
  45. :param syntree: a parse ``Tree``
  46. :param semkey: the feature label to use for the root semantics in the tree
  47. :return: the semantic representation at the root of a ``Tree``
  48. :rtype: sem.Expression
  49. """
  50. from nltk.grammar import FeatStructNonterminal
  51. node = syntree.label()
  52. assert isinstance(node, FeatStructNonterminal)
  53. try:
  54. return node[semkey]
  55. except KeyError:
  56. print(node, end=" ")
  57. print("has no specification for the feature %s" % semkey)
  58. raise
  59. def interpret_sents(inputs, grammar, semkey="SEM", trace=0):
  60. """
  61. Add the semantic representation to each syntactic parse tree
  62. of each input sentence.
  63. :param inputs: a list of sentences
  64. :type inputs: list(str)
  65. :param grammar: ``FeatureGrammar`` or name of feature-based grammar
  66. :type grammar: nltk.grammar.FeatureGrammar
  67. :return: a mapping from sentences to lists of pairs (parse-tree, semantic-representations)
  68. :rtype: list(list(tuple(nltk.tree.Tree, nltk.sem.logic.ConstantExpression)))
  69. """
  70. return [
  71. [(syn, root_semrep(syn, semkey)) for syn in syntrees]
  72. for syntrees in parse_sents(inputs, grammar, trace=trace)
  73. ]
  74. def evaluate_sents(inputs, grammar, model, assignment, trace=0):
  75. """
  76. Add the truth-in-a-model value to each semantic representation
  77. for each syntactic parse of each input sentences.
  78. :param inputs: a list of sentences
  79. :type inputs: list(str)
  80. :param grammar: ``FeatureGrammar`` or name of feature-based grammar
  81. :type grammar: nltk.grammar.FeatureGrammar
  82. :return: a mapping from sentences to lists of triples (parse-tree, semantic-representations, evaluation-in-model)
  83. :rtype: list(list(tuple(nltk.tree.Tree, nltk.sem.logic.ConstantExpression, bool or dict(str): bool)))
  84. """
  85. return [
  86. [
  87. (syn, sem, model.evaluate("%s" % sem, assignment, trace=trace))
  88. for (syn, sem) in interpretations
  89. ]
  90. for interpretations in interpret_sents(inputs, grammar)
  91. ]
  92. def demo_model0():
  93. global m0, g0
  94. # Initialize a valuation of non-logical constants."""
  95. v = [
  96. ("john", "b1"),
  97. ("mary", "g1"),
  98. ("suzie", "g2"),
  99. ("fido", "d1"),
  100. ("tess", "d2"),
  101. ("noosa", "n"),
  102. ("girl", set(["g1", "g2"])),
  103. ("boy", set(["b1", "b2"])),
  104. ("dog", set(["d1", "d2"])),
  105. ("bark", set(["d1", "d2"])),
  106. ("walk", set(["b1", "g2", "d1"])),
  107. ("chase", set([("b1", "g1"), ("b2", "g1"), ("g1", "d1"), ("g2", "d2")])),
  108. (
  109. "see",
  110. set([("b1", "g1"), ("b2", "d2"), ("g1", "b1"), ("d2", "b1"), ("g2", "n")]),
  111. ),
  112. ("in", set([("b1", "n"), ("b2", "n"), ("d2", "n")])),
  113. ("with", set([("b1", "g1"), ("g1", "b1"), ("d1", "b1"), ("b1", "d1")])),
  114. ]
  115. # Read in the data from ``v``
  116. val = evaluate.Valuation(v)
  117. # Bind ``dom`` to the ``domain`` property of ``val``
  118. dom = val.domain
  119. # Initialize a model with parameters ``dom`` and ``val``.
  120. m0 = evaluate.Model(dom, val)
  121. # Initialize a variable assignment with parameter ``dom``
  122. g0 = evaluate.Assignment(dom)
  123. def read_sents(filename, encoding="utf8"):
  124. with codecs.open(filename, "r", encoding) as fp:
  125. sents = [l.rstrip() for l in fp]
  126. # get rid of blank lines
  127. sents = [l for l in sents if len(l) > 0]
  128. sents = [l for l in sents if not l[0] == "#"]
  129. return sents
  130. def demo_legacy_grammar():
  131. """
  132. Check that interpret_sents() is compatible with legacy grammars that use
  133. a lowercase 'sem' feature.
  134. Define 'test.fcfg' to be the following
  135. """
  136. from nltk.grammar import FeatureGrammar
  137. g = FeatureGrammar.fromstring(
  138. """
  139. % start S
  140. S[sem=<hello>] -> 'hello'
  141. """
  142. )
  143. print("Reading grammar: %s" % g)
  144. print("*" * 20)
  145. for reading in interpret_sents(["hello"], g, semkey="sem"):
  146. syn, sem = reading[0]
  147. print()
  148. print("output: ", sem)
  149. def demo():
  150. import sys
  151. from optparse import OptionParser
  152. description = """
  153. Parse and evaluate some sentences.
  154. """
  155. opts = OptionParser(description=description)
  156. opts.set_defaults(
  157. evaluate=True,
  158. beta=True,
  159. syntrace=0,
  160. semtrace=0,
  161. demo="default",
  162. grammar="",
  163. sentences="",
  164. )
  165. opts.add_option(
  166. "-d",
  167. "--demo",
  168. dest="demo",
  169. help="choose demo D; omit this for the default demo, or specify 'chat80'",
  170. metavar="D",
  171. )
  172. opts.add_option(
  173. "-g", "--gram", dest="grammar", help="read in grammar G", metavar="G"
  174. )
  175. opts.add_option(
  176. "-m",
  177. "--model",
  178. dest="model",
  179. help="import model M (omit '.py' suffix)",
  180. metavar="M",
  181. )
  182. opts.add_option(
  183. "-s",
  184. "--sentences",
  185. dest="sentences",
  186. help="read in a file of test sentences S",
  187. metavar="S",
  188. )
  189. opts.add_option(
  190. "-e",
  191. "--no-eval",
  192. action="store_false",
  193. dest="evaluate",
  194. help="just do a syntactic analysis",
  195. )
  196. opts.add_option(
  197. "-b",
  198. "--no-beta-reduction",
  199. action="store_false",
  200. dest="beta",
  201. help="don't carry out beta-reduction",
  202. )
  203. opts.add_option(
  204. "-t",
  205. "--syntrace",
  206. action="count",
  207. dest="syntrace",
  208. help="set syntactic tracing on; requires '-e' option",
  209. )
  210. opts.add_option(
  211. "-T",
  212. "--semtrace",
  213. action="count",
  214. dest="semtrace",
  215. help="set semantic tracing on",
  216. )
  217. (options, args) = opts.parse_args()
  218. SPACER = "-" * 30
  219. demo_model0()
  220. sents = [
  221. "Fido sees a boy with Mary",
  222. "John sees Mary",
  223. "every girl chases a dog",
  224. "every boy chases a girl",
  225. "John walks with a girl in Noosa",
  226. "who walks",
  227. ]
  228. gramfile = "grammars/sample_grammars/sem2.fcfg"
  229. if options.sentences:
  230. sentsfile = options.sentences
  231. if options.grammar:
  232. gramfile = options.grammar
  233. if options.model:
  234. exec("import %s as model" % options.model)
  235. if sents is None:
  236. sents = read_sents(sentsfile)
  237. # Set model and assignment
  238. model = m0
  239. g = g0
  240. if options.evaluate:
  241. evaluations = evaluate_sents(sents, gramfile, model, g, trace=options.semtrace)
  242. else:
  243. semreps = interpret_sents(sents, gramfile, trace=options.syntrace)
  244. for i, sent in enumerate(sents):
  245. n = 1
  246. print("\nSentence: %s" % sent)
  247. print(SPACER)
  248. if options.evaluate:
  249. for (syntree, semrep, value) in evaluations[i]:
  250. if isinstance(value, dict):
  251. value = set(value.keys())
  252. print("%d: %s" % (n, semrep))
  253. print(value)
  254. n += 1
  255. else:
  256. for (syntree, semrep) in semreps[i]:
  257. print("%d: %s" % (n, semrep))
  258. n += 1
  259. if __name__ == "__main__":
  260. demo()
  261. demo_legacy_grammar()