lfg.py 7.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258
  1. # Natural Language Toolkit: Lexical Functional Grammar
  2. #
  3. # Author: Dan Garrette <dhgarrette@gmail.com>
  4. #
  5. # Copyright (C) 2001-2020 NLTK Project
  6. # URL: <http://nltk.org/>
  7. # For license information, see LICENSE.TXT
  8. from itertools import chain
  9. from nltk.internals import Counter
  10. class FStructure(dict):
  11. def safeappend(self, key, item):
  12. """
  13. Append 'item' to the list at 'key'. If no list exists for 'key', then
  14. construct one.
  15. """
  16. if key not in self:
  17. self[key] = []
  18. self[key].append(item)
  19. def __setitem__(self, key, value):
  20. dict.__setitem__(self, key.lower(), value)
  21. def __getitem__(self, key):
  22. return dict.__getitem__(self, key.lower())
  23. def __contains__(self, key):
  24. return dict.__contains__(self, key.lower())
  25. def to_glueformula_list(self, glue_dict):
  26. depgraph = self.to_depgraph()
  27. return glue_dict.to_glueformula_list(depgraph)
  28. def to_depgraph(self, rel=None):
  29. from nltk.parse.dependencygraph import DependencyGraph
  30. depgraph = DependencyGraph()
  31. nodes = depgraph.nodes
  32. self._to_depgraph(nodes, 0, "ROOT")
  33. # Add all the dependencies for all the nodes
  34. for address, node in nodes.items():
  35. for n2 in (n for n in nodes.values() if n["rel"] != "TOP"):
  36. if n2["head"] == address:
  37. relation = n2["rel"]
  38. node["deps"].setdefault(relation, [])
  39. node["deps"][relation].append(n2["address"])
  40. depgraph.root = nodes[1]
  41. return depgraph
  42. def _to_depgraph(self, nodes, head, rel):
  43. index = len(nodes)
  44. nodes[index].update(
  45. {
  46. "address": index,
  47. "word": self.pred[0],
  48. "tag": self.pred[1],
  49. "head": head,
  50. "rel": rel,
  51. }
  52. )
  53. for feature in sorted(self):
  54. for item in sorted(self[feature]):
  55. if isinstance(item, FStructure):
  56. item._to_depgraph(nodes, index, feature)
  57. elif isinstance(item, tuple):
  58. new_index = len(nodes)
  59. nodes[new_index].update(
  60. {
  61. "address": new_index,
  62. "word": item[0],
  63. "tag": item[1],
  64. "head": index,
  65. "rel": feature,
  66. }
  67. )
  68. elif isinstance(item, list):
  69. for n in item:
  70. n._to_depgraph(nodes, index, feature)
  71. else:
  72. raise Exception(
  73. "feature %s is not an FStruct, a list, or a tuple" % feature
  74. )
  75. @staticmethod
  76. def read_depgraph(depgraph):
  77. return FStructure._read_depgraph(depgraph.root, depgraph)
  78. @staticmethod
  79. def _read_depgraph(node, depgraph, label_counter=None, parent=None):
  80. if not label_counter:
  81. label_counter = Counter()
  82. if node["rel"].lower() in ["spec", "punct"]:
  83. # the value of a 'spec' entry is a word, not an FStructure
  84. return (node["word"], node["tag"])
  85. else:
  86. fstruct = FStructure()
  87. fstruct.pred = None
  88. fstruct.label = FStructure._make_label(label_counter.get())
  89. fstruct.parent = parent
  90. word, tag = node["word"], node["tag"]
  91. if tag[:2] == "VB":
  92. if tag[2:3] == "D":
  93. fstruct.safeappend("tense", ("PAST", "tense"))
  94. fstruct.pred = (word, tag[:2])
  95. if not fstruct.pred:
  96. fstruct.pred = (word, tag)
  97. children = [depgraph.nodes[idx] for idx in chain(*node["deps"].values())]
  98. for child in children:
  99. fstruct.safeappend(
  100. child["rel"],
  101. FStructure._read_depgraph(child, depgraph, label_counter, fstruct),
  102. )
  103. return fstruct
  104. @staticmethod
  105. def _make_label(value):
  106. """
  107. Pick an alphabetic character as identifier for an entity in the model.
  108. :param value: where to index into the list of characters
  109. :type value: int
  110. """
  111. letter = [
  112. "f",
  113. "g",
  114. "h",
  115. "i",
  116. "j",
  117. "k",
  118. "l",
  119. "m",
  120. "n",
  121. "o",
  122. "p",
  123. "q",
  124. "r",
  125. "s",
  126. "t",
  127. "u",
  128. "v",
  129. "w",
  130. "x",
  131. "y",
  132. "z",
  133. "a",
  134. "b",
  135. "c",
  136. "d",
  137. "e",
  138. ][value - 1]
  139. num = int(value) // 26
  140. if num > 0:
  141. return letter + str(num)
  142. else:
  143. return letter
  144. def __repr__(self):
  145. return self.__str__().replace("\n", "")
  146. def __str__(self):
  147. return self.pretty_format()
  148. def pretty_format(self, indent=3):
  149. try:
  150. accum = "%s:[" % self.label
  151. except NameError:
  152. accum = "["
  153. try:
  154. accum += "pred '%s'" % (self.pred[0])
  155. except NameError:
  156. pass
  157. for feature in sorted(self):
  158. for item in self[feature]:
  159. if isinstance(item, FStructure):
  160. next_indent = indent + len(feature) + 3 + len(self.label)
  161. accum += "\n%s%s %s" % (
  162. " " * (indent),
  163. feature,
  164. item.pretty_format(next_indent),
  165. )
  166. elif isinstance(item, tuple):
  167. accum += "\n%s%s '%s'" % (" " * (indent), feature, item[0])
  168. elif isinstance(item, list):
  169. accum += "\n%s%s {%s}" % (
  170. " " * (indent),
  171. feature,
  172. ("\n%s" % (" " * (indent + len(feature) + 2))).join(item),
  173. )
  174. else: # ERROR
  175. raise Exception(
  176. "feature %s is not an FStruct, a list, or a tuple" % feature
  177. )
  178. return accum + "]"
  179. def demo_read_depgraph():
  180. from nltk.parse.dependencygraph import DependencyGraph
  181. dg1 = DependencyGraph(
  182. """\
  183. Esso NNP 2 SUB
  184. said VBD 0 ROOT
  185. the DT 5 NMOD
  186. Whiting NNP 5 NMOD
  187. field NN 6 SUB
  188. started VBD 2 VMOD
  189. production NN 6 OBJ
  190. Tuesday NNP 6 VMOD
  191. """
  192. )
  193. dg2 = DependencyGraph(
  194. """\
  195. John NNP 2 SUB
  196. sees VBP 0 ROOT
  197. Mary NNP 2 OBJ
  198. """
  199. )
  200. dg3 = DependencyGraph(
  201. """\
  202. a DT 2 SPEC
  203. man NN 3 SUBJ
  204. walks VB 0 ROOT
  205. """
  206. )
  207. dg4 = DependencyGraph(
  208. """\
  209. every DT 2 SPEC
  210. girl NN 3 SUBJ
  211. chases VB 0 ROOT
  212. a DT 5 SPEC
  213. dog NN 3 OBJ
  214. """
  215. )
  216. depgraphs = [dg1, dg2, dg3, dg4]
  217. for dg in depgraphs:
  218. print(FStructure.read_depgraph(dg))
  219. if __name__ == "__main__":
  220. demo_read_depgraph()