| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258 |
- # Natural Language Toolkit: Lexical Functional Grammar
- #
- # Author: Dan Garrette <dhgarrette@gmail.com>
- #
- # Copyright (C) 2001-2020 NLTK Project
- # URL: <http://nltk.org/>
- # For license information, see LICENSE.TXT
- from itertools import chain
- from nltk.internals import Counter
- class FStructure(dict):
- def safeappend(self, key, item):
- """
- Append 'item' to the list at 'key'. If no list exists for 'key', then
- construct one.
- """
- if key not in self:
- self[key] = []
- self[key].append(item)
- def __setitem__(self, key, value):
- dict.__setitem__(self, key.lower(), value)
- def __getitem__(self, key):
- return dict.__getitem__(self, key.lower())
- def __contains__(self, key):
- return dict.__contains__(self, key.lower())
- def to_glueformula_list(self, glue_dict):
- depgraph = self.to_depgraph()
- return glue_dict.to_glueformula_list(depgraph)
- def to_depgraph(self, rel=None):
- from nltk.parse.dependencygraph import DependencyGraph
- depgraph = DependencyGraph()
- nodes = depgraph.nodes
- self._to_depgraph(nodes, 0, "ROOT")
- # Add all the dependencies for all the nodes
- for address, node in nodes.items():
- for n2 in (n for n in nodes.values() if n["rel"] != "TOP"):
- if n2["head"] == address:
- relation = n2["rel"]
- node["deps"].setdefault(relation, [])
- node["deps"][relation].append(n2["address"])
- depgraph.root = nodes[1]
- return depgraph
- def _to_depgraph(self, nodes, head, rel):
- index = len(nodes)
- nodes[index].update(
- {
- "address": index,
- "word": self.pred[0],
- "tag": self.pred[1],
- "head": head,
- "rel": rel,
- }
- )
- for feature in sorted(self):
- for item in sorted(self[feature]):
- if isinstance(item, FStructure):
- item._to_depgraph(nodes, index, feature)
- elif isinstance(item, tuple):
- new_index = len(nodes)
- nodes[new_index].update(
- {
- "address": new_index,
- "word": item[0],
- "tag": item[1],
- "head": index,
- "rel": feature,
- }
- )
- elif isinstance(item, list):
- for n in item:
- n._to_depgraph(nodes, index, feature)
- else:
- raise Exception(
- "feature %s is not an FStruct, a list, or a tuple" % feature
- )
- @staticmethod
- def read_depgraph(depgraph):
- return FStructure._read_depgraph(depgraph.root, depgraph)
- @staticmethod
- def _read_depgraph(node, depgraph, label_counter=None, parent=None):
- if not label_counter:
- label_counter = Counter()
- if node["rel"].lower() in ["spec", "punct"]:
- # the value of a 'spec' entry is a word, not an FStructure
- return (node["word"], node["tag"])
- else:
- fstruct = FStructure()
- fstruct.pred = None
- fstruct.label = FStructure._make_label(label_counter.get())
- fstruct.parent = parent
- word, tag = node["word"], node["tag"]
- if tag[:2] == "VB":
- if tag[2:3] == "D":
- fstruct.safeappend("tense", ("PAST", "tense"))
- fstruct.pred = (word, tag[:2])
- if not fstruct.pred:
- fstruct.pred = (word, tag)
- children = [depgraph.nodes[idx] for idx in chain(*node["deps"].values())]
- for child in children:
- fstruct.safeappend(
- child["rel"],
- FStructure._read_depgraph(child, depgraph, label_counter, fstruct),
- )
- return fstruct
- @staticmethod
- def _make_label(value):
- """
- Pick an alphabetic character as identifier for an entity in the model.
- :param value: where to index into the list of characters
- :type value: int
- """
- letter = [
- "f",
- "g",
- "h",
- "i",
- "j",
- "k",
- "l",
- "m",
- "n",
- "o",
- "p",
- "q",
- "r",
- "s",
- "t",
- "u",
- "v",
- "w",
- "x",
- "y",
- "z",
- "a",
- "b",
- "c",
- "d",
- "e",
- ][value - 1]
- num = int(value) // 26
- if num > 0:
- return letter + str(num)
- else:
- return letter
- def __repr__(self):
- return self.__str__().replace("\n", "")
- def __str__(self):
- return self.pretty_format()
- def pretty_format(self, indent=3):
- try:
- accum = "%s:[" % self.label
- except NameError:
- accum = "["
- try:
- accum += "pred '%s'" % (self.pred[0])
- except NameError:
- pass
- for feature in sorted(self):
- for item in self[feature]:
- if isinstance(item, FStructure):
- next_indent = indent + len(feature) + 3 + len(self.label)
- accum += "\n%s%s %s" % (
- " " * (indent),
- feature,
- item.pretty_format(next_indent),
- )
- elif isinstance(item, tuple):
- accum += "\n%s%s '%s'" % (" " * (indent), feature, item[0])
- elif isinstance(item, list):
- accum += "\n%s%s {%s}" % (
- " " * (indent),
- feature,
- ("\n%s" % (" " * (indent + len(feature) + 2))).join(item),
- )
- else: # ERROR
- raise Exception(
- "feature %s is not an FStruct, a list, or a tuple" % feature
- )
- return accum + "]"
- def demo_read_depgraph():
- from nltk.parse.dependencygraph import DependencyGraph
- dg1 = DependencyGraph(
- """\
- Esso NNP 2 SUB
- said VBD 0 ROOT
- the DT 5 NMOD
- Whiting NNP 5 NMOD
- field NN 6 SUB
- started VBD 2 VMOD
- production NN 6 OBJ
- Tuesday NNP 6 VMOD
- """
- )
- dg2 = DependencyGraph(
- """\
- John NNP 2 SUB
- sees VBP 0 ROOT
- Mary NNP 2 OBJ
- """
- )
- dg3 = DependencyGraph(
- """\
- a DT 2 SPEC
- man NN 3 SUBJ
- walks VB 0 ROOT
- """
- )
- dg4 = DependencyGraph(
- """\
- every DT 2 SPEC
- girl NN 3 SUBJ
- chases VB 0 ROOT
- a DT 5 SPEC
- dog NN 3 OBJ
- """
- )
- depgraphs = [dg1, dg2, dg3, dg4]
- for dg in depgraphs:
- print(FStructure.read_depgraph(dg))
- if __name__ == "__main__":
- demo_read_depgraph()
|