api.py 9.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358
  1. # Natural Language Toolkit: CCG Categories
  2. #
  3. # Copyright (C) 2001-2020 NLTK Project
  4. # Author: Graeme Gange <ggange@csse.unimelb.edu.au>
  5. # URL: <http://nltk.org/>
  6. # For license information, see LICENSE.TXT
  7. from functools import total_ordering
  8. from abc import ABCMeta, abstractmethod
  9. from nltk.internals import raise_unorderable_types
  10. @total_ordering
  11. class AbstractCCGCategory(metaclass=ABCMeta):
  12. """
  13. Interface for categories in combinatory grammars.
  14. """
  15. @abstractmethod
  16. def is_primitive(self):
  17. """
  18. Returns true if the category is primitive.
  19. """
  20. @abstractmethod
  21. def is_function(self):
  22. """
  23. Returns true if the category is a function application.
  24. """
  25. @abstractmethod
  26. def is_var(self):
  27. """
  28. Returns true if the category is a variable.
  29. """
  30. @abstractmethod
  31. def substitute(self, substitutions):
  32. """
  33. Takes a set of (var, category) substitutions, and replaces every
  34. occurrence of the variable with the corresponding category.
  35. """
  36. @abstractmethod
  37. def can_unify(self, other):
  38. """
  39. Determines whether two categories can be unified.
  40. - Returns None if they cannot be unified
  41. - Returns a list of necessary substitutions if they can.
  42. """
  43. # Utility functions: comparison, strings and hashing.
  44. @abstractmethod
  45. def __str__(self):
  46. pass
  47. def __eq__(self, other):
  48. return (
  49. self.__class__ is other.__class__
  50. and self._comparison_key == other._comparison_key
  51. )
  52. def __ne__(self, other):
  53. return not self == other
  54. def __lt__(self, other):
  55. if not isinstance(other, AbstractCCGCategory):
  56. raise_unorderable_types("<", self, other)
  57. if self.__class__ is other.__class__:
  58. return self._comparison_key < other._comparison_key
  59. else:
  60. return self.__class__.__name__ < other.__class__.__name__
  61. def __hash__(self):
  62. try:
  63. return self._hash
  64. except AttributeError:
  65. self._hash = hash(self._comparison_key)
  66. return self._hash
  67. class CCGVar(AbstractCCGCategory):
  68. """
  69. Class representing a variable CCG category.
  70. Used for conjunctions (and possibly type-raising, if implemented as a
  71. unary rule).
  72. """
  73. _maxID = 0
  74. def __init__(self, prim_only=False):
  75. """Initialize a variable (selects a new identifier)
  76. :param prim_only: a boolean that determines whether the variable is
  77. restricted to primitives
  78. :type prim_only: bool
  79. """
  80. self._id = self.new_id()
  81. self._prim_only = prim_only
  82. self._comparison_key = self._id
  83. @classmethod
  84. def new_id(cls):
  85. """
  86. A class method allowing generation of unique variable identifiers.
  87. """
  88. cls._maxID = cls._maxID + 1
  89. return cls._maxID - 1
  90. @classmethod
  91. def reset_id(cls):
  92. cls._maxID = 0
  93. def is_primitive(self):
  94. return False
  95. def is_function(self):
  96. return False
  97. def is_var(self):
  98. return True
  99. def substitute(self, substitutions):
  100. """If there is a substitution corresponding to this variable,
  101. return the substituted category.
  102. """
  103. for (var, cat) in substitutions:
  104. if var == self:
  105. return cat
  106. return self
  107. def can_unify(self, other):
  108. """ If the variable can be replaced with other
  109. a substitution is returned.
  110. """
  111. if other.is_primitive() or not self._prim_only:
  112. return [(self, other)]
  113. return None
  114. def id(self):
  115. return self._id
  116. def __str__(self):
  117. return "_var" + str(self._id)
  118. @total_ordering
  119. class Direction(object):
  120. """
  121. Class representing the direction of a function application.
  122. Also contains maintains information as to which combinators
  123. may be used with the category.
  124. """
  125. def __init__(self, dir, restrictions):
  126. self._dir = dir
  127. self._restrs = restrictions
  128. self._comparison_key = (dir, tuple(restrictions))
  129. # Testing the application direction
  130. def is_forward(self):
  131. return self._dir == "/"
  132. def is_backward(self):
  133. return self._dir == "\\"
  134. def dir(self):
  135. return self._dir
  136. def restrs(self):
  137. """A list of restrictions on the combinators.
  138. '.' denotes that permuting operations are disallowed
  139. ',' denotes that function composition is disallowed
  140. '_' denotes that the direction has variable restrictions.
  141. (This is redundant in the current implementation of type-raising)
  142. """
  143. return self._restrs
  144. def is_variable(self):
  145. return self._restrs == "_"
  146. # Unification and substitution of variable directions.
  147. # Used only if type-raising is implemented as a unary rule, as it
  148. # must inherit restrictions from the argument category.
  149. def can_unify(self, other):
  150. if other.is_variable():
  151. return [("_", self.restrs())]
  152. elif self.is_variable():
  153. return [("_", other.restrs())]
  154. else:
  155. if self.restrs() == other.restrs():
  156. return []
  157. return None
  158. def substitute(self, subs):
  159. if not self.is_variable():
  160. return self
  161. for (var, restrs) in subs:
  162. if var == "_":
  163. return Direction(self._dir, restrs)
  164. return self
  165. # Testing permitted combinators
  166. def can_compose(self):
  167. return "," not in self._restrs
  168. def can_cross(self):
  169. return "." not in self._restrs
  170. def __eq__(self, other):
  171. return (
  172. self.__class__ is other.__class__
  173. and self._comparison_key == other._comparison_key
  174. )
  175. def __ne__(self, other):
  176. return not self == other
  177. def __lt__(self, other):
  178. if not isinstance(other, Direction):
  179. raise_unorderable_types("<", self, other)
  180. if self.__class__ is other.__class__:
  181. return self._comparison_key < other._comparison_key
  182. else:
  183. return self.__class__.__name__ < other.__class__.__name__
  184. def __hash__(self):
  185. try:
  186. return self._hash
  187. except AttributeError:
  188. self._hash = hash(self._comparison_key)
  189. return self._hash
  190. def __str__(self):
  191. r_str = ""
  192. for r in self._restrs:
  193. r_str = r_str + "%s" % r
  194. return "%s%s" % (self._dir, r_str)
  195. # The negation operator reverses the direction of the application
  196. def __neg__(self):
  197. if self._dir == "/":
  198. return Direction("\\", self._restrs)
  199. else:
  200. return Direction("/", self._restrs)
  201. class PrimitiveCategory(AbstractCCGCategory):
  202. """
  203. Class representing primitive categories.
  204. Takes a string representation of the category, and a
  205. list of strings specifying the morphological subcategories.
  206. """
  207. def __init__(self, categ, restrictions=[]):
  208. self._categ = categ
  209. self._restrs = restrictions
  210. self._comparison_key = (categ, tuple(restrictions))
  211. def is_primitive(self):
  212. return True
  213. def is_function(self):
  214. return False
  215. def is_var(self):
  216. return False
  217. def restrs(self):
  218. return self._restrs
  219. def categ(self):
  220. return self._categ
  221. # Substitution does nothing to a primitive category
  222. def substitute(self, subs):
  223. return self
  224. # A primitive can be unified with a class of the same
  225. # base category, given that the other category shares all
  226. # of its subclasses, or with a variable.
  227. def can_unify(self, other):
  228. if not other.is_primitive():
  229. return None
  230. if other.is_var():
  231. return [(other, self)]
  232. if other.categ() == self.categ():
  233. for restr in self._restrs:
  234. if restr not in other.restrs():
  235. return None
  236. return []
  237. return None
  238. def __str__(self):
  239. if self._restrs == []:
  240. return "%s" % self._categ
  241. restrictions = "[%s]" % ",".join(repr(r) for r in self._restrs)
  242. return "%s%s" % (self._categ, restrictions)
  243. class FunctionalCategory(AbstractCCGCategory):
  244. """
  245. Class that represents a function application category.
  246. Consists of argument and result categories, together with
  247. an application direction.
  248. """
  249. def __init__(self, res, arg, dir):
  250. self._res = res
  251. self._arg = arg
  252. self._dir = dir
  253. self._comparison_key = (arg, dir, res)
  254. def is_primitive(self):
  255. return False
  256. def is_function(self):
  257. return True
  258. def is_var(self):
  259. return False
  260. # Substitution returns the category consisting of the
  261. # substitution applied to each of its constituents.
  262. def substitute(self, subs):
  263. sub_res = self._res.substitute(subs)
  264. sub_dir = self._dir.substitute(subs)
  265. sub_arg = self._arg.substitute(subs)
  266. return FunctionalCategory(sub_res, sub_arg, self._dir)
  267. # A function can unify with another function, so long as its
  268. # constituents can unify, or with an unrestricted variable.
  269. def can_unify(self, other):
  270. if other.is_var():
  271. return [(other, self)]
  272. if other.is_function():
  273. sa = self._res.can_unify(other.res())
  274. sd = self._dir.can_unify(other.dir())
  275. if sa is not None and sd is not None:
  276. sb = self._arg.substitute(sa).can_unify(other.arg().substitute(sa))
  277. if sb is not None:
  278. return sa + sb
  279. return None
  280. # Constituent accessors
  281. def arg(self):
  282. return self._arg
  283. def res(self):
  284. return self._res
  285. def dir(self):
  286. return self._dir
  287. def __str__(self):
  288. return "(%s%s%s)" % (self._res, self._dir, self._arg)