dependencygraph.py 30 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779
  1. # Natural Language Toolkit: Dependency Grammars
  2. #
  3. # Copyright (C) 2001-2020 NLTK Project
  4. # Author: Jason Narad <jason.narad@gmail.com>
  5. # Steven Bird <stevenbird1@gmail.com> (modifications)
  6. #
  7. # URL: <http://nltk.org/>
  8. # For license information, see LICENSE.TXT
  9. #
  10. """
  11. Tools for reading and writing dependency trees.
  12. The input is assumed to be in Malt-TAB format
  13. (http://stp.lingfil.uu.se/~nivre/research/MaltXML.html).
  14. """
  15. from collections import defaultdict
  16. from itertools import chain
  17. from pprint import pformat
  18. import subprocess
  19. import warnings
  20. from nltk.tree import Tree
  21. #################################################################
  22. # DependencyGraph Class
  23. #################################################################
  24. class DependencyGraph(object):
  25. """
  26. A container for the nodes and labelled edges of a dependency structure.
  27. """
  28. def __init__(
  29. self,
  30. tree_str=None,
  31. cell_extractor=None,
  32. zero_based=False,
  33. cell_separator=None,
  34. top_relation_label="ROOT",
  35. ):
  36. """Dependency graph.
  37. We place a dummy `TOP` node with the index 0, since the root node is
  38. often assigned 0 as its head. This also means that the indexing of the
  39. nodes corresponds directly to the Malt-TAB format, which starts at 1.
  40. If zero-based is True, then Malt-TAB-like input with node numbers
  41. starting at 0 and the root node assigned -1 (as produced by, e.g.,
  42. zpar).
  43. :param str cell_separator: the cell separator. If not provided, cells
  44. are split by whitespace.
  45. :param str top_relation_label: the label by which the top relation is
  46. identified, for examlple, `ROOT`, `null` or `TOP`.
  47. """
  48. self.nodes = defaultdict(
  49. lambda: {
  50. "address": None,
  51. "word": None,
  52. "lemma": None,
  53. "ctag": None,
  54. "tag": None,
  55. "feats": None,
  56. "head": None,
  57. "deps": defaultdict(list),
  58. "rel": None,
  59. }
  60. )
  61. self.nodes[0].update({"ctag": "TOP", "tag": "TOP", "address": 0})
  62. self.root = None
  63. if tree_str:
  64. self._parse(
  65. tree_str,
  66. cell_extractor=cell_extractor,
  67. zero_based=zero_based,
  68. cell_separator=cell_separator,
  69. top_relation_label=top_relation_label,
  70. )
  71. def remove_by_address(self, address):
  72. """
  73. Removes the node with the given address. References
  74. to this node in others will still exist.
  75. """
  76. del self.nodes[address]
  77. def redirect_arcs(self, originals, redirect):
  78. """
  79. Redirects arcs to any of the nodes in the originals list
  80. to the redirect node address.
  81. """
  82. for node in self.nodes.values():
  83. new_deps = []
  84. for dep in node["deps"]:
  85. if dep in originals:
  86. new_deps.append(redirect)
  87. else:
  88. new_deps.append(dep)
  89. node["deps"] = new_deps
  90. def add_arc(self, head_address, mod_address):
  91. """
  92. Adds an arc from the node specified by head_address to the
  93. node specified by the mod address.
  94. """
  95. relation = self.nodes[mod_address]["rel"]
  96. self.nodes[head_address]["deps"].setdefault(relation, [])
  97. self.nodes[head_address]["deps"][relation].append(mod_address)
  98. # self.nodes[head_address]['deps'].append(mod_address)
  99. def connect_graph(self):
  100. """
  101. Fully connects all non-root nodes. All nodes are set to be dependents
  102. of the root node.
  103. """
  104. for node1 in self.nodes.values():
  105. for node2 in self.nodes.values():
  106. if node1["address"] != node2["address"] and node2["rel"] != "TOP":
  107. relation = node2["rel"]
  108. node1["deps"].setdefault(relation, [])
  109. node1["deps"][relation].append(node2["address"])
  110. # node1['deps'].append(node2['address'])
  111. def get_by_address(self, node_address):
  112. """Return the node with the given address."""
  113. return self.nodes[node_address]
  114. def contains_address(self, node_address):
  115. """
  116. Returns true if the graph contains a node with the given node
  117. address, false otherwise.
  118. """
  119. return node_address in self.nodes
  120. def to_dot(self):
  121. """Return a dot representation suitable for using with Graphviz.
  122. >>> dg = DependencyGraph(
  123. ... 'John N 2\\n'
  124. ... 'loves V 0\\n'
  125. ... 'Mary N 2'
  126. ... )
  127. >>> print(dg.to_dot())
  128. digraph G{
  129. edge [dir=forward]
  130. node [shape=plaintext]
  131. <BLANKLINE>
  132. 0 [label="0 (None)"]
  133. 0 -> 2 [label="ROOT"]
  134. 1 [label="1 (John)"]
  135. 2 [label="2 (loves)"]
  136. 2 -> 1 [label=""]
  137. 2 -> 3 [label=""]
  138. 3 [label="3 (Mary)"]
  139. }
  140. """
  141. # Start the digraph specification
  142. s = "digraph G{\n"
  143. s += "edge [dir=forward]\n"
  144. s += "node [shape=plaintext]\n"
  145. # Draw the remaining nodes
  146. for node in sorted(self.nodes.values(), key=lambda v: v["address"]):
  147. s += '\n%s [label="%s (%s)"]' % (
  148. node["address"],
  149. node["address"],
  150. node["word"],
  151. )
  152. for rel, deps in node["deps"].items():
  153. for dep in deps:
  154. if rel is not None:
  155. s += '\n%s -> %s [label="%s"]' % (node["address"], dep, rel)
  156. else:
  157. s += "\n%s -> %s " % (node["address"], dep)
  158. s += "\n}"
  159. return s
  160. def _repr_svg_(self):
  161. """Show SVG representation of the transducer (IPython magic).
  162. >>> dg = DependencyGraph(
  163. ... 'John N 2\\n'
  164. ... 'loves V 0\\n'
  165. ... 'Mary N 2'
  166. ... )
  167. >>> dg._repr_svg_().split('\\n')[0]
  168. '<?xml version="1.0" encoding="UTF-8" standalone="no"?>'
  169. """
  170. dot_string = self.to_dot()
  171. try:
  172. process = subprocess.Popen(
  173. ["dot", "-Tsvg"],
  174. stdin=subprocess.PIPE,
  175. stdout=subprocess.PIPE,
  176. stderr=subprocess.PIPE,
  177. universal_newlines=True,
  178. )
  179. except OSError:
  180. raise Exception("Cannot find the dot binary from Graphviz package")
  181. out, err = process.communicate(dot_string)
  182. if err:
  183. raise Exception(
  184. "Cannot create svg representation by running dot from string: {}"
  185. "".format(dot_string)
  186. )
  187. return out
  188. def __str__(self):
  189. return pformat(self.nodes)
  190. def __repr__(self):
  191. return "<DependencyGraph with {0} nodes>".format(len(self.nodes))
  192. @staticmethod
  193. def load(
  194. filename, zero_based=False, cell_separator=None, top_relation_label="ROOT"
  195. ):
  196. """
  197. :param filename: a name of a file in Malt-TAB format
  198. :param zero_based: nodes in the input file are numbered starting from 0
  199. rather than 1 (as produced by, e.g., zpar)
  200. :param str cell_separator: the cell separator. If not provided, cells
  201. are split by whitespace.
  202. :param str top_relation_label: the label by which the top relation is
  203. identified, for examlple, `ROOT`, `null` or `TOP`.
  204. :return: a list of DependencyGraphs
  205. """
  206. with open(filename) as infile:
  207. return [
  208. DependencyGraph(
  209. tree_str,
  210. zero_based=zero_based,
  211. cell_separator=cell_separator,
  212. top_relation_label=top_relation_label,
  213. )
  214. for tree_str in infile.read().split("\n\n")
  215. ]
  216. def left_children(self, node_index):
  217. """
  218. Returns the number of left children under the node specified
  219. by the given address.
  220. """
  221. children = chain.from_iterable(self.nodes[node_index]["deps"].values())
  222. index = self.nodes[node_index]["address"]
  223. return sum(1 for c in children if c < index)
  224. def right_children(self, node_index):
  225. """
  226. Returns the number of right children under the node specified
  227. by the given address.
  228. """
  229. children = chain.from_iterable(self.nodes[node_index]["deps"].values())
  230. index = self.nodes[node_index]["address"]
  231. return sum(1 for c in children if c > index)
  232. def add_node(self, node):
  233. if not self.contains_address(node["address"]):
  234. self.nodes[node["address"]].update(node)
  235. def _parse(
  236. self,
  237. input_,
  238. cell_extractor=None,
  239. zero_based=False,
  240. cell_separator=None,
  241. top_relation_label="ROOT",
  242. ):
  243. """Parse a sentence.
  244. :param extractor: a function that given a tuple of cells returns a
  245. 7-tuple, where the values are ``word, lemma, ctag, tag, feats, head,
  246. rel``.
  247. :param str cell_separator: the cell separator. If not provided, cells
  248. are split by whitespace.
  249. :param str top_relation_label: the label by which the top relation is
  250. identified, for examlple, `ROOT`, `null` or `TOP`.
  251. """
  252. def extract_3_cells(cells, index):
  253. word, tag, head = cells
  254. return index, word, word, tag, tag, "", head, ""
  255. def extract_4_cells(cells, index):
  256. word, tag, head, rel = cells
  257. return index, word, word, tag, tag, "", head, rel
  258. def extract_7_cells(cells, index):
  259. line_index, word, lemma, tag, _, head, rel = cells
  260. try:
  261. index = int(line_index)
  262. except ValueError:
  263. # index can't be parsed as an integer, use default
  264. pass
  265. return index, word, lemma, tag, tag, "", head, rel
  266. def extract_10_cells(cells, index):
  267. line_index, word, lemma, ctag, tag, feats, head, rel, _, _ = cells
  268. try:
  269. index = int(line_index)
  270. except ValueError:
  271. # index can't be parsed as an integer, use default
  272. pass
  273. return index, word, lemma, ctag, tag, feats, head, rel
  274. extractors = {
  275. 3: extract_3_cells,
  276. 4: extract_4_cells,
  277. 7: extract_7_cells,
  278. 10: extract_10_cells,
  279. }
  280. if isinstance(input_, str):
  281. input_ = (line for line in input_.split("\n"))
  282. lines = (l.rstrip() for l in input_)
  283. lines = (l for l in lines if l)
  284. cell_number = None
  285. for index, line in enumerate(lines, start=1):
  286. cells = line.split(cell_separator)
  287. if cell_number is None:
  288. cell_number = len(cells)
  289. else:
  290. assert cell_number == len(cells)
  291. if cell_extractor is None:
  292. try:
  293. cell_extractor = extractors[cell_number]
  294. except KeyError:
  295. raise ValueError(
  296. "Number of tab-delimited fields ({0}) not supported by "
  297. "CoNLL(10) or Malt-Tab(4) format".format(cell_number)
  298. )
  299. try:
  300. index, word, lemma, ctag, tag, feats, head, rel = cell_extractor(
  301. cells, index
  302. )
  303. except (TypeError, ValueError):
  304. # cell_extractor doesn't take 2 arguments or doesn't return 8
  305. # values; assume the cell_extractor is an older external
  306. # extractor and doesn't accept or return an index.
  307. word, lemma, ctag, tag, feats, head, rel = cell_extractor(cells)
  308. if head == "_":
  309. continue
  310. head = int(head)
  311. if zero_based:
  312. head += 1
  313. self.nodes[index].update(
  314. {
  315. "address": index,
  316. "word": word,
  317. "lemma": lemma,
  318. "ctag": ctag,
  319. "tag": tag,
  320. "feats": feats,
  321. "head": head,
  322. "rel": rel,
  323. }
  324. )
  325. # Make sure that the fake root node has labeled dependencies.
  326. if (cell_number == 3) and (head == 0):
  327. rel = top_relation_label
  328. self.nodes[head]["deps"][rel].append(index)
  329. if self.nodes[0]["deps"][top_relation_label]:
  330. root_address = self.nodes[0]["deps"][top_relation_label][0]
  331. self.root = self.nodes[root_address]
  332. self.top_relation_label = top_relation_label
  333. else:
  334. warnings.warn(
  335. "The graph doesn't contain a node " "that depends on the root element."
  336. )
  337. def _word(self, node, filter=True):
  338. w = node["word"]
  339. if filter:
  340. if w != ",":
  341. return w
  342. return w
  343. def _tree(self, i):
  344. """ Turn dependency graphs into NLTK trees.
  345. :param int i: index of a node
  346. :return: either a word (if the indexed node is a leaf) or a ``Tree``.
  347. """
  348. node = self.get_by_address(i)
  349. word = node["word"]
  350. deps = sorted(chain.from_iterable(node["deps"].values()))
  351. if deps:
  352. return Tree(word, [self._tree(dep) for dep in deps])
  353. else:
  354. return word
  355. def tree(self):
  356. """
  357. Starting with the ``root`` node, build a dependency tree using the NLTK
  358. ``Tree`` constructor. Dependency labels are omitted.
  359. """
  360. node = self.root
  361. word = node["word"]
  362. deps = sorted(chain.from_iterable(node["deps"].values()))
  363. return Tree(word, [self._tree(dep) for dep in deps])
  364. def triples(self, node=None):
  365. """
  366. Extract dependency triples of the form:
  367. ((head word, head tag), rel, (dep word, dep tag))
  368. """
  369. if not node:
  370. node = self.root
  371. head = (node["word"], node["ctag"])
  372. for i in sorted(chain.from_iterable(node["deps"].values())):
  373. dep = self.get_by_address(i)
  374. yield (head, dep["rel"], (dep["word"], dep["ctag"]))
  375. for triple in self.triples(node=dep):
  376. yield triple
  377. def _hd(self, i):
  378. try:
  379. return self.nodes[i]["head"]
  380. except IndexError:
  381. return None
  382. def _rel(self, i):
  383. try:
  384. return self.nodes[i]["rel"]
  385. except IndexError:
  386. return None
  387. # what's the return type? Boolean or list?
  388. def contains_cycle(self):
  389. """Check whether there are cycles.
  390. >>> dg = DependencyGraph(treebank_data)
  391. >>> dg.contains_cycle()
  392. False
  393. >>> cyclic_dg = DependencyGraph()
  394. >>> top = {'word': None, 'deps': [1], 'rel': 'TOP', 'address': 0}
  395. >>> child1 = {'word': None, 'deps': [2], 'rel': 'NTOP', 'address': 1}
  396. >>> child2 = {'word': None, 'deps': [4], 'rel': 'NTOP', 'address': 2}
  397. >>> child3 = {'word': None, 'deps': [1], 'rel': 'NTOP', 'address': 3}
  398. >>> child4 = {'word': None, 'deps': [3], 'rel': 'NTOP', 'address': 4}
  399. >>> cyclic_dg.nodes = {
  400. ... 0: top,
  401. ... 1: child1,
  402. ... 2: child2,
  403. ... 3: child3,
  404. ... 4: child4,
  405. ... }
  406. >>> cyclic_dg.root = top
  407. >>> cyclic_dg.contains_cycle()
  408. [3, 1, 2, 4]
  409. """
  410. distances = {}
  411. for node in self.nodes.values():
  412. for dep in node["deps"]:
  413. key = tuple([node["address"], dep])
  414. distances[key] = 1
  415. for _ in self.nodes:
  416. new_entries = {}
  417. for pair1 in distances:
  418. for pair2 in distances:
  419. if pair1[1] == pair2[0]:
  420. key = tuple([pair1[0], pair2[1]])
  421. new_entries[key] = distances[pair1] + distances[pair2]
  422. for pair in new_entries:
  423. distances[pair] = new_entries[pair]
  424. if pair[0] == pair[1]:
  425. path = self.get_cycle_path(self.get_by_address(pair[0]), pair[0])
  426. return path
  427. return False # return []?
  428. def get_cycle_path(self, curr_node, goal_node_index):
  429. for dep in curr_node["deps"]:
  430. if dep == goal_node_index:
  431. return [curr_node["address"]]
  432. for dep in curr_node["deps"]:
  433. path = self.get_cycle_path(self.get_by_address(dep), goal_node_index)
  434. if len(path) > 0:
  435. path.insert(0, curr_node["address"])
  436. return path
  437. return []
  438. def to_conll(self, style):
  439. """
  440. The dependency graph in CoNLL format.
  441. :param style: the style to use for the format (3, 4, 10 columns)
  442. :type style: int
  443. :rtype: str
  444. """
  445. if style == 3:
  446. template = "{word}\t{tag}\t{head}\n"
  447. elif style == 4:
  448. template = "{word}\t{tag}\t{head}\t{rel}\n"
  449. elif style == 10:
  450. template = (
  451. "{i}\t{word}\t{lemma}\t{ctag}\t{tag}\t{feats}\t{head}\t{rel}\t_\t_\n"
  452. )
  453. else:
  454. raise ValueError(
  455. "Number of tab-delimited fields ({0}) not supported by "
  456. "CoNLL(10) or Malt-Tab(4) format".format(style)
  457. )
  458. return "".join(
  459. template.format(i=i, **node)
  460. for i, node in sorted(self.nodes.items())
  461. if node["tag"] != "TOP"
  462. )
  463. def nx_graph(self):
  464. """Convert the data in a ``nodelist`` into a networkx labeled directed graph."""
  465. import networkx
  466. nx_nodelist = list(range(1, len(self.nodes)))
  467. nx_edgelist = [
  468. (n, self._hd(n), self._rel(n)) for n in nx_nodelist if self._hd(n)
  469. ]
  470. self.nx_labels = {}
  471. for n in nx_nodelist:
  472. self.nx_labels[n] = self.nodes[n]["word"]
  473. g = networkx.MultiDiGraph()
  474. g.add_nodes_from(nx_nodelist)
  475. g.add_edges_from(nx_edgelist)
  476. return g
  477. class DependencyGraphError(Exception):
  478. """Dependency graph exception."""
  479. def demo():
  480. malt_demo()
  481. conll_demo()
  482. conll_file_demo()
  483. cycle_finding_demo()
  484. def malt_demo(nx=False):
  485. """
  486. A demonstration of the result of reading a dependency
  487. version of the first sentence of the Penn Treebank.
  488. """
  489. dg = DependencyGraph(
  490. """Pierre NNP 2 NMOD
  491. Vinken NNP 8 SUB
  492. , , 2 P
  493. 61 CD 5 NMOD
  494. years NNS 6 AMOD
  495. old JJ 2 NMOD
  496. , , 2 P
  497. will MD 0 ROOT
  498. join VB 8 VC
  499. the DT 11 NMOD
  500. board NN 9 OBJ
  501. as IN 9 VMOD
  502. a DT 15 NMOD
  503. nonexecutive JJ 15 NMOD
  504. director NN 12 PMOD
  505. Nov. NNP 9 VMOD
  506. 29 CD 16 NMOD
  507. . . 9 VMOD
  508. """
  509. )
  510. tree = dg.tree()
  511. tree.pprint()
  512. if nx:
  513. # currently doesn't work
  514. import networkx
  515. from matplotlib import pylab
  516. g = dg.nx_graph()
  517. g.info()
  518. pos = networkx.spring_layout(g, dim=1)
  519. networkx.draw_networkx_nodes(g, pos, node_size=50)
  520. # networkx.draw_networkx_edges(g, pos, edge_color='k', width=8)
  521. networkx.draw_networkx_labels(g, pos, dg.nx_labels)
  522. pylab.xticks([])
  523. pylab.yticks([])
  524. pylab.savefig("tree.png")
  525. pylab.show()
  526. def conll_demo():
  527. """
  528. A demonstration of how to read a string representation of
  529. a CoNLL format dependency tree.
  530. """
  531. dg = DependencyGraph(conll_data1)
  532. tree = dg.tree()
  533. tree.pprint()
  534. print(dg)
  535. print(dg.to_conll(4))
  536. def conll_file_demo():
  537. print("Mass conll_read demo...")
  538. graphs = [DependencyGraph(entry) for entry in conll_data2.split("\n\n") if entry]
  539. for graph in graphs:
  540. tree = graph.tree()
  541. print("\n")
  542. tree.pprint()
  543. def cycle_finding_demo():
  544. dg = DependencyGraph(treebank_data)
  545. print(dg.contains_cycle())
  546. cyclic_dg = DependencyGraph()
  547. cyclic_dg.add_node({"word": None, "deps": [1], "rel": "TOP", "address": 0})
  548. cyclic_dg.add_node({"word": None, "deps": [2], "rel": "NTOP", "address": 1})
  549. cyclic_dg.add_node({"word": None, "deps": [4], "rel": "NTOP", "address": 2})
  550. cyclic_dg.add_node({"word": None, "deps": [1], "rel": "NTOP", "address": 3})
  551. cyclic_dg.add_node({"word": None, "deps": [3], "rel": "NTOP", "address": 4})
  552. print(cyclic_dg.contains_cycle())
  553. treebank_data = """Pierre NNP 2 NMOD
  554. Vinken NNP 8 SUB
  555. , , 2 P
  556. 61 CD 5 NMOD
  557. years NNS 6 AMOD
  558. old JJ 2 NMOD
  559. , , 2 P
  560. will MD 0 ROOT
  561. join VB 8 VC
  562. the DT 11 NMOD
  563. board NN 9 OBJ
  564. as IN 9 VMOD
  565. a DT 15 NMOD
  566. nonexecutive JJ 15 NMOD
  567. director NN 12 PMOD
  568. Nov. NNP 9 VMOD
  569. 29 CD 16 NMOD
  570. . . 9 VMOD
  571. """
  572. conll_data1 = """
  573. 1 Ze ze Pron Pron per|3|evofmv|nom 2 su _ _
  574. 2 had heb V V trans|ovt|1of2of3|ev 0 ROOT _ _
  575. 3 met met Prep Prep voor 8 mod _ _
  576. 4 haar haar Pron Pron bez|3|ev|neut|attr 5 det _ _
  577. 5 moeder moeder N N soort|ev|neut 3 obj1 _ _
  578. 6 kunnen kan V V hulp|ott|1of2of3|mv 2 vc _ _
  579. 7 gaan ga V V hulp|inf 6 vc _ _
  580. 8 winkelen winkel V V intrans|inf 11 cnj _ _
  581. 9 , , Punc Punc komma 8 punct _ _
  582. 10 zwemmen zwem V V intrans|inf 11 cnj _ _
  583. 11 of of Conj Conj neven 7 vc _ _
  584. 12 terrassen terras N N soort|mv|neut 11 cnj _ _
  585. 13 . . Punc Punc punt 12 punct _ _
  586. """
  587. conll_data2 = """1 Cathy Cathy N N eigen|ev|neut 2 su _ _
  588. 2 zag zie V V trans|ovt|1of2of3|ev 0 ROOT _ _
  589. 3 hen hen Pron Pron per|3|mv|datofacc 2 obj1 _ _
  590. 4 wild wild Adj Adj attr|stell|onverv 5 mod _ _
  591. 5 zwaaien zwaai N N soort|mv|neut 2 vc _ _
  592. 6 . . Punc Punc punt 5 punct _ _
  593. 1 Ze ze Pron Pron per|3|evofmv|nom 2 su _ _
  594. 2 had heb V V trans|ovt|1of2of3|ev 0 ROOT _ _
  595. 3 met met Prep Prep voor 8 mod _ _
  596. 4 haar haar Pron Pron bez|3|ev|neut|attr 5 det _ _
  597. 5 moeder moeder N N soort|ev|neut 3 obj1 _ _
  598. 6 kunnen kan V V hulp|ott|1of2of3|mv 2 vc _ _
  599. 7 gaan ga V V hulp|inf 6 vc _ _
  600. 8 winkelen winkel V V intrans|inf 11 cnj _ _
  601. 9 , , Punc Punc komma 8 punct _ _
  602. 10 zwemmen zwem V V intrans|inf 11 cnj _ _
  603. 11 of of Conj Conj neven 7 vc _ _
  604. 12 terrassen terras N N soort|mv|neut 11 cnj _ _
  605. 13 . . Punc Punc punt 12 punct _ _
  606. 1 Dat dat Pron Pron aanw|neut|attr 2 det _ _
  607. 2 werkwoord werkwoord N N soort|ev|neut 6 obj1 _ _
  608. 3 had heb V V hulp|ovt|1of2of3|ev 0 ROOT _ _
  609. 4 ze ze Pron Pron per|3|evofmv|nom 6 su _ _
  610. 5 zelf zelf Pron Pron aanw|neut|attr|wzelf 3 predm _ _
  611. 6 uitgevonden vind V V trans|verldw|onverv 3 vc _ _
  612. 7 . . Punc Punc punt 6 punct _ _
  613. 1 Het het Pron Pron onbep|neut|zelfst 2 su _ _
  614. 2 hoorde hoor V V trans|ovt|1of2of3|ev 0 ROOT _ _
  615. 3 bij bij Prep Prep voor 2 ld _ _
  616. 4 de de Art Art bep|zijdofmv|neut 6 det _ _
  617. 5 warme warm Adj Adj attr|stell|vervneut 6 mod _ _
  618. 6 zomerdag zomerdag N N soort|ev|neut 3 obj1 _ _
  619. 7 die die Pron Pron betr|neut|zelfst 6 mod _ _
  620. 8 ze ze Pron Pron per|3|evofmv|nom 12 su _ _
  621. 9 ginds ginds Adv Adv gew|aanw 12 mod _ _
  622. 10 achter achter Adv Adv gew|geenfunc|stell|onverv 12 svp _ _
  623. 11 had heb V V hulp|ovt|1of2of3|ev 7 body _ _
  624. 12 gelaten laat V V trans|verldw|onverv 11 vc _ _
  625. 13 . . Punc Punc punt 12 punct _ _
  626. 1 Ze ze Pron Pron per|3|evofmv|nom 2 su _ _
  627. 2 hadden heb V V trans|ovt|1of2of3|mv 0 ROOT _ _
  628. 3 languit languit Adv Adv gew|geenfunc|stell|onverv 11 mod _ _
  629. 4 naast naast Prep Prep voor 11 mod _ _
  630. 5 elkaar elkaar Pron Pron rec|neut 4 obj1 _ _
  631. 6 op op Prep Prep voor 11 ld _ _
  632. 7 de de Art Art bep|zijdofmv|neut 8 det _ _
  633. 8 strandstoelen strandstoel N N soort|mv|neut 6 obj1 _ _
  634. 9 kunnen kan V V hulp|inf 2 vc _ _
  635. 10 gaan ga V V hulp|inf 9 vc _ _
  636. 11 liggen lig V V intrans|inf 10 vc _ _
  637. 12 . . Punc Punc punt 11 punct _ _
  638. 1 Zij zij Pron Pron per|3|evofmv|nom 2 su _ _
  639. 2 zou zal V V hulp|ovt|1of2of3|ev 7 cnj _ _
  640. 3 mams mams N N soort|ev|neut 4 det _ _
  641. 4 rug rug N N soort|ev|neut 5 obj1 _ _
  642. 5 ingewreven wrijf V V trans|verldw|onverv 6 vc _ _
  643. 6 hebben heb V V hulp|inf 2 vc _ _
  644. 7 en en Conj Conj neven 0 ROOT _ _
  645. 8 mam mam V V trans|ovt|1of2of3|ev 7 cnj _ _
  646. 9 de de Art Art bep|zijdofmv|neut 10 det _ _
  647. 10 hare hare Pron Pron bez|3|ev|neut|attr 8 obj1 _ _
  648. 11 . . Punc Punc punt 10 punct _ _
  649. 1 Of of Conj Conj onder|metfin 0 ROOT _ _
  650. 2 ze ze Pron Pron per|3|evofmv|nom 3 su _ _
  651. 3 had heb V V hulp|ovt|1of2of3|ev 0 ROOT _ _
  652. 4 gewoon gewoon Adj Adj adv|stell|onverv 10 mod _ _
  653. 5 met met Prep Prep voor 10 mod _ _
  654. 6 haar haar Pron Pron bez|3|ev|neut|attr 7 det _ _
  655. 7 vriendinnen vriendin N N soort|mv|neut 5 obj1 _ _
  656. 8 rond rond Adv Adv deelv 10 svp _ _
  657. 9 kunnen kan V V hulp|inf 3 vc _ _
  658. 10 slenteren slenter V V intrans|inf 9 vc _ _
  659. 11 in in Prep Prep voor 10 mod _ _
  660. 12 de de Art Art bep|zijdofmv|neut 13 det _ _
  661. 13 buurt buurt N N soort|ev|neut 11 obj1 _ _
  662. 14 van van Prep Prep voor 13 mod _ _
  663. 15 Trafalgar_Square Trafalgar_Square MWU N_N eigen|ev|neut_eigen|ev|neut 14 obj1 _ _
  664. 16 . . Punc Punc punt 15 punct _ _
  665. """
  666. if __name__ == "__main__":
  667. demo()