cfg.py 29 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860
  1. # Natural Language Toolkit: CFG visualization
  2. #
  3. # Copyright (C) 2001-2020 NLTK Project
  4. # Author: Edward Loper <edloper@gmail.com>
  5. # URL: <http://nltk.org/>
  6. # For license information, see LICENSE.TXT
  7. """
  8. Visualization tools for CFGs.
  9. """
  10. # Idea for a nice demo:
  11. # - 3 panes: grammar, treelet, working area
  12. # - grammar is a list of productions
  13. # - when you select a production, the treelet that it licenses appears
  14. # in the treelet area
  15. # - the working area has the text on the bottom, and S at top. When
  16. # you select a production, it shows (ghosted) the locations where
  17. # that production's treelet could be attached to either the text
  18. # or the tree rooted at S.
  19. # - the user can drag the treelet onto one of those (or click on them?)
  20. # - the user can delete pieces of the tree from the working area
  21. # (right click?)
  22. # - connecting top to bottom? drag one NP onto another?
  23. #
  24. # +-------------------------------------------------------------+
  25. # | S -> NP VP | S |
  26. # |[NP -> Det N ]| / \ |
  27. # | ... | NP VP |
  28. # | N -> 'dog' | |
  29. # | N -> 'cat' | |
  30. # | ... | |
  31. # +--------------+ |
  32. # | NP | Det N |
  33. # | / \ | | | |
  34. # | Det N | the cat saw the dog |
  35. # | | |
  36. # +--------------+----------------------------------------------+
  37. #
  38. # Operations:
  39. # - connect a new treelet -- drag or click shadow
  40. # - delete a treelet -- right click
  41. # - if only connected to top, delete everything below
  42. # - if only connected to bottom, delete everything above
  43. # - connect top & bottom -- drag a leaf to a root or a root to a leaf
  44. # - disconnect top & bottom -- right click
  45. # - if connected to top & bottom, then disconnect
  46. import re
  47. from tkinter import (
  48. Button,
  49. Canvas,
  50. Entry,
  51. Frame,
  52. IntVar,
  53. Label,
  54. Scrollbar,
  55. Text,
  56. Tk,
  57. Toplevel,
  58. )
  59. from nltk.grammar import CFG, _read_cfg_production, Nonterminal, nonterminals
  60. from nltk.tree import Tree
  61. from nltk.draw.tree import TreeSegmentWidget, tree_to_treesegment
  62. from nltk.draw.util import (
  63. CanvasFrame,
  64. ColorizedList,
  65. ShowText,
  66. SymbolWidget,
  67. TextWidget,
  68. )
  69. ######################################################################
  70. # Production List
  71. ######################################################################
  72. class ProductionList(ColorizedList):
  73. ARROW = SymbolWidget.SYMBOLS["rightarrow"]
  74. def _init_colortags(self, textwidget, options):
  75. textwidget.tag_config("terminal", foreground="#006000")
  76. textwidget.tag_config("arrow", font="symbol", underline="0")
  77. textwidget.tag_config(
  78. "nonterminal", foreground="blue", font=("helvetica", -12, "bold")
  79. )
  80. def _item_repr(self, item):
  81. contents = []
  82. contents.append(("%s\t" % item.lhs(), "nonterminal"))
  83. contents.append((self.ARROW, "arrow"))
  84. for elt in item.rhs():
  85. if isinstance(elt, Nonterminal):
  86. contents.append((" %s" % elt.symbol(), "nonterminal"))
  87. else:
  88. contents.append((" %r" % elt, "terminal"))
  89. return contents
  90. ######################################################################
  91. # CFG Editor
  92. ######################################################################
  93. _CFGEditor_HELP = """
  94. The CFG Editor can be used to create or modify context free grammars.
  95. A context free grammar consists of a start symbol and a list of
  96. productions. The start symbol is specified by the text entry field in
  97. the upper right hand corner of the editor; and the list of productions
  98. are specified in the main text editing box.
  99. Every non-blank line specifies a single production. Each production
  100. has the form "LHS -> RHS," where LHS is a single nonterminal, and RHS
  101. is a list of nonterminals and terminals.
  102. Nonterminals must be a single word, such as S or NP or NP_subj.
  103. Currently, nonterminals must consists of alphanumeric characters and
  104. underscores (_). Nonterminals are colored blue. If you place the
  105. mouse over any nonterminal, then all occurrences of that nonterminal
  106. will be highlighted.
  107. Terminals must be surrounded by single quotes (') or double
  108. quotes(\"). For example, "dog" and "New York" are terminals.
  109. Currently, the string within the quotes must consist of alphanumeric
  110. characters, underscores, and spaces.
  111. To enter a new production, go to a blank line, and type a nonterminal,
  112. followed by an arrow (->), followed by a sequence of terminals and
  113. nonterminals. Note that "->" (dash + greater-than) is automatically
  114. converted to an arrow symbol. When you move your cursor to a
  115. different line, your production will automatically be colorized. If
  116. there are any errors, they will be highlighted in red.
  117. Note that the order of the productions is significant for some
  118. algorithms. To re-order the productions, use cut and paste to move
  119. them.
  120. Use the buttons at the bottom of the window when you are done editing
  121. the CFG:
  122. - Ok: apply the new CFG, and exit the editor.
  123. - Apply: apply the new CFG, and do not exit the editor.
  124. - Reset: revert to the original CFG, and do not exit the editor.
  125. - Cancel: revert to the original CFG, and exit the editor.
  126. """
  127. class CFGEditor(object):
  128. """
  129. A dialog window for creating and editing context free grammars.
  130. ``CFGEditor`` imposes the following restrictions:
  131. - All nonterminals must be strings consisting of word
  132. characters.
  133. - All terminals must be strings consisting of word characters
  134. and space characters.
  135. """
  136. # Regular expressions used by _analyze_line. Precompile them, so
  137. # we can process the text faster.
  138. ARROW = SymbolWidget.SYMBOLS["rightarrow"]
  139. _LHS_RE = re.compile(r"(^\s*\w+\s*)(->|(" + ARROW + "))")
  140. _ARROW_RE = re.compile("\s*(->|(" + ARROW + "))\s*")
  141. _PRODUCTION_RE = re.compile(
  142. r"(^\s*\w+\s*)"
  143. + "(->|(" # LHS
  144. + ARROW
  145. + "))\s*"
  146. + r"((\w+|'[\w ]*'|\"[\w ]*\"|\|)\s*)*$" # arrow
  147. ) # RHS
  148. _TOKEN_RE = re.compile("\\w+|->|'[\\w ]+'|\"[\\w ]+\"|(" + ARROW + ")")
  149. _BOLD = ("helvetica", -12, "bold")
  150. def __init__(self, parent, cfg=None, set_cfg_callback=None):
  151. self._parent = parent
  152. if cfg is not None:
  153. self._cfg = cfg
  154. else:
  155. self._cfg = CFG(Nonterminal("S"), [])
  156. self._set_cfg_callback = set_cfg_callback
  157. self._highlight_matching_nonterminals = 1
  158. # Create the top-level window.
  159. self._top = Toplevel(parent)
  160. self._init_bindings()
  161. self._init_startframe()
  162. self._startframe.pack(side="top", fill="x", expand=0)
  163. self._init_prodframe()
  164. self._prodframe.pack(side="top", fill="both", expand=1)
  165. self._init_buttons()
  166. self._buttonframe.pack(side="bottom", fill="x", expand=0)
  167. self._textwidget.focus()
  168. def _init_startframe(self):
  169. frame = self._startframe = Frame(self._top)
  170. self._start = Entry(frame)
  171. self._start.pack(side="right")
  172. Label(frame, text="Start Symbol:").pack(side="right")
  173. Label(frame, text="Productions:").pack(side="left")
  174. self._start.insert(0, self._cfg.start().symbol())
  175. def _init_buttons(self):
  176. frame = self._buttonframe = Frame(self._top)
  177. Button(frame, text="Ok", command=self._ok, underline=0, takefocus=0).pack(
  178. side="left"
  179. )
  180. Button(frame, text="Apply", command=self._apply, underline=0, takefocus=0).pack(
  181. side="left"
  182. )
  183. Button(frame, text="Reset", command=self._reset, underline=0, takefocus=0).pack(
  184. side="left"
  185. )
  186. Button(
  187. frame, text="Cancel", command=self._cancel, underline=0, takefocus=0
  188. ).pack(side="left")
  189. Button(frame, text="Help", command=self._help, underline=0, takefocus=0).pack(
  190. side="right"
  191. )
  192. def _init_bindings(self):
  193. self._top.title("CFG Editor")
  194. self._top.bind("<Control-q>", self._cancel)
  195. self._top.bind("<Alt-q>", self._cancel)
  196. self._top.bind("<Control-d>", self._cancel)
  197. # self._top.bind('<Control-x>', self._cancel)
  198. self._top.bind("<Alt-x>", self._cancel)
  199. self._top.bind("<Escape>", self._cancel)
  200. # self._top.bind('<Control-c>', self._cancel)
  201. self._top.bind("<Alt-c>", self._cancel)
  202. self._top.bind("<Control-o>", self._ok)
  203. self._top.bind("<Alt-o>", self._ok)
  204. self._top.bind("<Control-a>", self._apply)
  205. self._top.bind("<Alt-a>", self._apply)
  206. self._top.bind("<Control-r>", self._reset)
  207. self._top.bind("<Alt-r>", self._reset)
  208. self._top.bind("<Control-h>", self._help)
  209. self._top.bind("<Alt-h>", self._help)
  210. self._top.bind("<F1>", self._help)
  211. def _init_prodframe(self):
  212. self._prodframe = Frame(self._top)
  213. # Create the basic Text widget & scrollbar.
  214. self._textwidget = Text(
  215. self._prodframe, background="#e0e0e0", exportselection=1
  216. )
  217. self._textscroll = Scrollbar(self._prodframe, takefocus=0, orient="vertical")
  218. self._textwidget.config(yscrollcommand=self._textscroll.set)
  219. self._textscroll.config(command=self._textwidget.yview)
  220. self._textscroll.pack(side="right", fill="y")
  221. self._textwidget.pack(expand=1, fill="both", side="left")
  222. # Initialize the colorization tags. Each nonterminal gets its
  223. # own tag, so they aren't listed here.
  224. self._textwidget.tag_config("terminal", foreground="#006000")
  225. self._textwidget.tag_config("arrow", font="symbol")
  226. self._textwidget.tag_config("error", background="red")
  227. # Keep track of what line they're on. We use that to remember
  228. # to re-analyze a line whenever they leave it.
  229. self._linenum = 0
  230. # Expand "->" to an arrow.
  231. self._top.bind(">", self._replace_arrows)
  232. # Re-colorize lines when appropriate.
  233. self._top.bind("<<Paste>>", self._analyze)
  234. self._top.bind("<KeyPress>", self._check_analyze)
  235. self._top.bind("<ButtonPress>", self._check_analyze)
  236. # Tab cycles focus. (why doesn't this work??)
  237. def cycle(e, textwidget=self._textwidget):
  238. textwidget.tk_focusNext().focus()
  239. self._textwidget.bind("<Tab>", cycle)
  240. prod_tuples = [(p.lhs(), [p.rhs()]) for p in self._cfg.productions()]
  241. for i in range(len(prod_tuples) - 1, 0, -1):
  242. if prod_tuples[i][0] == prod_tuples[i - 1][0]:
  243. if () in prod_tuples[i][1]:
  244. continue
  245. if () in prod_tuples[i - 1][1]:
  246. continue
  247. print(prod_tuples[i - 1][1])
  248. print(prod_tuples[i][1])
  249. prod_tuples[i - 1][1].extend(prod_tuples[i][1])
  250. del prod_tuples[i]
  251. for lhs, rhss in prod_tuples:
  252. print(lhs, rhss)
  253. s = "%s ->" % lhs
  254. for rhs in rhss:
  255. for elt in rhs:
  256. if isinstance(elt, Nonterminal):
  257. s += " %s" % elt
  258. else:
  259. s += " %r" % elt
  260. s += " |"
  261. s = s[:-2] + "\n"
  262. self._textwidget.insert("end", s)
  263. self._analyze()
  264. # # Add the producitons to the text widget, and colorize them.
  265. # prod_by_lhs = {}
  266. # for prod in self._cfg.productions():
  267. # if len(prod.rhs()) > 0:
  268. # prod_by_lhs.setdefault(prod.lhs(),[]).append(prod)
  269. # for (lhs, prods) in prod_by_lhs.items():
  270. # self._textwidget.insert('end', '%s ->' % lhs)
  271. # self._textwidget.insert('end', self._rhs(prods[0]))
  272. # for prod in prods[1:]:
  273. # print '\t|'+self._rhs(prod),
  274. # self._textwidget.insert('end', '\t|'+self._rhs(prod))
  275. # print
  276. # self._textwidget.insert('end', '\n')
  277. # for prod in self._cfg.productions():
  278. # if len(prod.rhs()) == 0:
  279. # self._textwidget.insert('end', '%s' % prod)
  280. # self._analyze()
  281. # def _rhs(self, prod):
  282. # s = ''
  283. # for elt in prod.rhs():
  284. # if isinstance(elt, Nonterminal): s += ' %s' % elt.symbol()
  285. # else: s += ' %r' % elt
  286. # return s
  287. def _clear_tags(self, linenum):
  288. """
  289. Remove all tags (except ``arrow`` and ``sel``) from the given
  290. line of the text widget used for editing the productions.
  291. """
  292. start = "%d.0" % linenum
  293. end = "%d.end" % linenum
  294. for tag in self._textwidget.tag_names():
  295. if tag not in ("arrow", "sel"):
  296. self._textwidget.tag_remove(tag, start, end)
  297. def _check_analyze(self, *e):
  298. """
  299. Check if we've moved to a new line. If we have, then remove
  300. all colorization from the line we moved to, and re-colorize
  301. the line that we moved from.
  302. """
  303. linenum = int(self._textwidget.index("insert").split(".")[0])
  304. if linenum != self._linenum:
  305. self._clear_tags(linenum)
  306. self._analyze_line(self._linenum)
  307. self._linenum = linenum
  308. def _replace_arrows(self, *e):
  309. """
  310. Replace any ``'->'`` text strings with arrows (char \\256, in
  311. symbol font). This searches the whole buffer, but is fast
  312. enough to be done anytime they press '>'.
  313. """
  314. arrow = "1.0"
  315. while True:
  316. arrow = self._textwidget.search("->", arrow, "end+1char")
  317. if arrow == "":
  318. break
  319. self._textwidget.delete(arrow, arrow + "+2char")
  320. self._textwidget.insert(arrow, self.ARROW, "arrow")
  321. self._textwidget.insert(arrow, "\t")
  322. arrow = "1.0"
  323. while True:
  324. arrow = self._textwidget.search(self.ARROW, arrow + "+1char", "end+1char")
  325. if arrow == "":
  326. break
  327. self._textwidget.tag_add("arrow", arrow, arrow + "+1char")
  328. def _analyze_token(self, match, linenum):
  329. """
  330. Given a line number and a regexp match for a token on that
  331. line, colorize the token. Note that the regexp match gives us
  332. the token's text, start index (on the line), and end index (on
  333. the line).
  334. """
  335. # What type of token is it?
  336. if match.group()[0] in "'\"":
  337. tag = "terminal"
  338. elif match.group() in ("->", self.ARROW):
  339. tag = "arrow"
  340. else:
  341. # If it's a nonterminal, then set up new bindings, so we
  342. # can highlight all instances of that nonterminal when we
  343. # put the mouse over it.
  344. tag = "nonterminal_" + match.group()
  345. if tag not in self._textwidget.tag_names():
  346. self._init_nonterminal_tag(tag)
  347. start = "%d.%d" % (linenum, match.start())
  348. end = "%d.%d" % (linenum, match.end())
  349. self._textwidget.tag_add(tag, start, end)
  350. def _init_nonterminal_tag(self, tag, foreground="blue"):
  351. self._textwidget.tag_config(tag, foreground=foreground, font=CFGEditor._BOLD)
  352. if not self._highlight_matching_nonterminals:
  353. return
  354. def enter(e, textwidget=self._textwidget, tag=tag):
  355. textwidget.tag_config(tag, background="#80ff80")
  356. def leave(e, textwidget=self._textwidget, tag=tag):
  357. textwidget.tag_config(tag, background="")
  358. self._textwidget.tag_bind(tag, "<Enter>", enter)
  359. self._textwidget.tag_bind(tag, "<Leave>", leave)
  360. def _analyze_line(self, linenum):
  361. """
  362. Colorize a given line.
  363. """
  364. # Get rid of any tags that were previously on the line.
  365. self._clear_tags(linenum)
  366. # Get the line line's text string.
  367. line = self._textwidget.get(repr(linenum) + ".0", repr(linenum) + ".end")
  368. # If it's a valid production, then colorize each token.
  369. if CFGEditor._PRODUCTION_RE.match(line):
  370. # It's valid; Use _TOKEN_RE to tokenize the production,
  371. # and call analyze_token on each token.
  372. def analyze_token(match, self=self, linenum=linenum):
  373. self._analyze_token(match, linenum)
  374. return ""
  375. CFGEditor._TOKEN_RE.sub(analyze_token, line)
  376. elif line.strip() != "":
  377. # It's invalid; show the user where the error is.
  378. self._mark_error(linenum, line)
  379. def _mark_error(self, linenum, line):
  380. """
  381. Mark the location of an error in a line.
  382. """
  383. arrowmatch = CFGEditor._ARROW_RE.search(line)
  384. if not arrowmatch:
  385. # If there's no arrow at all, highlight the whole line.
  386. start = "%d.0" % linenum
  387. end = "%d.end" % linenum
  388. elif not CFGEditor._LHS_RE.match(line):
  389. # Otherwise, if the LHS is bad, highlight it.
  390. start = "%d.0" % linenum
  391. end = "%d.%d" % (linenum, arrowmatch.start())
  392. else:
  393. # Otherwise, highlight the RHS.
  394. start = "%d.%d" % (linenum, arrowmatch.end())
  395. end = "%d.end" % linenum
  396. # If we're highlighting 0 chars, highlight the whole line.
  397. if self._textwidget.compare(start, "==", end):
  398. start = "%d.0" % linenum
  399. end = "%d.end" % linenum
  400. self._textwidget.tag_add("error", start, end)
  401. def _analyze(self, *e):
  402. """
  403. Replace ``->`` with arrows, and colorize the entire buffer.
  404. """
  405. self._replace_arrows()
  406. numlines = int(self._textwidget.index("end").split(".")[0])
  407. for linenum in range(1, numlines + 1): # line numbers start at 1.
  408. self._analyze_line(linenum)
  409. def _parse_productions(self):
  410. """
  411. Parse the current contents of the textwidget buffer, to create
  412. a list of productions.
  413. """
  414. productions = []
  415. # Get the text, normalize it, and split it into lines.
  416. text = self._textwidget.get("1.0", "end")
  417. text = re.sub(self.ARROW, "->", text)
  418. text = re.sub("\t", " ", text)
  419. lines = text.split("\n")
  420. # Convert each line to a CFG production
  421. for line in lines:
  422. line = line.strip()
  423. if line == "":
  424. continue
  425. productions += _read_cfg_production(line)
  426. # if line.strip() == '': continue
  427. # if not CFGEditor._PRODUCTION_RE.match(line):
  428. # raise ValueError('Bad production string %r' % line)
  429. #
  430. # (lhs_str, rhs_str) = line.split('->')
  431. # lhs = Nonterminal(lhs_str.strip())
  432. # rhs = []
  433. # def parse_token(match, rhs=rhs):
  434. # token = match.group()
  435. # if token[0] in "'\"": rhs.append(token[1:-1])
  436. # else: rhs.append(Nonterminal(token))
  437. # return ''
  438. # CFGEditor._TOKEN_RE.sub(parse_token, rhs_str)
  439. #
  440. # productions.append(Production(lhs, *rhs))
  441. return productions
  442. def _destroy(self, *e):
  443. if self._top is None:
  444. return
  445. self._top.destroy()
  446. self._top = None
  447. def _ok(self, *e):
  448. self._apply()
  449. self._destroy()
  450. def _apply(self, *e):
  451. productions = self._parse_productions()
  452. start = Nonterminal(self._start.get())
  453. cfg = CFG(start, productions)
  454. if self._set_cfg_callback is not None:
  455. self._set_cfg_callback(cfg)
  456. def _reset(self, *e):
  457. self._textwidget.delete("1.0", "end")
  458. for production in self._cfg.productions():
  459. self._textwidget.insert("end", "%s\n" % production)
  460. self._analyze()
  461. if self._set_cfg_callback is not None:
  462. self._set_cfg_callback(self._cfg)
  463. def _cancel(self, *e):
  464. try:
  465. self._reset()
  466. except:
  467. pass
  468. self._destroy()
  469. def _help(self, *e):
  470. # The default font's not very legible; try using 'fixed' instead.
  471. try:
  472. ShowText(
  473. self._parent,
  474. "Help: Chart Parser Demo",
  475. (_CFGEditor_HELP).strip(),
  476. width=75,
  477. font="fixed",
  478. )
  479. except:
  480. ShowText(
  481. self._parent,
  482. "Help: Chart Parser Demo",
  483. (_CFGEditor_HELP).strip(),
  484. width=75,
  485. )
  486. ######################################################################
  487. # New Demo (built tree based on cfg)
  488. ######################################################################
  489. class CFGDemo(object):
  490. def __init__(self, grammar, text):
  491. self._grammar = grammar
  492. self._text = text
  493. # Set up the main window.
  494. self._top = Tk()
  495. self._top.title("Context Free Grammar Demo")
  496. # Base font size
  497. self._size = IntVar(self._top)
  498. self._size.set(12) # = medium
  499. # Set up the key bindings
  500. self._init_bindings(self._top)
  501. # Create the basic frames
  502. frame1 = Frame(self._top)
  503. frame1.pack(side="left", fill="y", expand=0)
  504. self._init_menubar(self._top)
  505. self._init_buttons(self._top)
  506. self._init_grammar(frame1)
  507. self._init_treelet(frame1)
  508. self._init_workspace(self._top)
  509. # //////////////////////////////////////////////////
  510. # Initialization
  511. # //////////////////////////////////////////////////
  512. def _init_bindings(self, top):
  513. top.bind("<Control-q>", self.destroy)
  514. def _init_menubar(self, parent):
  515. pass
  516. def _init_buttons(self, parent):
  517. pass
  518. def _init_grammar(self, parent):
  519. self._prodlist = ProductionList(parent, self._grammar, width=20)
  520. self._prodlist.pack(side="top", fill="both", expand=1)
  521. self._prodlist.focus()
  522. self._prodlist.add_callback("select", self._selectprod_cb)
  523. self._prodlist.add_callback("move", self._selectprod_cb)
  524. def _init_treelet(self, parent):
  525. self._treelet_canvas = Canvas(parent, background="white")
  526. self._treelet_canvas.pack(side="bottom", fill="x")
  527. self._treelet = None
  528. def _init_workspace(self, parent):
  529. self._workspace = CanvasFrame(parent, background="white")
  530. self._workspace.pack(side="right", fill="both", expand=1)
  531. self._tree = None
  532. self.reset_workspace()
  533. # //////////////////////////////////////////////////
  534. # Workspace
  535. # //////////////////////////////////////////////////
  536. def reset_workspace(self):
  537. c = self._workspace.canvas()
  538. fontsize = int(self._size.get())
  539. node_font = ("helvetica", -(fontsize + 4), "bold")
  540. leaf_font = ("helvetica", -(fontsize + 2))
  541. # Remove the old tree
  542. if self._tree is not None:
  543. self._workspace.remove_widget(self._tree)
  544. # The root of the tree.
  545. start = self._grammar.start().symbol()
  546. rootnode = TextWidget(c, start, font=node_font, draggable=1)
  547. # The leaves of the tree.
  548. leaves = []
  549. for word in self._text:
  550. leaves.append(TextWidget(c, word, font=leaf_font, draggable=1))
  551. # Put it all together into one tree
  552. self._tree = TreeSegmentWidget(c, rootnode, leaves, color="white")
  553. # Add it to the workspace.
  554. self._workspace.add_widget(self._tree)
  555. # Move the leaves to the bottom of the workspace.
  556. for leaf in leaves:
  557. leaf.move(0, 100)
  558. # self._nodes = {start:1}
  559. # self._leaves = dict([(l,1) for l in leaves])
  560. def workspace_markprod(self, production):
  561. pass
  562. def _markproduction(self, prod, tree=None):
  563. if tree is None:
  564. tree = self._tree
  565. for i in range(len(tree.subtrees()) - len(prod.rhs())):
  566. if tree["color", i] == "white":
  567. self._markproduction # FIXME: Is this necessary at all?
  568. for j, node in enumerate(prod.rhs()):
  569. widget = tree.subtrees()[i + j]
  570. if (
  571. isinstance(node, Nonterminal)
  572. and isinstance(widget, TreeSegmentWidget)
  573. and node.symbol == widget.label().text()
  574. ):
  575. pass # matching nonterminal
  576. elif (
  577. isinstance(node, str)
  578. and isinstance(widget, TextWidget)
  579. and node == widget.text()
  580. ):
  581. pass # matching nonterminal
  582. else:
  583. break
  584. else:
  585. # Everything matched!
  586. print("MATCH AT", i)
  587. # //////////////////////////////////////////////////
  588. # Grammar
  589. # //////////////////////////////////////////////////
  590. def _selectprod_cb(self, production):
  591. canvas = self._treelet_canvas
  592. self._prodlist.highlight(production)
  593. if self._treelet is not None:
  594. self._treelet.destroy()
  595. # Convert the production to a tree.
  596. rhs = production.rhs()
  597. for (i, elt) in enumerate(rhs):
  598. if isinstance(elt, Nonterminal):
  599. elt = Tree(elt)
  600. tree = Tree(production.lhs().symbol(), *rhs)
  601. # Draw the tree in the treelet area.
  602. fontsize = int(self._size.get())
  603. node_font = ("helvetica", -(fontsize + 4), "bold")
  604. leaf_font = ("helvetica", -(fontsize + 2))
  605. self._treelet = tree_to_treesegment(
  606. canvas, tree, node_font=node_font, leaf_font=leaf_font
  607. )
  608. self._treelet["draggable"] = 1
  609. # Center the treelet.
  610. (x1, y1, x2, y2) = self._treelet.bbox()
  611. w, h = int(canvas["width"]), int(canvas["height"])
  612. self._treelet.move((w - x1 - x2) / 2, (h - y1 - y2) / 2)
  613. # Mark the places where we can add it to the workspace.
  614. self._markproduction(production)
  615. def destroy(self, *args):
  616. self._top.destroy()
  617. def mainloop(self, *args, **kwargs):
  618. self._top.mainloop(*args, **kwargs)
  619. def demo2():
  620. from nltk import Nonterminal, Production, CFG
  621. nonterminals = "S VP NP PP P N Name V Det"
  622. (S, VP, NP, PP, P, N, Name, V, Det) = [Nonterminal(s) for s in nonterminals.split()]
  623. productions = (
  624. # Syntactic Productions
  625. Production(S, [NP, VP]),
  626. Production(NP, [Det, N]),
  627. Production(NP, [NP, PP]),
  628. Production(VP, [VP, PP]),
  629. Production(VP, [V, NP, PP]),
  630. Production(VP, [V, NP]),
  631. Production(PP, [P, NP]),
  632. Production(PP, []),
  633. Production(PP, ["up", "over", NP]),
  634. # Lexical Productions
  635. Production(NP, ["I"]),
  636. Production(Det, ["the"]),
  637. Production(Det, ["a"]),
  638. Production(N, ["man"]),
  639. Production(V, ["saw"]),
  640. Production(P, ["in"]),
  641. Production(P, ["with"]),
  642. Production(N, ["park"]),
  643. Production(N, ["dog"]),
  644. Production(N, ["statue"]),
  645. Production(Det, ["my"]),
  646. )
  647. grammar = CFG(S, productions)
  648. text = "I saw a man in the park".split()
  649. d = CFGDemo(grammar, text)
  650. d.mainloop()
  651. ######################################################################
  652. # Old Demo
  653. ######################################################################
  654. def demo():
  655. from nltk import Nonterminal, CFG
  656. nonterminals = "S VP NP PP P N Name V Det"
  657. (S, VP, NP, PP, P, N, Name, V, Det) = [Nonterminal(s) for s in nonterminals.split()]
  658. grammar = CFG.fromstring(
  659. """
  660. S -> NP VP
  661. PP -> P NP
  662. NP -> Det N
  663. NP -> NP PP
  664. VP -> V NP
  665. VP -> VP PP
  666. Det -> 'a'
  667. Det -> 'the'
  668. Det -> 'my'
  669. NP -> 'I'
  670. N -> 'dog'
  671. N -> 'man'
  672. N -> 'park'
  673. N -> 'statue'
  674. V -> 'saw'
  675. P -> 'in'
  676. P -> 'up'
  677. P -> 'over'
  678. P -> 'with'
  679. """
  680. )
  681. def cb(grammar):
  682. print(grammar)
  683. top = Tk()
  684. editor = CFGEditor(top, grammar, cb)
  685. Label(top, text="\nTesting CFG Editor\n").pack()
  686. Button(top, text="Quit", command=top.destroy).pack()
  687. top.mainloop()
  688. def demo3():
  689. from nltk import Production
  690. (S, VP, NP, PP, P, N, Name, V, Det) = nonterminals(
  691. "S, VP, NP, PP, P, N, Name, V, Det"
  692. )
  693. productions = (
  694. # Syntactic Productions
  695. Production(S, [NP, VP]),
  696. Production(NP, [Det, N]),
  697. Production(NP, [NP, PP]),
  698. Production(VP, [VP, PP]),
  699. Production(VP, [V, NP, PP]),
  700. Production(VP, [V, NP]),
  701. Production(PP, [P, NP]),
  702. Production(PP, []),
  703. Production(PP, ["up", "over", NP]),
  704. # Lexical Productions
  705. Production(NP, ["I"]),
  706. Production(Det, ["the"]),
  707. Production(Det, ["a"]),
  708. Production(N, ["man"]),
  709. Production(V, ["saw"]),
  710. Production(P, ["in"]),
  711. Production(P, ["with"]),
  712. Production(N, ["park"]),
  713. Production(N, ["dog"]),
  714. Production(N, ["statue"]),
  715. Production(Det, ["my"]),
  716. )
  717. t = Tk()
  718. def destroy(e, t=t):
  719. t.destroy()
  720. t.bind("q", destroy)
  721. p = ProductionList(t, productions)
  722. p.pack(expand=1, fill="both")
  723. p.add_callback("select", p.markonly)
  724. p.add_callback("move", p.markonly)
  725. p.focus()
  726. p.mark(productions[2])
  727. p.mark(productions[8])
  728. if __name__ == "__main__":
  729. demo()