robotframework.py 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559
  1. # -*- coding: utf-8 -*-
  2. """
  3. pygments.lexers.robotframework
  4. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  5. Lexer for Robot Framework.
  6. :copyright: Copyright 2006-2019 by the Pygments team, see AUTHORS.
  7. :license: BSD, see LICENSE for details.
  8. """
  9. # Copyright 2012 Nokia Siemens Networks Oyj
  10. #
  11. # Licensed under the Apache License, Version 2.0 (the "License");
  12. # you may not use this file except in compliance with the License.
  13. # You may obtain a copy of the License at
  14. #
  15. # http://www.apache.org/licenses/LICENSE-2.0
  16. #
  17. # Unless required by applicable law or agreed to in writing, software
  18. # distributed under the License is distributed on an "AS IS" BASIS,
  19. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  20. # See the License for the specific language governing permissions and
  21. # limitations under the License.
  22. import re
  23. from pygments.lexer import Lexer
  24. from pygments.token import Token
  25. __all__ = ['RobotFrameworkLexer']
  26. HEADING = Token.Generic.Heading
  27. SETTING = Token.Keyword.Namespace
  28. IMPORT = Token.Name.Namespace
  29. TC_KW_NAME = Token.Generic.Subheading
  30. KEYWORD = Token.Name.Function
  31. ARGUMENT = Token.String
  32. VARIABLE = Token.Name.Variable
  33. COMMENT = Token.Comment
  34. SEPARATOR = Token.Punctuation
  35. SYNTAX = Token.Punctuation
  36. GHERKIN = Token.Generic.Emph
  37. ERROR = Token.Error
  38. def normalize(string, remove=''):
  39. string = string.lower()
  40. for char in remove + ' ':
  41. if char in string:
  42. string = string.replace(char, '')
  43. return string
  44. class RobotFrameworkLexer(Lexer):
  45. """
  46. For `Robot Framework <http://robotframework.org>`_ test data.
  47. Supports both space and pipe separated plain text formats.
  48. .. versionadded:: 1.6
  49. """
  50. name = 'RobotFramework'
  51. aliases = ['robotframework']
  52. filenames = ['*.robot']
  53. mimetypes = ['text/x-robotframework']
  54. def __init__(self, **options):
  55. options['tabsize'] = 2
  56. options['encoding'] = 'UTF-8'
  57. Lexer.__init__(self, **options)
  58. def get_tokens_unprocessed(self, text):
  59. row_tokenizer = RowTokenizer()
  60. var_tokenizer = VariableTokenizer()
  61. index = 0
  62. for row in text.splitlines():
  63. for value, token in row_tokenizer.tokenize(row):
  64. for value, token in var_tokenizer.tokenize(value, token):
  65. if value:
  66. yield index, token, str(value)
  67. index += len(value)
  68. class VariableTokenizer:
  69. def tokenize(self, string, token):
  70. var = VariableSplitter(string, identifiers='$@%&')
  71. if var.start < 0 or token in (COMMENT, ERROR):
  72. yield string, token
  73. return
  74. for value, token in self._tokenize(var, string, token):
  75. if value:
  76. yield value, token
  77. def _tokenize(self, var, string, orig_token):
  78. before = string[:var.start]
  79. yield before, orig_token
  80. yield var.identifier + '{', SYNTAX
  81. for value, token in self.tokenize(var.base, VARIABLE):
  82. yield value, token
  83. yield '}', SYNTAX
  84. if var.index:
  85. yield '[', SYNTAX
  86. for value, token in self.tokenize(var.index, VARIABLE):
  87. yield value, token
  88. yield ']', SYNTAX
  89. for value, token in self.tokenize(string[var.end:], orig_token):
  90. yield value, token
  91. class RowTokenizer:
  92. def __init__(self):
  93. self._table = UnknownTable()
  94. self._splitter = RowSplitter()
  95. testcases = TestCaseTable()
  96. settings = SettingTable(testcases.set_default_template)
  97. variables = VariableTable()
  98. keywords = KeywordTable()
  99. self._tables = {'settings': settings, 'setting': settings,
  100. 'metadata': settings,
  101. 'variables': variables, 'variable': variables,
  102. 'testcases': testcases, 'testcase': testcases,
  103. 'keywords': keywords, 'keyword': keywords,
  104. 'userkeywords': keywords, 'userkeyword': keywords}
  105. def tokenize(self, row):
  106. commented = False
  107. heading = False
  108. for index, value in enumerate(self._splitter.split(row)):
  109. # First value, and every second after that, is a separator.
  110. index, separator = divmod(index-1, 2)
  111. if value.startswith('#'):
  112. commented = True
  113. elif index == 0 and value.startswith('*'):
  114. self._table = self._start_table(value)
  115. heading = True
  116. for value, token in self._tokenize(value, index, commented,
  117. separator, heading):
  118. yield value, token
  119. self._table.end_row()
  120. def _start_table(self, header):
  121. name = normalize(header, remove='*')
  122. return self._tables.get(name, UnknownTable())
  123. def _tokenize(self, value, index, commented, separator, heading):
  124. if commented:
  125. yield value, COMMENT
  126. elif separator:
  127. yield value, SEPARATOR
  128. elif heading:
  129. yield value, HEADING
  130. else:
  131. for value, token in self._table.tokenize(value, index):
  132. yield value, token
  133. class RowSplitter:
  134. _space_splitter = re.compile('( {2,})')
  135. _pipe_splitter = re.compile(r'((?:^| +)\|(?: +|$))')
  136. def split(self, row):
  137. splitter = (row.startswith('| ') and self._split_from_pipes
  138. or self._split_from_spaces)
  139. for value in splitter(row):
  140. yield value
  141. yield '\n'
  142. def _split_from_spaces(self, row):
  143. yield '' # Start with (pseudo)separator similarly as with pipes
  144. for value in self._space_splitter.split(row):
  145. yield value
  146. def _split_from_pipes(self, row):
  147. _, separator, rest = self._pipe_splitter.split(row, 1)
  148. yield separator
  149. while self._pipe_splitter.search(rest):
  150. cell, separator, rest = self._pipe_splitter.split(rest, 1)
  151. yield cell
  152. yield separator
  153. yield rest
  154. class Tokenizer:
  155. _tokens = None
  156. def __init__(self):
  157. self._index = 0
  158. def tokenize(self, value):
  159. values_and_tokens = self._tokenize(value, self._index)
  160. self._index += 1
  161. if isinstance(values_and_tokens, type(Token)):
  162. values_and_tokens = [(value, values_and_tokens)]
  163. return values_and_tokens
  164. def _tokenize(self, value, index):
  165. index = min(index, len(self._tokens) - 1)
  166. return self._tokens[index]
  167. def _is_assign(self, value):
  168. if value.endswith('='):
  169. value = value[:-1].strip()
  170. var = VariableSplitter(value, identifiers='$@&')
  171. return var.start == 0 and var.end == len(value)
  172. class Comment(Tokenizer):
  173. _tokens = (COMMENT,)
  174. class Setting(Tokenizer):
  175. _tokens = (SETTING, ARGUMENT)
  176. _keyword_settings = ('suitesetup', 'suiteprecondition', 'suiteteardown',
  177. 'suitepostcondition', 'testsetup', 'testprecondition',
  178. 'testteardown', 'testpostcondition', 'testtemplate')
  179. _import_settings = ('library', 'resource', 'variables')
  180. _other_settings = ('documentation', 'metadata', 'forcetags', 'defaulttags',
  181. 'testtimeout')
  182. _custom_tokenizer = None
  183. def __init__(self, template_setter=None):
  184. Tokenizer.__init__(self)
  185. self._template_setter = template_setter
  186. def _tokenize(self, value, index):
  187. if index == 1 and self._template_setter:
  188. self._template_setter(value)
  189. if index == 0:
  190. normalized = normalize(value)
  191. if normalized in self._keyword_settings:
  192. self._custom_tokenizer = KeywordCall(support_assign=False)
  193. elif normalized in self._import_settings:
  194. self._custom_tokenizer = ImportSetting()
  195. elif normalized not in self._other_settings:
  196. return ERROR
  197. elif self._custom_tokenizer:
  198. return self._custom_tokenizer.tokenize(value)
  199. return Tokenizer._tokenize(self, value, index)
  200. class ImportSetting(Tokenizer):
  201. _tokens = (IMPORT, ARGUMENT)
  202. class TestCaseSetting(Setting):
  203. _keyword_settings = ('setup', 'precondition', 'teardown', 'postcondition',
  204. 'template')
  205. _import_settings = ()
  206. _other_settings = ('documentation', 'tags', 'timeout')
  207. def _tokenize(self, value, index):
  208. if index == 0:
  209. type = Setting._tokenize(self, value[1:-1], index)
  210. return [('[', SYNTAX), (value[1:-1], type), (']', SYNTAX)]
  211. return Setting._tokenize(self, value, index)
  212. class KeywordSetting(TestCaseSetting):
  213. _keyword_settings = ('teardown',)
  214. _other_settings = ('documentation', 'arguments', 'return', 'timeout', 'tags')
  215. class Variable(Tokenizer):
  216. _tokens = (SYNTAX, ARGUMENT)
  217. def _tokenize(self, value, index):
  218. if index == 0 and not self._is_assign(value):
  219. return ERROR
  220. return Tokenizer._tokenize(self, value, index)
  221. class KeywordCall(Tokenizer):
  222. _tokens = (KEYWORD, ARGUMENT)
  223. def __init__(self, support_assign=True):
  224. Tokenizer.__init__(self)
  225. self._keyword_found = not support_assign
  226. self._assigns = 0
  227. def _tokenize(self, value, index):
  228. if not self._keyword_found and self._is_assign(value):
  229. self._assigns += 1
  230. return SYNTAX # VariableTokenizer tokenizes this later.
  231. if self._keyword_found:
  232. return Tokenizer._tokenize(self, value, index - self._assigns)
  233. self._keyword_found = True
  234. return GherkinTokenizer().tokenize(value, KEYWORD)
  235. class GherkinTokenizer:
  236. _gherkin_prefix = re.compile('^(Given|When|Then|And) ', re.IGNORECASE)
  237. def tokenize(self, value, token):
  238. match = self._gherkin_prefix.match(value)
  239. if not match:
  240. return [(value, token)]
  241. end = match.end()
  242. return [(value[:end], GHERKIN), (value[end:], token)]
  243. class TemplatedKeywordCall(Tokenizer):
  244. _tokens = (ARGUMENT,)
  245. class ForLoop(Tokenizer):
  246. def __init__(self):
  247. Tokenizer.__init__(self)
  248. self._in_arguments = False
  249. def _tokenize(self, value, index):
  250. token = self._in_arguments and ARGUMENT or SYNTAX
  251. if value.upper() in ('IN', 'IN RANGE'):
  252. self._in_arguments = True
  253. return token
  254. class _Table:
  255. _tokenizer_class = None
  256. def __init__(self, prev_tokenizer=None):
  257. self._tokenizer = self._tokenizer_class()
  258. self._prev_tokenizer = prev_tokenizer
  259. self._prev_values_on_row = []
  260. def tokenize(self, value, index):
  261. if self._continues(value, index):
  262. self._tokenizer = self._prev_tokenizer
  263. yield value, SYNTAX
  264. else:
  265. for value_and_token in self._tokenize(value, index):
  266. yield value_and_token
  267. self._prev_values_on_row.append(value)
  268. def _continues(self, value, index):
  269. return value == '...' and all(self._is_empty(t)
  270. for t in self._prev_values_on_row)
  271. def _is_empty(self, value):
  272. return value in ('', '\\')
  273. def _tokenize(self, value, index):
  274. return self._tokenizer.tokenize(value)
  275. def end_row(self):
  276. self.__init__(prev_tokenizer=self._tokenizer)
  277. class UnknownTable(_Table):
  278. _tokenizer_class = Comment
  279. def _continues(self, value, index):
  280. return False
  281. class VariableTable(_Table):
  282. _tokenizer_class = Variable
  283. class SettingTable(_Table):
  284. _tokenizer_class = Setting
  285. def __init__(self, template_setter, prev_tokenizer=None):
  286. _Table.__init__(self, prev_tokenizer)
  287. self._template_setter = template_setter
  288. def _tokenize(self, value, index):
  289. if index == 0 and normalize(value) == 'testtemplate':
  290. self._tokenizer = Setting(self._template_setter)
  291. return _Table._tokenize(self, value, index)
  292. def end_row(self):
  293. self.__init__(self._template_setter, prev_tokenizer=self._tokenizer)
  294. class TestCaseTable(_Table):
  295. _setting_class = TestCaseSetting
  296. _test_template = None
  297. _default_template = None
  298. @property
  299. def _tokenizer_class(self):
  300. if self._test_template or (self._default_template and
  301. self._test_template is not False):
  302. return TemplatedKeywordCall
  303. return KeywordCall
  304. def _continues(self, value, index):
  305. return index > 0 and _Table._continues(self, value, index)
  306. def _tokenize(self, value, index):
  307. if index == 0:
  308. if value:
  309. self._test_template = None
  310. return GherkinTokenizer().tokenize(value, TC_KW_NAME)
  311. if index == 1 and self._is_setting(value):
  312. if self._is_template(value):
  313. self._test_template = False
  314. self._tokenizer = self._setting_class(self.set_test_template)
  315. else:
  316. self._tokenizer = self._setting_class()
  317. if index == 1 and self._is_for_loop(value):
  318. self._tokenizer = ForLoop()
  319. if index == 1 and self._is_empty(value):
  320. return [(value, SYNTAX)]
  321. return _Table._tokenize(self, value, index)
  322. def _is_setting(self, value):
  323. return value.startswith('[') and value.endswith(']')
  324. def _is_template(self, value):
  325. return normalize(value) == '[template]'
  326. def _is_for_loop(self, value):
  327. return value.startswith(':') and normalize(value, remove=':') == 'for'
  328. def set_test_template(self, template):
  329. self._test_template = self._is_template_set(template)
  330. def set_default_template(self, template):
  331. self._default_template = self._is_template_set(template)
  332. def _is_template_set(self, template):
  333. return normalize(template) not in ('', '\\', 'none', '${empty}')
  334. class KeywordTable(TestCaseTable):
  335. _tokenizer_class = KeywordCall
  336. _setting_class = KeywordSetting
  337. def _is_template(self, value):
  338. return False
  339. # Following code copied directly from Robot Framework 2.7.5.
  340. class VariableSplitter:
  341. def __init__(self, string, identifiers):
  342. self.identifier = None
  343. self.base = None
  344. self.index = None
  345. self.start = -1
  346. self.end = -1
  347. self._identifiers = identifiers
  348. self._may_have_internal_variables = False
  349. try:
  350. self._split(string)
  351. except ValueError:
  352. pass
  353. else:
  354. self._finalize()
  355. def get_replaced_base(self, variables):
  356. if self._may_have_internal_variables:
  357. return variables.replace_string(self.base)
  358. return self.base
  359. def _finalize(self):
  360. self.identifier = self._variable_chars[0]
  361. self.base = ''.join(self._variable_chars[2:-1])
  362. self.end = self.start + len(self._variable_chars)
  363. if self._has_list_or_dict_variable_index():
  364. self.index = ''.join(self._list_and_dict_variable_index_chars[1:-1])
  365. self.end += len(self._list_and_dict_variable_index_chars)
  366. def _has_list_or_dict_variable_index(self):
  367. return self._list_and_dict_variable_index_chars\
  368. and self._list_and_dict_variable_index_chars[-1] == ']'
  369. def _split(self, string):
  370. start_index, max_index = self._find_variable(string)
  371. self.start = start_index
  372. self._open_curly = 1
  373. self._state = self._variable_state
  374. self._variable_chars = [string[start_index], '{']
  375. self._list_and_dict_variable_index_chars = []
  376. self._string = string
  377. start_index += 2
  378. for index, char in enumerate(string[start_index:]):
  379. index += start_index # Giving start to enumerate only in Py 2.6+
  380. try:
  381. self._state(char, index)
  382. except StopIteration:
  383. return
  384. if index == max_index and not self._scanning_list_variable_index():
  385. return
  386. def _scanning_list_variable_index(self):
  387. return self._state in [self._waiting_list_variable_index_state,
  388. self._list_variable_index_state]
  389. def _find_variable(self, string):
  390. max_end_index = string.rfind('}')
  391. if max_end_index == -1:
  392. raise ValueError('No variable end found')
  393. if self._is_escaped(string, max_end_index):
  394. return self._find_variable(string[:max_end_index])
  395. start_index = self._find_start_index(string, 1, max_end_index)
  396. if start_index == -1:
  397. raise ValueError('No variable start found')
  398. return start_index, max_end_index
  399. def _find_start_index(self, string, start, end):
  400. index = string.find('{', start, end) - 1
  401. if index < 0:
  402. return -1
  403. if self._start_index_is_ok(string, index):
  404. return index
  405. return self._find_start_index(string, index+2, end)
  406. def _start_index_is_ok(self, string, index):
  407. return string[index] in self._identifiers\
  408. and not self._is_escaped(string, index)
  409. def _is_escaped(self, string, index):
  410. escaped = False
  411. while index > 0 and string[index-1] == '\\':
  412. index -= 1
  413. escaped = not escaped
  414. return escaped
  415. def _variable_state(self, char, index):
  416. self._variable_chars.append(char)
  417. if char == '}' and not self._is_escaped(self._string, index):
  418. self._open_curly -= 1
  419. if self._open_curly == 0:
  420. if not self._is_list_or_dict_variable():
  421. raise StopIteration
  422. self._state = self._waiting_list_variable_index_state
  423. elif char in self._identifiers:
  424. self._state = self._internal_variable_start_state
  425. def _is_list_or_dict_variable(self):
  426. return self._variable_chars[0] in ('@','&')
  427. def _internal_variable_start_state(self, char, index):
  428. self._state = self._variable_state
  429. if char == '{':
  430. self._variable_chars.append(char)
  431. self._open_curly += 1
  432. self._may_have_internal_variables = True
  433. else:
  434. self._variable_state(char, index)
  435. def _waiting_list_variable_index_state(self, char, index):
  436. if char != '[':
  437. raise StopIteration
  438. self._list_and_dict_variable_index_chars.append(char)
  439. self._state = self._list_variable_index_state
  440. def _list_variable_index_state(self, char, index):
  441. self._list_and_dict_variable_index_chars.append(char)
  442. if char == ']':
  443. raise StopIteration