asm.py 38 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993
  1. # -*- coding: utf-8 -*-
  2. """
  3. pygments.lexers.asm
  4. ~~~~~~~~~~~~~~~~~~~
  5. Lexers for assembly languages.
  6. :copyright: Copyright 2006-2019 by the Pygments team, see AUTHORS.
  7. :license: BSD, see LICENSE for details.
  8. """
  9. import re
  10. from pygments.lexer import RegexLexer, include, bygroups, using, words, \
  11. DelegatingLexer, default
  12. from pygments.lexers.c_cpp import CppLexer, CLexer
  13. from pygments.lexers.d import DLexer
  14. from pygments.token import Text, Name, Number, String, Comment, Punctuation, \
  15. Other, Keyword, Operator, Literal
  16. __all__ = ['GasLexer', 'ObjdumpLexer', 'DObjdumpLexer', 'CppObjdumpLexer',
  17. 'CObjdumpLexer', 'HsailLexer', 'LlvmLexer', 'LlvmMirBodyLexer',
  18. 'LlvmMirLexer', 'NasmLexer', 'NasmObjdumpLexer', 'TasmLexer',
  19. 'Ca65Lexer', 'Dasm16Lexer']
  20. class GasLexer(RegexLexer):
  21. """
  22. For Gas (AT&T) assembly code.
  23. """
  24. name = 'GAS'
  25. aliases = ['gas', 'asm']
  26. filenames = ['*.s', '*.S']
  27. mimetypes = ['text/x-gas']
  28. #: optional Comment or Whitespace
  29. string = r'"(\\"|[^"])*"'
  30. char = r'[\w$.@-]'
  31. identifier = r'(?:[a-zA-Z$_]' + char + r'*|\.' + char + '+)'
  32. number = r'(?:0[xX][a-zA-Z0-9]+|\d+)'
  33. register = '%' + identifier
  34. tokens = {
  35. 'root': [
  36. include('whitespace'),
  37. (identifier + ':', Name.Label),
  38. (r'\.' + identifier, Name.Attribute, 'directive-args'),
  39. (r'lock|rep(n?z)?|data\d+', Name.Attribute),
  40. (identifier, Name.Function, 'instruction-args'),
  41. (r'[\r\n]+', Text)
  42. ],
  43. 'directive-args': [
  44. (identifier, Name.Constant),
  45. (string, String),
  46. ('@' + identifier, Name.Attribute),
  47. (number, Number.Integer),
  48. (register, Name.Variable),
  49. (r'[\r\n]+', Text, '#pop'),
  50. (r'([;#]|//).*?\n', Comment.Single, '#pop'),
  51. (r'/[*].*?[*]/', Comment.Multiline),
  52. (r'/[*].*?\n[\w\W]*?[*]/', Comment.Multiline, '#pop'),
  53. include('punctuation'),
  54. include('whitespace')
  55. ],
  56. 'instruction-args': [
  57. # For objdump-disassembled code, shouldn't occur in
  58. # actual assembler input
  59. ('([a-z0-9]+)( )(<)('+identifier+')(>)',
  60. bygroups(Number.Hex, Text, Punctuation, Name.Constant,
  61. Punctuation)),
  62. ('([a-z0-9]+)( )(<)('+identifier+')([-+])('+number+')(>)',
  63. bygroups(Number.Hex, Text, Punctuation, Name.Constant,
  64. Punctuation, Number.Integer, Punctuation)),
  65. # Address constants
  66. (identifier, Name.Constant),
  67. (number, Number.Integer),
  68. # Registers
  69. (register, Name.Variable),
  70. # Numeric constants
  71. ('$'+number, Number.Integer),
  72. (r"$'(.|\\')'", String.Char),
  73. (r'[\r\n]+', Text, '#pop'),
  74. (r'([;#]|//).*?\n', Comment.Single, '#pop'),
  75. (r'/[*].*?[*]/', Comment.Multiline),
  76. (r'/[*].*?\n[\w\W]*?[*]/', Comment.Multiline, '#pop'),
  77. include('punctuation'),
  78. include('whitespace')
  79. ],
  80. 'whitespace': [
  81. (r'\n', Text),
  82. (r'\s+', Text),
  83. (r'([;#]|//).*?\n', Comment.Single),
  84. (r'/[*][\w\W]*?[*]/', Comment.Multiline)
  85. ],
  86. 'punctuation': [
  87. (r'[-*,.()\[\]!:]+', Punctuation)
  88. ]
  89. }
  90. def analyse_text(text):
  91. if re.search(r'^\.(text|data|section)', text, re.M):
  92. return True
  93. elif re.search(r'^\.\w+', text, re.M):
  94. return 0.1
  95. def _objdump_lexer_tokens(asm_lexer):
  96. """
  97. Common objdump lexer tokens to wrap an ASM lexer.
  98. """
  99. hex_re = r'[0-9A-Za-z]'
  100. return {
  101. 'root': [
  102. # File name & format:
  103. ('(.*?)(:)( +file format )(.*?)$',
  104. bygroups(Name.Label, Punctuation, Text, String)),
  105. # Section header
  106. ('(Disassembly of section )(.*?)(:)$',
  107. bygroups(Text, Name.Label, Punctuation)),
  108. # Function labels
  109. # (With offset)
  110. ('('+hex_re+'+)( )(<)(.*?)([-+])(0[xX][A-Za-z0-9]+)(>:)$',
  111. bygroups(Number.Hex, Text, Punctuation, Name.Function,
  112. Punctuation, Number.Hex, Punctuation)),
  113. # (Without offset)
  114. ('('+hex_re+'+)( )(<)(.*?)(>:)$',
  115. bygroups(Number.Hex, Text, Punctuation, Name.Function,
  116. Punctuation)),
  117. # Code line with disassembled instructions
  118. ('( *)('+hex_re+r'+:)(\t)((?:'+hex_re+hex_re+' )+)( *\t)([a-zA-Z].*?)$',
  119. bygroups(Text, Name.Label, Text, Number.Hex, Text,
  120. using(asm_lexer))),
  121. # Code line with ascii
  122. ('( *)('+hex_re+r'+:)(\t)((?:'+hex_re+hex_re+' )+)( *)(.*?)$',
  123. bygroups(Text, Name.Label, Text, Number.Hex, Text, String)),
  124. # Continued code line, only raw opcodes without disassembled
  125. # instruction
  126. ('( *)('+hex_re+r'+:)(\t)((?:'+hex_re+hex_re+' )+)$',
  127. bygroups(Text, Name.Label, Text, Number.Hex)),
  128. # Skipped a few bytes
  129. (r'\t\.\.\.$', Text),
  130. # Relocation line
  131. # (With offset)
  132. (r'(\t\t\t)('+hex_re+r'+:)( )([^\t]+)(\t)(.*?)([-+])(0x'+hex_re+'+)$',
  133. bygroups(Text, Name.Label, Text, Name.Property, Text,
  134. Name.Constant, Punctuation, Number.Hex)),
  135. # (Without offset)
  136. (r'(\t\t\t)('+hex_re+r'+:)( )([^\t]+)(\t)(.*?)$',
  137. bygroups(Text, Name.Label, Text, Name.Property, Text,
  138. Name.Constant)),
  139. (r'[^\n]+\n', Other)
  140. ]
  141. }
  142. class ObjdumpLexer(RegexLexer):
  143. """
  144. For the output of 'objdump -dr'
  145. """
  146. name = 'objdump'
  147. aliases = ['objdump']
  148. filenames = ['*.objdump']
  149. mimetypes = ['text/x-objdump']
  150. tokens = _objdump_lexer_tokens(GasLexer)
  151. class DObjdumpLexer(DelegatingLexer):
  152. """
  153. For the output of 'objdump -Sr on compiled D files'
  154. """
  155. name = 'd-objdump'
  156. aliases = ['d-objdump']
  157. filenames = ['*.d-objdump']
  158. mimetypes = ['text/x-d-objdump']
  159. def __init__(self, **options):
  160. super(DObjdumpLexer, self).__init__(DLexer, ObjdumpLexer, **options)
  161. class CppObjdumpLexer(DelegatingLexer):
  162. """
  163. For the output of 'objdump -Sr on compiled C++ files'
  164. """
  165. name = 'cpp-objdump'
  166. aliases = ['cpp-objdump', 'c++-objdumb', 'cxx-objdump']
  167. filenames = ['*.cpp-objdump', '*.c++-objdump', '*.cxx-objdump']
  168. mimetypes = ['text/x-cpp-objdump']
  169. def __init__(self, **options):
  170. super(CppObjdumpLexer, self).__init__(CppLexer, ObjdumpLexer, **options)
  171. class CObjdumpLexer(DelegatingLexer):
  172. """
  173. For the output of 'objdump -Sr on compiled C files'
  174. """
  175. name = 'c-objdump'
  176. aliases = ['c-objdump']
  177. filenames = ['*.c-objdump']
  178. mimetypes = ['text/x-c-objdump']
  179. def __init__(self, **options):
  180. super(CObjdumpLexer, self).__init__(CLexer, ObjdumpLexer, **options)
  181. class HsailLexer(RegexLexer):
  182. """
  183. For HSAIL assembly code.
  184. .. versionadded:: 2.2
  185. """
  186. name = 'HSAIL'
  187. aliases = ['hsail', 'hsa']
  188. filenames = ['*.hsail']
  189. mimetypes = ['text/x-hsail']
  190. string = r'"[^"]*?"'
  191. identifier = r'[a-zA-Z_][\w.]*'
  192. # Registers
  193. register_number = r'[0-9]+'
  194. register = r'(\$(c|s|d|q)' + register_number + ')'
  195. # Qualifiers
  196. alignQual = r'(align\(\d+\))'
  197. widthQual = r'(width\((\d+|all)\))'
  198. allocQual = r'(alloc\(agent\))'
  199. # Instruction Modifiers
  200. roundingMod = (r'((_ftz)?(_up|_down|_zero|_near))')
  201. datatypeMod = (r'_('
  202. # packedTypes
  203. r'u8x4|s8x4|u16x2|s16x2|u8x8|s8x8|u16x4|s16x4|u32x2|s32x2|'
  204. r'u8x16|s8x16|u16x8|s16x8|u32x4|s32x4|u64x2|s64x2|'
  205. r'f16x2|f16x4|f16x8|f32x2|f32x4|f64x2|'
  206. # baseTypes
  207. r'u8|s8|u16|s16|u32|s32|u64|s64|'
  208. r'b128|b8|b16|b32|b64|b1|'
  209. r'f16|f32|f64|'
  210. # opaqueType
  211. r'roimg|woimg|rwimg|samp|sig32|sig64)')
  212. # Numeric Constant
  213. float = r'((\d+\.)|(\d*\.\d+))[eE][+-]?\d+'
  214. hexfloat = r'0[xX](([0-9a-fA-F]+\.[0-9a-fA-F]*)|([0-9a-fA-F]*\.[0-9a-fA-F]+))[pP][+-]?\d+'
  215. ieeefloat = r'0((h|H)[0-9a-fA-F]{4}|(f|F)[0-9a-fA-F]{8}|(d|D)[0-9a-fA-F]{16})'
  216. tokens = {
  217. 'root': [
  218. include('whitespace'),
  219. include('comments'),
  220. (string, String),
  221. (r'@' + identifier + ':?', Name.Label),
  222. (register, Name.Variable.Anonymous),
  223. include('keyword'),
  224. (r'&' + identifier, Name.Variable.Global),
  225. (r'%' + identifier, Name.Variable),
  226. (hexfloat, Number.Hex),
  227. (r'0[xX][a-fA-F0-9]+', Number.Hex),
  228. (ieeefloat, Number.Float),
  229. (float, Number.Float),
  230. (r'\d+', Number.Integer),
  231. (r'[=<>{}\[\]()*.,:;!]|x\b', Punctuation)
  232. ],
  233. 'whitespace': [
  234. (r'(\n|\s)+', Text),
  235. ],
  236. 'comments': [
  237. (r'/\*.*?\*/', Comment.Multiline),
  238. (r'//.*?\n', Comment.Single),
  239. ],
  240. 'keyword': [
  241. # Types
  242. (r'kernarg' + datatypeMod, Keyword.Type),
  243. # Regular keywords
  244. (r'\$(full|base|small|large|default|zero|near)', Keyword),
  245. (words((
  246. 'module', 'extension', 'pragma', 'prog', 'indirect', 'signature',
  247. 'decl', 'kernel', 'function', 'enablebreakexceptions',
  248. 'enabledetectexceptions', 'maxdynamicgroupsize', 'maxflatgridsize',
  249. 'maxflatworkgroupsize', 'requireddim', 'requiredgridsize',
  250. 'requiredworkgroupsize', 'requirenopartialworkgroups'),
  251. suffix=r'\b'), Keyword),
  252. # instructions
  253. (roundingMod, Keyword),
  254. (datatypeMod, Keyword),
  255. (r'_(' + alignQual + '|' + widthQual + ')', Keyword),
  256. (r'_kernarg', Keyword),
  257. (r'(nop|imagefence)\b', Keyword),
  258. (words((
  259. 'cleardetectexcept', 'clock', 'cuid', 'debugtrap', 'dim',
  260. 'getdetectexcept', 'groupbaseptr', 'kernargbaseptr', 'laneid',
  261. 'maxcuid', 'maxwaveid', 'packetid', 'setdetectexcept', 'waveid',
  262. 'workitemflatabsid', 'workitemflatid', 'nullptr', 'abs', 'bitrev',
  263. 'currentworkgroupsize', 'currentworkitemflatid', 'fract', 'ncos',
  264. 'neg', 'nexp2', 'nlog2', 'nrcp', 'nrsqrt', 'nsin', 'nsqrt',
  265. 'gridgroups', 'gridsize', 'not', 'sqrt', 'workgroupid',
  266. 'workgroupsize', 'workitemabsid', 'workitemid', 'ceil', 'floor',
  267. 'rint', 'trunc', 'add', 'bitmask', 'borrow', 'carry', 'copysign',
  268. 'div', 'rem', 'sub', 'shl', 'shr', 'and', 'or', 'xor', 'unpackhi',
  269. 'unpacklo', 'max', 'min', 'fma', 'mad', 'bitextract', 'bitselect',
  270. 'shuffle', 'cmov', 'bitalign', 'bytealign', 'lerp', 'nfma', 'mul',
  271. 'mulhi', 'mul24hi', 'mul24', 'mad24', 'mad24hi', 'bitinsert',
  272. 'combine', 'expand', 'lda', 'mov', 'pack', 'unpack', 'packcvt',
  273. 'unpackcvt', 'sad', 'sementp', 'ftos', 'stof', 'cmp', 'ld', 'st',
  274. '_eq', '_ne', '_lt', '_le', '_gt', '_ge', '_equ', '_neu', '_ltu',
  275. '_leu', '_gtu', '_geu', '_num', '_nan', '_seq', '_sne', '_slt',
  276. '_sle', '_sgt', '_sge', '_snum', '_snan', '_sequ', '_sneu', '_sltu',
  277. '_sleu', '_sgtu', '_sgeu', 'atomic', '_ld', '_st', '_cas', '_add',
  278. '_and', '_exch', '_max', '_min', '_or', '_sub', '_wrapdec',
  279. '_wrapinc', '_xor', 'ret', 'cvt', '_readonly', '_kernarg', '_global',
  280. 'br', 'cbr', 'sbr', '_scacq', '_screl', '_scar', '_rlx', '_wave',
  281. '_wg', '_agent', '_system', 'ldimage', 'stimage', '_v2', '_v3', '_v4',
  282. '_1d', '_2d', '_3d', '_1da', '_2da', '_1db', '_2ddepth', '_2dadepth',
  283. '_width', '_height', '_depth', '_array', '_channelorder',
  284. '_channeltype', 'querysampler', '_coord', '_filter', '_addressing',
  285. 'barrier', 'wavebarrier', 'initfbar', 'joinfbar', 'waitfbar',
  286. 'arrivefbar', 'leavefbar', 'releasefbar', 'ldf', 'activelaneid',
  287. 'activelanecount', 'activelanemask', 'activelanepermute', 'call',
  288. 'scall', 'icall', 'alloca', 'packetcompletionsig',
  289. 'addqueuewriteindex', 'casqueuewriteindex', 'ldqueuereadindex',
  290. 'stqueuereadindex', 'readonly', 'global', 'private', 'group',
  291. 'spill', 'arg', '_upi', '_downi', '_zeroi', '_neari', '_upi_sat',
  292. '_downi_sat', '_zeroi_sat', '_neari_sat', '_supi', '_sdowni',
  293. '_szeroi', '_sneari', '_supi_sat', '_sdowni_sat', '_szeroi_sat',
  294. '_sneari_sat', '_pp', '_ps', '_sp', '_ss', '_s', '_p', '_pp_sat',
  295. '_ps_sat', '_sp_sat', '_ss_sat', '_s_sat', '_p_sat')), Keyword),
  296. # Integer types
  297. (r'i[1-9]\d*', Keyword)
  298. ]
  299. }
  300. class LlvmLexer(RegexLexer):
  301. """
  302. For LLVM assembly code.
  303. """
  304. name = 'LLVM'
  305. aliases = ['llvm']
  306. filenames = ['*.ll']
  307. mimetypes = ['text/x-llvm']
  308. #: optional Comment or Whitespace
  309. string = r'"[^"]*?"'
  310. identifier = r'([-a-zA-Z$._][\w\-$.]*|' + string + ')'
  311. tokens = {
  312. 'root': [
  313. include('whitespace'),
  314. # Before keywords, because keywords are valid label names :(...
  315. (identifier + r'\s*:', Name.Label),
  316. include('keyword'),
  317. (r'%' + identifier, Name.Variable),
  318. (r'@' + identifier, Name.Variable.Global),
  319. (r'%\d+', Name.Variable.Anonymous),
  320. (r'@\d+', Name.Variable.Global),
  321. (r'#\d+', Name.Variable.Global),
  322. (r'!' + identifier, Name.Variable),
  323. (r'!\d+', Name.Variable.Anonymous),
  324. (r'c?' + string, String),
  325. (r'0[xX][a-fA-F0-9]+', Number),
  326. (r'-?\d+(?:[.]\d+)?(?:[eE][-+]?\d+(?:[.]\d+)?)?', Number),
  327. (r'[=<>{}\[\]()*.,!]|x\b', Punctuation)
  328. ],
  329. 'whitespace': [
  330. (r'(\n|\s)+', Text),
  331. (r';.*?\n', Comment)
  332. ],
  333. 'keyword': [
  334. # Regular keywords
  335. (words((
  336. 'acq_rel', 'acquire', 'add', 'addrspace', 'addrspacecast', 'afn', 'alias',
  337. 'aliasee', 'align', 'alignLog2', 'alignstack', 'alloca', 'allocsize', 'allOnes',
  338. 'alwaysinline', 'amdgpu_cs', 'amdgpu_es', 'amdgpu_gs', 'amdgpu_hs',
  339. 'amdgpu_kernel', 'amdgpu_ls', 'amdgpu_ps', 'amdgpu_vs', 'and', 'any',
  340. 'anyregcc', 'appending', 'arcp', 'argmemonly', 'args', 'arm_aapcs_vfpcc',
  341. 'arm_aapcscc', 'arm_apcscc', 'ashr', 'asm', 'atomic', 'atomicrmw', 'attributes',
  342. 'available_externally', 'avr_intrcc', 'avr_signalcc', 'bit', 'bitcast',
  343. 'bitMask', 'blockaddress', 'br', 'branchFunnel', 'builtin', 'byArg', 'byte',
  344. 'byteArray', 'byval', 'c', 'call', 'callee', 'caller', 'calls', 'catch',
  345. 'catchpad', 'catchret', 'catchswitch', 'cc', 'ccc', 'cleanup', 'cleanuppad',
  346. 'cleanupret', 'cmpxchg', 'cold', 'coldcc', 'comdat', 'common', 'constant',
  347. 'contract', 'convergent', 'critical', 'cxx_fast_tlscc', 'datalayout', 'declare',
  348. 'default', 'define', 'deplibs', 'dereferenceable', 'dereferenceable_or_null',
  349. 'distinct', 'dllexport', 'dllimport', 'double', 'dso_local', 'dso_preemptable',
  350. 'dsoLocal', 'eq', 'exact', 'exactmatch', 'extern_weak', 'external',
  351. 'externally_initialized', 'extractelement', 'extractvalue', 'fadd', 'false',
  352. 'fast', 'fastcc', 'fcmp', 'fdiv', 'fence', 'filter', 'flags', 'float', 'fmul',
  353. 'fp128', 'fpext', 'fptosi', 'fptoui', 'fptrunc', 'frem', 'from', 'fsub',
  354. 'funcFlags', 'function', 'gc', 'getelementptr', 'ghccc', 'global', 'guid', 'gv',
  355. 'half', 'hash', 'hhvm_ccc', 'hhvmcc', 'hidden', 'hot', 'hotness', 'icmp',
  356. 'ifunc', 'inaccessiblemem_or_argmemonly', 'inaccessiblememonly', 'inalloca',
  357. 'inbounds', 'indir', 'indirectbr', 'info', 'initialexec', 'inline',
  358. 'inlineBits', 'inlinehint', 'inrange', 'inreg', 'insertelement', 'insertvalue',
  359. 'insts', 'intel_ocl_bicc', 'inteldialect', 'internal', 'inttoptr', 'invoke',
  360. 'jumptable', 'kind', 'label', 'landingpad', 'largest', 'linkage', 'linkonce',
  361. 'linkonce_odr', 'live', 'load', 'local_unnamed_addr', 'localdynamic',
  362. 'localexec', 'lshr', 'max', 'metadata', 'min', 'minsize', 'module', 'monotonic',
  363. 'msp430_intrcc', 'mul', 'musttail', 'naked', 'name', 'nand', 'ne', 'nest',
  364. 'ninf', 'nnan', 'noalias', 'nobuiltin', 'nocapture', 'nocf_check',
  365. 'noduplicate', 'noduplicates', 'noimplicitfloat', 'noinline', 'none',
  366. 'nonlazybind', 'nonnull', 'norecurse', 'noRecurse', 'noredzone', 'noreturn',
  367. 'notail', 'notEligibleToImport', 'nounwind', 'nsw', 'nsz', 'null', 'nuw', 'oeq',
  368. 'offset', 'oge', 'ogt', 'ole', 'olt', 'one', 'opaque', 'optforfuzzing',
  369. 'optnone', 'optsize', 'or', 'ord', 'path', 'personality', 'phi', 'ppc_fp128',
  370. 'prefix', 'preserve_allcc', 'preserve_mostcc', 'private', 'prologue',
  371. 'protected', 'ptrtoint', 'ptx_device', 'ptx_kernel', 'readnone', 'readNone',
  372. 'readonly', 'readOnly', 'reassoc', 'refs', 'relbf', 'release', 'resByArg',
  373. 'resume', 'ret', 'returnDoesNotAlias', 'returned', 'returns_twice', 'safestack',
  374. 'samesize', 'sanitize_address', 'sanitize_hwaddress', 'sanitize_memory',
  375. 'sanitize_thread', 'sdiv', 'section', 'select', 'seq_cst', 'sext', 'sge', 'sgt',
  376. 'shadowcallstack', 'shl', 'shufflevector', 'sideeffect', 'signext', 'single',
  377. 'singleImpl', 'singleImplName', 'sitofp', 'sizeM1', 'sizeM1BitWidth', 'sle',
  378. 'slt', 'source_filename', 'speculatable', 'spir_func', 'spir_kernel', 'srem',
  379. 'sret', 'ssp', 'sspreq', 'sspstrong', 'store', 'strictfp', 'sub', 'summaries',
  380. 'summary', 'swiftcc', 'swifterror', 'swiftself', 'switch', 'syncscope', 'tail',
  381. 'target', 'thread_local', 'to', 'token', 'triple', 'true', 'trunc', 'type',
  382. 'typeCheckedLoadConstVCalls', 'typeCheckedLoadVCalls', 'typeid', 'typeIdInfo',
  383. 'typeTestAssumeConstVCalls', 'typeTestAssumeVCalls', 'typeTestRes', 'typeTests',
  384. 'udiv', 'ueq', 'uge', 'ugt', 'uitofp', 'ule', 'ult', 'umax', 'umin', 'undef',
  385. 'une', 'uniformRetVal', 'uniqueRetVal', 'unknown', 'unnamed_addr', 'uno',
  386. 'unordered', 'unreachable', 'unsat', 'unwind', 'urem', 'uselistorder',
  387. 'uselistorder_bb', 'uwtable', 'va_arg', 'variable', 'vFuncId',
  388. 'virtualConstProp', 'void', 'volatile', 'weak', 'weak_odr', 'webkit_jscc',
  389. 'win64cc', 'within', 'wpdRes', 'wpdResolutions', 'writeonly', 'x',
  390. 'x86_64_sysvcc', 'x86_fastcallcc', 'x86_fp80', 'x86_intrcc', 'x86_mmx',
  391. 'x86_regcallcc', 'x86_stdcallcc', 'x86_thiscallcc', 'x86_vectorcallcc', 'xchg',
  392. 'xor', 'zeroext', 'zeroinitializer', 'zext', 'immarg', 'willreturn'),
  393. suffix=r'\b'), Keyword),
  394. # Types
  395. (words(('void', 'half', 'float', 'double', 'x86_fp80', 'fp128',
  396. 'ppc_fp128', 'label', 'metadata', 'token')), Keyword.Type),
  397. # Integer types
  398. (r'i[1-9]\d*', Keyword)
  399. ]
  400. }
  401. class LlvmMirBodyLexer(RegexLexer):
  402. """
  403. For LLVM MIR examples without the YAML wrapper
  404. For more information on LLVM MIR see https://llvm.org/docs/MIRLangRef.html.
  405. .. versionadded:: 2.6
  406. """
  407. name = 'LLVM-MIR Body'
  408. aliases = ['llvm-mir-body']
  409. filenames = []
  410. mimetypes = []
  411. tokens = {
  412. 'root': [
  413. # Attributes on basic blocks
  414. (words(('liveins', 'successors'), suffix=':'), Keyword),
  415. # Basic Block Labels
  416. (r'bb\.[0-9]+(\.[0-9a-zA-Z_.-]+)?( \(address-taken\))?:', Name.Label),
  417. (r'bb\.[0-9]+ \(%[0-9a-zA-Z_.-]+\)( \(address-taken\))?:', Name.Label),
  418. (r'%bb\.[0-9]+(\.\w+)?', Name.Label),
  419. # Stack references
  420. (r'%stack\.[0-9]+(\.\w+\.addr)?', Name),
  421. # Subreg indices
  422. (r'%subreg\.\w+', Name),
  423. # Virtual registers
  424. (r'%[0-9a-zA-Z_]+ *', Name.Variable, 'vreg'),
  425. # Reference to LLVM-IR global
  426. include('global'),
  427. # Reference to Intrinsic
  428. (r'intrinsic\(\@[0-9a-zA-Z_.]+\)', Name.Variable.Global),
  429. # Comparison predicates
  430. (words(('eq', 'ne', 'sgt', 'sge', 'slt', 'sle', 'ugt', 'uge', 'ult',
  431. 'ule'), prefix=r'intpred\(', suffix=r'\)'), Name.Builtin),
  432. (words(('oeq', 'one', 'ogt', 'oge', 'olt', 'ole', 'ugt', 'uge',
  433. 'ult', 'ule'), prefix=r'floatpred\(', suffix=r'\)'),
  434. Name.Builtin),
  435. # Physical registers
  436. (r'\$\w+', String.Single),
  437. # Assignment operator
  438. (r'[=]', Operator),
  439. # gMIR Opcodes
  440. (r'(G_ANYEXT|G_[SZ]EXT|G_SEXT_INREG|G_TRUNC|G_IMPLICIT_DEF|G_PHI|'
  441. r'G_FRAME_INDEX|G_GLOBAL_VALUE|G_INTTOPTR|G_PTRTOINT|G_BITCAST|'
  442. r'G_CONSTANT|G_FCONSTANT|G_VASTART|G_VAARG|G_CTLZ|G_CTLZ_ZERO_UNDEF|'
  443. r'G_CTTZ|G_CTTZ_ZERO_UNDEF|G_CTPOP|G_BSWAP|G_BITREVERSE|'
  444. r'G_ADDRSPACE_CAST|G_BLOCK_ADDR|G_JUMP_TABLE|G_DYN_STACKALLOC|'
  445. r'G_ADD|G_SUB|G_MUL|G_[SU]DIV|G_[SU]REM|G_AND|G_OR|G_XOR|G_SHL|'
  446. r'G_[LA]SHR|G_[IF]CMP|G_SELECT|G_GEP|G_PTR_MASK|G_SMIN|G_SMAX|'
  447. r'G_UMIN|G_UMAX|G_[US]ADDO|G_[US]ADDE|G_[US]SUBO|G_[US]SUBE|'
  448. r'G_[US]MULO|G_[US]MULH|G_FNEG|G_FPEXT|G_FPTRUNC|G_FPTO[US]I|'
  449. r'G_[US]ITOFP|G_FABS|G_FCOPYSIGN|G_FCANONICALIZE|G_FMINNUM|'
  450. r'G_FMAXNUM|G_FMINNUM_IEEE|G_FMAXNUM_IEEE|G_FMINIMUM|G_FMAXIMUM|'
  451. r'G_FADD|G_FSUB|G_FMUL|G_FMA|G_FMAD|G_FDIV|G_FREM|G_FPOW|G_FEXP|'
  452. r'G_FEXP2|G_FLOG|G_FLOG2|G_FLOG10|G_FCEIL|G_FCOS|G_FSIN|G_FSQRT|'
  453. r'G_FFLOOR|G_FRINT|G_FNEARBYINT|G_INTRINSIC_TRUNC|'
  454. r'G_INTRINSIC_ROUND|G_LOAD|G_[ZS]EXTLOAD|G_INDEXED_LOAD|'
  455. r'G_INDEXED_[ZS]EXTLOAD|G_STORE|G_INDEXED_STORE|'
  456. r'G_ATOMIC_CMPXCHG_WITH_SUCCESS|G_ATOMIC_CMPXCHG|'
  457. r'G_ATOMICRMW_(XCHG|ADD|SUB|AND|NAND|OR|XOR|MAX|MIN|UMAX|UMIN|FADD|'
  458. r'FSUB)'
  459. r'|G_FENCE|G_EXTRACT|G_UNMERGE_VALUES|G_INSERT|G_MERGE_VALUES|'
  460. r'G_BUILD_VECTOR|G_BUILD_VECTOR_TRUNC|G_CONCAT_VECTORS|'
  461. r'G_INTRINSIC|G_INTRINSIC_W_SIDE_EFFECTS|G_BR|G_BRCOND|'
  462. r'G_BRINDIRECT|G_BRJT|G_INSERT_VECTOR_ELT|G_EXTRACT_VECTOR_ELT|'
  463. r'G_SHUFFLE_VECTOR)\b',
  464. Name.Builtin),
  465. # Target independent opcodes
  466. (r'(COPY|PHI|INSERT_SUBREG|EXTRACT_SUBREG|REG_SEQUENCE)\b',
  467. Name.Builtin),
  468. # Flags
  469. (words(('killed', 'implicit')), Keyword),
  470. # ConstantInt values
  471. (r'[i][0-9]+ +', Keyword.Type, 'constantint'),
  472. # ConstantFloat values
  473. (r'(half|float|double) +', Keyword.Type, 'constantfloat'),
  474. # Bare immediates
  475. include('integer'),
  476. # MMO's
  477. (r':: *', Operator, 'mmo'),
  478. # MIR Comments
  479. (r';.*', Comment),
  480. # If we get here, assume it's a target instruction
  481. (r'[0-9a-zA-Z_]+', Name),
  482. # Everything else that isn't highlighted
  483. (r'[(), \n]+', Text),
  484. ],
  485. # The integer constant from a ConstantInt value
  486. 'constantint': [
  487. include('integer'),
  488. (r'(?=.)', Text, '#pop'),
  489. ],
  490. # The floating point constant from a ConstantFloat value
  491. 'constantfloat': [
  492. include('float'),
  493. (r'(?=.)', Text, '#pop'),
  494. ],
  495. 'vreg': [
  496. # The bank or class if there is one
  497. (r' *:(?!:)', Keyword, ('#pop', 'vreg_bank_or_class')),
  498. # The LLT if there is one
  499. (r' *\(', Text, 'vreg_type'),
  500. (r'(?=.)', Text, '#pop'),
  501. ],
  502. 'vreg_bank_or_class': [
  503. # The unassigned bank/class
  504. (r' *_', Name.Variable.Magic),
  505. (r' *[0-9a-zA-Z_]+', Name.Variable),
  506. # The LLT if there is one
  507. (r' *\(', Text, 'vreg_type'),
  508. (r'(?=.)', Text, '#pop'),
  509. ],
  510. 'vreg_type': [
  511. # Scalar and pointer types
  512. (r' *[sp][0-9]+', Keyword.Type),
  513. (r' *<[0-9]+ *x *[sp][0-9]+>', Keyword.Type),
  514. (r'\)', Text, '#pop'),
  515. (r'(?=.)', Text, '#pop'),
  516. ],
  517. 'mmo': [
  518. (r'\(', Text),
  519. (r' +', Text),
  520. (words(('load', 'store', 'on', 'into', 'from', 'align', 'monotonic',
  521. 'acquire', 'release', 'acq_rel', 'seq_cst')),
  522. Keyword),
  523. # IR references
  524. (r'%ir\.[0-9a-zA-Z_.-]+', Name),
  525. (r'%ir-block\.[0-9a-zA-Z_.-]+', Name),
  526. (r'[-+]', Operator),
  527. include('integer'),
  528. include('global'),
  529. (r',', Punctuation),
  530. (r'\), \(', Text),
  531. (r'\)', Text, '#pop'),
  532. ],
  533. 'integer': [(r'-?[0-9]+', Number.Integer),],
  534. 'float': [(r'-?[0-9]+\.[0-9]+(e[+-][0-9]+)?', Number.Float)],
  535. 'global': [(r'\@[0-9a-zA-Z_.]+', Name.Variable.Global)],
  536. }
  537. class LlvmMirLexer(RegexLexer):
  538. """
  539. Lexer for the overall LLVM MIR document format
  540. MIR is a human readable serialization format that's used to represent LLVM's
  541. machine specific intermediate representation. It allows LLVM's developers to
  542. see the state of the compilation process at various points, as well as test
  543. individual pieces of the compiler.
  544. For more information on LLVM MIR see https://llvm.org/docs/MIRLangRef.html.
  545. .. versionadded:: 2.6
  546. """
  547. name = 'LLVM-MIR'
  548. aliases = ['llvm-mir']
  549. filenames = ['*.mir']
  550. tokens = {
  551. 'root': [
  552. # Comments are hashes at the YAML level
  553. (r'#.*', Comment),
  554. # Documents starting with | are LLVM-IR
  555. (r'--- \|$', Keyword, 'llvm_ir'),
  556. # Other documents are MIR
  557. (r'---', Keyword, 'llvm_mir'),
  558. # Consume everything else in one token for efficiency
  559. (r'[^-#]+|.', Text),
  560. ],
  561. 'llvm_ir': [
  562. # Documents end with '...' or '---'
  563. (r'(\.\.\.|(?=---))', Keyword, '#pop'),
  564. # Delegate to the LlvmLexer
  565. (r'((?:.|\n)+?)(?=(\.\.\.|---))', bygroups(using(LlvmLexer))),
  566. ],
  567. 'llvm_mir': [
  568. # Comments are hashes at the YAML level
  569. (r'#.*', Comment),
  570. # Documents end with '...' or '---'
  571. (r'(\.\.\.|(?=---))', Keyword, '#pop'),
  572. # Handle the simple attributes
  573. (r'name:', Keyword, 'name'),
  574. (words(('alignment', ),
  575. suffix=':'), Keyword, 'number'),
  576. (words(('legalized', 'regBankSelected', 'tracksRegLiveness',
  577. 'selected', 'exposesReturnsTwice'),
  578. suffix=':'), Keyword, 'boolean'),
  579. # Handle the attributes don't highlight inside
  580. (words(('registers', 'stack', 'fixedStack', 'liveins', 'frameInfo',
  581. 'machineFunctionInfo'),
  582. suffix=':'), Keyword),
  583. # Delegate the body block to the LlvmMirBodyLexer
  584. (r'body: *\|', Keyword, 'llvm_mir_body'),
  585. # Consume everything else
  586. (r'.+', Text),
  587. (r'\n', Text),
  588. ],
  589. 'name': [ (r'[^\n]+', Name), default('#pop') ],
  590. 'boolean': [ (r' *(true|false)', Name.Builtin), default('#pop') ],
  591. 'number': [ (r' *[0-9]+', Number), default('#pop') ],
  592. 'llvm_mir_body': [
  593. # Documents end with '...' or '---'.
  594. # We have to pop llvm_mir_body and llvm_mir
  595. (r'(\.\.\.|(?=---))', Keyword, '#pop:2'),
  596. # Delegate the body block to the LlvmMirBodyLexer
  597. (r'((?:.|\n)+?)(?=\.\.\.|---)', bygroups(using(LlvmMirBodyLexer))),
  598. # The '...' is optional. If we didn't already find it then it isn't
  599. # there. There might be a '---' instead though.
  600. (r'(?!\.\.\.|---)((.|\n)+)', bygroups(using(LlvmMirBodyLexer), Keyword)),
  601. ],
  602. }
  603. class NasmLexer(RegexLexer):
  604. """
  605. For Nasm (Intel) assembly code.
  606. """
  607. name = 'NASM'
  608. aliases = ['nasm']
  609. filenames = ['*.asm', '*.ASM']
  610. mimetypes = ['text/x-nasm']
  611. # Tasm uses the same file endings, but TASM is not as common as NASM, so
  612. # we prioritize NASM higher by default
  613. priority = 1.0
  614. identifier = r'[a-z$._?][\w$.?#@~]*'
  615. hexn = r'(?:0x[0-9a-f]+|$0[0-9a-f]*|[0-9]+[0-9a-f]*h)'
  616. octn = r'[0-7]+q'
  617. binn = r'[01]+b'
  618. decn = r'[0-9]+'
  619. floatn = decn + r'\.e?' + decn
  620. string = r'"(\\"|[^"\n])*"|' + r"'(\\'|[^'\n])*'|" + r"`(\\`|[^`\n])*`"
  621. declkw = r'(?:res|d)[bwdqt]|times'
  622. register = (r'r[0-9][0-5]?[bwd]|'
  623. r'[a-d][lh]|[er]?[a-d]x|[er]?[sb]p|[er]?[sd]i|[c-gs]s|st[0-7]|'
  624. r'mm[0-7]|cr[0-4]|dr[0-367]|tr[3-7]')
  625. wordop = r'seg|wrt|strict'
  626. type = r'byte|[dq]?word'
  627. # Directives must be followed by whitespace, otherwise CPU will match
  628. # cpuid for instance.
  629. directives = (r'(?:BITS|USE16|USE32|SECTION|SEGMENT|ABSOLUTE|EXTERN|GLOBAL|'
  630. r'ORG|ALIGN|STRUC|ENDSTRUC|COMMON|CPU|GROUP|UPPERCASE|IMPORT|'
  631. r'EXPORT|LIBRARY|MODULE)\s+')
  632. flags = re.IGNORECASE | re.MULTILINE
  633. tokens = {
  634. 'root': [
  635. (r'^\s*%', Comment.Preproc, 'preproc'),
  636. include('whitespace'),
  637. (identifier + ':', Name.Label),
  638. (r'(%s)(\s+)(equ)' % identifier,
  639. bygroups(Name.Constant, Keyword.Declaration, Keyword.Declaration),
  640. 'instruction-args'),
  641. (directives, Keyword, 'instruction-args'),
  642. (declkw, Keyword.Declaration, 'instruction-args'),
  643. (identifier, Name.Function, 'instruction-args'),
  644. (r'[\r\n]+', Text)
  645. ],
  646. 'instruction-args': [
  647. (string, String),
  648. (hexn, Number.Hex),
  649. (octn, Number.Oct),
  650. (binn, Number.Bin),
  651. (floatn, Number.Float),
  652. (decn, Number.Integer),
  653. include('punctuation'),
  654. (register, Name.Builtin),
  655. (identifier, Name.Variable),
  656. (r'[\r\n]+', Text, '#pop'),
  657. include('whitespace')
  658. ],
  659. 'preproc': [
  660. (r'[^;\n]+', Comment.Preproc),
  661. (r';.*?\n', Comment.Single, '#pop'),
  662. (r'\n', Comment.Preproc, '#pop'),
  663. ],
  664. 'whitespace': [
  665. (r'\n', Text),
  666. (r'[ \t]+', Text),
  667. (r';.*', Comment.Single)
  668. ],
  669. 'punctuation': [
  670. (r'[,():\[\]]+', Punctuation),
  671. (r'[&|^<>+*/%~-]+', Operator),
  672. (r'[$]+', Keyword.Constant),
  673. (wordop, Operator.Word),
  674. (type, Keyword.Type)
  675. ],
  676. }
  677. def analyse_text(text):
  678. # Probably TASM
  679. if re.match(r'PROC', text, re.IGNORECASE):
  680. return False
  681. class NasmObjdumpLexer(ObjdumpLexer):
  682. """
  683. For the output of 'objdump -d -M intel'.
  684. .. versionadded:: 2.0
  685. """
  686. name = 'objdump-nasm'
  687. aliases = ['objdump-nasm']
  688. filenames = ['*.objdump-intel']
  689. mimetypes = ['text/x-nasm-objdump']
  690. tokens = _objdump_lexer_tokens(NasmLexer)
  691. class TasmLexer(RegexLexer):
  692. """
  693. For Tasm (Turbo Assembler) assembly code.
  694. """
  695. name = 'TASM'
  696. aliases = ['tasm']
  697. filenames = ['*.asm', '*.ASM', '*.tasm']
  698. mimetypes = ['text/x-tasm']
  699. identifier = r'[@a-z$._?][\w$.?#@~]*'
  700. hexn = r'(?:0x[0-9a-f]+|$0[0-9a-f]*|[0-9]+[0-9a-f]*h)'
  701. octn = r'[0-7]+q'
  702. binn = r'[01]+b'
  703. decn = r'[0-9]+'
  704. floatn = decn + r'\.e?' + decn
  705. string = r'"(\\"|[^"\n])*"|' + r"'(\\'|[^'\n])*'|" + r"`(\\`|[^`\n])*`"
  706. declkw = r'(?:res|d)[bwdqt]|times'
  707. register = (r'r[0-9][0-5]?[bwd]|'
  708. r'[a-d][lh]|[er]?[a-d]x|[er]?[sb]p|[er]?[sd]i|[c-gs]s|st[0-7]|'
  709. r'mm[0-7]|cr[0-4]|dr[0-367]|tr[3-7]')
  710. wordop = r'seg|wrt|strict'
  711. type = r'byte|[dq]?word'
  712. directives = (r'BITS|USE16|USE32|SECTION|SEGMENT|ABSOLUTE|EXTERN|GLOBAL|'
  713. r'ORG|ALIGN|STRUC|ENDSTRUC|ENDS|COMMON|CPU|GROUP|UPPERCASE|INCLUDE|'
  714. r'EXPORT|LIBRARY|MODULE|PROC|ENDP|USES|ARG|DATASEG|UDATASEG|END|IDEAL|'
  715. r'P386|MODEL|ASSUME|CODESEG|SIZE')
  716. # T[A-Z][a-z] is more of a convention. Lexer should filter out STRUC definitions
  717. # and then 'add' them to datatype somehow.
  718. datatype = (r'db|dd|dw|T[A-Z][a-z]+')
  719. flags = re.IGNORECASE | re.MULTILINE
  720. tokens = {
  721. 'root': [
  722. (r'^\s*%', Comment.Preproc, 'preproc'),
  723. include('whitespace'),
  724. (identifier + ':', Name.Label),
  725. (directives, Keyword, 'instruction-args'),
  726. (r'(%s)(\s+)(%s)' % (identifier, datatype),
  727. bygroups(Name.Constant, Keyword.Declaration, Keyword.Declaration),
  728. 'instruction-args'),
  729. (declkw, Keyword.Declaration, 'instruction-args'),
  730. (identifier, Name.Function, 'instruction-args'),
  731. (r'[\r\n]+', Text)
  732. ],
  733. 'instruction-args': [
  734. (string, String),
  735. (hexn, Number.Hex),
  736. (octn, Number.Oct),
  737. (binn, Number.Bin),
  738. (floatn, Number.Float),
  739. (decn, Number.Integer),
  740. include('punctuation'),
  741. (register, Name.Builtin),
  742. (identifier, Name.Variable),
  743. # Do not match newline when it's preceeded by a backslash
  744. (r'(\\\s*)(;.*)([\r\n])', bygroups(Text, Comment.Single, Text)),
  745. (r'[\r\n]+', Text, '#pop'),
  746. include('whitespace')
  747. ],
  748. 'preproc': [
  749. (r'[^;\n]+', Comment.Preproc),
  750. (r';.*?\n', Comment.Single, '#pop'),
  751. (r'\n', Comment.Preproc, '#pop'),
  752. ],
  753. 'whitespace': [
  754. (r'[\n\r]', Text),
  755. (r'\\[\n\r]', Text),
  756. (r'[ \t]+', Text),
  757. (r';.*', Comment.Single)
  758. ],
  759. 'punctuation': [
  760. (r'[,():\[\]]+', Punctuation),
  761. (r'[&|^<>+*=/%~-]+', Operator),
  762. (r'[$]+', Keyword.Constant),
  763. (wordop, Operator.Word),
  764. (type, Keyword.Type)
  765. ],
  766. }
  767. def analyse_text(text):
  768. # See above
  769. if re.match(r'PROC', text, re.I):
  770. return True
  771. class Ca65Lexer(RegexLexer):
  772. """
  773. For ca65 assembler sources.
  774. .. versionadded:: 1.6
  775. """
  776. name = 'ca65 assembler'
  777. aliases = ['ca65']
  778. filenames = ['*.s']
  779. flags = re.IGNORECASE
  780. tokens = {
  781. 'root': [
  782. (r';.*', Comment.Single),
  783. (r'\s+', Text),
  784. (r'[a-z_.@$][\w.@$]*:', Name.Label),
  785. (r'((ld|st)[axy]|(in|de)[cxy]|asl|lsr|ro[lr]|adc|sbc|cmp|cp[xy]'
  786. r'|cl[cvdi]|se[cdi]|jmp|jsr|bne|beq|bpl|bmi|bvc|bvs|bcc|bcs'
  787. r'|p[lh][ap]|rt[is]|brk|nop|ta[xy]|t[xy]a|txs|tsx|and|ora|eor'
  788. r'|bit)\b', Keyword),
  789. (r'\.\w+', Keyword.Pseudo),
  790. (r'[-+~*/^&|!<>=]', Operator),
  791. (r'"[^"\n]*.', String),
  792. (r"'[^'\n]*.", String.Char),
  793. (r'\$[0-9a-f]+|[0-9a-f]+h\b', Number.Hex),
  794. (r'\d+', Number.Integer),
  795. (r'%[01]+', Number.Bin),
  796. (r'[#,.:()=\[\]]', Punctuation),
  797. (r'[a-z_.@$][\w.@$]*', Name),
  798. ]
  799. }
  800. def analyse_text(self, text):
  801. # comments in GAS start with "#"
  802. if re.search(r'^\s*;', text, re.MULTILINE):
  803. return 0.9
  804. class Dasm16Lexer(RegexLexer):
  805. """
  806. Simple lexer for DCPU-16 Assembly
  807. Check http://0x10c.com/doc/dcpu-16.txt
  808. .. versionadded:: 2.4
  809. """
  810. name = 'DASM16'
  811. aliases = ['dasm16']
  812. filenames = ['*.dasm16', '*.dasm']
  813. mimetypes = ['text/x-dasm16']
  814. INSTRUCTIONS = [
  815. 'SET',
  816. 'ADD', 'SUB',
  817. 'MUL', 'MLI',
  818. 'DIV', 'DVI',
  819. 'MOD', 'MDI',
  820. 'AND', 'BOR', 'XOR',
  821. 'SHR', 'ASR', 'SHL',
  822. 'IFB', 'IFC', 'IFE', 'IFN', 'IFG', 'IFA', 'IFL', 'IFU',
  823. 'ADX', 'SBX',
  824. 'STI', 'STD',
  825. 'JSR',
  826. 'INT', 'IAG', 'IAS', 'RFI', 'IAQ', 'HWN', 'HWQ', 'HWI',
  827. ]
  828. REGISTERS = [
  829. 'A', 'B', 'C',
  830. 'X', 'Y', 'Z',
  831. 'I', 'J',
  832. 'SP', 'PC', 'EX',
  833. 'POP', 'PEEK', 'PUSH'
  834. ]
  835. # Regexes yo
  836. char = r'[a-zA-Z$._0-9@]'
  837. identifier = r'(?:[a-zA-Z$_]' + char + r'*|\.' + char + '+)'
  838. number = r'[+-]?(?:0[xX][a-zA-Z0-9]+|\d+)'
  839. binary_number = r'0b[01_]+'
  840. instruction = r'(?i)(' + '|'.join(INSTRUCTIONS) + ')'
  841. single_char = r"'\\?" + char + "'"
  842. string = r'"(\\"|[^"])*"'
  843. def guess_identifier(lexer, match):
  844. ident = match.group(0)
  845. klass = Name.Variable if ident.upper() in lexer.REGISTERS else Name.Label
  846. yield match.start(), klass, ident
  847. tokens = {
  848. 'root': [
  849. include('whitespace'),
  850. (':' + identifier, Name.Label),
  851. (identifier + ':', Name.Label),
  852. (instruction, Name.Function, 'instruction-args'),
  853. (r'\.' + identifier, Name.Function, 'data-args'),
  854. (r'[\r\n]+', Text)
  855. ],
  856. 'numeric' : [
  857. (binary_number, Number.Integer),
  858. (number, Number.Integer),
  859. (single_char, String),
  860. ],
  861. 'arg' : [
  862. (identifier, guess_identifier),
  863. include('numeric')
  864. ],
  865. 'deref' : [
  866. (r'\+', Punctuation),
  867. (r'\]', Punctuation, '#pop'),
  868. include('arg'),
  869. include('whitespace')
  870. ],
  871. 'instruction-line' : [
  872. (r'[\r\n]+', Text, '#pop'),
  873. (r';.*?$', Comment, '#pop'),
  874. include('whitespace')
  875. ],
  876. 'instruction-args': [
  877. (r',', Punctuation),
  878. (r'\[', Punctuation, 'deref'),
  879. include('arg'),
  880. include('instruction-line')
  881. ],
  882. 'data-args' : [
  883. (r',', Punctuation),
  884. include('numeric'),
  885. (string, String),
  886. include('instruction-line')
  887. ],
  888. 'whitespace': [
  889. (r'\n', Text),
  890. (r'\s+', Text),
  891. (r';.*?\n', Comment)
  892. ],
  893. }