@@ -255,3 +255,110 @@ def __init__(self, code):
255255 def lex (self ):
256256 return simple_lexer (self .rules , self .code )
257257
258+
259+ # https://sourceware.org/binutils/docs/as.html#Syntax
260+ class GasLexer :
261+ # https://sourceware.org/binutils/docs/as.html#Symbol-Intro
262+ # apparently dots are okay, BUT ctags removes the first dot from labels, for example. same with dollars
263+ # /musl/v1.2.5/source/src/string/aarch64/memcpy.S#L92
264+ gasm_identifier = r'[a-zA-Z0-9_][a-zA-Z0-9_$.]*'
265+
266+ gasm_flonum = r'0?[a-zA-Z][+-]?([0-9]|\\s*\n\s*)*\.([0-9]|\\s*\n\s*)*([eE][+-]?[0-9]+)?'
267+ gasm_number = regex_or (gasm_flonum , shared .common_hexidecimal_integer , shared .common_binary_integer ,
268+ shared .common_decimal_integer )
269+
270+ gasm_char = r"'(\\.|.|\n)"
271+ gasm_string = f'(({ shared .double_quote_string_with_escapes } )|({ gasm_char } ))'
272+
273+ gasm_comment_chars_map = {
274+ 'generic' : (r'#\s' ,),
275+
276+ 'nios2' : (r'#' ,),
277+ 'openrisc' : (r'#' ,),
278+ 'powerpc' : (r'#' ,),
279+ 's390' : (r'#' ,),
280+ 'xtensa' : (r'#' ,),
281+ 'microblaze' : (r'#' ,),
282+ 'mips' : (r'#' ,),
283+ 'alpha' : (r'#' ,),
284+ 'csky' : (r'#' ,),
285+ # BUT double pipe in macros is an operator... and # not in the first line in
286+ # /linux/v6.10.7/source/arch/m68k/ifpsp060/src/fplsp.S
287+ 'm68k' : ('|' , '^#' , r'#\s' ),
288+ 'arc' : ('# ' , ';' ),
289+
290+ # https://sourceware.org/binutils/docs/as.html#HPPA-Syntax
291+ # /linux/v6.10.7/source/arch/parisc/kernel/perf_asm.S#L28
292+ 'parisc' : (';' ,),
293+ 'x86' : (';' ,),
294+ 'tic6x' : (';' , '*' ), # cx6, tms320, although the star is sketchy
295+
296+ # in below, # can be a comment only if the first character of the line
297+
298+ # https://sourceware.org/binutils/docs/as.html#SH-Syntax
299+ # /linux/v6.10.7/source/arch/sh/kernel/head_32.S#L58
300+ 'sh' : ('!' , '^#' ),
301+ # https://sourceware.org/binutils/docs/as.html#Sparc_002dSyntax
302+ # /linux/v6.10.7/source/arch/sparc/lib/memset.S#L125
303+ 'sparc' : ('!' , '^#' ),
304+ # used in ARM https://sourceware.org/binutils/docs/as.html#ARM-Syntax
305+ # /linux/v6.10.7/source/arch/arm/mach-sa1100/sleep.S#L33
306+ 'arm32' : ('@' , '^#' ),
307+ 'cris' : (';' , '^#' ),
308+ 'avr' : (';' , '^#' ),
309+ # blackfin, tile
310+ }
311+
312+ gasm_punctuation = r'[.,\[\]()<>{}%&+*!|@#$;:^/\\=~-]'
313+ # TODO make sure all relevant directives are listed here
314+ gasm_preprocessor = r'#[ \t]*(define|ifdef|ifndef|undef|if|else|elif|endif)'
315+
316+ rules_before_comments = [
317+ (shared .whitespace , TokenType .WHITESPACE ),
318+ # don't interpret macro concatenate as a comment
319+ ('##' , TokenType .PUNCTUATION ),
320+ # don't interpret or as a comment
321+ (r'\|\|' , TokenType .PUNCTUATION ),
322+ (FirstInLine (regex_or (shared .c_preproc_include , shared .c_preproc_warning_and_error )), TokenType .SPECIAL ),
323+ (FirstInLine (gasm_preprocessor ), TokenType .SPECIAL ),
324+ (shared .common_slash_comment , TokenType .COMMENT ),
325+ ]
326+
327+ rules_after_comments = [
328+ (gasm_string , TokenType .STRING ),
329+ (gasm_number , TokenType .NUMBER ),
330+ (gasm_identifier , TokenType .IDENTIFIER ),
331+ (gasm_punctuation , TokenType .PUNCTUATION ),
332+ ]
333+
334+ def __init__ (self , code , arch = 'generic' ):
335+ self .code = code
336+ self .comment_chars = self .gasm_comment_chars_map [arch ]
337+
338+ def get_arch_rules (self ):
339+ result = []
340+
341+ regex_chars = '*?+^.$\\ []|()'
342+ add_slash = lambda ch : '\\ ' + ch if ch in regex_chars else ch
343+
344+ for comment_char in self .comment_chars :
345+ if comment_char [0 ] == '^' :
346+ result .append ((
347+ FirstInLine (add_slash (comment_char [1 ]) + shared .singleline_comment_with_escapes_base ),
348+ TokenType .COMMENT
349+ ))
350+ else :
351+ result .append ((
352+ add_slash (comment_char ) + shared .singleline_comment_with_escapes_base ,
353+ TokenType .COMMENT )
354+ )
355+
356+ return result
357+
358+ def lex (self ):
359+ rules = self .rules_before_comments + \
360+ self .get_arch_rules () + \
361+ self .rules_after_comments
362+
363+ return simple_lexer (rules , self .code )
364+
0 commit comments