1import re 2import sys 3 4from lark import Lark, Tree 5from lark.visitors import Interpreter 6 7# This grammar derived from: 8# https://mesonbuild.com/Syntax.html#grammar 9meson_grammar = r""" 10 ?start: (statement | COMMENT | NEWLINE)* 11 12 ?additive_expression: multiplicative_expression | (additive_expression additive_operator multiplicative_expression) 13 additive_operator: PLUS | MINUS 14 argument_list: positional_arguments [COMMA keyword_arguments] [COMMA] | keyword_arguments 15 array_literal: LBRACKET [expression_list] RBRACKET 16 ?assignment_statement: assignment_expression 17 assignment_expression: expression assignment_operator expression 18 assignment_operator: EQUALS | PLUS_EQUALS 19 binary_literal: "0b" BINARY_NUMBER 20 BINARY_NUMBER: /[01]+/ 21 boolean_literal: TRUE | FALSE 22 build_definition: (NEWLINE | statement)* 23 condition: expression 24 ?conditional_expression: logical_or_expression | (logical_or_expression "?" expression ":" expression) 25 decimal_literal: DECIMAL_NUMBER 26 DECIMAL_NUMBER: /[0-9][0-9]*/ 27 dictionary_literal: LBRACE [key_value_list] RBRACE 28 ?equality_expression: relational_expression | (equality_expression equality_operator relational_expression) 29 equality_operator: DOUBLE_EQUAL | NOT_EQUAL 30 ?expression: conditional_expression | logical_or_expression 31 expression_list: expression (COMMA expression)* COMMA? 32 ?expression_statement: expression 33 ?function_expression: id_expression LPAREN [argument_list] RPAREN 34 hex_literal: "0x" HEX_NUMBER 35 HEX_NUMBER: /[a-fA-F0-9]+/ 36 id_expression: IDENTIFIER 37 IDENTIFIER: /[a-zA-Z_][a-zA-Z_0-9]*/ 38 identifier_list: id_expression (COMMA id_expression)* 39 integer_literal: decimal_literal | octal_literal | hex_literal 40 iteration_statement: FOREACH identifier_list COLON expression NEWLINE (statement | jump_statement)* ENDFOREACH 41 jump_statement: (BREAK | CONTINUE) NEWLINE 42 key_value_item: expression COLON expression 43 key_value_list: key_value_item (COMMA key_value_item)* COMMA? 44 keyword_item: id_expression ":" expression 45 keyword_arguments: keyword_item (COMMA keyword_item)* COMMA? 46 ?literal: integer_literal | string_literal | boolean_literal | array_literal | dictionary_literal 47 ?logical_and_expression: equality_expression | (logical_and_expression AND ["\\"] equality_expression) 48 ?logical_or_expression: logical_and_expression | (logical_or_expression OR ["\\"] logical_and_expression) 49 ?method_expression: postfix_expression ["\\"] DOT function_expression 50 ?multiplicative_expression: unary_expression | (multiplicative_expression multiplicative_operator unary_expression) 51 multiplicative_operator: ASTERISK | SLASH | PERCENT 52 octal_literal: "0o" OCTAL_NUMBER 53 OCTAL_NUMBER: /[0-7]+/ 54 positional_arguments: expression (COMMA expression)* 55 postfix_expression: primary_expression | subscript_expression | function_expression | method_expression 56 ?primary_expression: literal | (LPAREN expression RPAREN) | id_expression 57 ?relational_expression: additive_expression | (relational_expression relational_operator additive_expression) 58 relational_operator: GREATER | LESSTHAN | GREATER_OR_EQUAL | LESSTHAN_OR_EQUAL | IN | (NOT IN) 59 selection_statement: IF condition NEWLINE (statement)* (ELIF condition NEWLINE (statement)*)* [ELSE NEWLINE (statement)*] ENDIF 60 statement: (expression_statement | selection_statement | iteration_statement | assignment_statement) NEWLINE 61 string_literal: STRING_SIMPLE_VALUE | STRING_MULTILINE_VALUE 62 ?subscript_expression: postfix_expression LBRACKET expression RBRACKET 63 ?unary_expression: postfix_expression | (unary_operator unary_expression) 64 unary_operator: NOT | DASH 65 66 AND: /and/ 67 ASTERISK: /\*/ 68 BREAK: /break/ 69 CONTINUE: /continue/ 70 COLON: /:/ 71 COMMA: /,/ 72 DASH: /-/ 73 DOT: /\./ 74 DOUBLE_EQUAL: /==/ 75 EQUALS: /=/ 76 FOREACH: /foreach/ 77 GREATER: />/ 78 GREATER_OR_EQUAL: />=/ 79 # Raise priorities to avoid elif parsed as a statement 80 ELIF.1: /elif/ 81 ELSE.1: /else/ 82 ENDIF.1: /endif/ 83 ENDFOREACH: /endforeach/ 84 FALSE: /false/ 85 IF: /if / 86 IN: / in / 87 LBRACKET: /\[/ 88 NOT: /not / 89 NOT_EQUAL: /!=/ 90 RBRACKET: /\]/ 91 LESSTHAN: /</ 92 LESSTHAN_OR_EQUAL: /<=/ 93 LBRACE: /{/ 94 LPAREN: /\(/ 95 RBRACE: /}/ 96 RPAREN: /\)/ 97 OR: /or/ 98 PERCENT: /%/ 99 PLUS: /\+/ 100 MINUS: /-/ 101 PLUS_EQUALS: /\+=/ 102 NEWLINE: ( / *\r?\n/ | COMMENT )+ 103 COMMENT: / *\#.*\n/ 104 SLASH: /\// 105 STRING_SIMPLE_VALUE: /f?'(.*\\')*.*?'/ 106 STRING_MULTILINE_VALUE: /f?'''.*?'''/s 107 TRUE: /true/ 108 109 %import common.WS 110 111 %ignore WS 112 # Comments would be nice to keep, but parsing fails end-of-line comments 113 %ignore COMMENT 114""" 115 116 117class TreeToCode(Interpreter): 118 indent = '' 119 120 def statement(self, tree): 121 string = '' 122 for child in tree.children: 123 if isinstance(child, Tree): 124 string += self.visit(child) 125 elif child is not None: 126 string += child 127 return self.indent + string 128 129 def more_indent(self): 130 self.indent += ' ' 131 132 def less_indent(self): 133 self.indent = self.indent[0 : len(self.indent) - 2] 134 135 # Ensure spaces around 'and' 136 def logical_and_expression(self, tree): 137 assert len(tree.children) == 3 138 lhs = self.visit(tree.children[0]) 139 rhs = self.visit(tree.children[2]) 140 return lhs + ' and ' + rhs 141 142 # Ensure spaces around 'or' 143 def logical_or_expression(self, tree): 144 assert len(tree.children) == 3 145 lhs = self.visit(tree.children[0]) 146 rhs = self.visit(tree.children[2]) 147 return lhs + ' or ' + rhs 148 149 # A ? B : C becomes B if A else C 150 def conditional_expression(self, tree): 151 assert len(tree.children) == 3 152 expr = self.visit(tree.children[0]) 153 first = self.visit(tree.children[1]) 154 second = self.visit(tree.children[2]) 155 return first + ' if ' + expr + ' else ' + second 156 157 def assignment_expression(self, tree): 158 assert len(tree.children) == 3 159 lhs = self.visit(tree.children[0]) 160 operator = self.visit(tree.children[1]) 161 rhs = self.visit(tree.children[2]) 162 if operator == '+=' and rhs.startswith('{'): 163 # Convert += to |= for dictionaries 164 return lhs + ' |= ' + rhs 165 elif operator == '+=' and rhs.startswith("'"): 166 # Handle literal string append to list or string 167 return ( 168 lhs 169 + ' += ' 170 + '[' 171 + rhs 172 + '] if isinstance(' 173 + lhs 174 + ', list) else ' 175 + rhs 176 ) 177 return lhs + operator + rhs 178 179 def iteration_statement(self, tree): 180 # foreach = tree.children[0] 181 identifier_list = self.visit(tree.children[1]) 182 # colon = tree.children[2] 183 id_expression = self.visit(tree.children[3]) 184 # newline = tree.children[4] 185 string = 'for ' + identifier_list + ' in ' + id_expression 186 string += ( 187 '.items():\n' if re.search(r',', identifier_list) is not None else ':\n' 188 ) 189 self.more_indent() 190 lastindex = len(tree.children) - 1 191 for child in tree.children[5:lastindex]: 192 if isinstance(child, Tree): 193 string += self.visit(child) 194 elif child is not None: 195 string += child 196 self.less_indent() 197 return string 198 199 def selection_statement(self, tree): 200 string = '' 201 index = 0 202 while index < len(tree.children): 203 prefix = tree.children[index] 204 index = index + 1 205 if prefix is None: 206 continue 207 if isinstance(prefix, Tree): 208 exit('unexpected prefix: ' + prefix.pretty()) 209 if re.match(r' *endif', prefix) is not None: 210 break 211 212 if re.match(r'if', prefix) is not None: 213 condition = self.visit(tree.children[index]) 214 index += 1 215 # Skip indent here because all statements are prepended with the indentation 216 string += 'if ' + condition + ':\n' 217 elif re.match(r'elif', prefix) is not None: 218 condition = self.visit(tree.children[index]) 219 index = index + 1 220 string += self.indent + 'elif ' + condition + ':\n' 221 elif re.match(r'else', prefix) is not None: 222 string += self.indent + 'else:\n' 223 else: 224 exit('Not a prefix: ' + prefix) 225 226 # newline = tree.children[index] 227 index += 1 228 229 statement_count = 0 230 self.more_indent() 231 while index < len(tree.children): 232 statement = tree.children[index] 233 if not isinstance(statement, Tree): 234 break 235 string += self.visit(statement) 236 index = index + 1 237 statement_count = statement_count + 1 238 if statement_count == 0: 239 string += self.indent + 'noop()\n' 240 self.less_indent() 241 242 return string 243 244 def postfix_expression(self, tree): 245 string = '' 246 for child in tree.children: 247 if isinstance(child, Tree): 248 subtree = self.visit(child) 249 subtree = re.sub(r'(.+)\.to_int\(\)', r'int(\g<1>)', subtree) 250 subtree = re.sub(r'(.+)\.to_string\(\)', r'str(\g<1>)', subtree) 251 subtree = re.sub(r'(.+)\.length\(\)', r'len(\g<1>)', subtree) 252 subtree = re.sub(r'(.+)\.to_upper\(\)', r'\g<1>.upper()', subtree) 253 subtree = re.sub( 254 r'(.+)\.underscorify\(\)', 255 r"\g<1>.replace('.', '_').replace('/', '_')", 256 subtree, 257 ) 258 string += subtree 259 elif child is not None: 260 string += child 261 return string 262 263 def function_expression(self, tree): 264 assert len(tree.children) == 4 265 identifier = self.visit(tree.children[0]) 266 if identifier == 'import': 267 identifier = 'module_import' 268 lparen = tree.children[1] 269 args = ( 270 self.visit(tree.children[2]) if isinstance(tree.children[2], Tree) else '' 271 ) 272 rparen = tree.children[3] 273 if identifier == 'contains': 274 return 'count' + lparen + args + rparen + ' > 0' 275 return identifier + lparen + args + rparen 276 277 def multiplicative_expression(self, tree): 278 assert len(tree.children) == 3 279 lhs = self.visit(tree.children[0]) 280 operator = self.visit(tree.children[1]) 281 rhs = self.visit(tree.children[2]) 282 # Slash used mostly to concatenate strings 283 if operator == '/': 284 return ( 285 '(' 286 + lhs 287 + ' + ' 288 + rhs 289 + ') if isinstance(' 290 + lhs 291 + ', str) else (' 292 + lhs 293 + ' / ' 294 + rhs 295 + ')' 296 ) 297 return lhs + operator + rhs 298 299 # Switch from colon to equals 300 def keyword_item(self, tree): 301 id_ = self.visit(tree.children[0]) 302 args = self.visit(tree.children[1]) 303 return id_ + '=' + args 304 305 def boolean_literal(self, tree): 306 assert len(tree.children) == 1 307 value = tree.children[0] 308 if value == 'true': 309 return 'True' 310 elif value == 'false': 311 return 'False' 312 exit('Unhandled value: ' + value) 313 314 def string_literal(self, tree): 315 assert len(tree.children) == 1 316 string = tree.children[0] 317 string = re.sub(r'(@[0-9]@)', r'{}', string) 318 if string.startswith('f'): 319 string = re.sub(r'(@(.+)@)', r'{\g<2>}', string) 320 return string 321 322 def __default__(self, tree): 323 string = '' 324 for child in tree.children: 325 if isinstance(child, Tree): 326 string += self.visit(child) 327 elif child is not None: 328 string += child 329 return string 330 331 332# Converts the given file from meson to python and returns the content as a string 333def meson2python(file_name): 334 meson_parser = Lark(meson_grammar, parser='earley') 335 with open(file_name) as f: 336 # Ensure newline before end of file 337 tree = meson_parser.parse(f.read() + '\n') 338 code = TreeToCode().visit(tree) 339 return code 340 341 342if __name__ == '__main__': 343 meson2python(sys.argv[1]) 344