1#!/usr/bin/python 2# 3# Copyright (C) 2018 The Android Open Source Project 4# 5# Licensed under the Apache License, Version 2.0 (the "License"); 6# you may not use this file except in compliance with the License. 7# You may obtain a copy of the License at 8# 9# http://www.apache.org/licenses/LICENSE-2.0 10# 11# Unless required by applicable law or agreed to in writing, software 12# distributed under the License is distributed on an "AS IS" BASIS, 13# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14# See the License for the specific language governing permissions and 15# limitations under the License. 16# 17 18"""Parse assembler definition file. 19 20Definition JSON file for this script have following form: 21{ 22 "arch": "XXX", 23 "insns": [ 24 { 25 "name": "ShlbRegReg", 26 "args": [ 27 {"class": "GeneralReg8", "usage": "use_def"}, 28 {"class": "RCX", "usage": "use"}, 29 {"class": "FLAGS", "usage": "def"} 30 ], 31 "asm": "ShlbByCl", 32 "mnemo": "SHLB" 33 }, 34 ... 35 ] 36'arch' is primarily used for C++ namespace in LIR generator, and is ignored by 37this script. 38 39'insn' is array of objects, each describing single instruction variant. 40Each instruction is an object with following fields: 41 'name' - instruction unique name, used in LIR generator, typical name is 42 InsnOp1Op2, where 'Insn' is instruction name, 'Op1', 'Op2' - 43 operand types, such as Imm, Reg, Mem(Op), Base, Disp. 44 'args' - described ordered list of instruction arguments. 45 for each argument 'class' (any GP register, fixed GP register, 46 any XMM register, immediate, memory operand, flags register) 47 and how it is treated by an instruction (used, defined, 48 both used and defined) 49 'asm' - which internal assembler's mnemonic is used 50 'opcode' | 'opcodes' - optional flag for autogeneration: 51 if opcode bytes are specified then implementation 52 would be automatically generated 53 'type' - optional flag to specify extra information (encoded in the name). 54 'mnemo' - how instruction shall be named in LIR dumps (ignored here) 55 56Memory operand for assembler instructions can be described as either opaque 57Operand class, which provides full power of x86 addressing modes, or as 58explicit BaseDisp format, which translates to reg+disp form. 59 60For some instructions (such as pop, push, jmp reg) exact register width is not 61specified, and 'GeneralReg' class is used, as same encoding is used for 32 and 6264 bit operands, depending on current CPU mode. 63 64This script produces inline file for internal assembler's header, such as for 65above example it would yield single line 66 67 void ShlbByCl(Register); 68 69Fixed arguments (such as 'RCX') and flags ('FLAGS') are ignored when generating 70assembler's header, while for others emitted an argument of type depending on 71argument's class. 72""" 73 74import copy 75import json 76import re 77 78 79def is_imm(arg_type): 80 return arg_type in ( 81 'Imm2', 'Imm8', 'Imm16', 'Imm32', 'Imm64', # x86 immediates 82 'B-Imm', 'I-Imm', 'J-Imm', 'P-Imm', 'S-Imm', 'U-Imm', # Official RISC-V immediates 83 'Csr-Imm', 'Shift32-Imm', 'Shift64-Imm', # Extra RISC-V immediates 84 ) 85 86 87def is_disp(arg_type): 88 return arg_type == 'Disp' 89 90 91def is_mem_op(arg_type): 92 return arg_type in ( 93 # Universal memory operands 94 'Mem', 'Mem8', 'Mem16', 'Mem32', 'Mem64', 'Mem128', 95 # x86 memory operands 96 'MemX87', 'MemX8716', 'MemX8732', 'MemX8764', 'MemX8780', 97 'VecMem32', 'VecMem64', 'VecMem128', 'VecMem256') 98 99 100def is_cond(arg_type): 101 return arg_type == 'Cond' 102 103 104def is_csr(arg_type): 105 return arg_type == 'CsrReg' 106 107 108def is_rm(arg_type): 109 return arg_type == 'Rm' 110 111 112def is_label(arg_type): 113 return arg_type == 'Label' 114 115 116def is_x87reg(arg_type): 117 return arg_type == 'RegX87' 118 119 120def is_greg(arg_type): 121 return arg_type in ('GeneralReg', 122 'GeneralReg8', 'GeneralReg16', 123 'GeneralReg32', 'GeneralReg64') 124 125 126def is_freg(arg_type): 127 return arg_type == 'FpReg' 128 129 130def is_xreg(arg_type): 131 return arg_type in ('XmmReg', 132 'VecReg64', 'VecReg128', 133 'FpReg32', 'FpReg64') 134 135 136def is_yreg(arg_type): 137 return arg_type in ('YmmReg','VecReg256') 138 139 140# Operands of this type are NOT passed to assembler 141def is_implicit_reg(arg_type): 142 return arg_type in ('RAX', 'EAX', 'AX', 'AL', 143 'RCX', 'ECX', 'CL', 'ST', 'ST1', 144 'RDX', 'EDX', 'DX', 'CC', 145 'RBX', 'EBX', 'BX', 'SW', 146 'RDI', 'RSI', 'RSP', 'FLAGS') 147 148 149def exactly_one_of(iterable): 150 return sum(1 for elem in iterable if elem) == 1 151 152 153def get_mem_macro_name(insn, addr_mode = None): 154 macro_name = insn.get('asm') 155 if macro_name.endswith('ByCl'): 156 macro_name = macro_name[:-4] 157 for arg in insn['args']: 158 clazz = arg['class'] 159 # Don't reflect FLAGS/Conditions/Csrs/Labels in the name - we don't ever 160 # have two different instructions where these cause the difference. 161 if clazz == 'FLAGS' or is_cond(clazz) or is_label(clazz) or is_csr(clazz): 162 pass 163 elif is_rm(clazz): 164 macro_name += 'Rm' 165 elif is_x87reg(clazz) or is_greg(clazz) or is_implicit_reg(clazz): 166 macro_name += 'Reg' 167 elif is_freg(clazz): 168 macro_name += 'FReg' 169 elif is_xreg(clazz): 170 macro_name += 'XReg' 171 elif is_yreg(clazz): 172 macro_name += 'YReg' 173 elif is_imm(clazz): 174 macro_name += 'Imm' 175 elif is_mem_op(clazz): 176 if addr_mode is not None: 177 macro_name += 'Mem' + addr_mode 178 else: 179 macro_name += 'Op' 180 else: 181 raise Exception('arg type %s is not supported' % clazz) 182 return macro_name 183 184 185def _get_cxx_name(name): 186 return ''.join(w if re.search('[A-Z]', w) else w.capitalize() 187 for w in re.split('[-_. ]', name)) 188 189 190def _expand_name(insn, stem, encoding = {}): 191 # Make deep copy of the instruction to make sure consumers could treat them 192 # as independent entities and add/remove marks freely. 193 # 194 # JSON never have "merged" objects thus having them in result violates 195 # expectations. 196 expanded_insn = copy.deepcopy(insn) 197 # Native assembler name may include dots, spaces, etc. Keep it for text assembler. 198 expanded_insn["native-asm"] = stem 199 expanded_insn['asm'] = _get_cxx_name(stem) 200 expanded_insn['name'] = get_mem_macro_name(expanded_insn) 201 expanded_insn['mnemo'] = stem.upper() 202 expanded_insn.update(encoding) 203 return expanded_insn 204 205 206def _expand_insn_by_encodings(insns): 207 expanded_insns = [] 208 for insn in insns: 209 if insn.get('encodings'): 210 assert all((f not in insn) for f in ['stems', 'name', 'asm', 'mnemo']) 211 # If we have encoding then we must have at least opcodes 212 assert all('opcode' in encoding or 'opcodes' in encoding 213 for _, encoding in insn['encodings'].items()) 214 expanded_insns.extend([_expand_name(insn, stem, encoding) 215 for stem, encoding in insn['encodings'].items()]) 216 elif insn.get('stems'): 217 assert all((f not in insn) for f in ['encoding', 'name', 'asm', 'mnemo']) 218 expanded_insns.extend([_expand_name(insn, stem) 219 for stem in insn['stems']]) 220 else: 221 assert all((f in insn) for f in ['name', 'asm', 'mnemo']) 222 expanded_insns.append(insn) 223 return expanded_insns 224 225 226def _expand_insns_by_operands(insns): 227 expanded_insns = [] 228 for insn in insns: 229 split_done = False 230 for arg in insn['args']: 231 if '/' in arg['class']: 232 assert not split_done 233 operand_classes = arg['class'].split('/') 234 for subclass in operand_classes: 235 arg['class'] = subclass 236 expanded_insn = copy.deepcopy(insn) 237 expanded_insns.append(expanded_insn) 238 split_done = True 239 if not split_done: 240 expanded_insns.append(insn) 241 return expanded_insns 242 243 244def load_asm_defs(asm_def): 245 result = [] 246 with open(asm_def) as asm: 247 obj = json.load(asm) 248 insns = obj.get('insns') 249 insns = _expand_insns_by_operands(insns) 250 insns = _expand_insn_by_encodings(insns) 251 insns = sorted(insns, key=lambda i: i.get('asm')) 252 result.extend(insns) 253 return obj.get('arch'), result 254