#!/usr/bin/python # # Copyright (C) 2023 The Android Open Source Project # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. """Generate LIR files out of the definition file. * Operand usage Register allocator needs operand usage to learn which operands can share the same register. To understand register sharing options, register allocator assumes insn works in these steps: - read input operands - do the job - write output operands So, input-output operands should have dedicated registers, while input-only operands can share registers with output-only operands. There might be an exception when output-only operand is written before all input-only operands are read, so its register can't be shared. Such operands are usually referred as output-only-early-clobber operands. For register sharing, output-only-early-clobber operand is the same as input-output operand, but it is unnatural to describe output-only as input-output, so we use a special keyword for it. Finally, keywords are: use - input-only def - output-only def_early_clobber - output-only-early-clobber use_def - input-output * Scratch operands Scratch operands are actually output operands - indeed, their original value is not used and they get some new value after the insn is done. However, they are usually written before all input operands are read, so it makes sense to describe scratch operands as output-only-early-clobber. """ import asm_defs import json import sys def _is_reg(arg_type): return (asm_defs.is_greg(arg_type) or asm_defs.is_xreg(arg_type) or asm_defs.is_implicit_reg(arg_type)) class Operand(object): pass def _get_reg_operand_info(usage, kind): if usage == 'use': return '{ &k%s, MachineRegKind::kUse }' % (kind) if usage == 'def': return '{ &k%s, MachineRegKind::kDef }' % (kind) if usage == 'use_def': return '{ &k%s, MachineRegKind::kUseDef }' % (kind) if usage == 'def_early_clobber': return '{ &k%s, MachineRegKind::kDefEarlyClobber }' % (kind) assert False, 'unknown operand usage %s' % (usage) def _make_reg_operand(r, usage, kind): op = Operand() op.type = 'MachineReg' op.name = 'r%d' % (r) op.reg_operand_info = _get_reg_operand_info(usage, kind) op.initializer = 'SetRegAt(%d, r%d)' % (r, r) if asm_defs.is_greg(kind): op.asm_arg = 'GetGReg(RegAt(%d))' % (r) elif asm_defs.is_xreg(kind): op.asm_arg = 'GetXReg(RegAt(%d))' % (r) elif asm_defs.is_implicit_reg(kind): op.asm_arg = None else: assert False, 'unknown register kind %s' % (kind) return op def _make_imm_operand(bits): op = Operand() op.type = 'int%s_t' % (bits) op.name = 'imm' op.reg_operand_info = None op.initializer = 'set_imm(imm)' op.asm_arg = 'static_cast<%s>(imm())' % (op.type) return op def _make_scale_operand(): op = Operand() op.type = 'MachineMemOperandScale' op.name = 'scale' op.reg_operand_info = None op.initializer = 'set_scale(scale)' op.asm_arg = 'ToScaleFactor(scale())' return op def _make_disp_operand(): op = Operand() op.type = 'uint32_t' op.name = 'disp' op.reg_operand_info = None op.initializer = 'set_disp(disp)' op.asm_arg = 'disp()' return op def _make_cond_operand(): op = Operand() op.type = 'Assembler::Condition' op.name = 'cond' op.reg_operand_info = None op.initializer = 'set_cond(cond)' op.asm_arg = 'cond()' return op def _make_label_operand(): op = Operand() # We never have both immediate and Label in same insn. op.type = 'Label*' op.name = 'label' op.reg_operand_info = None op.initializer = 'set_imm(reinterpret_cast(label))' op.asm_arg = '*reinterpret_cast(imm())' return op def _check_insn_defs(insn, skip_unsupported=False): seen_imm = False seen_memop = False seen_disp = False for arg in insn.get('args'): kind = arg.get('class') if _is_reg(kind): pass elif asm_defs.is_imm(kind): # We share field for immediate and label in 'insn'. assert not seen_imm seen_imm = True elif asm_defs.is_mem_op(kind): # No insn can have more than one memop. assert not seen_memop addr_mode = insn.get('addr_mode') if skip_unsupported: if addr_mode not in ('Absolute', 'BaseDisp', 'IndexDisp', 'BaseIndexDisp'): return False assert addr_mode in ('Absolute', 'BaseDisp', 'IndexDisp', 'BaseIndexDisp'), \ 'unknown addressing mode %s' % (addr_mode) seen_memop = True elif asm_defs.is_disp(kind): assert not seen_disp seen_disp = True elif asm_defs.is_cond(kind): pass elif asm_defs.is_label(kind): assert not seen_imm seen_imm = True else: assert False, 'unknown operand class %s' % (kind) return True def _get_insn_operands(insn): """For each operand, define: - type - name - reg_operand_info - initializer - asm_arg """ res = [] r = 0 # Int3, Lfence, Mfence, Sfence, and UD2 have side effects not related to arguments. side_effects = insn['name'] in ('Int3', 'Lfence', 'Mfence', 'Sfence', 'UD2') for arg in insn.get('args'): kind = arg.get('class') if _is_reg(kind): res.append(_make_reg_operand(r, arg.get('usage'), kind)) r += 1 elif asm_defs.is_imm(kind): # We share field for immediate and label in 'insn'. bits = kind[3:] res.append(_make_imm_operand(bits)) elif asm_defs.is_mem_op(kind): # If operand is memory and it's not "use" then we have side_effects if arg['usage'] != 'use': side_effects = True # No insn can have more than one memop. addr_mode = insn.get('addr_mode') assert addr_mode in ('Absolute', 'BaseDisp', 'IndexDisp', 'BaseIndexDisp'), \ 'unknown addressing mode %s' % (addr_mode) if addr_mode in ('BaseDisp', 'BaseIndexDisp'): res.append(_make_reg_operand(r, 'use', 'GeneralReg32')) r += 1 if addr_mode in ('IndexDisp', 'BaseIndexDisp'): res.append(_make_reg_operand(r, 'use', 'GeneralReg32')) r += 1 res.append(_make_scale_operand()) res.append(_make_disp_operand()) elif asm_defs.is_disp(kind): res.append(_make_disp_operand()) elif asm_defs.is_cond(kind): res.append(_make_cond_operand()) elif asm_defs.is_label(kind): res.append(_make_label_operand()) else: assert False, 'unknown operand class %s' % (kind) return res, side_effects def _get_insn_debug_operands(insn): res = [] r = 0 for arg in insn.get('args'): kind = arg.get('class') if _is_reg(kind): if asm_defs.is_greg(kind) or asm_defs.is_xreg(kind): res.append('GetRegOperandDebugString(this, %d)' % (r)) elif asm_defs.is_implicit_reg(kind): res.append('GetImplicitRegOperandDebugString(this, %d)' % (r)) else: assert False, 'unknown register kind %s' % (kind) r += 1 elif asm_defs.is_imm(kind): # We share field for immediate and label in 'insn'. res.append('GetImmOperandDebugString(this)') elif asm_defs.is_mem_op(kind): # No insn can have more than one memop. addr_mode = insn.get('addr_mode') if addr_mode == 'Absolute': res.append('GetAbsoluteMemOperandDebugString(this)') elif addr_mode in ('BaseDisp', 'IndexDisp', 'BaseIndexDisp'): res.append('Get%sMemOperandDebugString(this, %d)' % (addr_mode, r)) r += {'BaseDisp': 1, 'IndexDisp': 1, 'BaseIndexDisp': 2}[addr_mode] else: assert False, 'unknown addr_mode %s' % (addr_mode) elif asm_defs.is_disp(kind): # Hack: replace previous reg helper with mem helper. assert res assert res[-1].startswith('GetRegOperandDebugString') res[-1] = 'GetBaseDispMemOperandDebugString' + res[-1][24:] elif asm_defs.is_cond(kind): res.append('GetCondOperandDebugString(this)') elif asm_defs.is_label(kind): res.append('GetLabelOperandDebugString(this)') else: assert False, 'unknown operand class %s' % (kind) return res INDENT = ' ' def _gen_insn_ctor(f, insn): name = insn.get('name') operands, _ = _get_insn_operands(insn) params = ['%s %s' % (op.type, op.name) for op in operands] inits = ['%s%s;' % (INDENT, op.initializer) for op in operands] print('constexpr MachineInsnInfo %s::kInfo;' % (name), file=f) print('%s::%s(%s) : MachineInsnForArch(&kInfo) {' % (name, name, ', '.join(params)), file=f) print('\n'.join(inits), file=f) print('}', file=f) # TODO(b/232598137): Maybe we should just implement generic printing in C++ # instead of generating it for every instruction. def _gen_insn_debug(f, insn): name = insn.get('name') mnemo = insn.get('mnemo') print('std::string %s::GetDebugString() const {' % (name), file=f) operands = _get_insn_debug_operands(insn) if not operands: print(' return "%s";' % (mnemo), file=f) else: print(' std::string s("%s ");' % (mnemo), file=f) print(' s += %s;' % (operands[0]), file=f) for op in operands[1:]: print(' s += ", ";', file=f) print(' s += %s;' % (op), file=f) # We don't print recovery_bb() since it can be found by edges outgoing from basic block. print(' if (recovery_pc()) {', file=f) print(' s += StringPrintf(" <0x%" PRIxPTR ">", recovery_pc());', file=f) print(' }', file=f) print(' return s;', file=f) print('}', file=f) def _gen_insn_emit(f, insn): name = insn.get('name') asm = insn.get('asm') operands, _ = _get_insn_operands(insn) asm_args = [op.asm_arg for op in operands if op.asm_arg] print('void %s::Emit(CodeEmitter* as) const {' % (name), file=f) print('%sas->%s(%s);' % (INDENT, asm, ', '.join(asm_args)), file=f) print('}', file=f) def _gen_insn_class(f, insn): name = insn.get('name') operands, side_effects = _get_insn_operands(insn) regs = [op.reg_operand_info for op in operands if op.reg_operand_info] if side_effects: kind = 'kMachineInsnSideEffects' else: kind = 'kMachineInsnDefault' params = ['%s %s' % (op.type, op.name) for op in operands] print('class %s : public MachineInsnForArch {' % (name), file=f) print(' public:', file=f) print(' explicit %s(%s);' % (name, ', '.join(params)), file=f) print(' static constexpr MachineInsnInfo kInfo =', file=f) print(' MachineInsnInfo({kMachineOp%s,' % (name), file=f) print(' %d,' % (len(regs)), file=f) print(' {%s},' % (', '.join(regs)), file=f) print(' %s});' % (kind), file=f) print(' static constexpr int NumRegOperands() { return kInfo.num_reg_operands; }', file=f) print(' static constexpr const MachineRegKind& RegKindAt(int i) { return kInfo.reg_kinds[i]; }', file=f) print(' std::string GetDebugString() const override;', file=f) print(' void Emit(CodeEmitter* as) const override;', file=f) print('};', file=f) def gen_code_2_cc(out, arch, insns): with open(out, 'w') as f: for insn in insns: _gen_insn_ctor(f, insn) def gen_code_debug_cc(out, arch, insns): with open(out, 'w') as f: print("""\ // This file automatically generated by gen_lir.py // DO NOT EDIT! #include "berberis/base/stringprintf.h" #include "berberis/backend/%s/code_debug.h" namespace berberis { namespace %s { """ % (arch, arch), file=f) for insn in insns: _gen_insn_debug(f, insn) print("""\ } // namespace %s } // namespace berberis""" % (arch), file=f) def gen_code_emit_cc(out, arch, insns): with open(out, 'w') as f: print("""\ // This file automatically generated by gen_lir.py // DO NOT EDIT! #include "berberis/backend/code_emitter.h" #include "berberis/backend/%s/code_emit.h" namespace berberis { namespace %s { """ % (arch, arch), file=f) for insn in insns: _gen_insn_emit(f, insn) print("""\ } // namespace %s } // namespace berberis""" % (arch), file=f) def gen_machine_info_h(out, arch, insns): with open(out, 'w') as f: for insn in insns: name = insn.get('name') print('using %s = %s;' % (name, name), file=f) def gen_machine_opcode_h(out, arch, insns): with open(out, 'w') as f: for insn in insns: name = insn.get('name') print('kMachineOp%s,' % (name), file=f) def _gen_mem_insn_groups(f, insns): # Build a dictionary to map a memory insn group name to another dictionary, # which in turn maps an addressing mode to an individual memory insn. groups = {} for i in insns: group_name = i.get('mem_group_name') if group_name: groups.setdefault(group_name, {})[i.get('addr_mode')] = i.get('name') for group_name in sorted(groups): # The order of the addressing modes here is important. It must # match what MemInsns expects. mem_insns = [groups[group_name][addr_mode] for addr_mode in ('Absolute', 'BaseDisp', 'IndexDisp', 'BaseIndexDisp')] print('using %s = MemInsns<%s>;' % (group_name, ', '.join(mem_insns)), file=f) def gen_machine_ir_h(out, arch, insns): with open(out, 'w') as f: for insn in insns: _gen_insn_class(f, insn) print('', file=f) _gen_mem_insn_groups(f, insns) def _contains_mem(insn): return any(asm_defs.is_mem_op(arg['class']) for arg in insn.get('args')) def _create_mem_insn(insn, addr_mode): new_insn = insn.copy() macro_name = asm_defs.get_mem_macro_name(insn, addr_mode) new_insn['name'] = macro_name new_insn['addr_mode'] = addr_mode new_insn['asm'] = macro_name new_insn['mem_group_name'] = asm_defs.get_mem_macro_name(insn, '') + 'Insns' return new_insn def _expand_mem_insns(insns): result = [] for insn in insns: if _contains_mem(insn): result.extend([_create_mem_insn(insn, addr_mode) for addr_mode in ('Absolute', 'BaseDisp', 'IndexDisp', 'BaseIndexDisp')]) result.append(insn) return result def _load_lir_def(allowlist_looked, allowlist_found, asm_def): arch, insns = asm_defs.load_asm_defs(asm_def) insns = _expand_mem_insns(insns) # Mark all instructions to remove and remember instructions we kept for insn in insns: insn_name = insn.get('mem_group_name', insn['name']) if insn_name in allowlist_looked: allowlist_found.add(insn_name) else: insn['skip_lir'] = 1 # Filter out disabled instructions. insns = [i for i in insns if not i.get('skip_lir')] return arch, insns def _allowlist_instructions(allowlist_files, machine_ir_intrinsic_binding_files): allowlisted_names = set() for allowlist_file in allowlist_files: with open(allowlist_file) as allowlist_json: for insn_name in json.load(allowlist_json)['insns']: allowlisted_names.add(insn_name) for machine_ir_intrinsic_binding_file in machine_ir_intrinsic_binding_files: with open(machine_ir_intrinsic_binding_file) as machine_ir_intrinsic_binding_json: json_array = json.load(machine_ir_intrinsic_binding_json) # insn of type str is actually part of the file license. while isinstance(json_array[0], str): json_array.pop(0) for insn in json_array: if insn.get('usage', '') != 'interpret-only': allowlisted_names.add(insn['insn']) return allowlisted_names def load_all_lir_defs(allowlist_files, machine_ir_intrinsic_binding_files, lir_defs): allowlist_looked = _allowlist_instructions( allowlist_files, machine_ir_intrinsic_binding_files) allowlist_found = set() arch = None insns = [] macro_insns = [] for lir_def in lir_defs: def_arch, def_insns = _load_lir_def(allowlist_looked, allowlist_found, lir_def) if arch and not arch.startswith('common_'): assert def_arch is None or arch == def_arch else: arch = def_arch if def_arch is None: macro_insns.extend(def_insns) else: insns.extend(def_insns) for insn in insns: _check_insn_defs(insn) # Some macroinstructions can only be used in Lite translator for now. Ignore them here. insns.extend(insn for insn in macro_insns if _check_insn_defs(insn, True)) assert allowlist_looked == allowlist_found return arch, insns