1#!/usr/bin/python
2#
3# Copyright (C) 2018 The Android Open Source Project
4#
5# Licensed under the Apache License, Version 2.0 (the "License");
6# you may not use this file except in compliance with the License.
7# You may obtain a copy of the License at
8#
9#      http://www.apache.org/licenses/LICENSE-2.0
10#
11# Unless required by applicable law or agreed to in writing, software
12# distributed under the License is distributed on an "AS IS" BASIS,
13# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14# See the License for the specific language governing permissions and
15# limitations under the License.
16#
17
18"""Parse assembler definition file.
19
20Definition JSON file for this script have following form:
21{
22  "arch": "XXX",
23  "insns": [
24    {
25      "name": "ShlbRegReg",
26      "args": [
27         {"class": "GeneralReg8", "usage": "use_def"},
28         {"class": "RCX", "usage": "use"},
29         {"class": "FLAGS", "usage": "def"}
30       ],
31       "asm": "ShlbByCl",
32       "mnemo": "SHLB"
33    },
34    ...
35  ]
36'arch' is primarily used for C++ namespace in LIR generator, and is ignored by
37this script.
38
39'insn' is array of objects, each describing single instruction variant.
40Each instruction is an object with following fields:
41  'name' - instruction unique name, used in LIR generator, typical name is
42           InsnOp1Op2, where 'Insn' is instruction name, 'Op1', 'Op2' -
43           operand types, such as Imm, Reg, Mem(Op), Base, Disp.
44  'args' - described ordered list of instruction arguments.
45           for each argument 'class' (any GP register, fixed GP register,
46           any XMM register, immediate, memory operand, flags register)
47           and how it is treated by an instruction (used, defined,
48           both used and defined)
49  'asm' - which internal assembler's mnemonic is used
50  'opcode' | 'opcodes' - optional flag for autogeneration:
51                         if opcode bytes are specified then implementation
52                         would be automatically generated
53  'type' - optional flag to specify extra information (encoded in the name).
54  'mnemo' - how instruction shall be named in LIR dumps (ignored here)
55
56Memory operand for assembler instructions can be described as either opaque
57Operand class, which provides full power of x86 addressing modes, or as
58explicit BaseDisp format, which translates to reg+disp form.
59
60For some instructions (such as pop, push, jmp reg) exact register width is not
61specified, and 'GeneralReg' class is used, as same encoding is used for 32 and
6264 bit operands, depending on current CPU mode.
63
64This script produces inline file for internal assembler's header, such as for
65above example it would yield single line
66
67  void ShlbByCl(Register);
68
69Fixed arguments (such as 'RCX') and flags ('FLAGS') are ignored when generating
70assembler's header, while for others emitted an argument of type depending on
71argument's class.
72"""
73
74import copy
75import json
76import re
77
78
79def is_imm(arg_type):
80  return arg_type in (
81    'Imm2', 'Imm8', 'Imm16', 'Imm32', 'Imm64', # x86 immediates
82    'B-Imm', 'I-Imm', 'J-Imm', 'P-Imm', 'S-Imm', 'U-Imm', # Official RISC-V immediates
83    'Csr-Imm', 'Shift32-Imm', 'Shift64-Imm', # Extra RISC-V immediates
84  )
85
86
87def is_disp(arg_type):
88  return arg_type == 'Disp'
89
90
91def is_mem_op(arg_type):
92  return arg_type in (
93    # Universal memory operands
94    'Mem', 'Mem8', 'Mem16', 'Mem32', 'Mem64', 'Mem128',
95    # x86 memory operands
96    'MemX87', 'MemX8716', 'MemX8732', 'MemX8764', 'MemX8780',
97    'VecMem32', 'VecMem64', 'VecMem128', 'VecMem256')
98
99
100def is_cond(arg_type):
101  return arg_type == 'Cond'
102
103
104def is_csr(arg_type):
105  return arg_type == 'CsrReg'
106
107
108def is_rm(arg_type):
109  return arg_type == 'Rm'
110
111
112def is_label(arg_type):
113  return arg_type == 'Label'
114
115
116def is_x87reg(arg_type):
117  return arg_type == 'RegX87'
118
119
120def is_greg(arg_type):
121  return arg_type in ('GeneralReg',
122                      'GeneralReg8', 'GeneralReg16',
123                      'GeneralReg32', 'GeneralReg64')
124
125
126def is_freg(arg_type):
127  return arg_type == 'FpReg'
128
129
130def is_xreg(arg_type):
131  return arg_type in ('XmmReg',
132                      'VecReg64', 'VecReg128',
133                      'FpReg32', 'FpReg64')
134
135
136def is_yreg(arg_type):
137  return arg_type in ('YmmReg','VecReg256')
138
139
140# Operands of this type are NOT passed to assembler
141def is_implicit_reg(arg_type):
142  return arg_type in ('RAX', 'EAX', 'AX', 'AL',
143                      'RCX', 'ECX', 'CL', 'ST', 'ST1',
144                      'RDX', 'EDX', 'DX', 'CC',
145                      'RBX', 'EBX', 'BX', 'SW',
146                      'RDI', 'RSI', 'RSP', 'FLAGS')
147
148
149def exactly_one_of(iterable):
150  return sum(1 for elem in iterable if elem) == 1
151
152
153def get_mem_macro_name(insn, addr_mode = None):
154  macro_name = insn.get('asm')
155  if macro_name.endswith('ByCl'):
156    macro_name = macro_name[:-4]
157  for arg in insn['args']:
158    clazz = arg['class']
159    # Don't reflect FLAGS/Conditions/Csrs/Labels in the name - we don't ever
160    # have two different instructions where these cause the difference.
161    if clazz == 'FLAGS' or is_cond(clazz) or is_label(clazz) or is_csr(clazz):
162      pass
163    elif is_rm(clazz):
164      macro_name += 'Rm'
165    elif is_x87reg(clazz) or is_greg(clazz) or is_implicit_reg(clazz):
166      macro_name += 'Reg'
167    elif is_freg(clazz):
168      macro_name += 'FReg'
169    elif is_xreg(clazz):
170      macro_name += 'XReg'
171    elif is_yreg(clazz):
172      macro_name += 'YReg'
173    elif is_imm(clazz):
174      macro_name += 'Imm'
175    elif is_mem_op(clazz):
176      if addr_mode is not None:
177        macro_name += 'Mem' + addr_mode
178      else:
179        macro_name += 'Op'
180    else:
181      raise Exception('arg type %s is not supported' % clazz)
182  return macro_name
183
184
185def _get_cxx_name(name):
186  return ''.join(w if re.search('[A-Z]', w) else w.capitalize()
187                 for w in re.split('[-_. ]', name))
188
189
190def _expand_name(insn, stem, encoding = {}):
191  # Make deep copy of the instruction to make sure consumers could treat them
192  # as independent entities and add/remove marks freely.
193  #
194  # JSON never have "merged" objects thus having them in result violates
195  # expectations.
196  expanded_insn = copy.deepcopy(insn)
197  # Native assembler name may include dots, spaces, etc. Keep it for text assembler.
198  expanded_insn["native-asm"] = stem
199  expanded_insn['asm'] = _get_cxx_name(stem)
200  expanded_insn['name'] = get_mem_macro_name(expanded_insn)
201  expanded_insn['mnemo'] = stem.upper()
202  expanded_insn.update(encoding)
203  return expanded_insn
204
205
206def _expand_insn_by_encodings(insns):
207  expanded_insns = []
208  for insn in insns:
209    if insn.get('encodings'):
210      assert all((f not in insn) for f in ['stems', 'name', 'asm', 'mnemo'])
211      # If we have encoding then we must have at least opcodes
212      assert all('opcode' in encoding or 'opcodes' in encoding
213                  for _, encoding in insn['encodings'].items())
214      expanded_insns.extend([_expand_name(insn, stem, encoding)
215                            for stem, encoding in insn['encodings'].items()])
216    elif insn.get('stems'):
217      assert all((f not in insn) for f in ['encoding', 'name', 'asm', 'mnemo'])
218      expanded_insns.extend([_expand_name(insn, stem)
219                            for stem in insn['stems']])
220    else:
221      assert all((f in insn) for f in ['name', 'asm', 'mnemo'])
222      expanded_insns.append(insn)
223  return expanded_insns
224
225
226def _expand_insns_by_operands(insns):
227  expanded_insns = []
228  for insn in insns:
229    split_done = False
230    for arg in insn['args']:
231      if '/' in arg['class']:
232        assert not split_done
233        operand_classes = arg['class'].split('/')
234        for subclass in operand_classes:
235          arg['class'] = subclass
236          expanded_insn = copy.deepcopy(insn)
237          expanded_insns.append(expanded_insn)
238        split_done = True
239    if not split_done:
240      expanded_insns.append(insn)
241  return expanded_insns
242
243
244def load_asm_defs(asm_def):
245  result = []
246  with open(asm_def) as asm:
247    obj = json.load(asm)
248    insns = obj.get('insns')
249    insns = _expand_insns_by_operands(insns)
250    insns = _expand_insn_by_encodings(insns)
251    insns = sorted(insns, key=lambda i: i.get('asm'))
252    result.extend(insns)
253  return obj.get('arch'), result
254