1// Copyright 2014 The Go Authors.  All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5package armasm
6
7import (
8	"encoding/binary"
9	"fmt"
10)
11
12// An instFormat describes the format of an instruction encoding.
13// An instruction with 32-bit value x matches the format if x&mask == value
14// and the condition matches.
15// The condition matches if x>>28 == 0xF && value>>28==0xF
16// or if x>>28 != 0xF and value>>28 == 0.
17// If x matches the format, then the rest of the fields describe how to interpret x.
18// The opBits describe bits that should be extracted from x and added to the opcode.
19// For example opBits = 0x1234 means that the value
20//
21//	(2 bits at offset 1) followed by (4 bits at offset 3)
22//
23// should be added to op.
24// Finally the args describe how to decode the instruction arguments.
25// args is stored as a fixed-size array; if there are fewer than len(args) arguments,
26// args[i] == 0 marks the end of the argument list.
27type instFormat struct {
28	mask     uint32
29	value    uint32
30	priority int8
31	op       Op
32	opBits   uint64
33	args     instArgs
34}
35
36type instArgs [4]instArg
37
38var (
39	errMode    = fmt.Errorf("unsupported execution mode")
40	errShort   = fmt.Errorf("truncated instruction")
41	errUnknown = fmt.Errorf("unknown instruction")
42)
43
44var decoderCover []bool
45
46// Decode decodes the leading bytes in src as a single instruction.
47func Decode(src []byte, mode Mode) (inst Inst, err error) {
48	if mode != ModeARM {
49		return Inst{}, errMode
50	}
51	if len(src) < 4 {
52		return Inst{}, errShort
53	}
54
55	if decoderCover == nil {
56		decoderCover = make([]bool, len(instFormats))
57	}
58
59	x := binary.LittleEndian.Uint32(src)
60
61	// The instFormat table contains both conditional and unconditional instructions.
62	// Considering only the top 4 bits, the conditional instructions use mask=0, value=0,
63	// while the unconditional instructions use mask=f, value=f.
64	// Prepare a version of x with the condition cleared to 0 in conditional instructions
65	// and then assume mask=f during matching.
66	const condMask = 0xf0000000
67	xNoCond := x
68	if x&condMask != condMask {
69		xNoCond &^= condMask
70	}
71	var priority int8
72Search:
73	for i := range instFormats {
74		f := &instFormats[i]
75		if xNoCond&(f.mask|condMask) != f.value || f.priority <= priority {
76			continue
77		}
78		delta := uint32(0)
79		deltaShift := uint(0)
80		for opBits := f.opBits; opBits != 0; opBits >>= 16 {
81			n := uint(opBits & 0xFF)
82			off := uint((opBits >> 8) & 0xFF)
83			delta |= (x >> off) & (1<<n - 1) << deltaShift
84			deltaShift += n
85		}
86		op := f.op + Op(delta)
87
88		// Special case: BKPT encodes with condition but cannot have one.
89		if op&^15 == BKPT_EQ && op != BKPT {
90			continue Search
91		}
92
93		var args Args
94		for j, aop := range f.args {
95			if aop == 0 {
96				break
97			}
98			arg := decodeArg(aop, x)
99			if arg == nil { // cannot decode argument
100				continue Search
101			}
102			args[j] = arg
103		}
104
105		decoderCover[i] = true
106
107		inst = Inst{
108			Op:   op,
109			Args: args,
110			Enc:  x,
111			Len:  4,
112		}
113		priority = f.priority
114		continue Search
115	}
116	if inst.Op != 0 {
117		return inst, nil
118	}
119	return Inst{}, errUnknown
120}
121
122// An instArg describes the encoding of a single argument.
123// In the names used for arguments, _p_ means +, _m_ means -,
124// _pm_ means ± (usually keyed by the U bit).
125// The _W suffix indicates a general addressing mode based on the P and W bits.
126// The _offset and _postindex suffixes force the given addressing mode.
127// The rest should be somewhat self-explanatory, at least given
128// the decodeArg function.
129type instArg uint8
130
131const (
132	_ instArg = iota
133	arg_APSR
134	arg_FPSCR
135	arg_Dn_half
136	arg_R1_0
137	arg_R1_12
138	arg_R2_0
139	arg_R2_12
140	arg_R_0
141	arg_R_12
142	arg_R_12_nzcv
143	arg_R_16
144	arg_R_16_WB
145	arg_R_8
146	arg_R_rotate
147	arg_R_shift_R
148	arg_R_shift_imm
149	arg_SP
150	arg_Sd
151	arg_Sd_Dd
152	arg_Dd_Sd
153	arg_Sm
154	arg_Sm_Dm
155	arg_Sn
156	arg_Sn_Dn
157	arg_const
158	arg_endian
159	arg_fbits
160	arg_fp_0
161	arg_imm24
162	arg_imm5
163	arg_imm5_32
164	arg_imm5_nz
165	arg_imm_12at8_4at0
166	arg_imm_4at16_12at0
167	arg_imm_vfp
168	arg_label24
169	arg_label24H
170	arg_label_m_12
171	arg_label_p_12
172	arg_label_pm_12
173	arg_label_pm_4_4
174	arg_lsb_width
175	arg_mem_R
176	arg_mem_R_pm_R_W
177	arg_mem_R_pm_R_postindex
178	arg_mem_R_pm_R_shift_imm_W
179	arg_mem_R_pm_R_shift_imm_offset
180	arg_mem_R_pm_R_shift_imm_postindex
181	arg_mem_R_pm_imm12_W
182	arg_mem_R_pm_imm12_offset
183	arg_mem_R_pm_imm12_postindex
184	arg_mem_R_pm_imm8_W
185	arg_mem_R_pm_imm8_postindex
186	arg_mem_R_pm_imm8at0_offset
187	arg_option
188	arg_registers
189	arg_registers1
190	arg_registers2
191	arg_satimm4
192	arg_satimm5
193	arg_satimm4m1
194	arg_satimm5m1
195	arg_widthm1
196)
197
198// decodeArg decodes the arg described by aop from the instruction bits x.
199// It returns nil if x cannot be decoded according to aop.
200func decodeArg(aop instArg, x uint32) Arg {
201	switch aop {
202	default:
203		return nil
204
205	case arg_APSR:
206		return APSR
207	case arg_FPSCR:
208		return FPSCR
209
210	case arg_R_0:
211		return Reg(x & (1<<4 - 1))
212	case arg_R_8:
213		return Reg((x >> 8) & (1<<4 - 1))
214	case arg_R_12:
215		return Reg((x >> 12) & (1<<4 - 1))
216	case arg_R_16:
217		return Reg((x >> 16) & (1<<4 - 1))
218
219	case arg_R_12_nzcv:
220		r := Reg((x >> 12) & (1<<4 - 1))
221		if r == R15 {
222			return APSR_nzcv
223		}
224		return r
225
226	case arg_R_16_WB:
227		mode := AddrLDM
228		if (x>>21)&1 != 0 {
229			mode = AddrLDM_WB
230		}
231		return Mem{Base: Reg((x >> 16) & (1<<4 - 1)), Mode: mode}
232
233	case arg_R_rotate:
234		Rm := Reg(x & (1<<4 - 1))
235		typ, count := decodeShift(x)
236		// ROR #0 here means ROR #0, but decodeShift rewrites to RRX #1.
237		if typ == RotateRightExt {
238			return Rm
239		}
240		return RegShift{Rm, typ, count}
241
242	case arg_R_shift_R:
243		Rm := Reg(x & (1<<4 - 1))
244		Rs := Reg((x >> 8) & (1<<4 - 1))
245		typ := Shift((x >> 5) & (1<<2 - 1))
246		return RegShiftReg{Rm, typ, Rs}
247
248	case arg_R_shift_imm:
249		Rm := Reg(x & (1<<4 - 1))
250		typ, count := decodeShift(x)
251		if typ == ShiftLeft && count == 0 {
252			return Reg(Rm)
253		}
254		return RegShift{Rm, typ, count}
255
256	case arg_R1_0:
257		return Reg((x & (1<<4 - 1)))
258	case arg_R1_12:
259		return Reg(((x >> 12) & (1<<4 - 1)))
260	case arg_R2_0:
261		return Reg((x & (1<<4 - 1)) | 1)
262	case arg_R2_12:
263		return Reg(((x >> 12) & (1<<4 - 1)) | 1)
264
265	case arg_SP:
266		return SP
267
268	case arg_Sd_Dd:
269		v := (x >> 12) & (1<<4 - 1)
270		vx := (x >> 22) & 1
271		sz := (x >> 8) & 1
272		if sz != 0 {
273			return D0 + Reg(vx<<4+v)
274		} else {
275			return S0 + Reg(v<<1+vx)
276		}
277
278	case arg_Dd_Sd:
279		return decodeArg(arg_Sd_Dd, x^(1<<8))
280
281	case arg_Sd:
282		v := (x >> 12) & (1<<4 - 1)
283		vx := (x >> 22) & 1
284		return S0 + Reg(v<<1+vx)
285
286	case arg_Sm_Dm:
287		v := (x >> 0) & (1<<4 - 1)
288		vx := (x >> 5) & 1
289		sz := (x >> 8) & 1
290		if sz != 0 {
291			return D0 + Reg(vx<<4+v)
292		} else {
293			return S0 + Reg(v<<1+vx)
294		}
295
296	case arg_Sm:
297		v := (x >> 0) & (1<<4 - 1)
298		vx := (x >> 5) & 1
299		return S0 + Reg(v<<1+vx)
300
301	case arg_Dn_half:
302		v := (x >> 16) & (1<<4 - 1)
303		vx := (x >> 7) & 1
304		return RegX{D0 + Reg(vx<<4+v), int((x >> 21) & 1)}
305
306	case arg_Sn_Dn:
307		v := (x >> 16) & (1<<4 - 1)
308		vx := (x >> 7) & 1
309		sz := (x >> 8) & 1
310		if sz != 0 {
311			return D0 + Reg(vx<<4+v)
312		} else {
313			return S0 + Reg(v<<1+vx)
314		}
315
316	case arg_Sn:
317		v := (x >> 16) & (1<<4 - 1)
318		vx := (x >> 7) & 1
319		return S0 + Reg(v<<1+vx)
320
321	case arg_const:
322		v := x & (1<<8 - 1)
323		rot := (x >> 8) & (1<<4 - 1) * 2
324		if rot > 0 && v&3 == 0 {
325			// could rotate less
326			return ImmAlt{uint8(v), uint8(rot)}
327		}
328		if rot >= 24 && ((v<<(32-rot))&0xFF)>>(32-rot) == v {
329			// could wrap around to rot==0.
330			return ImmAlt{uint8(v), uint8(rot)}
331		}
332		return Imm(v>>rot | v<<(32-rot))
333
334	case arg_endian:
335		return Endian((x >> 9) & 1)
336
337	case arg_fbits:
338		return Imm((16 << ((x >> 7) & 1)) - ((x&(1<<4-1))<<1 | (x>>5)&1))
339
340	case arg_fp_0:
341		return Imm(0)
342
343	case arg_imm24:
344		return Imm(x & (1<<24 - 1))
345
346	case arg_imm5:
347		return Imm((x >> 7) & (1<<5 - 1))
348
349	case arg_imm5_32:
350		x = (x >> 7) & (1<<5 - 1)
351		if x == 0 {
352			x = 32
353		}
354		return Imm(x)
355
356	case arg_imm5_nz:
357		x = (x >> 7) & (1<<5 - 1)
358		if x == 0 {
359			return nil
360		}
361		return Imm(x)
362
363	case arg_imm_4at16_12at0:
364		return Imm((x>>16)&(1<<4-1)<<12 | x&(1<<12-1))
365
366	case arg_imm_12at8_4at0:
367		return Imm((x>>8)&(1<<12-1)<<4 | x&(1<<4-1))
368
369	case arg_imm_vfp:
370		x = (x>>16)&(1<<4-1)<<4 | x&(1<<4-1)
371		return Imm(x)
372
373	case arg_label24:
374		imm := (x & (1<<24 - 1)) << 2
375		return PCRel(int32(imm<<6) >> 6)
376
377	case arg_label24H:
378		h := (x >> 24) & 1
379		imm := (x&(1<<24-1))<<2 | h<<1
380		return PCRel(int32(imm<<6) >> 6)
381
382	case arg_label_m_12:
383		d := int32(x & (1<<12 - 1))
384		return Mem{Base: PC, Mode: AddrOffset, Offset: int16(-d)}
385
386	case arg_label_p_12:
387		d := int32(x & (1<<12 - 1))
388		return Mem{Base: PC, Mode: AddrOffset, Offset: int16(d)}
389
390	case arg_label_pm_12:
391		d := int32(x & (1<<12 - 1))
392		u := (x >> 23) & 1
393		if u == 0 {
394			d = -d
395		}
396		return Mem{Base: PC, Mode: AddrOffset, Offset: int16(d)}
397
398	case arg_label_pm_4_4:
399		d := int32((x>>8)&(1<<4-1)<<4 | x&(1<<4-1))
400		u := (x >> 23) & 1
401		if u == 0 {
402			d = -d
403		}
404		return PCRel(d)
405
406	case arg_lsb_width:
407		lsb := (x >> 7) & (1<<5 - 1)
408		msb := (x >> 16) & (1<<5 - 1)
409		if msb < lsb || msb >= 32 {
410			return nil
411		}
412		return Imm(msb + 1 - lsb)
413
414	case arg_mem_R:
415		Rn := Reg((x >> 16) & (1<<4 - 1))
416		return Mem{Base: Rn, Mode: AddrOffset}
417
418	case arg_mem_R_pm_R_postindex:
419		// Treat [<Rn>],+/-<Rm> like [<Rn>,+/-<Rm>{,<shift>}]{!}
420		// by forcing shift bits to <<0 and P=0, W=0 (postindex=true).
421		return decodeArg(arg_mem_R_pm_R_shift_imm_W, x&^((1<<7-1)<<5|1<<24|1<<21))
422
423	case arg_mem_R_pm_R_W:
424		// Treat [<Rn>,+/-<Rm>]{!} like [<Rn>,+/-<Rm>{,<shift>}]{!}
425		// by forcing shift bits to <<0.
426		return decodeArg(arg_mem_R_pm_R_shift_imm_W, x&^((1<<7-1)<<5))
427
428	case arg_mem_R_pm_R_shift_imm_offset:
429		// Treat [<Rn>],+/-<Rm>{,<shift>} like [<Rn>,+/-<Rm>{,<shift>}]{!}
430		// by forcing P=1, W=0 (index=false, wback=false).
431		return decodeArg(arg_mem_R_pm_R_shift_imm_W, x&^(1<<21)|1<<24)
432
433	case arg_mem_R_pm_R_shift_imm_postindex:
434		// Treat [<Rn>],+/-<Rm>{,<shift>} like [<Rn>,+/-<Rm>{,<shift>}]{!}
435		// by forcing P=0, W=0 (postindex=true).
436		return decodeArg(arg_mem_R_pm_R_shift_imm_W, x&^(1<<24|1<<21))
437
438	case arg_mem_R_pm_R_shift_imm_W:
439		Rn := Reg((x >> 16) & (1<<4 - 1))
440		Rm := Reg(x & (1<<4 - 1))
441		typ, count := decodeShift(x)
442		u := (x >> 23) & 1
443		w := (x >> 21) & 1
444		p := (x >> 24) & 1
445		if p == 0 && w == 1 {
446			return nil
447		}
448		sign := int8(+1)
449		if u == 0 {
450			sign = -1
451		}
452		mode := AddrMode(uint8(p<<1) | uint8(w^1))
453		return Mem{Base: Rn, Mode: mode, Sign: sign, Index: Rm, Shift: typ, Count: count}
454
455	case arg_mem_R_pm_imm12_offset:
456		// Treat [<Rn>,#+/-<imm12>] like [<Rn>{,#+/-<imm12>}]{!}
457		// by forcing P=1, W=0 (index=false, wback=false).
458		return decodeArg(arg_mem_R_pm_imm12_W, x&^(1<<21)|1<<24)
459
460	case arg_mem_R_pm_imm12_postindex:
461		// Treat [<Rn>],#+/-<imm12> like [<Rn>{,#+/-<imm12>}]{!}
462		// by forcing P=0, W=0 (postindex=true).
463		return decodeArg(arg_mem_R_pm_imm12_W, x&^(1<<24|1<<21))
464
465	case arg_mem_R_pm_imm12_W:
466		Rn := Reg((x >> 16) & (1<<4 - 1))
467		u := (x >> 23) & 1
468		w := (x >> 21) & 1
469		p := (x >> 24) & 1
470		if p == 0 && w == 1 {
471			return nil
472		}
473		sign := int8(+1)
474		if u == 0 {
475			sign = -1
476		}
477		imm := int16(x & (1<<12 - 1))
478		mode := AddrMode(uint8(p<<1) | uint8(w^1))
479		return Mem{Base: Rn, Mode: mode, Offset: int16(sign) * imm}
480
481	case arg_mem_R_pm_imm8_postindex:
482		// Treat [<Rn>],#+/-<imm8> like [<Rn>{,#+/-<imm8>}]{!}
483		// by forcing P=0, W=0 (postindex=true).
484		return decodeArg(arg_mem_R_pm_imm8_W, x&^(1<<24|1<<21))
485
486	case arg_mem_R_pm_imm8_W:
487		Rn := Reg((x >> 16) & (1<<4 - 1))
488		u := (x >> 23) & 1
489		w := (x >> 21) & 1
490		p := (x >> 24) & 1
491		if p == 0 && w == 1 {
492			return nil
493		}
494		sign := int8(+1)
495		if u == 0 {
496			sign = -1
497		}
498		imm := int16((x>>8)&(1<<4-1)<<4 | x&(1<<4-1))
499		mode := AddrMode(uint8(p<<1) | uint8(w^1))
500		return Mem{Base: Rn, Mode: mode, Offset: int16(sign) * imm}
501
502	case arg_mem_R_pm_imm8at0_offset:
503		Rn := Reg((x >> 16) & (1<<4 - 1))
504		u := (x >> 23) & 1
505		sign := int8(+1)
506		if u == 0 {
507			sign = -1
508		}
509		imm := int16(x&(1<<8-1)) << 2
510		return Mem{Base: Rn, Mode: AddrOffset, Offset: int16(sign) * imm}
511
512	case arg_option:
513		return Imm(x & (1<<4 - 1))
514
515	case arg_registers:
516		return RegList(x & (1<<16 - 1))
517
518	case arg_registers2:
519		x &= 1<<16 - 1
520		n := 0
521		for i := 0; i < 16; i++ {
522			if x>>uint(i)&1 != 0 {
523				n++
524			}
525		}
526		if n < 2 {
527			return nil
528		}
529		return RegList(x)
530
531	case arg_registers1:
532		Rt := (x >> 12) & (1<<4 - 1)
533		return RegList(1 << Rt)
534
535	case arg_satimm4:
536		return Imm((x >> 16) & (1<<4 - 1))
537
538	case arg_satimm5:
539		return Imm((x >> 16) & (1<<5 - 1))
540
541	case arg_satimm4m1:
542		return Imm((x>>16)&(1<<4-1) + 1)
543
544	case arg_satimm5m1:
545		return Imm((x>>16)&(1<<5-1) + 1)
546
547	case arg_widthm1:
548		return Imm((x>>16)&(1<<5-1) + 1)
549
550	}
551}
552
553// decodeShift decodes the shift-by-immediate encoded in x.
554func decodeShift(x uint32) (Shift, uint8) {
555	count := (x >> 7) & (1<<5 - 1)
556	typ := Shift((x >> 5) & (1<<2 - 1))
557	switch typ {
558	case ShiftRight, ShiftRightSigned:
559		if count == 0 {
560			count = 32
561		}
562	case RotateRight:
563		if count == 0 {
564			typ = RotateRightExt
565			count = 1
566		}
567	}
568	return typ, uint8(count)
569}
570