xref: /aosp_15_r20/external/arm-neon-tests/Init.s (revision f37826520a923688f9e110915f3811e385d8b6d1)
1*f3782652STreehugger Robot;==================================================================
2*f3782652STreehugger Robot; Copyright ARM Ltd 2005. All rights reserved.
3*f3782652STreehugger Robot;
4*f3782652STreehugger Robot; Cortex-A8 Dhrystone example - Startup Code
5*f3782652STreehugger Robot;==================================================================
6*f3782652STreehugger Robot
7*f3782652STreehugger Robot        PRESERVE8
8*f3782652STreehugger Robot        AREA   CORTEXA8, CODE, READONLY
9*f3782652STreehugger Robot
10*f3782652STreehugger Robot        ENTRY
11*f3782652STreehugger Robot
12*f3782652STreehugger Robot; Standard definitions of mode bits and interrupt (I & F) flags in PSRs
13*f3782652STreehugger Robot
14*f3782652STreehugger RobotMode_USR        EQU     0x10
15*f3782652STreehugger RobotMode_FIQ        EQU     0x11
16*f3782652STreehugger RobotMode_IRQ        EQU     0x12
17*f3782652STreehugger RobotMode_SVC        EQU     0x13
18*f3782652STreehugger RobotMode_ABT        EQU     0x17
19*f3782652STreehugger RobotMode_UNDEF      EQU     0x1B
20*f3782652STreehugger RobotMode_SYS        EQU     0x1F
21*f3782652STreehugger Robot
22*f3782652STreehugger RobotI_Bit           EQU     0x80 ; when I bit is set, IRQ is disabled
23*f3782652STreehugger RobotF_Bit           EQU     0x40 ; when F bit is set, FIQ is disabled
24*f3782652STreehugger Robot
25*f3782652STreehugger Robot;==================================================================
26*f3782652STreehugger Robot; Disable Cortex-A8 MMU if enabled
27*f3782652STreehugger Robot;==================================================================
28*f3782652STreehugger Robot
29*f3782652STreehugger Robot        EXPORT Start
30*f3782652STreehugger Robot
31*f3782652STreehugger RobotStart
32*f3782652STreehugger Robot
33*f3782652STreehugger Robot        MRC     p15, 0, r0, c1, c0, 0       ; Read CP15 Control Register into r0
34*f3782652STreehugger Robot        TST     r0, #0x1                    ; Is the MMU enabled?
35*f3782652STreehugger Robot        BICNE   r0, r0, #0x1                ; Clear bit 0
36*f3782652STreehugger Robot        MCRNE   p15, 0, r0, c1, c0, 0       ; Write value back
37*f3782652STreehugger Robot
38*f3782652STreehugger Robot;==================================================================
39*f3782652STreehugger Robot; Initialise Supervisor Mode Stack
40*f3782652STreehugger Robot; Note stack must be 8 byte aligned.
41*f3782652STreehugger Robot;==================================================================
42*f3782652STreehugger Robot
43*f3782652STreehugger Robot        IMPORT  ||Image$$STACK$$ZI$$Limit|| ; Linker symbol from scatter file
44*f3782652STreehugger Robot        LDR     SP, =||Image$$STACK$$ZI$$Limit||
45*f3782652STreehugger Robot
46*f3782652STreehugger Robot;==================================================================
47*f3782652STreehugger Robot; TLB maintenance, Invalidate Data and Instruction TLB's
48*f3782652STreehugger Robot;==================================================================
49*f3782652STreehugger Robot
50*f3782652STreehugger Robot        MOV    r0,#0
51*f3782652STreehugger Robot        MCR    p15, 0, r0, c8, c7, 0 ; Cortex-A8 I-TLB and D-TLB invalidation
52*f3782652STreehugger Robot
53*f3782652STreehugger Robot;==================================================================
54*f3782652STreehugger Robot; Cache Invalidation code for Cortex-A8
55*f3782652STreehugger Robot;==================================================================
56*f3782652STreehugger Robot
57*f3782652STreehugger Robot        ; Invalidate L1 Instruction Cache
58*f3782652STreehugger Robot
59*f3782652STreehugger Robot        MRC p15, 1, r0, c0, c0, 1   ; Read CLIDR
60*f3782652STreehugger Robot        TST r0, #0x3                ; Harvard Cache?
61*f3782652STreehugger Robot        MOV r0, #0
62*f3782652STreehugger Robot        MCRNE p15, 0, r0, c7, c5, 0 ; Invalidate Instruction Cache
63*f3782652STreehugger Robot
64*f3782652STreehugger Robot        ; Invalidate Data/Unified Caches
65*f3782652STreehugger Robot
66*f3782652STreehugger Robot        MRC p15, 1, r0, c0, c0, 1   ; Read CLIDR
67*f3782652STreehugger Robot        ANDS r3, r0, #&7000000
68*f3782652STreehugger Robot        MOV r3, r3, LSR #23         ; Total cache levels << 1
69*f3782652STreehugger Robot        BEQ Finished
70*f3782652STreehugger Robot
71*f3782652STreehugger Robot        MOV r10, #0                 ; R10 holds current cache level << 1
72*f3782652STreehugger RobotLoop1   ADD r2, r10, r10, LSR #1    ; R2 holds cache "Set" position
73*f3782652STreehugger Robot        MOV r1, r0, LSR r2          ; Bottom 3 bits are the Cache-type for this level
74*f3782652STreehugger Robot        AND r1, R1, #7              ; Get those 3 bits alone
75*f3782652STreehugger Robot        CMP r1, #2
76*f3782652STreehugger Robot        BLT Skip                    ; No cache or only instruction cache at this level
77*f3782652STreehugger Robot
78*f3782652STreehugger Robot        MCR p15, 2, r10, c0, c0, 0  ; Write the Cache Size selection register
79*f3782652STreehugger Robot        MOV r1, #0
80*f3782652STreehugger Robot        MCR p15, 0, r1, c7, c5, 4   ; PrefetchFlush to sync the change to the CacheSizeID reg
81*f3782652STreehugger Robot        MRC p15, 1, r1, c0, c0, 0   ; Reads current Cache Size ID register
82*f3782652STreehugger Robot        AND r2, r1, #&7             ; Extract the line length field
83*f3782652STreehugger Robot        ADD r2, r2, #4              ; Add 4 for the line length offset (log2 16 bytes)
84*f3782652STreehugger Robot        LDR r4, =0x3FF
85*f3782652STreehugger Robot        ANDS r4, r4, r1, LSR #3     ; R4 is the max number on the way size (right aligned)
86*f3782652STreehugger Robot        CLZ r5, r4                  ; R5 is the bit position of the way size increment
87*f3782652STreehugger Robot        LDR r7, =0x00007FFF
88*f3782652STreehugger Robot        ANDS r7, r7, r1, LSR #13    ; R7 is the max number of the index size (right aligned)
89*f3782652STreehugger Robot
90*f3782652STreehugger RobotLoop2   MOV r9, r4                  ; R9 working copy of the max way size (right aligned)
91*f3782652STreehugger Robot
92*f3782652STreehugger RobotLoop3   ORR r11, r10, r9, LSL r5    ; Factor in the Way number and cache number into R11
93*f3782652STreehugger Robot        ORR r11, r11, r7, LSL r2    ; Factor in the Set number
94*f3782652STreehugger Robot        MCR p15, 0, r11, c7, c14, 2 ; Clean and Invalidate by set/way
95*f3782652STreehugger Robot        SUBS r9, r9, #1             ; Decrement the Way number
96*f3782652STreehugger Robot        BGE Loop3
97*f3782652STreehugger Robot        SUBS r7, r7, #1             ; Decrement the Set number
98*f3782652STreehugger Robot        BGE Loop2
99*f3782652STreehugger RobotSkip    ADD r10, r10, #2            ; increment the cache number
100*f3782652STreehugger Robot        CMP r3, r10
101*f3782652STreehugger Robot        BGT Loop1
102*f3782652STreehugger Robot
103*f3782652STreehugger RobotFinished
104*f3782652STreehugger Robot
105*f3782652STreehugger Robot
106*f3782652STreehugger Robot;===================================================================
107*f3782652STreehugger Robot; Cortex-A8 MMU Configuration
108*f3782652STreehugger Robot; Set translation table base
109*f3782652STreehugger Robot;===================================================================
110*f3782652STreehugger Robot
111*f3782652STreehugger Robot
112*f3782652STreehugger Robot        IMPORT ||Image$$TTB$$ZI$$Base||  ; from scatter file.;
113*f3782652STreehugger Robot
114*f3782652STreehugger Robot        ; Cortex-A8 supports two translation tables
115*f3782652STreehugger Robot        ; Configure translation table base (TTB) control register cp15,c2
116*f3782652STreehugger Robot        ; to a value of all zeros, indicates we are using TTB register 0.
117*f3782652STreehugger Robot
118*f3782652STreehugger Robot        MOV     r0,#0x0
119*f3782652STreehugger Robot        MCR     p15, 0, r0, c2, c0, 2
120*f3782652STreehugger Robot
121*f3782652STreehugger Robot        ; write the address of our page table base to TTB register 0.;
122*f3782652STreehugger Robot        ; We are setting to outer-noncachable [4:3] is zero
123*f3782652STreehugger Robot
124*f3782652STreehugger Robot        LDR     r0,=||Image$$TTB$$ZI$$Base||
125*f3782652STreehugger Robot        MCR     p15, 0, r0, c2, c0, 0
126*f3782652STreehugger Robot
127*f3782652STreehugger Robot
128*f3782652STreehugger Robot;===================================================================
129*f3782652STreehugger Robot; Cortex-A8 PAGE TABLE generation, using standard Arch v6 tables
130*f3782652STreehugger Robot;
131*f3782652STreehugger Robot; AP[11:10]   - Access Permissions = b11, Read/Write Access
132*f3782652STreehugger Robot; Domain[8:5] - Domain = b1111, Domain 15
133*f3782652STreehugger Robot; Type[1:0]   - Descriptor Type = b10, 1Mb descriptors
134*f3782652STreehugger Robot;
135*f3782652STreehugger Robot; TEX  C  B
136*f3782652STreehugger Robot; 000  0  0  Strongly Ordered
137*f3782652STreehugger Robot; 001  1  1  Outer and inner write back, write allocate Normal
138*f3782652STreehugger Robot;===================================================================
139*f3782652STreehugger Robot
140*f3782652STreehugger Robot        LDR     r1,=0xfff                   ; loop counter
141*f3782652STreehugger Robot        LDR     r2,=2_00000000000000000000110111100010
142*f3782652STreehugger Robot
143*f3782652STreehugger Robot        ; r0 contains the address of the translation table base
144*f3782652STreehugger Robot        ; r1 is loop counter
145*f3782652STreehugger Robot        ; r2 is level1 descriptor (bits 19:0)
146*f3782652STreehugger Robot
147*f3782652STreehugger Robot        ; use loop counter to create 4096 individual table entries
148*f3782652STreehugger Robot        ; this writes from address 0x7FFC down to 0x4000 in word steps (4bytes).
149*f3782652STreehugger Robot
150*f3782652STreehugger Robotinit_ttb_1
151*f3782652STreehugger Robot
152*f3782652STreehugger Robot        ORR     r3, r2, r1, LSL#20          ; r3 now contains full level1 descriptor to write
153*f3782652STreehugger Robot        STR     r3, [r0, r1, LSL#2]         ; str table entry at TTB base + loopcount*4
154*f3782652STreehugger Robot        SUBS    r1, r1, #1                  ; decrement loop counter
155*f3782652STreehugger Robot        BPL     init_ttb_1
156*f3782652STreehugger Robot
157*f3782652STreehugger Robot        ; In this example we will change the cacheable attribute in the first descriptor.
158*f3782652STreehugger Robot        ; Virtual memory from 0 to 1MB will be cacheable (write back mode).
159*f3782652STreehugger Robot        ; TEX[14:12]=001 and CB[3:2]= 11, Outer and inner write back, write allocate.
160*f3782652STreehugger Robot
161*f3782652STreehugger Robot        ORR     r3,r3,#2_0000000001100      ; Set CB bits
162*f3782652STreehugger Robot        ORR     r3,r3,#2_1000000000000      ; Set TEX bits
163*f3782652STreehugger Robot        STR     r3,[r0]
164*f3782652STreehugger Robot
165*f3782652STreehugger Robot	ADD r2, r3, #0x100000               ; alter r3 to have correct base address for second descriptor (flat mapping)
166*f3782652STreehugger Robot	STR r2, [r0, #4]                    ; store the new descriptor at r0 + 4 (overwrite second section descriptor)
167*f3782652STreehugger Robot
168*f3782652STreehugger Robot	ADD r2, r3, #0x200000               ; alter r3 to have correct base address for 3 descriptor (flat mapping)
169*f3782652STreehugger Robot	STR r2, [r0, #8]                    ; store the new descriptor at r0 + 4 (overwrite second section descriptor)
170*f3782652STreehugger Robot
171*f3782652STreehugger Robot	ADD r2, r3, #0x300000               ; alter r3 to have correct base address for 4 descriptor (flat mapping)
172*f3782652STreehugger Robot	STR r2, [r0, #0xc]                    ; store the new descriptor at r0 + 4 (overwrite second section descriptor)
173*f3782652STreehugger Robot
174*f3782652STreehugger Robot	ADD r2, r3, #0x400000               ; alter r3 to have correct base address for 5 descriptor (flat mapping)
175*f3782652STreehugger Robot	STR r2, [r0, #0x10]                    ; store the new descriptor at r0 + 4 (overwrite second section descriptor)
176*f3782652STreehugger Robot
177*f3782652STreehugger Robot	ADD r2, r3, #0x500000               ; alter r3 to have correct base address for 6 descriptor (flat mapping)
178*f3782652STreehugger Robot	STR r2, [r0, #0x14]                    ; store the new descriptor at r0 + 4 (overwrite second section descriptor)
179*f3782652STreehugger Robot
180*f3782652STreehugger Robot	ADD r2, r3, #0x600000               ; alter r3 to have correct base address for 7 descriptor (flat mapping)
181*f3782652STreehugger Robot	STR r2, [r0, #0x18]                    ; store the new descriptor at r0 + 4 (overwrite second section descriptor)
182*f3782652STreehugger Robot
183*f3782652STreehugger Robot	ADD r2, r3, #0x700000               ; alter r3 to have correct base address for 8 descriptor (flat mapping)
184*f3782652STreehugger Robot	STR r2, [r0, #0x1c]                    ; store the new descriptor at r0 + 4 (overwrite second section descriptor)
185*f3782652STreehugger Robot
186*f3782652STreehugger Robot	ADD r2, r3, #0x800000               ; alter r3 to have correct base address for 9 descriptor (flat mapping)
187*f3782652STreehugger Robot	STR r2, [r0, #0x20]                    ; store the new descriptor at r0 + 4 (overwrite second section descriptor)
188*f3782652STreehugger Robot
189*f3782652STreehugger Robot	ADD r2, r3, #0x900000               ; alter r3 to have correct base address for 10 descriptor (flat mapping)
190*f3782652STreehugger Robot	STR r2, [r0, #0x24]                    ; store the new descriptor at r0 + 4 (overwrite second section descriptor)
191*f3782652STreehugger Robot
192*f3782652STreehugger Robot	ADD r2, r3, #0xa00000               ; alter r3 to have correct base address for 11 descriptor (flat mapping)
193*f3782652STreehugger Robot	STR r2, [r0, #0x28]                    ; store the new descriptor at r0 + 4 (overwrite second section descriptor)
194*f3782652STreehugger Robot
195*f3782652STreehugger Robot	ADD r2, r3, #0xb00000               ; alter r3 to have correct base address for 12 descriptor (flat mapping)
196*f3782652STreehugger Robot	STR r2, [r0, #0x2c]                    ; store the new descriptor at r0 + 4 (overwrite second section descriptor)
197*f3782652STreehugger Robot
198*f3782652STreehugger Robot	ADD r2, r3, #0xc00000               ; alter r3 to have correct base address for 13 descriptor (flat mapping)
199*f3782652STreehugger Robot	STR r2, [r0, #0x30]                    ; store the new descriptor at r0 + 4 (overwrite second section descriptor)
200*f3782652STreehugger Robot
201*f3782652STreehugger Robot;===================================================================
202*f3782652STreehugger Robot; Setup domain control register - Enable all domains to client mode
203*f3782652STreehugger Robot;===================================================================
204*f3782652STreehugger Robot
205*f3782652STreehugger Robot        MRC     p15, 0, r0, c3, c0, 0     ; Read Domain Access Control Register
206*f3782652STreehugger Robot        LDR     r0, =0x55555555           ; Initialize every domain entry to b01 (client)
207*f3782652STreehugger Robot        MCR     p15, 0, r0, c3, c0, 0     ; Write Domain Access Control Register
208*f3782652STreehugger Robot
209*f3782652STreehugger Robot;===================================================================
210*f3782652STreehugger Robot; Setup L2 Cache - L2 Cache Auxiliary Control
211*f3782652STreehugger Robot;===================================================================
212*f3782652STreehugger Robot
213*f3782652STreehugger Robot        MOV     r0, #0
214*f3782652STreehugger Robot        ;MCR     p15, 1, r0, c9, c0, 2      ; Write L2 Auxilary Control Register
215*f3782652STreehugger Robot
216*f3782652STreehugger Robot;==================================================================
217*f3782652STreehugger Robot; Enable access to NEON/VFP by enabling access to Coprocessors 10 and 11.
218*f3782652STreehugger Robot; Enables Full Access i.e. in both priv and non priv modes
219*f3782652STreehugger Robot;==================================================================
220*f3782652STreehugger Robot
221*f3782652STreehugger Robot        MRC     p15, 0, r0, c1, c0, 2      ; read CP access register
222*f3782652STreehugger Robot        ORR     r0, r0, #(0x3  <<20)       ; enable access CP 10
223*f3782652STreehugger Robot        ORR     r0, r0, #(0x3  <<22)       ; enable access CP 11
224*f3782652STreehugger Robot        MCR     p15, 0, r0, c1, c0, 2      ; write CP access register back
225*f3782652STreehugger Robot
226*f3782652STreehugger Robot;==================================================================
227*f3782652STreehugger Robot; Switch on the VFP and Neon Hardware
228*f3782652STreehugger Robot;=================================================================
229*f3782652STreehugger Robot
230*f3782652STreehugger Robot        MOV     r0, #0                      ; Set up a register
231*f3782652STreehugger Robot        ORR     r0, r0, #(0x1 << 30)
232*f3782652STreehugger Robot        FMXR    FPEXC, r0                   ; Write FPEXC register, EN bit set.
233*f3782652STreehugger Robot
234*f3782652STreehugger Robot;===================================================================
235*f3782652STreehugger Robot; Enable MMU and Branch to __main
236*f3782652STreehugger Robot;===================================================================
237*f3782652STreehugger Robot
238*f3782652STreehugger Robot        IMPORT  __main                      ; before MMU enabled import label to __main
239*f3782652STreehugger Robot        LDR     r12,=__main                 ; save this in register for possible long jump
240*f3782652STreehugger Robot
241*f3782652STreehugger Robot
242*f3782652STreehugger Robot        MRC     p15, 0, r0, c1, c0, 0       ; read CP15 register 1 into r0
243*f3782652STreehugger Robot        ORR     r0, r0, #0x1                ; enable MMU before scatter loading
244*f3782652STreehugger Robot        MCR     p15, 0, r0, c1, c0, 0       ; write CP15 register 1
245*f3782652STreehugger Robot
246*f3782652STreehugger Robot
247*f3782652STreehugger Robot; Now the MMU is enabled, virtual to physical address translations will occur.
248*f3782652STreehugger Robot; This will affect the next instruction fetches.
249*f3782652STreehugger Robot;
250*f3782652STreehugger Robot; The two instructions currently in the ARM pipeline will have been fetched
251*f3782652STreehugger Robot; before the MMU was enabled. This property is useful because the next two
252*f3782652STreehugger Robot; instructions are safe even if new instruction fetches fail. If this routine
253*f3782652STreehugger Robot; was mapped out of the new virtual memory map, the branch to __main would
254*f3782652STreehugger Robot; still succeed.
255*f3782652STreehugger Robot
256*f3782652STreehugger Robot        BX      r12                 ; branch to __main  C library entry point
257*f3782652STreehugger Robot
258*f3782652STreehugger Robot        END                         ; mark the end of this file
259*f3782652STreehugger Robot
260