xref: /btstack/3rd-party/segger-rtt/SEGGER_RTT_ASM_ARMv7M.S (revision ce6f85e79d1d141c1b45dfa16b2671762457cbb4)
1779af47bSMatthias Ringwald/*********************************************************************
2779af47bSMatthias Ringwald*                   (c) SEGGER Microcontroller GmbH                  *
3779af47bSMatthias Ringwald*                        The Embedded Experts                        *
4779af47bSMatthias Ringwald*                           www.segger.com                           *
5779af47bSMatthias Ringwald**********************************************************************
6779af47bSMatthias Ringwald
7779af47bSMatthias Ringwald-------------------------- END-OF-HEADER -----------------------------
8779af47bSMatthias Ringwald
9779af47bSMatthias RingwaldFile    : SEGGER_RTT_ASM_ARMv7M.S
10779af47bSMatthias RingwaldPurpose : Assembler implementation of RTT functions for ARMv7M
11779af47bSMatthias Ringwald
12779af47bSMatthias RingwaldAdditional information:
13779af47bSMatthias Ringwald  This module is written to be assembler-independent and works with
14779af47bSMatthias Ringwald  GCC and clang (Embedded Studio) and IAR.
15779af47bSMatthias Ringwald*/
16779af47bSMatthias Ringwald
17779af47bSMatthias Ringwald#define SEGGER_RTT_ASM      // Used to control processed input from header file
18779af47bSMatthias Ringwald#include "SEGGER_RTT.h"
19779af47bSMatthias Ringwald
20779af47bSMatthias Ringwald/*********************************************************************
21779af47bSMatthias Ringwald*
22779af47bSMatthias Ringwald*       Defines, fixed
23779af47bSMatthias Ringwald*
24779af47bSMatthias Ringwald**********************************************************************
25779af47bSMatthias Ringwald*/
26*ce6f85e7SMatthias Ringwald
27779af47bSMatthias Ringwald#define _CCIAR   0
28779af47bSMatthias Ringwald#define _CCCLANG 1
29779af47bSMatthias Ringwald
30779af47bSMatthias Ringwald#if (defined __SES_ARM) || (defined __GNUC__) || (defined __clang__)
31779af47bSMatthias Ringwald  #define _CC_TYPE             _CCCLANG
32779af47bSMatthias Ringwald  #define _PUB_SYM             .global
33779af47bSMatthias Ringwald  #define _EXT_SYM             .extern
34779af47bSMatthias Ringwald  #define _END                 .end
35779af47bSMatthias Ringwald  #define _WEAK                .weak
36779af47bSMatthias Ringwald  #define _THUMB_FUNC          .thumb_func
37779af47bSMatthias Ringwald  #define _THUMB_CODE          .code 16
38779af47bSMatthias Ringwald  #define _WORD                .word
39779af47bSMatthias Ringwald  #define _SECTION(Sect, Type, AlignExp) .section Sect ##, "ax"
40779af47bSMatthias Ringwald  #define _ALIGN(Exp)          .align Exp
41779af47bSMatthias Ringwald  #define _PLACE_LITS          .ltorg
42779af47bSMatthias Ringwald  #define _DATA_SECT_START
43779af47bSMatthias Ringwald  #define _C_STARTUP           _start
44779af47bSMatthias Ringwald  #define _STACK_END           __stack_end__
45779af47bSMatthias Ringwald  #define _RAMFUNC
46779af47bSMatthias Ringwald  //
47779af47bSMatthias Ringwald  // .text     => Link to flash
48779af47bSMatthias Ringwald  // .fast     => Link to RAM
49779af47bSMatthias Ringwald  // OtherSect => Usually link to RAM
50779af47bSMatthias Ringwald  // Alignment is 2^x
51779af47bSMatthias Ringwald  //
52779af47bSMatthias Ringwald#elif defined (__IASMARM__)
53779af47bSMatthias Ringwald  #define _CC_TYPE             _CCIAR
54779af47bSMatthias Ringwald  #define _PUB_SYM             PUBLIC
55779af47bSMatthias Ringwald  #define _EXT_SYM             EXTERN
56779af47bSMatthias Ringwald  #define _END                 END
57779af47bSMatthias Ringwald  #define _WEAK                _WEAK
58779af47bSMatthias Ringwald  #define _THUMB_FUNC
59779af47bSMatthias Ringwald  #define _THUMB_CODE          THUMB
60779af47bSMatthias Ringwald  #define _WORD                DCD
61779af47bSMatthias Ringwald  #define _SECTION(Sect, Type, AlignExp) SECTION Sect ## : ## Type ## :REORDER:NOROOT ## (AlignExp)
62779af47bSMatthias Ringwald  #define _ALIGN(Exp)          alignrom Exp
63779af47bSMatthias Ringwald  #define _PLACE_LITS
64779af47bSMatthias Ringwald  #define _DATA_SECT_START     DATA
65779af47bSMatthias Ringwald  #define _C_STARTUP           __iar_program_start
66779af47bSMatthias Ringwald  #define _STACK_END           sfe(CSTACK)
67779af47bSMatthias Ringwald  #define _RAMFUNC             SECTION_TYPE SHT_PROGBITS, SHF_WRITE | SHF_EXECINSTR
68779af47bSMatthias Ringwald  //
69779af47bSMatthias Ringwald  // .text     => Link to flash
70779af47bSMatthias Ringwald  // .textrw   => Link to RAM
71779af47bSMatthias Ringwald  // OtherSect => Usually link to RAM
72779af47bSMatthias Ringwald  // NOROOT    => Allows linker to throw away the function, if not referenced
73779af47bSMatthias Ringwald  // Alignment is 2^x
74779af47bSMatthias Ringwald  //
75779af47bSMatthias Ringwald#endif
76779af47bSMatthias Ringwald
77779af47bSMatthias Ringwald#if (_CC_TYPE == _CCIAR)
78779af47bSMatthias Ringwald        NAME SEGGER_RTT_ASM_ARMv7M
79779af47bSMatthias Ringwald#else
80779af47bSMatthias Ringwald        .syntax unified
81779af47bSMatthias Ringwald#endif
82779af47bSMatthias Ringwald
83779af47bSMatthias Ringwald#if defined (RTT_USE_ASM) && (RTT_USE_ASM == 1)
84779af47bSMatthias Ringwald        #define SHT_PROGBITS 0x1
85779af47bSMatthias Ringwald
86779af47bSMatthias Ringwald/*********************************************************************
87779af47bSMatthias Ringwald*
88779af47bSMatthias Ringwald*       Public / external symbols
89779af47bSMatthias Ringwald*
90779af47bSMatthias Ringwald**********************************************************************
91779af47bSMatthias Ringwald*/
92779af47bSMatthias Ringwald
93779af47bSMatthias Ringwald        _EXT_SYM __aeabi_memcpy
94779af47bSMatthias Ringwald        _EXT_SYM __aeabi_memcpy4
95779af47bSMatthias Ringwald        _EXT_SYM _SEGGER_RTT
96779af47bSMatthias Ringwald
97779af47bSMatthias Ringwald        _PUB_SYM SEGGER_RTT_ASM_WriteSkipNoLock
98779af47bSMatthias Ringwald
99779af47bSMatthias Ringwald/*********************************************************************
100779af47bSMatthias Ringwald*
101779af47bSMatthias Ringwald*       SEGGER_RTT_WriteSkipNoLock
102779af47bSMatthias Ringwald*
103779af47bSMatthias Ringwald*  Function description
104779af47bSMatthias Ringwald*    Stores a specified number of characters in SEGGER RTT
105779af47bSMatthias Ringwald*    control block which is then read by the host.
106779af47bSMatthias Ringwald*    SEGGER_RTT_WriteSkipNoLock does not lock the application and
107779af47bSMatthias Ringwald*    skips all data, if the data does not fit into the buffer.
108779af47bSMatthias Ringwald*
109779af47bSMatthias Ringwald*  Parameters
110779af47bSMatthias Ringwald*    BufferIndex  Index of "Up"-buffer to be used (e.g. 0 for "Terminal").
111779af47bSMatthias Ringwald*    pBuffer      Pointer to character array. Does not need to point to a \0 terminated string.
112779af47bSMatthias Ringwald*    NumBytes     Number of bytes to be stored in the SEGGER RTT control block.
113779af47bSMatthias Ringwald*                 MUST be > 0!!!
114779af47bSMatthias Ringwald*                 This is done for performance reasons, so no initial check has do be done.
115779af47bSMatthias Ringwald*
116779af47bSMatthias Ringwald*  Return value
117779af47bSMatthias Ringwald*    1: Data has been copied
118779af47bSMatthias Ringwald*    0: No space, data has not been copied
119779af47bSMatthias Ringwald*
120779af47bSMatthias Ringwald*  Notes
121779af47bSMatthias Ringwald*    (1) If there is not enough space in the "Up"-buffer, all data is dropped.
122779af47bSMatthias Ringwald*    (2) For performance reasons this function does not call Init()
123779af47bSMatthias Ringwald*        and may only be called after RTT has been initialized.
124779af47bSMatthias Ringwald*        Either by calling SEGGER_RTT_Init() or calling another RTT API function first.
125779af47bSMatthias Ringwald*/
126779af47bSMatthias Ringwald        _SECTION(.text, CODE, 2)
127779af47bSMatthias Ringwald        _ALIGN(2)
128779af47bSMatthias Ringwald        _THUMB_FUNC
129779af47bSMatthias RingwaldSEGGER_RTT_ASM_WriteSkipNoLock:   // unsigned SEGGER_RTT_WriteSkipNoLock(unsigned BufferIndex, const void* pData, unsigned NumBytes) {
130779af47bSMatthias Ringwald        //
131779af47bSMatthias Ringwald        // Cases:
132779af47bSMatthias Ringwald        //   1) RdOff <= WrOff => Space until wrap-around is sufficient
133779af47bSMatthias Ringwald        //   2) RdOff <= WrOff => Space after wrap-around needed (copy in 2 chunks)
134779af47bSMatthias Ringwald        //   3) RdOff <  WrOff => No space in buf
135779af47bSMatthias Ringwald        //   4) RdOff >  WrOff => Space is sufficient
136779af47bSMatthias Ringwald        //   5) RdOff >  WrOff => No space in buf
137779af47bSMatthias Ringwald        //
138779af47bSMatthias Ringwald        // 1) is the most common case for large buffers and assuming that J-Link reads the data fast enough
139779af47bSMatthias Ringwald        //
140779af47bSMatthias Ringwald        // Register usage:
141779af47bSMatthias Ringwald        //   R0 Temporary needed as RdOff, <Tmp> register later on
142779af47bSMatthias Ringwald        //   R1 pData
143779af47bSMatthias Ringwald        //   R2 <NumBytes>
144779af47bSMatthias Ringwald        //   R3 <Tmp> register. Hold free for subroutine calls
145779af47bSMatthias Ringwald        //   R4 <Rem>
146779af47bSMatthias Ringwald        //   R5 pRing->pBuffer
147779af47bSMatthias Ringwald        //   R6 pRing (Points to active struct SEGGER_RTT_BUFFER_DOWN)
148779af47bSMatthias Ringwald        //   R7 WrOff
149779af47bSMatthias Ringwald        //
150779af47bSMatthias Ringwald        PUSH     {R4-R7}
151779af47bSMatthias Ringwald        ADD      R3,R0,R0, LSL #+1
152779af47bSMatthias Ringwald        LDR.W    R0,=_SEGGER_RTT                 // pRing = &_SEGGER_RTT.aUp[BufferIndex];
153779af47bSMatthias Ringwald        ADD      R0,R0,R3, LSL #+3
154779af47bSMatthias Ringwald        ADD      R6,R0,#+24
155779af47bSMatthias Ringwald        LDR      R0,[R6, #+16]                   // RdOff = pRing->RdOff;
156779af47bSMatthias Ringwald        LDR      R7,[R6, #+12]                   // WrOff = pRing->WrOff;
157779af47bSMatthias Ringwald        LDR      R5,[R6, #+4]                    // pRing->pBuffer
158779af47bSMatthias Ringwald        CMP      R7,R0
159779af47bSMatthias Ringwald        BCC.N    _CheckCase4                     // if (RdOff <= WrOff) {                           => Case 1), 2) or 3)
160779af47bSMatthias Ringwald        //
161779af47bSMatthias Ringwald        // Handling for case 1, later on identical to case 4
162779af47bSMatthias Ringwald        //
163779af47bSMatthias Ringwald        LDR      R3,[R6, #+8]                    //  Avail = pRing->SizeOfBuffer - WrOff - 1u;      => Space until wrap-around (assume 1 byte not usable for case that RdOff == 0)
164779af47bSMatthias Ringwald        SUBS     R4,R3,R7                        // <Rem> (Used in case we jump into case 2 afterwards)
165779af47bSMatthias Ringwald        SUBS     R3,R4,#+1                       // <Avail>
166779af47bSMatthias Ringwald        CMP      R3,R2
167779af47bSMatthias Ringwald        BCC.N    _CheckCase2                     // if (Avail >= NumBytes) {  => Case 1)?
168779af47bSMatthias Ringwald_Case4:
169779af47bSMatthias Ringwald        ADDS     R5,R7,R5                        // pBuffer += WrOff
170779af47bSMatthias Ringwald        ADDS     R0,R2,R7                        // v = WrOff + NumBytes
171779af47bSMatthias Ringwald        //
172779af47bSMatthias Ringwald        // 2x unrolling for the copy loop that is used most of the time
173779af47bSMatthias Ringwald        // This is a special optimization for small SystemView packets and makes them even faster
174779af47bSMatthias Ringwald        //
175779af47bSMatthias Ringwald        _ALIGN(2)
176779af47bSMatthias Ringwald_LoopCopyStraight:                               // memcpy(pRing->pBuffer + WrOff, pData, NumBytes);
177779af47bSMatthias Ringwald        LDRB     R3,[R1], #+1
178779af47bSMatthias Ringwald        STRB     R3,[R5], #+1                    // *pDest++ = *pSrc++
179779af47bSMatthias Ringwald        SUBS     R2,R2,#+1
180779af47bSMatthias Ringwald        BEQ      _CSDone
181779af47bSMatthias Ringwald        LDRB     R3,[R1], #+1
182779af47bSMatthias Ringwald        STRB     R3,[R5], #+1                    // *pDest++ = *pSrc++
183779af47bSMatthias Ringwald        SUBS     R2,R2,#+1
184779af47bSMatthias Ringwald        BNE      _LoopCopyStraight
185779af47bSMatthias Ringwald_CSDone:
186*ce6f85e7SMatthias Ringwald#if _CORE_NEEDS_DMB                              // Do not slow down cores that do not need a DMB instruction here
187*ce6f85e7SMatthias Ringwald        DMB                                      // Cortex-M7 may delay memory writes and also change the order in which the writes happen. Therefore, make sure that all buffer writes are finished, before updating the <WrOff> in the struct
188*ce6f85e7SMatthias Ringwald#endif
189779af47bSMatthias Ringwald        STR      R0,[R6, #+12]                   // pRing->WrOff = WrOff + NumBytes;
190779af47bSMatthias Ringwald        MOVS     R0,#+1
191779af47bSMatthias Ringwald        POP      {R4-R7}
192779af47bSMatthias Ringwald        BX       LR                              // Return 1
193779af47bSMatthias Ringwald_CheckCase2:
194779af47bSMatthias Ringwald        ADDS     R0,R0,R3                        // Avail += RdOff; => Space incl. wrap-around
195779af47bSMatthias Ringwald        CMP      R0,R2
196779af47bSMatthias Ringwald        BCC.N    _Case3                          // if (Avail >= NumBytes) {           => Case 2? => If not, we have case 3) (does not fit)
197779af47bSMatthias Ringwald        //
198779af47bSMatthias Ringwald        // Handling for case 2
199779af47bSMatthias Ringwald        //
200779af47bSMatthias Ringwald        ADDS     R0,R7,R5                        // v = pRing->pBuffer + WrOff => Do not change pRing->pBuffer here because 2nd chunk needs org. value
201779af47bSMatthias Ringwald        SUBS     R2,R2,R4                        // NumBytes -= Rem;  (Rem = pRing->SizeOfBuffer - WrOff; => Space until end of buffer)
202779af47bSMatthias Ringwald_LoopCopyBeforeWrapAround:                       // memcpy(pRing->pBuffer + WrOff, pData, Rem); => Copy 1st chunk
203779af47bSMatthias Ringwald        LDRB     R3,[R1], #+1
204779af47bSMatthias Ringwald        STRB     R3,[R0], #+1                    // *pDest++ = *pSrc++
205779af47bSMatthias Ringwald        SUBS     R4,R4,#+1
206779af47bSMatthias Ringwald        BNE      _LoopCopyBeforeWrapAround
207779af47bSMatthias Ringwald        //
208779af47bSMatthias Ringwald        // Special case: First check that assumed RdOff == 0 calculated that last element before wrap-around could not be used
209779af47bSMatthias Ringwald        // But 2nd check (considering space until wrap-around and until RdOff) revealed that RdOff is not 0, so we can use the last element
210779af47bSMatthias Ringwald        // In this case, we may use a copy straight until buffer end anyway without needing to copy 2 chunks
211779af47bSMatthias Ringwald        // Therefore, check if 2nd memcpy is necessary at all
212779af47bSMatthias Ringwald        //
213779af47bSMatthias Ringwald        ADDS     R4,R2,#+0                       // Save <NumBytes> (needed as counter in loop but must be written to <WrOff> after the loop). Also use this inst to update the flags to skip 2nd loop if possible
214779af47bSMatthias Ringwald        BEQ.N    _No2ChunkNeeded                 // if (NumBytes) {
215779af47bSMatthias Ringwald_LoopCopyAfterWrapAround:                        // memcpy(pRing->pBuffer, pData + Rem, NumBytes);
216779af47bSMatthias Ringwald        LDRB     R3,[R1], #+1                    // pData already points to the next src byte due to copy loop increment before this loop
217779af47bSMatthias Ringwald        STRB     R3,[R5], #+1                    // *pDest++ = *pSrc++
218779af47bSMatthias Ringwald        SUBS     R2,R2,#+1
219779af47bSMatthias Ringwald        BNE      _LoopCopyAfterWrapAround
220779af47bSMatthias Ringwald_No2ChunkNeeded:
221*ce6f85e7SMatthias Ringwald#if _CORE_NEEDS_DMB                              // Do not slow down cores that do not need a DMB instruction here
222*ce6f85e7SMatthias Ringwald        DMB                                      // Cortex-M7 may delay memory writes and also change the order in which the writes happen. Therefore, make sure that all buffer writes are finished, before updating the <WrOff> in the struct
223*ce6f85e7SMatthias Ringwald#endif
224779af47bSMatthias Ringwald        STR      R4,[R6, #+12]                   // pRing->WrOff = NumBytes; => Must be written after copying data because J-Link may read control block asynchronously while writing into buffer
225779af47bSMatthias Ringwald        MOVS     R0,#+1
226779af47bSMatthias Ringwald        POP      {R4-R7}
227779af47bSMatthias Ringwald        BX       LR                              // Return 1
228779af47bSMatthias Ringwald_CheckCase4:
229779af47bSMatthias Ringwald        SUBS     R0,R0,R7
230779af47bSMatthias Ringwald        SUBS     R0,R0,#+1                       // Avail = RdOff - WrOff - 1u;
231779af47bSMatthias Ringwald        CMP      R0,R2
232779af47bSMatthias Ringwald        BCS.N    _Case4                          // if (Avail >= NumBytes) {      => Case 4) == 1) ? => If not, we have case 5) == 3) (does not fit)
233779af47bSMatthias Ringwald_Case3:
234779af47bSMatthias Ringwald        MOVS     R0,#+0
235779af47bSMatthias Ringwald        POP      {R4-R7}
236779af47bSMatthias Ringwald        BX       LR                              // Return 0
237779af47bSMatthias Ringwald        _PLACE_LITS
238779af47bSMatthias Ringwald
239779af47bSMatthias Ringwald#endif  // defined (RTT_USE_ASM) && (RTT_USE_ASM == 1)
240779af47bSMatthias Ringwald        _END
241779af47bSMatthias Ringwald
242779af47bSMatthias Ringwald/*************************** End of file ****************************/
243