Lines Matching +full:16 +full:- +full:byte

2  * arch/xtensa/lib/hal/memcopy.S -- Core HAL library functions
9 * Copyright (C) 2002 - 2012 Tensilica Inc.
24 * 32-bit load and store instructions (as required for these
35 * do 16 bytes with a loop, and then finish up with
36 * 8, 4, 2, and 1 byte copies conditional on the length;
39 * This code tries to use fall-through branches for the common
61 * Byte by byte copy
64 .byte 0 # 1 mod 4 alignment for LOOPNEZ
89 .Ldst1mod2: # dst is only byte aligned
90 _bltui a4, 7, .Lbytecopy # do short copies byte by byte
92 # copy 1 byte
95 addi a4, a4, -1
100 .Ldst2mod4: # dst 16-bit aligned
102 _bltui a4, 6, .Lbytecopy # do short copies byte by byte
106 addi a4, a4, -2
122 srli a7, a4, 4 # number of loop iterations with 16B
127 * Destination and source are word-aligned, use word copy.
129 # copy 16 bytes per iteration for word-aligned dst and word-aligned src
135 add a8, a8, a3 # a8 = end of last 16B source chunk
145 addi a3, a3, 16
147 addi a5, a5, 16
183 # copy 1 byte
194 _beqz a4, .Ldone # avoid loading anything for zero-length copies
195 # copy 16 bytes per iteration for word-aligned dst and unaligned src
196 __ssa8 a3 # set shift amount from byte offset
211 add a10, a10, a3 # a10 = end of last 16B source chunk
221 l32i a6, a3, 16
224 addi a3, a3, 16
227 addi a5, a5, 16
270 # copy 1 byte
287 * 32-bit load and store instructions (as required for these
314 * Byte by byte copy
317 .byte 0 # 1 mod 4 alignment for LOOPNEZ
327 addi a3, a3, -1
329 addi a5, a5, -1
343 .Lbackdst1mod2: # dst is only byte aligned
344 _bltui a4, 7, .Lbackbytecopy # do short copies byte by byte
346 # copy 1 byte
347 addi a3, a3, -1
349 addi a5, a5, -1
351 addi a4, a4, -1
354 .Lbackdst2mod4: # dst 16-bit aligned
356 _bltui a4, 6, .Lbackbytecopy # do short copies byte by byte
357 addi a3, a3, -2
360 addi a5, a5, -2
363 addi a4, a4, -2
383 srli a7, a4, 4 # number of loop iterations with 16B
388 * Destination and source are word-aligned, use word copy.
390 # copy 16 bytes per iteration for word-aligned dst and word-aligned src
396 sub a8, a3, a8 # a8 = start of first 16B source chunk
399 addi a3, a3, -16
402 addi a5, a5, -16
415 addi a3, a3, -8
418 addi a5, a5, -8
428 addi a3, a3, -4
430 addi a5, a5, -4
437 addi a3, a3, -2
439 addi a5, a5, -2
444 # copy 1 byte
445 addi a3, a3, -1
447 addi a5, a5, -1
457 _beqz a4, .Lbackdone # avoid loading anything for zero-length copies
458 # copy 16 bytes per iteration for word-aligned dst and unaligned src
459 __ssa8 a3 # set shift amount from byte offset
473 sub a10, a3, a10 # a10 = start of first 16B source chunk
476 addi a3, a3, -16
479 addi a5, a5, -16
496 addi a3, a3, -8
499 addi a5, a5, -8
508 addi a3, a3, -4
510 addi a5, a5, -4
524 addi a3, a3, -2
527 addi a5, a5, -2
533 # copy 1 byte
534 addi a3, a3, -1
535 addi a5, a5, -1