memcopy.S - OpenGrok cross reference for /linux-6.14.4/arch/xtensa/lib/memcopy.S

Lines Matching +full:16 +full:- +full:byte
2  * arch/xtensa/lib/hal/memcopy.S -- Core HAL library functions
9  * Copyright (C) 2002 - 2012 Tensilica Inc.
24  * 32-bit load and store instructions (as required for these
35  *     do 16 bytes with a loop, and then finish up with
36  *     8, 4, 2, and 1 byte copies conditional on the length;
39  *   This code tries to use fall-through branches for the common
61  * Byte by byte copy
64 	.byte	0		# 1 mod 4 alignment for LOOPNEZ
89 .Ldst1mod2:	# dst is only byte aligned
90 	_bltui	a4, 7, .Lbytecopy	# do short copies byte by byte
92 	# copy 1 byte
95 	addi	a4, a4, -1
100 .Ldst2mod4:	# dst 16-bit aligned
102 	_bltui	a4, 6, .Lbytecopy	# do short copies byte by byte
106 	addi	a4, a4, -2
122 	srli	a7, a4, 4	# number of loop iterations with 16B
127 	 * Destination and source are word-aligned, use word copy.
129 	# copy 16 bytes per iteration for word-aligned dst and word-aligned src
135 	add	a8, a8, a3	# a8 = end of last 16B source chunk
145 	addi	a3, a3, 16
147 	addi	a5, a5, 16
183 	# copy 1 byte
194 	_beqz	a4, .Ldone	# avoid loading anything for zero-length copies
195 	# copy 16 bytes per iteration for word-aligned dst and unaligned src
196 	__ssa8	a3		# set shift amount from byte offset
211 	add	a10, a10, a3	# a10 = end of last 16B source chunk
221 	l32i	a6, a3, 16
224 	addi	a3, a3, 16
227 	addi	a5, a5, 16
270 	# copy 1 byte
287  * 32-bit load and store instructions (as required for these
314  * Byte by byte copy
317 	.byte	0		# 1 mod 4 alignment for LOOPNEZ
327 	addi	a3, a3, -1
329 	addi	a5, a5, -1
343 .Lbackdst1mod2:	# dst is only byte aligned
344 	_bltui	a4, 7, .Lbackbytecopy	# do short copies byte by byte
346 	# copy 1 byte
347 	addi	a3, a3, -1
349 	addi	a5, a5, -1
351 	addi	a4, a4, -1
354 .Lbackdst2mod4:	# dst 16-bit aligned
356 	_bltui	a4, 6, .Lbackbytecopy	# do short copies byte by byte
357 	addi	a3, a3, -2
360 	addi	a5, a5, -2
363 	addi	a4, a4, -2
383 	srli	a7, a4, 4	# number of loop iterations with 16B
388 	 * Destination and source are word-aligned, use word copy.
390 	# copy 16 bytes per iteration for word-aligned dst and word-aligned src
396 	sub	a8, a3, a8	# a8 = start of first 16B source chunk
399 	addi	a3, a3, -16
402 	addi	a5, a5, -16
415 	addi	a3, a3, -8
418 	addi	a5, a5, -8
428 	addi	a3, a3, -4
430 	addi	a5, a5, -4
437 	addi	a3, a3, -2
439 	addi	a5, a5, -2
444 	# copy 1 byte
445 	addi	a3, a3, -1
447 	addi	a5, a5, -1
457 	_beqz	a4, .Lbackdone	# avoid loading anything for zero-length copies
458 	# copy 16 bytes per iteration for word-aligned dst and unaligned src
459 	__ssa8	a3		# set shift amount from byte offset
473 	sub	a10, a3, a10	# a10 = start of first 16B source chunk
476 	addi	a3, a3, -16
479 	addi	a5, a5, -16
496 	addi	a3, a3, -8
499 	addi	a5, a5, -8
508 	addi	a3, a3, -4
510 	addi	a5, a5, -4
524 	addi	a3, a3, -2
527 	addi	a5, a5, -2
533 	# copy 1 byte
534 	addi	a3, a3, -1
535 	addi	a5, a5, -1