1*8d67ca89SAndroid Build Coastguard Worker/* 2*8d67ca89SAndroid Build Coastguard WorkerCopyright (c) 2014, Intel Corporation 3*8d67ca89SAndroid Build Coastguard WorkerAll rights reserved. 4*8d67ca89SAndroid Build Coastguard Worker 5*8d67ca89SAndroid Build Coastguard WorkerRedistribution and use in source and binary forms, with or without 6*8d67ca89SAndroid Build Coastguard Workermodification, are permitted provided that the following conditions are met: 7*8d67ca89SAndroid Build Coastguard Worker 8*8d67ca89SAndroid Build Coastguard Worker * Redistributions of source code must retain the above copyright notice, 9*8d67ca89SAndroid Build Coastguard Worker * this list of conditions and the following disclaimer. 10*8d67ca89SAndroid Build Coastguard Worker 11*8d67ca89SAndroid Build Coastguard Worker * Redistributions in binary form must reproduce the above copyright notice, 12*8d67ca89SAndroid Build Coastguard Worker * this list of conditions and the following disclaimer in the documentation 13*8d67ca89SAndroid Build Coastguard Worker * and/or other materials provided with the distribution. 14*8d67ca89SAndroid Build Coastguard Worker 15*8d67ca89SAndroid Build Coastguard Worker * Neither the name of Intel Corporation nor the names of its contributors 16*8d67ca89SAndroid Build Coastguard Worker * may be used to endorse or promote products derived from this software 17*8d67ca89SAndroid Build Coastguard Worker * without specific prior written permission. 18*8d67ca89SAndroid Build Coastguard Worker 19*8d67ca89SAndroid Build Coastguard WorkerTHIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 20*8d67ca89SAndroid Build Coastguard WorkerANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 21*8d67ca89SAndroid Build Coastguard WorkerWARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 22*8d67ca89SAndroid Build Coastguard WorkerDISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR 23*8d67ca89SAndroid Build Coastguard WorkerANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 24*8d67ca89SAndroid Build Coastguard Worker(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 25*8d67ca89SAndroid Build Coastguard WorkerLOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON 26*8d67ca89SAndroid Build Coastguard WorkerANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 27*8d67ca89SAndroid Build Coastguard Worker(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 28*8d67ca89SAndroid Build Coastguard WorkerSOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 29*8d67ca89SAndroid Build Coastguard Worker*/ 30*8d67ca89SAndroid Build Coastguard Worker 31*8d67ca89SAndroid Build Coastguard Worker#include <private/bionic_asm.h> 32*8d67ca89SAndroid Build Coastguard Worker 33*8d67ca89SAndroid Build Coastguard Worker 34*8d67ca89SAndroid Build Coastguard Worker#ifndef L 35*8d67ca89SAndroid Build Coastguard Worker# define L(label) .L##label 36*8d67ca89SAndroid Build Coastguard Worker#endif 37*8d67ca89SAndroid Build Coastguard Worker 38*8d67ca89SAndroid Build Coastguard Worker#ifndef ALIGN 39*8d67ca89SAndroid Build Coastguard Worker# define ALIGN(n) .p2align n 40*8d67ca89SAndroid Build Coastguard Worker#endif 41*8d67ca89SAndroid Build Coastguard Worker 42*8d67ca89SAndroid Build Coastguard Worker .section .text.avx2,"ax",@progbits 43*8d67ca89SAndroid Build Coastguard Worker 44*8d67ca89SAndroid Build Coastguard WorkerENTRY(__memset_chk_avx2) 45*8d67ca89SAndroid Build Coastguard Worker # %rdi = dst, %rsi = byte, %rdx = n, %rcx = dst_len 46*8d67ca89SAndroid Build Coastguard Worker cmp %rcx, %rdx 47*8d67ca89SAndroid Build Coastguard Worker ja __memset_chk_fail 48*8d67ca89SAndroid Build Coastguard Worker // Fall through to memset... 49*8d67ca89SAndroid Build Coastguard WorkerEND(__memset_chk_avx2) 50*8d67ca89SAndroid Build Coastguard Worker 51*8d67ca89SAndroid Build Coastguard WorkerENTRY(memset_avx2) 52*8d67ca89SAndroid Build Coastguard Worker movq %rdi, %rax 53*8d67ca89SAndroid Build Coastguard Worker and $0xff, %rsi 54*8d67ca89SAndroid Build Coastguard Worker mov $0x0101010101010101, %rcx 55*8d67ca89SAndroid Build Coastguard Worker imul %rsi, %rcx 56*8d67ca89SAndroid Build Coastguard Worker cmpq $16, %rdx 57*8d67ca89SAndroid Build Coastguard Worker jae L(16bytesormore) 58*8d67ca89SAndroid Build Coastguard Worker testb $8, %dl 59*8d67ca89SAndroid Build Coastguard Worker jnz L(8_15bytes) 60*8d67ca89SAndroid Build Coastguard Worker testb $4, %dl 61*8d67ca89SAndroid Build Coastguard Worker jnz L(4_7bytes) 62*8d67ca89SAndroid Build Coastguard Worker testb $2, %dl 63*8d67ca89SAndroid Build Coastguard Worker jnz L(2_3bytes) 64*8d67ca89SAndroid Build Coastguard Worker testb $1, %dl 65*8d67ca89SAndroid Build Coastguard Worker jz 1f 66*8d67ca89SAndroid Build Coastguard Worker movb %cl, (%rdi) 67*8d67ca89SAndroid Build Coastguard Worker1: ret 68*8d67ca89SAndroid Build Coastguard Worker 69*8d67ca89SAndroid Build Coastguard WorkerL(8_15bytes): 70*8d67ca89SAndroid Build Coastguard Worker movq %rcx, (%rdi) 71*8d67ca89SAndroid Build Coastguard Worker movq %rcx, -8(%rdi, %rdx) 72*8d67ca89SAndroid Build Coastguard Worker ret 73*8d67ca89SAndroid Build Coastguard Worker 74*8d67ca89SAndroid Build Coastguard WorkerL(4_7bytes): 75*8d67ca89SAndroid Build Coastguard Worker movl %ecx, (%rdi) 76*8d67ca89SAndroid Build Coastguard Worker movl %ecx, -4(%rdi, %rdx) 77*8d67ca89SAndroid Build Coastguard Worker ret 78*8d67ca89SAndroid Build Coastguard Worker 79*8d67ca89SAndroid Build Coastguard WorkerL(2_3bytes): 80*8d67ca89SAndroid Build Coastguard Worker movw %cx, (%rdi) 81*8d67ca89SAndroid Build Coastguard Worker movw %cx, -2(%rdi, %rdx) 82*8d67ca89SAndroid Build Coastguard Worker ret 83*8d67ca89SAndroid Build Coastguard Worker 84*8d67ca89SAndroid Build Coastguard Worker ALIGN (4) 85*8d67ca89SAndroid Build Coastguard WorkerL(16bytesormore): 86*8d67ca89SAndroid Build Coastguard Worker movd %rcx, %xmm0 87*8d67ca89SAndroid Build Coastguard Worker pshufd $0, %xmm0, %xmm0 88*8d67ca89SAndroid Build Coastguard Worker movdqu %xmm0, (%rdi) 89*8d67ca89SAndroid Build Coastguard Worker movdqu %xmm0, -16(%rdi, %rdx) 90*8d67ca89SAndroid Build Coastguard Worker cmpq $32, %rdx 91*8d67ca89SAndroid Build Coastguard Worker jbe L(done) 92*8d67ca89SAndroid Build Coastguard Worker movdqu %xmm0, 16(%rdi) 93*8d67ca89SAndroid Build Coastguard Worker movdqu %xmm0, -32(%rdi, %rdx) 94*8d67ca89SAndroid Build Coastguard Worker cmpq $64, %rdx 95*8d67ca89SAndroid Build Coastguard Worker jbe L(done) 96*8d67ca89SAndroid Build Coastguard Worker movdqu %xmm0, 32(%rdi) 97*8d67ca89SAndroid Build Coastguard Worker movdqu %xmm0, 48(%rdi) 98*8d67ca89SAndroid Build Coastguard Worker movdqu %xmm0, -64(%rdi, %rdx) 99*8d67ca89SAndroid Build Coastguard Worker movdqu %xmm0, -48(%rdi, %rdx) 100*8d67ca89SAndroid Build Coastguard Worker cmpq $128, %rdx 101*8d67ca89SAndroid Build Coastguard Worker jbe L(done) 102*8d67ca89SAndroid Build Coastguard Worker vpbroadcastb %xmm0, %ymm0 103*8d67ca89SAndroid Build Coastguard Worker vmovdqu %ymm0, 64(%rdi) 104*8d67ca89SAndroid Build Coastguard Worker vmovdqu %ymm0, 96(%rdi) 105*8d67ca89SAndroid Build Coastguard Worker vmovdqu %ymm0, -128(%rdi, %rdx) 106*8d67ca89SAndroid Build Coastguard Worker vmovdqu %ymm0, -96(%rdi, %rdx) 107*8d67ca89SAndroid Build Coastguard Worker cmpq $256, %rdx 108*8d67ca89SAndroid Build Coastguard Worker jbe L(done) 109*8d67ca89SAndroid Build Coastguard Worker 110*8d67ca89SAndroid Build Coastguard Worker ALIGN (4) 111*8d67ca89SAndroid Build Coastguard Worker leaq 128(%rdi), %rcx 112*8d67ca89SAndroid Build Coastguard Worker andq $-128, %rcx 113*8d67ca89SAndroid Build Coastguard Worker movq %rdx, %r8 114*8d67ca89SAndroid Build Coastguard Worker addq %rdi, %rdx 115*8d67ca89SAndroid Build Coastguard Worker andq $-128, %rdx 116*8d67ca89SAndroid Build Coastguard Worker cmpq %rcx, %rdx 117*8d67ca89SAndroid Build Coastguard Worker je L(done) 118*8d67ca89SAndroid Build Coastguard Worker 119*8d67ca89SAndroid Build Coastguard Worker cmp __x86_shared_cache_size(%rip), %r8 120*8d67ca89SAndroid Build Coastguard Worker 121*8d67ca89SAndroid Build Coastguard Worker ja L(non_temporal_loop) 122*8d67ca89SAndroid Build Coastguard Worker 123*8d67ca89SAndroid Build Coastguard Worker ALIGN (4) 124*8d67ca89SAndroid Build Coastguard WorkerL(normal_loop): 125*8d67ca89SAndroid Build Coastguard Worker vmovdqa %ymm0, (%rcx) 126*8d67ca89SAndroid Build Coastguard Worker vmovdqa %ymm0, 32(%rcx) 127*8d67ca89SAndroid Build Coastguard Worker vmovdqa %ymm0, 64(%rcx) 128*8d67ca89SAndroid Build Coastguard Worker vmovdqa %ymm0, 96(%rcx) 129*8d67ca89SAndroid Build Coastguard Worker addq $128, %rcx 130*8d67ca89SAndroid Build Coastguard Worker cmpq %rcx, %rdx 131*8d67ca89SAndroid Build Coastguard Worker jne L(normal_loop) 132*8d67ca89SAndroid Build Coastguard Worker jmp L(done) 133*8d67ca89SAndroid Build Coastguard Worker 134*8d67ca89SAndroid Build Coastguard Worker ALIGN (4) 135*8d67ca89SAndroid Build Coastguard WorkerL(non_temporal_loop): 136*8d67ca89SAndroid Build Coastguard Worker movntdq %xmm0, (%rcx) 137*8d67ca89SAndroid Build Coastguard Worker movntdq %xmm0, 16(%rcx) 138*8d67ca89SAndroid Build Coastguard Worker movntdq %xmm0, 32(%rcx) 139*8d67ca89SAndroid Build Coastguard Worker movntdq %xmm0, 48(%rcx) 140*8d67ca89SAndroid Build Coastguard Worker movntdq %xmm0, 64(%rcx) 141*8d67ca89SAndroid Build Coastguard Worker movntdq %xmm0, 80(%rcx) 142*8d67ca89SAndroid Build Coastguard Worker movntdq %xmm0, 96(%rcx) 143*8d67ca89SAndroid Build Coastguard Worker movntdq %xmm0, 112(%rcx) 144*8d67ca89SAndroid Build Coastguard Worker leaq 128(%rcx), %rcx 145*8d67ca89SAndroid Build Coastguard Worker cmpq %rcx, %rdx 146*8d67ca89SAndroid Build Coastguard Worker jne L(non_temporal_loop) 147*8d67ca89SAndroid Build Coastguard Worker # We used non-temporal stores, so we need a fence here. 148*8d67ca89SAndroid Build Coastguard Worker sfence 149*8d67ca89SAndroid Build Coastguard Worker 150*8d67ca89SAndroid Build Coastguard WorkerL(done): 151*8d67ca89SAndroid Build Coastguard Worker # We used the ymm registers, and that can break SSE2 performance 152*8d67ca89SAndroid Build Coastguard Worker # unless you do this. 153*8d67ca89SAndroid Build Coastguard Worker vzeroupper 154*8d67ca89SAndroid Build Coastguard Worker ret 155*8d67ca89SAndroid Build Coastguard Worker 156*8d67ca89SAndroid Build Coastguard WorkerEND(memset_avx2) 157