1*c0909341SAndroid Build Coastguard Worker/* 2*c0909341SAndroid Build Coastguard Worker * Copyright © 2019, VideoLAN and dav1d authors 3*c0909341SAndroid Build Coastguard Worker * Copyright © 2019, Martin Storsjo 4*c0909341SAndroid Build Coastguard Worker * All rights reserved. 5*c0909341SAndroid Build Coastguard Worker * 6*c0909341SAndroid Build Coastguard Worker * Redistribution and use in source and binary forms, with or without 7*c0909341SAndroid Build Coastguard Worker * modification, are permitted provided that the following conditions are met: 8*c0909341SAndroid Build Coastguard Worker * 9*c0909341SAndroid Build Coastguard Worker * 1. Redistributions of source code must retain the above copyright notice, this 10*c0909341SAndroid Build Coastguard Worker * list of conditions and the following disclaimer. 11*c0909341SAndroid Build Coastguard Worker * 12*c0909341SAndroid Build Coastguard Worker * 2. Redistributions in binary form must reproduce the above copyright notice, 13*c0909341SAndroid Build Coastguard Worker * this list of conditions and the following disclaimer in the documentation 14*c0909341SAndroid Build Coastguard Worker * and/or other materials provided with the distribution. 15*c0909341SAndroid Build Coastguard Worker * 16*c0909341SAndroid Build Coastguard Worker * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 17*c0909341SAndroid Build Coastguard Worker * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 18*c0909341SAndroid Build Coastguard Worker * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 19*c0909341SAndroid Build Coastguard Worker * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR 20*c0909341SAndroid Build Coastguard Worker * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 21*c0909341SAndroid Build Coastguard Worker * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 22*c0909341SAndroid Build Coastguard Worker * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 23*c0909341SAndroid Build Coastguard Worker * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 24*c0909341SAndroid Build Coastguard Worker * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 25*c0909341SAndroid Build Coastguard Worker * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26*c0909341SAndroid Build Coastguard Worker */ 27*c0909341SAndroid Build Coastguard Worker 28*c0909341SAndroid Build Coastguard Worker#include "src/arm/asm.S" 29*c0909341SAndroid Build Coastguard Worker#include "util.S" 30*c0909341SAndroid Build Coastguard Worker 31*c0909341SAndroid Build Coastguard Worker#define BUF_POS 0 32*c0909341SAndroid Build Coastguard Worker#define BUF_END 8 33*c0909341SAndroid Build Coastguard Worker#define DIF 16 34*c0909341SAndroid Build Coastguard Worker#define RNG 24 35*c0909341SAndroid Build Coastguard Worker#define CNT 28 36*c0909341SAndroid Build Coastguard Worker#define ALLOW_UPDATE_CDF 32 37*c0909341SAndroid Build Coastguard Worker 38*c0909341SAndroid Build Coastguard Worker#define COEFFS_BASE_OFFSET 30 39*c0909341SAndroid Build Coastguard Worker#define MASKS8_OFFSET (64-COEFFS_BASE_OFFSET) 40*c0909341SAndroid Build Coastguard Worker 41*c0909341SAndroid Build Coastguard Workerconst coeffs 42*c0909341SAndroid Build Coastguard Worker .short 60, 56, 52, 48, 44, 40, 36, 32, 28, 24, 20, 16, 12, 8, 4, 0 43*c0909341SAndroid Build Coastguard Worker .short 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 44*c0909341SAndroid Build Coastguard Worker // masks8 45*c0909341SAndroid Build Coastguard Worker .short -0x202, -0x202, -0x202, -0x202, -0x202, -0x202, -0x202, 0xF0E 46*c0909341SAndroid Build Coastguard Workerendconst 47*c0909341SAndroid Build Coastguard Worker 48*c0909341SAndroid Build Coastguard Worker.macro ld1_n d0, d1, src, sz, n 49*c0909341SAndroid Build Coastguard Worker.if \n <= 8 50*c0909341SAndroid Build Coastguard Worker ld1 {\d0\sz}, [\src] 51*c0909341SAndroid Build Coastguard Worker.else 52*c0909341SAndroid Build Coastguard Worker ld1 {\d0\sz, \d1\sz}, [\src] 53*c0909341SAndroid Build Coastguard Worker.endif 54*c0909341SAndroid Build Coastguard Worker.endm 55*c0909341SAndroid Build Coastguard Worker 56*c0909341SAndroid Build Coastguard Worker.macro st1_n s0, s1, dst, sz, n 57*c0909341SAndroid Build Coastguard Worker.if \n <= 8 58*c0909341SAndroid Build Coastguard Worker st1 {\s0\sz}, [\dst] 59*c0909341SAndroid Build Coastguard Worker.else 60*c0909341SAndroid Build Coastguard Worker st1 {\s0\sz, \s1\sz}, [\dst] 61*c0909341SAndroid Build Coastguard Worker.endif 62*c0909341SAndroid Build Coastguard Worker.endm 63*c0909341SAndroid Build Coastguard Worker 64*c0909341SAndroid Build Coastguard Worker.macro ushr_n d0, d1, s0, s1, shift, sz, n 65*c0909341SAndroid Build Coastguard Worker ushr \d0\sz, \s0\sz, \shift 66*c0909341SAndroid Build Coastguard Worker.if \n == 16 67*c0909341SAndroid Build Coastguard Worker ushr \d1\sz, \s1\sz, \shift 68*c0909341SAndroid Build Coastguard Worker.endif 69*c0909341SAndroid Build Coastguard Worker.endm 70*c0909341SAndroid Build Coastguard Worker 71*c0909341SAndroid Build Coastguard Worker.macro add_n d0, d1, s0, s1, s2, s3, sz, n 72*c0909341SAndroid Build Coastguard Worker add \d0\sz, \s0\sz, \s2\sz 73*c0909341SAndroid Build Coastguard Worker.if \n == 16 74*c0909341SAndroid Build Coastguard Worker add \d1\sz, \s1\sz, \s3\sz 75*c0909341SAndroid Build Coastguard Worker.endif 76*c0909341SAndroid Build Coastguard Worker.endm 77*c0909341SAndroid Build Coastguard Worker 78*c0909341SAndroid Build Coastguard Worker.macro sub_n d0, d1, s0, s1, s2, s3, sz, n 79*c0909341SAndroid Build Coastguard Worker sub \d0\sz, \s0\sz, \s2\sz 80*c0909341SAndroid Build Coastguard Worker.if \n == 16 81*c0909341SAndroid Build Coastguard Worker sub \d1\sz, \s1\sz, \s3\sz 82*c0909341SAndroid Build Coastguard Worker.endif 83*c0909341SAndroid Build Coastguard Worker.endm 84*c0909341SAndroid Build Coastguard Worker 85*c0909341SAndroid Build Coastguard Worker.macro and_n d0, d1, s0, s1, s2, s3, sz, n 86*c0909341SAndroid Build Coastguard Worker and \d0\sz, \s0\sz, \s2\sz 87*c0909341SAndroid Build Coastguard Worker.if \n == 16 88*c0909341SAndroid Build Coastguard Worker and \d1\sz, \s1\sz, \s3\sz 89*c0909341SAndroid Build Coastguard Worker.endif 90*c0909341SAndroid Build Coastguard Worker.endm 91*c0909341SAndroid Build Coastguard Worker 92*c0909341SAndroid Build Coastguard Worker.macro cmhs_n d0, d1, s0, s1, s2, s3, sz, n 93*c0909341SAndroid Build Coastguard Worker cmhs \d0\sz, \s0\sz, \s2\sz 94*c0909341SAndroid Build Coastguard Worker.if \n == 16 95*c0909341SAndroid Build Coastguard Worker cmhs \d1\sz, \s1\sz, \s3\sz 96*c0909341SAndroid Build Coastguard Worker.endif 97*c0909341SAndroid Build Coastguard Worker.endm 98*c0909341SAndroid Build Coastguard Worker 99*c0909341SAndroid Build Coastguard Worker.macro sshl_n d0, d1, s0, s1, s2, s3, sz, n 100*c0909341SAndroid Build Coastguard Worker sshl \d0\sz, \s0\sz, \s2\sz 101*c0909341SAndroid Build Coastguard Worker.if \n == 16 102*c0909341SAndroid Build Coastguard Worker sshl \d1\sz, \s1\sz, \s3\sz 103*c0909341SAndroid Build Coastguard Worker.endif 104*c0909341SAndroid Build Coastguard Worker.endm 105*c0909341SAndroid Build Coastguard Worker 106*c0909341SAndroid Build Coastguard Worker.macro sqdmulh_n d0, d1, s0, s1, s2, s3, sz, n 107*c0909341SAndroid Build Coastguard Worker sqdmulh \d0\sz, \s0\sz, \s2\sz 108*c0909341SAndroid Build Coastguard Worker.if \n == 16 109*c0909341SAndroid Build Coastguard Worker sqdmulh \d1\sz, \s1\sz, \s3\sz 110*c0909341SAndroid Build Coastguard Worker.endif 111*c0909341SAndroid Build Coastguard Worker.endm 112*c0909341SAndroid Build Coastguard Worker 113*c0909341SAndroid Build Coastguard Worker.macro str_n idx0, idx1, dstreg, dstoff, n 114*c0909341SAndroid Build Coastguard Worker str \idx0, [\dstreg, \dstoff] 115*c0909341SAndroid Build Coastguard Worker.if \n == 16 116*c0909341SAndroid Build Coastguard Worker str \idx1, [\dstreg, \dstoff + 16] 117*c0909341SAndroid Build Coastguard Worker.endif 118*c0909341SAndroid Build Coastguard Worker.endm 119*c0909341SAndroid Build Coastguard Worker 120*c0909341SAndroid Build Coastguard Worker// unsigned dav1d_msac_decode_symbol_adapt4_neon(MsacContext *s, uint16_t *cdf, 121*c0909341SAndroid Build Coastguard Worker// size_t n_symbols); 122*c0909341SAndroid Build Coastguard Worker 123*c0909341SAndroid Build Coastguard Workerfunction msac_decode_symbol_adapt4_neon, export=1 124*c0909341SAndroid Build Coastguard Worker.macro decode_update sz, szb, n 125*c0909341SAndroid Build Coastguard Worker.if \n == 16 126*c0909341SAndroid Build Coastguard Worker sub sp, sp, #48 127*c0909341SAndroid Build Coastguard Worker.endif 128*c0909341SAndroid Build Coastguard Worker add x8, x0, #RNG 129*c0909341SAndroid Build Coastguard Worker ld1_n v0, v1, x1, \sz, \n // cdf 130*c0909341SAndroid Build Coastguard Worker ld1r {v29\sz}, [x8] // rng 131*c0909341SAndroid Build Coastguard Worker movrel x9, coeffs, COEFFS_BASE_OFFSET 132*c0909341SAndroid Build Coastguard Worker movi v31\sz, #0x7f, lsl #8 // 0x7f00 133*c0909341SAndroid Build Coastguard Worker sub x10, x9, x2, lsl #1 134*c0909341SAndroid Build Coastguard Worker mvni v30\sz, #0x3f // 0xffc0 135*c0909341SAndroid Build Coastguard Worker and v7\szb, v29\szb, v31\szb // rng & 0x7f00 136*c0909341SAndroid Build Coastguard Worker.if \n == 16 137*c0909341SAndroid Build Coastguard Worker str h29, [sp, #14] // store original u = s->rng 138*c0909341SAndroid Build Coastguard Worker.endif 139*c0909341SAndroid Build Coastguard Worker and_n v2, v3, v0, v1, v30, v30, \szb, \n // cdf & 0xffc0 140*c0909341SAndroid Build Coastguard Worker 141*c0909341SAndroid Build Coastguard Worker ld1_n v4, v5, x10, \sz, \n // EC_MIN_PROB * (n_symbols - ret) 142*c0909341SAndroid Build Coastguard Worker sqdmulh_n v6, v7, v2, v3, v7, v7, \sz, \n // ((cdf >> EC_PROB_SHIFT) * (r - 128)) >> 1 143*c0909341SAndroid Build Coastguard Worker ldr d28, [x0, #DIF] 144*c0909341SAndroid Build Coastguard Worker 145*c0909341SAndroid Build Coastguard Worker add_n v4, v5, v2, v3, v4, v5, \sz, \n // v = cdf + EC_MIN_PROB * (n_symbols - ret) 146*c0909341SAndroid Build Coastguard Worker add_n v4, v5, v6, v7, v4, v5, \sz, \n // v = ((cdf >> EC_PROB_SHIFT) * r) >> 1 + EC_MIN_PROB * (n_symbols - ret) 147*c0909341SAndroid Build Coastguard Worker 148*c0909341SAndroid Build Coastguard Worker dup v30\sz, v28.h[3] // dif >> (EC_WIN_SIZE - 16) 149*c0909341SAndroid Build Coastguard Worker.if \n == 8 150*c0909341SAndroid Build Coastguard Worker ldur q31, [x9, #MASKS8_OFFSET] 151*c0909341SAndroid Build Coastguard Worker.elseif \n == 16 152*c0909341SAndroid Build Coastguard Worker str_n q4, q5, sp, #16, \n // store v values to allow indexed access 153*c0909341SAndroid Build Coastguard Worker.endif 154*c0909341SAndroid Build Coastguard Worker 155*c0909341SAndroid Build Coastguard Worker // After the condition starts being true it continues, such that the vector looks like: 156*c0909341SAndroid Build Coastguard Worker // 0, 0, 0 ... -1, -1 157*c0909341SAndroid Build Coastguard Worker cmhs_n v2, v3, v30, v30, v4, v5, \sz, \n // c >= v 158*c0909341SAndroid Build Coastguard Worker.if \n == 4 159*c0909341SAndroid Build Coastguard Worker ext v29\szb, v29\szb, v4\szb, #6 // u 160*c0909341SAndroid Build Coastguard Worker umov x15, v2.d[0] 161*c0909341SAndroid Build Coastguard Worker ldr w4, [x0, #ALLOW_UPDATE_CDF] 162*c0909341SAndroid Build Coastguard Worker rev x15, x15 163*c0909341SAndroid Build Coastguard Worker sub v29\sz, v29\sz, v4\sz // rng = u-v 164*c0909341SAndroid Build Coastguard Worker // rev + clz = count trailing zeros 165*c0909341SAndroid Build Coastguard Worker clz x15, x15 // 16*ret 166*c0909341SAndroid Build Coastguard Worker.elseif \n == 8 167*c0909341SAndroid Build Coastguard Worker // The final short of the compare is always set. 168*c0909341SAndroid Build Coastguard Worker // Using addv, subtract -0x202*ret from this value to create a lookup table for a short. 169*c0909341SAndroid Build Coastguard Worker // For n == 8: 170*c0909341SAndroid Build Coastguard Worker // -0x202 + -0x202 + ... + 0xF0E 171*c0909341SAndroid Build Coastguard Worker // (0x202*7) | (1 << 8) 172*c0909341SAndroid Build Coastguard Worker // ^-------offset for second byte of the short 173*c0909341SAndroid Build Coastguard Worker and v31\szb, v31\szb, v2\szb 174*c0909341SAndroid Build Coastguard Worker ext v29\szb, v29\szb, v4\szb, #14 // u 175*c0909341SAndroid Build Coastguard Worker addv h31, v31\sz // ((2*ret + 1) << 8) | (2*ret) 176*c0909341SAndroid Build Coastguard Worker ldr w4, [x0, #ALLOW_UPDATE_CDF] 177*c0909341SAndroid Build Coastguard Worker sub v30\sz, v30\sz, v4\sz // (dif >> 48) - v 178*c0909341SAndroid Build Coastguard Worker smov w15, v31.b[0] // 2*ret 179*c0909341SAndroid Build Coastguard Worker sub v29\sz, v29\sz, v4\sz // rng = u-v 180*c0909341SAndroid Build Coastguard Worker.elseif \n == 16 181*c0909341SAndroid Build Coastguard Worker add v6\sz, v2\sz, v3\sz 182*c0909341SAndroid Build Coastguard Worker addv h31, v6\sz // -n + ret 183*c0909341SAndroid Build Coastguard Worker ldr w4, [x0, #ALLOW_UPDATE_CDF] 184*c0909341SAndroid Build Coastguard Worker smov w15, v31.h[0] 185*c0909341SAndroid Build Coastguard Worker.endif 186*c0909341SAndroid Build Coastguard Worker 187*c0909341SAndroid Build Coastguard Worker cbz w4, 0f 188*c0909341SAndroid Build Coastguard Worker 189*c0909341SAndroid Build Coastguard Worker // update_cdf 190*c0909341SAndroid Build Coastguard Worker ldrh w3, [x1, x2, lsl #1] // count = cdf[n_symbols] 191*c0909341SAndroid Build Coastguard Worker.if \n == 16 192*c0909341SAndroid Build Coastguard Worker // 16 case has a lower bound that guarantees n_symbols > 2 193*c0909341SAndroid Build Coastguard Worker mov w4, #-5 194*c0909341SAndroid Build Coastguard Worker.elseif \n == 8 195*c0909341SAndroid Build Coastguard Worker mvn w14, w2 196*c0909341SAndroid Build Coastguard Worker mov w4, #-4 197*c0909341SAndroid Build Coastguard Worker cmn w14, #3 // set C if n_symbols <= 2 198*c0909341SAndroid Build Coastguard Worker.else 199*c0909341SAndroid Build Coastguard Worker // if n_symbols < 4 (or < 6 even) then 200*c0909341SAndroid Build Coastguard Worker // (1 + n_symbols) >> 2 == n_symbols > 2 201*c0909341SAndroid Build Coastguard Worker add w14, w2, #17 // (1 + n_symbols) + (4 << 2) 202*c0909341SAndroid Build Coastguard Worker.endif 203*c0909341SAndroid Build Coastguard Worker sub_n v16, v17, v0, v1, v2, v3, \sz, \n // cdf + (i >= val ? 1 : 0) 204*c0909341SAndroid Build Coastguard Worker orr v2\sz, #0x80, lsl #8 205*c0909341SAndroid Build Coastguard Worker.if \n == 16 206*c0909341SAndroid Build Coastguard Worker orr v3\sz, #0x80, lsl #8 207*c0909341SAndroid Build Coastguard Worker.endif 208*c0909341SAndroid Build Coastguard Worker.if \n == 16 209*c0909341SAndroid Build Coastguard Worker sub w4, w4, w3, lsr #4 // -((count >> 4) + 5) 210*c0909341SAndroid Build Coastguard Worker.elseif \n == 8 211*c0909341SAndroid Build Coastguard Worker lsr w14, w3, #4 // count >> 4 212*c0909341SAndroid Build Coastguard Worker sbc w4, w4, w14 // -((count >> 4) + (n_symbols > 2) + 4) 213*c0909341SAndroid Build Coastguard Worker.else 214*c0909341SAndroid Build Coastguard Worker neg w4, w14, lsr #2 // -((n_symbols > 2) + 4) 215*c0909341SAndroid Build Coastguard Worker sub w4, w4, w3, lsr #4 // -((count >> 4) + (n_symbols > 2) + 4) 216*c0909341SAndroid Build Coastguard Worker.endif 217*c0909341SAndroid Build Coastguard Worker sub_n v2, v3, v2, v3, v0, v1, \sz, \n // (32768 - cdf[i]) or (-1 - cdf[i]) 218*c0909341SAndroid Build Coastguard Worker dup v6\sz, w4 // -rate 219*c0909341SAndroid Build Coastguard Worker 220*c0909341SAndroid Build Coastguard Worker sub w3, w3, w3, lsr #5 // count - (count == 32) 221*c0909341SAndroid Build Coastguard Worker sshl_n v2, v3, v2, v3, v6, v6, \sz, \n // ({32768,-1} - cdf[i]) >> rate 222*c0909341SAndroid Build Coastguard Worker add w3, w3, #1 // count + (count < 32) 223*c0909341SAndroid Build Coastguard Worker add_n v0, v1, v16, v17, v2, v3, \sz, \n // cdf + (32768 - cdf[i]) >> rate 224*c0909341SAndroid Build Coastguard Worker st1_n v0, v1, x1, \sz, \n 225*c0909341SAndroid Build Coastguard Worker strh w3, [x1, x2, lsl #1] 226*c0909341SAndroid Build Coastguard Worker 227*c0909341SAndroid Build Coastguard Worker0: 228*c0909341SAndroid Build Coastguard Worker // renorm 229*c0909341SAndroid Build Coastguard Worker.if \n == 4 230*c0909341SAndroid Build Coastguard Worker ldr w6, [x0, #CNT] 231*c0909341SAndroid Build Coastguard Worker ldr x7, [x0, #DIF] 232*c0909341SAndroid Build Coastguard Worker mov x4, v29.d[0] // rng (packed) 233*c0909341SAndroid Build Coastguard Worker mov x3, v4.d[0] // v (packed) 234*c0909341SAndroid Build Coastguard Worker 235*c0909341SAndroid Build Coastguard Worker // Shift 'v'/'rng' for ret into the 16 least sig bits. There is 236*c0909341SAndroid Build Coastguard Worker // garbage in the remaining bits, but we can work around this. 237*c0909341SAndroid Build Coastguard Worker lsr x4, x4, x15 // rng 238*c0909341SAndroid Build Coastguard Worker lsr x3, x3, x15 // v 239*c0909341SAndroid Build Coastguard Worker lsl w5, w4, #16 // rng << 16 240*c0909341SAndroid Build Coastguard Worker sub x7, x7, x3, lsl #48 // dif - (v << 48) 241*c0909341SAndroid Build Coastguard Worker clz w5, w5 // d = clz(rng << 16) 242*c0909341SAndroid Build Coastguard Worker lsl w4, w4, w5 // rng << d 243*c0909341SAndroid Build Coastguard Worker subs w6, w6, w5 // cnt -= d 244*c0909341SAndroid Build Coastguard Worker lsl x7, x7, x5 // (dif - (v << 48)) << d 245*c0909341SAndroid Build Coastguard Worker strh w4, [x0, #RNG] 246*c0909341SAndroid Build Coastguard Worker b.lo 1f 247*c0909341SAndroid Build Coastguard Worker str w6, [x0, #CNT] 248*c0909341SAndroid Build Coastguard Worker str x7, [x0, #DIF] 249*c0909341SAndroid Build Coastguard Worker lsr w0, w15, #4 250*c0909341SAndroid Build Coastguard Worker ret 251*c0909341SAndroid Build Coastguard Worker1: 252*c0909341SAndroid Build Coastguard Worker lsr w15, w15, #4 253*c0909341SAndroid Build Coastguard Worker b L(refill) 254*c0909341SAndroid Build Coastguard Worker.elseif \n == 8 255*c0909341SAndroid Build Coastguard Worker ldr w6, [x0, #CNT] 256*c0909341SAndroid Build Coastguard Worker tbl v30.8b, {v30.16b}, v31.8b 257*c0909341SAndroid Build Coastguard Worker tbl v29.8b, {v29.16b}, v31.8b 258*c0909341SAndroid Build Coastguard Worker ins v28.h[3], v30.h[0] // dif - (v << 48) 259*c0909341SAndroid Build Coastguard Worker clz v0.4h, v29.4h // d = clz(rng) 260*c0909341SAndroid Build Coastguard Worker umov w5, v0.h[0] 261*c0909341SAndroid Build Coastguard Worker ushl v29.4h, v29.4h, v0.4h // rng << d 262*c0909341SAndroid Build Coastguard Worker 263*c0909341SAndroid Build Coastguard Worker // The vec for clz(rng) is filled with garbage after the first short, 264*c0909341SAndroid Build Coastguard Worker // but ushl/sshl conveniently uses only the first byte for the shift 265*c0909341SAndroid Build Coastguard Worker // amount. 266*c0909341SAndroid Build Coastguard Worker ushl d28, d28, d0 // (dif - (v << 48)) << d 267*c0909341SAndroid Build Coastguard Worker 268*c0909341SAndroid Build Coastguard Worker subs w6, w6, w5 // cnt -= d 269*c0909341SAndroid Build Coastguard Worker str h29, [x0, #RNG] 270*c0909341SAndroid Build Coastguard Worker b.lo 1f 271*c0909341SAndroid Build Coastguard Worker str w6, [x0, #CNT] 272*c0909341SAndroid Build Coastguard Worker str d28, [x0, #DIF] 273*c0909341SAndroid Build Coastguard Worker lsr w0, w15, #1 // ret 274*c0909341SAndroid Build Coastguard Worker ret 275*c0909341SAndroid Build Coastguard Worker1: 276*c0909341SAndroid Build Coastguard Worker lsr w15, w15, #1 // ret 277*c0909341SAndroid Build Coastguard Worker mov x7, v28.d[0] 278*c0909341SAndroid Build Coastguard Worker b L(refill) 279*c0909341SAndroid Build Coastguard Worker.elseif \n == 16 280*c0909341SAndroid Build Coastguard Worker add x8, sp, w15, sxtw #1 281*c0909341SAndroid Build Coastguard Worker ldrh w3, [x8, #48] // v 282*c0909341SAndroid Build Coastguard Worker ldurh w4, [x8, #46] // u 283*c0909341SAndroid Build Coastguard Worker ldr w6, [x0, #CNT] 284*c0909341SAndroid Build Coastguard Worker ldr x7, [x0, #DIF] 285*c0909341SAndroid Build Coastguard Worker sub w4, w4, w3 // rng = u - v 286*c0909341SAndroid Build Coastguard Worker clz w5, w4 // clz(rng) 287*c0909341SAndroid Build Coastguard Worker eor w5, w5, #16 // d = clz(rng) ^ 16 288*c0909341SAndroid Build Coastguard Worker sub x7, x7, x3, lsl #48 // dif - (v << 48) 289*c0909341SAndroid Build Coastguard Worker lsl w4, w4, w5 // rng << d 290*c0909341SAndroid Build Coastguard Worker subs w6, w6, w5 // cnt -= d 291*c0909341SAndroid Build Coastguard Worker lsl x7, x7, x5 // (dif - (v << 48)) << d 292*c0909341SAndroid Build Coastguard Worker str w4, [x0, #RNG] 293*c0909341SAndroid Build Coastguard Worker add sp, sp, #48 294*c0909341SAndroid Build Coastguard Worker b.lo 1f 295*c0909341SAndroid Build Coastguard Worker str w6, [x0, #CNT] 296*c0909341SAndroid Build Coastguard Worker str x7, [x0, #DIF] 297*c0909341SAndroid Build Coastguard Worker add w0, w15, #\n // ret 298*c0909341SAndroid Build Coastguard Worker ret 299*c0909341SAndroid Build Coastguard Worker1: 300*c0909341SAndroid Build Coastguard Worker add w15, w15, #\n // ret 301*c0909341SAndroid Build Coastguard Worker b L(refill) 302*c0909341SAndroid Build Coastguard Worker.endif 303*c0909341SAndroid Build Coastguard Worker.endm 304*c0909341SAndroid Build Coastguard Worker 305*c0909341SAndroid Build Coastguard Worker decode_update .4h, .8b, 4 306*c0909341SAndroid Build Coastguard Worker 307*c0909341SAndroid Build Coastguard WorkerL(refill): 308*c0909341SAndroid Build Coastguard Worker // refill 309*c0909341SAndroid Build Coastguard Worker ldp x3, x4, [x0] // BUF_POS, BUF_END 310*c0909341SAndroid Build Coastguard Worker add x5, x3, #8 311*c0909341SAndroid Build Coastguard Worker subs x5, x5, x4 312*c0909341SAndroid Build Coastguard Worker b.hi 6f 313*c0909341SAndroid Build Coastguard Worker 314*c0909341SAndroid Build Coastguard Worker ldr x8, [x3] // next_bits 315*c0909341SAndroid Build Coastguard Worker add w4, w6, #-48 // shift_bits = cnt + 16 (- 64) 316*c0909341SAndroid Build Coastguard Worker mvn x8, x8 317*c0909341SAndroid Build Coastguard Worker neg w5, w4 318*c0909341SAndroid Build Coastguard Worker rev x8, x8 // next_bits = bswap(next_bits) 319*c0909341SAndroid Build Coastguard Worker lsr w5, w5, #3 // num_bytes_read 320*c0909341SAndroid Build Coastguard Worker lsr x8, x8, x4 // next_bits >>= (shift_bits & 63) 321*c0909341SAndroid Build Coastguard Worker 322*c0909341SAndroid Build Coastguard Worker2: // refill_end 323*c0909341SAndroid Build Coastguard Worker add x3, x3, x5 324*c0909341SAndroid Build Coastguard Worker add w6, w6, w5, lsl #3 // cnt += num_bits_read 325*c0909341SAndroid Build Coastguard Worker str x3, [x0, #BUF_POS] 326*c0909341SAndroid Build Coastguard Worker 327*c0909341SAndroid Build Coastguard Worker3: // refill_end2 328*c0909341SAndroid Build Coastguard Worker orr x7, x7, x8 // dif |= next_bits 329*c0909341SAndroid Build Coastguard Worker 330*c0909341SAndroid Build Coastguard Worker4: // end 331*c0909341SAndroid Build Coastguard Worker str w6, [x0, #CNT] 332*c0909341SAndroid Build Coastguard Worker str x7, [x0, #DIF] 333*c0909341SAndroid Build Coastguard Worker 334*c0909341SAndroid Build Coastguard Worker mov w0, w15 335*c0909341SAndroid Build Coastguard Worker ret 336*c0909341SAndroid Build Coastguard Worker 337*c0909341SAndroid Build Coastguard Worker5: // pad_with_ones 338*c0909341SAndroid Build Coastguard Worker add w8, w6, #-16 339*c0909341SAndroid Build Coastguard Worker ror x8, x8, x8 340*c0909341SAndroid Build Coastguard Worker b 3b 341*c0909341SAndroid Build Coastguard Worker 342*c0909341SAndroid Build Coastguard Worker6: // refill_eob 343*c0909341SAndroid Build Coastguard Worker cmp x3, x4 344*c0909341SAndroid Build Coastguard Worker b.hs 5b 345*c0909341SAndroid Build Coastguard Worker 346*c0909341SAndroid Build Coastguard Worker ldr x8, [x4, #-8] 347*c0909341SAndroid Build Coastguard Worker lsl w5, w5, #3 348*c0909341SAndroid Build Coastguard Worker lsr x8, x8, x5 349*c0909341SAndroid Build Coastguard Worker add w5, w6, #-48 350*c0909341SAndroid Build Coastguard Worker mvn x8, x8 351*c0909341SAndroid Build Coastguard Worker sub w4, w4, w3 // num_bytes_left 352*c0909341SAndroid Build Coastguard Worker rev x8, x8 353*c0909341SAndroid Build Coastguard Worker lsr x8, x8, x5 354*c0909341SAndroid Build Coastguard Worker neg w5, w5 355*c0909341SAndroid Build Coastguard Worker lsr w5, w5, #3 356*c0909341SAndroid Build Coastguard Worker cmp w5, w4 357*c0909341SAndroid Build Coastguard Worker csel w5, w5, w4, lo // num_bytes_read 358*c0909341SAndroid Build Coastguard Worker b 2b 359*c0909341SAndroid Build Coastguard Workerendfunc 360*c0909341SAndroid Build Coastguard Worker 361*c0909341SAndroid Build Coastguard Workerfunction msac_decode_symbol_adapt8_neon, export=1 362*c0909341SAndroid Build Coastguard Worker decode_update .8h, .16b, 8 363*c0909341SAndroid Build Coastguard Workerendfunc 364*c0909341SAndroid Build Coastguard Worker 365*c0909341SAndroid Build Coastguard Workerfunction msac_decode_symbol_adapt16_neon, export=1 366*c0909341SAndroid Build Coastguard Worker decode_update .8h, .16b, 16 367*c0909341SAndroid Build Coastguard Workerendfunc 368*c0909341SAndroid Build Coastguard Worker 369*c0909341SAndroid Build Coastguard Workerfunction msac_decode_hi_tok_neon, export=1 370*c0909341SAndroid Build Coastguard Worker ld1 {v0.4h}, [x1] // cdf 371*c0909341SAndroid Build Coastguard Worker add x16, x0, #RNG 372*c0909341SAndroid Build Coastguard Worker movi v31.4h, #0x7f, lsl #8 // 0x7f00 373*c0909341SAndroid Build Coastguard Worker movrel x17, coeffs, COEFFS_BASE_OFFSET-2*3 374*c0909341SAndroid Build Coastguard Worker mvni v30.4h, #0x3f // 0xffc0 375*c0909341SAndroid Build Coastguard Worker ldrh w9, [x1, #6] // count = cdf[n_symbols] 376*c0909341SAndroid Build Coastguard Worker ld1r {v3.4h}, [x16] // rng 377*c0909341SAndroid Build Coastguard Worker ld1 {v29.4h}, [x17] // EC_MIN_PROB * (n_symbols - ret) 378*c0909341SAndroid Build Coastguard Worker add x17, x0, #DIF + 6 379*c0909341SAndroid Build Coastguard Worker mov w13, #-24*8 380*c0909341SAndroid Build Coastguard Worker and v17.8b, v0.8b, v30.8b // cdf & 0xffc0 381*c0909341SAndroid Build Coastguard Worker ldr w10, [x0, #ALLOW_UPDATE_CDF] 382*c0909341SAndroid Build Coastguard Worker ld1r {v1.8h}, [x17] // dif >> (EC_WIN_SIZE - 16) 383*c0909341SAndroid Build Coastguard Worker ldr w6, [x0, #CNT] 384*c0909341SAndroid Build Coastguard Worker ldr x7, [x0, #DIF] 385*c0909341SAndroid Build Coastguard Worker1: 386*c0909341SAndroid Build Coastguard Worker and v7.8b, v3.8b, v31.8b // rng & 0x7f00 387*c0909341SAndroid Build Coastguard Worker sqdmulh v6.4h, v17.4h, v7.4h // ((cdf >> EC_PROB_SHIFT) * (r - 128)) >> 1 388*c0909341SAndroid Build Coastguard Worker add v4.4h, v17.4h, v29.4h // v = cdf + EC_MIN_PROB * (n_symbols - ret) 389*c0909341SAndroid Build Coastguard Worker add v4.4h, v6.4h, v4.4h // v = ((cdf >> EC_PROB_SHIFT) * r) >> 1 + EC_MIN_PROB * (n_symbols - ret) 390*c0909341SAndroid Build Coastguard Worker cmhs v2.4h, v1.4h, v4.4h // c >= v 391*c0909341SAndroid Build Coastguard Worker add w13, w13, #5*8 392*c0909341SAndroid Build Coastguard Worker ext v18.8b, v3.8b, v4.8b, #6 // u 393*c0909341SAndroid Build Coastguard Worker umov x15, v2.d[0] 394*c0909341SAndroid Build Coastguard Worker rev x15, x15 395*c0909341SAndroid Build Coastguard Worker sub v18.4h, v18.4h, v4.4h // rng = u-v 396*c0909341SAndroid Build Coastguard Worker // rev + clz = count trailing zeros 397*c0909341SAndroid Build Coastguard Worker clz x15, x15 // 16*ret 398*c0909341SAndroid Build Coastguard Worker 399*c0909341SAndroid Build Coastguard Worker cbz w10, 2f 400*c0909341SAndroid Build Coastguard Worker // update_cdf 401*c0909341SAndroid Build Coastguard Worker sub v5.4h, v0.4h, v2.4h // cdf[i] + (i >= val ? 1 : 0) 402*c0909341SAndroid Build Coastguard Worker mov w4, #-5 403*c0909341SAndroid Build Coastguard Worker orr v2.4h, #0x80, lsl #8 // i >= val ? -1 : 32768 404*c0909341SAndroid Build Coastguard Worker sub w4, w4, w9, lsr #4 // -((count >> 4) + 5) 405*c0909341SAndroid Build Coastguard Worker sub v2.4h, v2.4h, v0.4h // (32768 - cdf[i]) or (-1 - cdf[i]) 406*c0909341SAndroid Build Coastguard Worker dup v6.4h, w4 // -rate 407*c0909341SAndroid Build Coastguard Worker 408*c0909341SAndroid Build Coastguard Worker sub w9, w9, w9, lsr #5 // count - (count == 32) 409*c0909341SAndroid Build Coastguard Worker sshl v2.4h, v2.4h, v6.4h // ({32768,-1} - cdf[i]) >> rate 410*c0909341SAndroid Build Coastguard Worker add w9, w9, #1 // count + (count < 32) 411*c0909341SAndroid Build Coastguard Worker add v0.4h, v5.4h, v2.4h // cdf[i] + (32768 - cdf[i]) >> rate 412*c0909341SAndroid Build Coastguard Worker st1 {v0.4h}, [x1] 413*c0909341SAndroid Build Coastguard Worker and v17.8b, v0.8b, v30.8b // cdf & 0xffc0 414*c0909341SAndroid Build Coastguard Worker strh w9, [x1, #6] 415*c0909341SAndroid Build Coastguard Worker 416*c0909341SAndroid Build Coastguard Worker2: 417*c0909341SAndroid Build Coastguard Worker mov x4, v18.d[0] // rng (packed) 418*c0909341SAndroid Build Coastguard Worker mov x3, v4.d[0] // v (packed) 419*c0909341SAndroid Build Coastguard Worker 420*c0909341SAndroid Build Coastguard Worker // Shift 'v'/'rng' for ret into the 16 least sig bits. There is 421*c0909341SAndroid Build Coastguard Worker // garbage in the remaining bits, but we can work around this. 422*c0909341SAndroid Build Coastguard Worker lsr x4, x4, x15 // rng 423*c0909341SAndroid Build Coastguard Worker lsr x3, x3, x15 // v 424*c0909341SAndroid Build Coastguard Worker lsl w5, w4, #16 // rng << 16 425*c0909341SAndroid Build Coastguard Worker sub x7, x7, x3, lsl #48 // dif - (v << 48) 426*c0909341SAndroid Build Coastguard Worker clz w5, w5 // d = clz(rng << 16) 427*c0909341SAndroid Build Coastguard Worker lsl w4, w4, w5 // rng << d 428*c0909341SAndroid Build Coastguard Worker subs w6, w6, w5 // cnt -= d 429*c0909341SAndroid Build Coastguard Worker lsl x7, x7, x5 // (dif - (v << 48)) << d 430*c0909341SAndroid Build Coastguard Worker strh w4, [x0, #RNG] 431*c0909341SAndroid Build Coastguard Worker dup v3.4h, w4 432*c0909341SAndroid Build Coastguard Worker b.hs 5f 433*c0909341SAndroid Build Coastguard Worker 434*c0909341SAndroid Build Coastguard Worker // refill 435*c0909341SAndroid Build Coastguard Worker ldp x3, x4, [x0] // BUF_POS, BUF_END 436*c0909341SAndroid Build Coastguard Worker add x5, x3, #8 437*c0909341SAndroid Build Coastguard Worker subs x5, x5, x4 438*c0909341SAndroid Build Coastguard Worker b.hi 7f 439*c0909341SAndroid Build Coastguard Worker 440*c0909341SAndroid Build Coastguard Worker ldr x8, [x3] // next_bits 441*c0909341SAndroid Build Coastguard Worker add w4, w6, #-48 // shift_bits = cnt + 16 (- 64) 442*c0909341SAndroid Build Coastguard Worker mvn x8, x8 443*c0909341SAndroid Build Coastguard Worker neg w5, w4 444*c0909341SAndroid Build Coastguard Worker rev x8, x8 // next_bits = bswap(next_bits) 445*c0909341SAndroid Build Coastguard Worker lsr w5, w5, #3 // num_bytes_read 446*c0909341SAndroid Build Coastguard Worker lsr x8, x8, x4 // next_bits >>= (shift_bits & 63) 447*c0909341SAndroid Build Coastguard Worker 448*c0909341SAndroid Build Coastguard Worker3: // refill_end 449*c0909341SAndroid Build Coastguard Worker add x3, x3, x5 450*c0909341SAndroid Build Coastguard Worker add w6, w6, w5, lsl #3 // cnt += num_bits_read 451*c0909341SAndroid Build Coastguard Worker str x3, [x0, #BUF_POS] 452*c0909341SAndroid Build Coastguard Worker 453*c0909341SAndroid Build Coastguard Worker4: // refill_end2 454*c0909341SAndroid Build Coastguard Worker orr x7, x7, x8 // dif |= next_bits 455*c0909341SAndroid Build Coastguard Worker 456*c0909341SAndroid Build Coastguard Worker5: // end 457*c0909341SAndroid Build Coastguard Worker sub w15, w15, #5*8 458*c0909341SAndroid Build Coastguard Worker lsr x12, x7, #48 459*c0909341SAndroid Build Coastguard Worker adds w13, w13, w15 // carry = tok_br < 3 || tok == 15 460*c0909341SAndroid Build Coastguard Worker dup v1.8h, w12 461*c0909341SAndroid Build Coastguard Worker b.cc 1b // loop if !carry 462*c0909341SAndroid Build Coastguard Worker add w13, w13, #30*8 463*c0909341SAndroid Build Coastguard Worker str w6, [x0, #CNT] 464*c0909341SAndroid Build Coastguard Worker str x7, [x0, #DIF] 465*c0909341SAndroid Build Coastguard Worker lsr w0, w13, #4 466*c0909341SAndroid Build Coastguard Worker ret 467*c0909341SAndroid Build Coastguard Worker 468*c0909341SAndroid Build Coastguard Worker6: // pad_with_ones 469*c0909341SAndroid Build Coastguard Worker add w8, w6, #-16 470*c0909341SAndroid Build Coastguard Worker ror x8, x8, x8 471*c0909341SAndroid Build Coastguard Worker b 4b 472*c0909341SAndroid Build Coastguard Worker 473*c0909341SAndroid Build Coastguard Worker7: // refill_eob 474*c0909341SAndroid Build Coastguard Worker cmp x3, x4 475*c0909341SAndroid Build Coastguard Worker b.hs 6b 476*c0909341SAndroid Build Coastguard Worker 477*c0909341SAndroid Build Coastguard Worker ldr x8, [x4, #-8] 478*c0909341SAndroid Build Coastguard Worker lsl w5, w5, #3 479*c0909341SAndroid Build Coastguard Worker lsr x8, x8, x5 480*c0909341SAndroid Build Coastguard Worker add w5, w6, #-48 481*c0909341SAndroid Build Coastguard Worker mvn x8, x8 482*c0909341SAndroid Build Coastguard Worker sub w4, w4, w3 // num_bytes_left 483*c0909341SAndroid Build Coastguard Worker rev x8, x8 484*c0909341SAndroid Build Coastguard Worker lsr x8, x8, x5 485*c0909341SAndroid Build Coastguard Worker neg w5, w5 486*c0909341SAndroid Build Coastguard Worker lsr w5, w5, #3 487*c0909341SAndroid Build Coastguard Worker cmp w5, w4 488*c0909341SAndroid Build Coastguard Worker csel w5, w5, w4, lo // num_bytes_read 489*c0909341SAndroid Build Coastguard Worker b 3b 490*c0909341SAndroid Build Coastguard Workerendfunc 491*c0909341SAndroid Build Coastguard Worker 492*c0909341SAndroid Build Coastguard Workerfunction msac_decode_bool_equi_neon, export=1 493*c0909341SAndroid Build Coastguard Worker ldp w5, w6, [x0, #RNG] // + CNT 494*c0909341SAndroid Build Coastguard Worker ldr x7, [x0, #DIF] 495*c0909341SAndroid Build Coastguard Worker bic w4, w5, #0xff // r &= 0xff00 496*c0909341SAndroid Build Coastguard Worker add w4, w4, #8 497*c0909341SAndroid Build Coastguard Worker subs x8, x7, x4, lsl #47 // dif - vw 498*c0909341SAndroid Build Coastguard Worker lsr w4, w4, #1 // v 499*c0909341SAndroid Build Coastguard Worker sub w5, w5, w4 // r - v 500*c0909341SAndroid Build Coastguard Worker cset w15, lo 501*c0909341SAndroid Build Coastguard Worker csel w4, w5, w4, hs // if (ret) v = r - v; 502*c0909341SAndroid Build Coastguard Worker csel x7, x8, x7, hs // if (ret) dif = dif - vw; 503*c0909341SAndroid Build Coastguard Worker 504*c0909341SAndroid Build Coastguard Worker clz w5, w4 // clz(rng) 505*c0909341SAndroid Build Coastguard Worker eor w5, w5, #16 // d = clz(rng) ^ 16 506*c0909341SAndroid Build Coastguard Worker lsl w4, w4, w5 // rng << d 507*c0909341SAndroid Build Coastguard Worker subs w6, w6, w5 // cnt -= d 508*c0909341SAndroid Build Coastguard Worker lsl x7, x7, x5 // (dif - (v << 48)) << d 509*c0909341SAndroid Build Coastguard Worker str w4, [x0, #RNG] 510*c0909341SAndroid Build Coastguard Worker b.lo L(refill) 511*c0909341SAndroid Build Coastguard Worker 512*c0909341SAndroid Build Coastguard Worker str w6, [x0, #CNT] 513*c0909341SAndroid Build Coastguard Worker str x7, [x0, #DIF] 514*c0909341SAndroid Build Coastguard Worker mov w0, w15 515*c0909341SAndroid Build Coastguard Worker ret 516*c0909341SAndroid Build Coastguard Workerendfunc 517*c0909341SAndroid Build Coastguard Worker 518*c0909341SAndroid Build Coastguard Workerfunction msac_decode_bool_neon, export=1 519*c0909341SAndroid Build Coastguard Worker ldp w5, w6, [x0, #RNG] // + CNT 520*c0909341SAndroid Build Coastguard Worker ldr x7, [x0, #DIF] 521*c0909341SAndroid Build Coastguard Worker lsr w4, w5, #8 // r >> 8 522*c0909341SAndroid Build Coastguard Worker bic w1, w1, #0x3f // f &= ~63 523*c0909341SAndroid Build Coastguard Worker mul w4, w4, w1 524*c0909341SAndroid Build Coastguard Worker lsr w4, w4, #7 525*c0909341SAndroid Build Coastguard Worker add w4, w4, #4 // v 526*c0909341SAndroid Build Coastguard Worker subs x8, x7, x4, lsl #48 // dif - vw 527*c0909341SAndroid Build Coastguard Worker sub w5, w5, w4 // r - v 528*c0909341SAndroid Build Coastguard Worker cset w15, lo 529*c0909341SAndroid Build Coastguard Worker csel w4, w5, w4, hs // if (ret) v = r - v; 530*c0909341SAndroid Build Coastguard Worker csel x7, x8, x7, hs // if (ret) dif = dif - vw; 531*c0909341SAndroid Build Coastguard Worker 532*c0909341SAndroid Build Coastguard Worker clz w5, w4 // clz(rng) 533*c0909341SAndroid Build Coastguard Worker eor w5, w5, #16 // d = clz(rng) ^ 16 534*c0909341SAndroid Build Coastguard Worker lsl w4, w4, w5 // rng << d 535*c0909341SAndroid Build Coastguard Worker subs w6, w6, w5 // cnt -= d 536*c0909341SAndroid Build Coastguard Worker lsl x7, x7, x5 // (dif - (v << 48)) << d 537*c0909341SAndroid Build Coastguard Worker str w4, [x0, #RNG] 538*c0909341SAndroid Build Coastguard Worker b.lo L(refill) 539*c0909341SAndroid Build Coastguard Worker 540*c0909341SAndroid Build Coastguard Worker str w6, [x0, #CNT] 541*c0909341SAndroid Build Coastguard Worker str x7, [x0, #DIF] 542*c0909341SAndroid Build Coastguard Worker mov w0, w15 543*c0909341SAndroid Build Coastguard Worker ret 544*c0909341SAndroid Build Coastguard Workerendfunc 545*c0909341SAndroid Build Coastguard Worker 546*c0909341SAndroid Build Coastguard Workerfunction msac_decode_bool_adapt_neon, export=1 547*c0909341SAndroid Build Coastguard Worker ldr w9, [x1] // cdf[0-1] 548*c0909341SAndroid Build Coastguard Worker ldp w5, w6, [x0, #RNG] // + CNT 549*c0909341SAndroid Build Coastguard Worker ldr x7, [x0, #DIF] 550*c0909341SAndroid Build Coastguard Worker lsr w4, w5, #8 // r >> 8 551*c0909341SAndroid Build Coastguard Worker and w2, w9, #0xffc0 // f &= ~63 552*c0909341SAndroid Build Coastguard Worker mul w4, w4, w2 553*c0909341SAndroid Build Coastguard Worker lsr w4, w4, #7 554*c0909341SAndroid Build Coastguard Worker add w4, w4, #4 // v 555*c0909341SAndroid Build Coastguard Worker subs x8, x7, x4, lsl #48 // dif - vw 556*c0909341SAndroid Build Coastguard Worker sub w5, w5, w4 // r - v 557*c0909341SAndroid Build Coastguard Worker cset w15, lo 558*c0909341SAndroid Build Coastguard Worker csel w4, w5, w4, hs // if (ret) v = r - v; 559*c0909341SAndroid Build Coastguard Worker csel x7, x8, x7, hs // if (ret) dif = dif - vw; 560*c0909341SAndroid Build Coastguard Worker 561*c0909341SAndroid Build Coastguard Worker ldr w10, [x0, #ALLOW_UPDATE_CDF] 562*c0909341SAndroid Build Coastguard Worker 563*c0909341SAndroid Build Coastguard Worker clz w5, w4 // clz(rng) 564*c0909341SAndroid Build Coastguard Worker eor w5, w5, #16 // d = clz(rng) ^ 16 565*c0909341SAndroid Build Coastguard Worker 566*c0909341SAndroid Build Coastguard Worker cbz w10, 1f 567*c0909341SAndroid Build Coastguard Worker 568*c0909341SAndroid Build Coastguard Worker lsr w2, w9, #16 // count = cdf[1] 569*c0909341SAndroid Build Coastguard Worker and w9, w9, #0xffff // cdf[0] 570*c0909341SAndroid Build Coastguard Worker 571*c0909341SAndroid Build Coastguard Worker sub w3, w2, w2, lsr #5 // count - (count >= 32) 572*c0909341SAndroid Build Coastguard Worker lsr w2, w2, #4 // count >> 4 573*c0909341SAndroid Build Coastguard Worker add w10, w3, #1 // count + (count < 32) 574*c0909341SAndroid Build Coastguard Worker add w2, w2, #4 // rate = (count >> 4) | 4 575*c0909341SAndroid Build Coastguard Worker 576*c0909341SAndroid Build Coastguard Worker sub w9, w9, w15 // cdf[0] -= bit 577*c0909341SAndroid Build Coastguard Worker sub w11, w9, w15, lsl #15 // {cdf[0], cdf[0] - 32769} 578*c0909341SAndroid Build Coastguard Worker asr w11, w11, w2 // {cdf[0], cdf[0] - 32769} >> rate 579*c0909341SAndroid Build Coastguard Worker sub w9, w9, w11 // cdf[0] 580*c0909341SAndroid Build Coastguard Worker 581*c0909341SAndroid Build Coastguard Worker strh w9, [x1] 582*c0909341SAndroid Build Coastguard Worker strh w10, [x1, #2] 583*c0909341SAndroid Build Coastguard Worker 584*c0909341SAndroid Build Coastguard Worker1: 585*c0909341SAndroid Build Coastguard Worker lsl w4, w4, w5 // rng << d 586*c0909341SAndroid Build Coastguard Worker subs w6, w6, w5 // cnt -= d 587*c0909341SAndroid Build Coastguard Worker lsl x7, x7, x5 // (dif - (v << 48)) << d 588*c0909341SAndroid Build Coastguard Worker str w4, [x0, #RNG] 589*c0909341SAndroid Build Coastguard Worker b.lo L(refill) 590*c0909341SAndroid Build Coastguard Worker 591*c0909341SAndroid Build Coastguard Worker str w6, [x0, #CNT] 592*c0909341SAndroid Build Coastguard Worker str x7, [x0, #DIF] 593*c0909341SAndroid Build Coastguard Worker mov w0, w15 594*c0909341SAndroid Build Coastguard Worker ret 595*c0909341SAndroid Build Coastguard Workerendfunc 596