1*a97c2a1fSXin Li@/****************************************************************************** 2*a97c2a1fSXin Li@ * 3*a97c2a1fSXin Li@ * Copyright (C) 2015 The Android Open Source Project 4*a97c2a1fSXin Li@ * 5*a97c2a1fSXin Li@ * Licensed under the Apache License, Version 2.0 (the "License"); 6*a97c2a1fSXin Li@ * you may not use this file except in compliance with the License. 7*a97c2a1fSXin Li@ * You may obtain a copy of the License at: 8*a97c2a1fSXin Li@ * 9*a97c2a1fSXin Li@ * http://www.apache.org/licenses/LICENSE-2.0 10*a97c2a1fSXin Li@ * 11*a97c2a1fSXin Li@ * Unless required by applicable law or agreed to in writing, software 12*a97c2a1fSXin Li@ * distributed under the License is distributed on an "AS IS" BASIS, 13*a97c2a1fSXin Li@ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14*a97c2a1fSXin Li@ * See the License for the specific language governing permissions and 15*a97c2a1fSXin Li@ * limitations under the License. 16*a97c2a1fSXin Li@ * 17*a97c2a1fSXin Li@ ***************************************************************************** 18*a97c2a1fSXin Li@ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore 19*a97c2a1fSXin Li@*/ 20*a97c2a1fSXin Li 21*a97c2a1fSXin Li 22*a97c2a1fSXin Li@****************************************************************************** 23*a97c2a1fSXin Li@* 24*a97c2a1fSXin Li@* 25*a97c2a1fSXin Li@* @brief 26*a97c2a1fSXin Li@* This file contains definitions of routines for variance caclulation 27*a97c2a1fSXin Li@* 28*a97c2a1fSXin Li@* @author 29*a97c2a1fSXin Li@* Ittiam 30*a97c2a1fSXin Li@* 31*a97c2a1fSXin Li@* @par List of Functions: 32*a97c2a1fSXin Li@* - icv_variance_8x4_a9() 33*a97c2a1fSXin Li@* 34*a97c2a1fSXin Li@* @remarks 35*a97c2a1fSXin Li@* None 36*a97c2a1fSXin Li@* 37*a97c2a1fSXin Li@******************************************************************************* 38*a97c2a1fSXin Li 39*a97c2a1fSXin Li 40*a97c2a1fSXin Li@****************************************************************************** 41*a97c2a1fSXin Li@* 42*a97c2a1fSXin Li@* @brief computes variance of a 8x4 block 43*a97c2a1fSXin Li@* 44*a97c2a1fSXin Li@* 45*a97c2a1fSXin Li@* @par Description 46*a97c2a1fSXin Li@* This functions computes variance of a 8x4 block 47*a97c2a1fSXin Li@* 48*a97c2a1fSXin Li@* @param[in] pu1_src 49*a97c2a1fSXin Li@* UWORD8 pointer to the source 50*a97c2a1fSXin Li@* 51*a97c2a1fSXin Li@* @param[in] src_strd 52*a97c2a1fSXin Li@* integer source stride 53*a97c2a1fSXin Li@* 54*a97c2a1fSXin Li@* @param[in] wd 55*a97c2a1fSXin Li@* Width (assumed to be 8) 56*a97c2a1fSXin Li@* 57*a97c2a1fSXin Li@* @param[in] ht 58*a97c2a1fSXin Li@* Height (assumed to be 4) 59*a97c2a1fSXin Li@* 60*a97c2a1fSXin Li@* @returns 61*a97c2a1fSXin Li@* variance value in r0 62*a97c2a1fSXin Li@* 63*a97c2a1fSXin Li@* @remarks 64*a97c2a1fSXin Li@* 65*a97c2a1fSXin Li@****************************************************************************** 66*a97c2a1fSXin Li 67*a97c2a1fSXin Li .global icv_variance_8x4_a9 68*a97c2a1fSXin Li 69*a97c2a1fSXin Liicv_variance_8x4_a9: 70*a97c2a1fSXin Li 71*a97c2a1fSXin Li push {lr} 72*a97c2a1fSXin Li 73*a97c2a1fSXin Li @ Load 8x4 source 74*a97c2a1fSXin Li vld1.8 d0, [r0], r1 75*a97c2a1fSXin Li vld1.8 d1, [r0], r1 76*a97c2a1fSXin Li vld1.8 d2, [r0], r1 77*a97c2a1fSXin Li vld1.8 d3, [r0], r1 78*a97c2a1fSXin Li 79*a97c2a1fSXin Li @ Calculate Sum(values) 80*a97c2a1fSXin Li vaddl.u8 q2, d0, d1 81*a97c2a1fSXin Li vaddl.u8 q3, d2, d3 82*a97c2a1fSXin Li vadd.u16 q2, q2, q3 83*a97c2a1fSXin Li 84*a97c2a1fSXin Li vadd.u16 d4, d4, d5 85*a97c2a1fSXin Li vpadd.u16 d4, d4, d4 86*a97c2a1fSXin Li vpadd.u16 d4, d4, d4 87*a97c2a1fSXin Li 88*a97c2a1fSXin Li @ Calculate SumOfSquares 89*a97c2a1fSXin Li vmull.u8 q10, d0, d0 90*a97c2a1fSXin Li vmull.u8 q11, d1, d1 91*a97c2a1fSXin Li vmull.u8 q12, d2, d2 92*a97c2a1fSXin Li vmull.u8 q13, d3, d3 93*a97c2a1fSXin Li 94*a97c2a1fSXin Li vaddl.u16 q10, d20, d21 95*a97c2a1fSXin Li vaddl.u16 q11, d22, d23 96*a97c2a1fSXin Li vaddl.u16 q12, d24, d25 97*a97c2a1fSXin Li vaddl.u16 q13, d26, d27 98*a97c2a1fSXin Li 99*a97c2a1fSXin Li vadd.u32 q10, q10, q11 100*a97c2a1fSXin Li vadd.u32 q11, q12, q13 101*a97c2a1fSXin Li vadd.u32 q10, q10, q11 102*a97c2a1fSXin Li vadd.u32 d20, d20, d21 103*a97c2a1fSXin Li vpadd.u32 d20, d20, d20 104*a97c2a1fSXin Li 105*a97c2a1fSXin Li @ Sum(values) 106*a97c2a1fSXin Li vmov.u16 r0, d4[0] 107*a97c2a1fSXin Li 108*a97c2a1fSXin Li @ SumOfSquares 109*a97c2a1fSXin Li vmov.u32 r1, d20[0] 110*a97c2a1fSXin Li 111*a97c2a1fSXin Li @ SquareOfSums 112*a97c2a1fSXin Li mul r3, r0, r0 113*a97c2a1fSXin Li 114*a97c2a1fSXin Li @ SumOfSquares * 8 * 4 - SquareOfSums 115*a97c2a1fSXin Li rsb r0, r3, r1, LSL #5 116*a97c2a1fSXin Li 117*a97c2a1fSXin Li @ Divide by 32 * 32 118*a97c2a1fSXin Li 119*a97c2a1fSXin Li mov r0, r0, ASR #10 120*a97c2a1fSXin Li pop {pc} 121