1*a97c2a1fSXin Li//****************************************************************************** 2*a97c2a1fSXin Li//* 3*a97c2a1fSXin Li//* Copyright (C) 2015 The Android Open Source Project 4*a97c2a1fSXin Li//* 5*a97c2a1fSXin Li//* Licensed under the Apache License, Version 2.0 (the "License"); 6*a97c2a1fSXin Li//* you may not use this file except in compliance with the License. 7*a97c2a1fSXin Li//* You may obtain a copy of the License at: 8*a97c2a1fSXin Li//* 9*a97c2a1fSXin Li//* http://www.apache.org/licenses/LICENSE-2.0 10*a97c2a1fSXin Li//* 11*a97c2a1fSXin Li//* Unless required by applicable law or agreed to in writing, software 12*a97c2a1fSXin Li//* distributed under the License is distributed on an "AS IS" BASIS, 13*a97c2a1fSXin Li//* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14*a97c2a1fSXin Li//* See the License for the specific language governing permissions and 15*a97c2a1fSXin Li//* limitations under the License. 16*a97c2a1fSXin Li//* 17*a97c2a1fSXin Li//***************************************************************************** 18*a97c2a1fSXin Li//* Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore 19*a97c2a1fSXin Li//*/ 20*a97c2a1fSXin Li 21*a97c2a1fSXin Li//****************************************************************************** 22*a97c2a1fSXin Li//* 23*a97c2a1fSXin Li//* 24*a97c2a1fSXin Li//* @brief 25*a97c2a1fSXin Li//* This file contains definitions of routines for variance caclulation 26*a97c2a1fSXin Li//* 27*a97c2a1fSXin Li//* @author 28*a97c2a1fSXin Li//* Ittiam 29*a97c2a1fSXin Li//* 30*a97c2a1fSXin Li//* @par List of Functions: 31*a97c2a1fSXin Li//* - icv_variance_8x4_av8() 32*a97c2a1fSXin Li//* 33*a97c2a1fSXin Li//* @remarks 34*a97c2a1fSXin Li//* None 35*a97c2a1fSXin Li//* 36*a97c2a1fSXin Li//******************************************************************************* 37*a97c2a1fSXin Li 38*a97c2a1fSXin Li 39*a97c2a1fSXin Li//****************************************************************************** 40*a97c2a1fSXin Li//* 41*a97c2a1fSXin Li//* @brief computes variance of a 8x4 block 42*a97c2a1fSXin Li//* 43*a97c2a1fSXin Li//* 44*a97c2a1fSXin Li//* @par Description 45*a97c2a1fSXin Li//* This functions computes variance of a 8x4 block 46*a97c2a1fSXin Li//* 47*a97c2a1fSXin Li//* @param[in] pu1_src 48*a97c2a1fSXin Li//* UWORD8 pointer to the source 49*a97c2a1fSXin Li//* 50*a97c2a1fSXin Li//* @param[in] src_strd 51*a97c2a1fSXin Li//* integer source stride 52*a97c2a1fSXin Li//* 53*a97c2a1fSXin Li//* @param[in] wd 54*a97c2a1fSXin Li//* Width (assumed to be 8) 55*a97c2a1fSXin Li//* 56*a97c2a1fSXin Li//* @param[in] ht 57*a97c2a1fSXin Li//* Height (assumed to be 4) 58*a97c2a1fSXin Li//* 59*a97c2a1fSXin Li//* @returns 60*a97c2a1fSXin Li//* variance value in x0 61*a97c2a1fSXin Li//* 62*a97c2a1fSXin Li//* @remarks 63*a97c2a1fSXin Li//* 64*a97c2a1fSXin Li//****************************************************************************** 65*a97c2a1fSXin Li 66*a97c2a1fSXin Li .global icv_variance_8x4_av8 67*a97c2a1fSXin Li 68*a97c2a1fSXin Liicv_variance_8x4_av8: 69*a97c2a1fSXin Li 70*a97c2a1fSXin Li // Load 8x4 source 71*a97c2a1fSXin Li ld1 {v0.8b}, [x0], x1 72*a97c2a1fSXin Li ld1 {v1.8b}, [x0], x1 73*a97c2a1fSXin Li ld1 {v2.8b}, [x0], x1 74*a97c2a1fSXin Li ld1 {v3.8b}, [x0], x1 75*a97c2a1fSXin Li 76*a97c2a1fSXin Li // Calculate Sum(values) 77*a97c2a1fSXin Li uaddl v4.8h, v0.8b, v1.8b 78*a97c2a1fSXin Li uaddl v6.8h, v2.8b, v3.8b 79*a97c2a1fSXin Li add v4.8h, v4.8h, v6.8h 80*a97c2a1fSXin Li 81*a97c2a1fSXin Li addp v4.8h, v4.8h, v4.8h 82*a97c2a1fSXin Li addp v4.4h, v4.4h, v4.4h 83*a97c2a1fSXin Li addp v4.4h, v4.4h, v4.4h 84*a97c2a1fSXin Li 85*a97c2a1fSXin Li // Calculate SumOfSquares 86*a97c2a1fSXin Li umull v20.8h, v0.8b, v0.8b 87*a97c2a1fSXin Li umull v22.8h, v1.8b, v1.8b 88*a97c2a1fSXin Li umull v24.8h, v2.8b, v2.8b 89*a97c2a1fSXin Li umull v26.8h, v3.8b, v3.8b 90*a97c2a1fSXin Li 91*a97c2a1fSXin Li uaddl v21.4s, v20.4h, v22.4h 92*a97c2a1fSXin Li uaddl v25.4s, v24.4h, v26.4h 93*a97c2a1fSXin Li uaddl2 v20.4s, v20.8h, v22.8h 94*a97c2a1fSXin Li uaddl2 v24.4s, v24.8h, v26.8h 95*a97c2a1fSXin Li 96*a97c2a1fSXin Li add v20.4s, v20.4s, v21.4s 97*a97c2a1fSXin Li add v22.4s, v24.4s, v25.4s 98*a97c2a1fSXin Li add v20.4s, v20.4s, v22.4s 99*a97c2a1fSXin Li addp v20.4s, v20.4s, v20.4s 100*a97c2a1fSXin Li addp v20.2s, v20.2s, v20.2s 101*a97c2a1fSXin Li 102*a97c2a1fSXin Li // Sum(values) 103*a97c2a1fSXin Li smov x0, v4.h[0] 104*a97c2a1fSXin Li 105*a97c2a1fSXin Li // SumOfSquares 106*a97c2a1fSXin Li smov x1, v20.s[0] 107*a97c2a1fSXin Li 108*a97c2a1fSXin Li // SquareOfSums 109*a97c2a1fSXin Li mul x3, x0, x0 110*a97c2a1fSXin Li 111*a97c2a1fSXin Li // SumOfSquares * 8 * 4 - SquareOfSums 112*a97c2a1fSXin Li sub x1, x3, x1, LSL #5 113*a97c2a1fSXin Li neg x0, x1 114*a97c2a1fSXin Li 115*a97c2a1fSXin Li // Divide by 32 * 32 116*a97c2a1fSXin Li 117*a97c2a1fSXin Li ASR x0, x0, #10 118*a97c2a1fSXin Li ret 119