xref: /aosp_15_r20/external/libmpeg2/common/arm/icv_variance_a9.s (revision a97c2a1f0a796dc32bed80d3353c69c5fc07c750)
1*a97c2a1fSXin Li@/******************************************************************************
2*a97c2a1fSXin Li@ *
3*a97c2a1fSXin Li@ * Copyright (C) 2015 The Android Open Source Project
4*a97c2a1fSXin Li@ *
5*a97c2a1fSXin Li@ * Licensed under the Apache License, Version 2.0 (the "License");
6*a97c2a1fSXin Li@ * you may not use this file except in compliance with the License.
7*a97c2a1fSXin Li@ * You may obtain a copy of the License at:
8*a97c2a1fSXin Li@ *
9*a97c2a1fSXin Li@ * http://www.apache.org/licenses/LICENSE-2.0
10*a97c2a1fSXin Li@ *
11*a97c2a1fSXin Li@ * Unless required by applicable law or agreed to in writing, software
12*a97c2a1fSXin Li@ * distributed under the License is distributed on an "AS IS" BASIS,
13*a97c2a1fSXin Li@ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14*a97c2a1fSXin Li@ * See the License for the specific language governing permissions and
15*a97c2a1fSXin Li@ * limitations under the License.
16*a97c2a1fSXin Li@ *
17*a97c2a1fSXin Li@ *****************************************************************************
18*a97c2a1fSXin Li@ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
19*a97c2a1fSXin Li@*/
20*a97c2a1fSXin Li
21*a97c2a1fSXin Li
22*a97c2a1fSXin Li@******************************************************************************
23*a97c2a1fSXin Li@*
24*a97c2a1fSXin Li@*
25*a97c2a1fSXin Li@* @brief
26*a97c2a1fSXin Li@*  This file contains definitions of routines for variance caclulation
27*a97c2a1fSXin Li@*
28*a97c2a1fSXin Li@* @author
29*a97c2a1fSXin Li@*  Ittiam
30*a97c2a1fSXin Li@*
31*a97c2a1fSXin Li@* @par List of Functions:
32*a97c2a1fSXin Li@*  - icv_variance_8x4_a9()
33*a97c2a1fSXin Li@*
34*a97c2a1fSXin Li@* @remarks
35*a97c2a1fSXin Li@*  None
36*a97c2a1fSXin Li@*
37*a97c2a1fSXin Li@*******************************************************************************
38*a97c2a1fSXin Li
39*a97c2a1fSXin Li
40*a97c2a1fSXin Li@******************************************************************************
41*a97c2a1fSXin Li@*
42*a97c2a1fSXin Li@*  @brief computes variance of a 8x4  block
43*a97c2a1fSXin Li@*
44*a97c2a1fSXin Li@*
45*a97c2a1fSXin Li@*  @par   Description
46*a97c2a1fSXin Li@*   This functions computes variance of a 8x4  block
47*a97c2a1fSXin Li@*
48*a97c2a1fSXin Li@* @param[in] pu1_src
49*a97c2a1fSXin Li@*  UWORD8 pointer to the source
50*a97c2a1fSXin Li@*
51*a97c2a1fSXin Li@* @param[in] src_strd
52*a97c2a1fSXin Li@*  integer source stride
53*a97c2a1fSXin Li@*
54*a97c2a1fSXin Li@* @param[in] wd
55*a97c2a1fSXin Li@*  Width (assumed to be 8)
56*a97c2a1fSXin Li@*
57*a97c2a1fSXin Li@* @param[in] ht
58*a97c2a1fSXin Li@*  Height (assumed to be 4)
59*a97c2a1fSXin Li@*
60*a97c2a1fSXin Li@* @returns
61*a97c2a1fSXin Li@*  variance value in r0
62*a97c2a1fSXin Li@*
63*a97c2a1fSXin Li@* @remarks
64*a97c2a1fSXin Li@*
65*a97c2a1fSXin Li@******************************************************************************
66*a97c2a1fSXin Li
67*a97c2a1fSXin Li    .global icv_variance_8x4_a9
68*a97c2a1fSXin Li
69*a97c2a1fSXin Liicv_variance_8x4_a9:
70*a97c2a1fSXin Li
71*a97c2a1fSXin Li    push        {lr}
72*a97c2a1fSXin Li
73*a97c2a1fSXin Li    @ Load 8x4 source
74*a97c2a1fSXin Li    vld1.8      d0,     [r0],   r1
75*a97c2a1fSXin Li    vld1.8      d1,     [r0],   r1
76*a97c2a1fSXin Li    vld1.8      d2,     [r0],   r1
77*a97c2a1fSXin Li    vld1.8      d3,     [r0],   r1
78*a97c2a1fSXin Li
79*a97c2a1fSXin Li    @ Calculate Sum(values)
80*a97c2a1fSXin Li    vaddl.u8    q2,     d0,     d1
81*a97c2a1fSXin Li    vaddl.u8    q3,     d2,     d3
82*a97c2a1fSXin Li    vadd.u16    q2,     q2,     q3
83*a97c2a1fSXin Li
84*a97c2a1fSXin Li    vadd.u16    d4,     d4,     d5
85*a97c2a1fSXin Li    vpadd.u16   d4,     d4,     d4
86*a97c2a1fSXin Li    vpadd.u16   d4,     d4,     d4
87*a97c2a1fSXin Li
88*a97c2a1fSXin Li    @ Calculate SumOfSquares
89*a97c2a1fSXin Li    vmull.u8    q10,    d0,     d0
90*a97c2a1fSXin Li    vmull.u8    q11,    d1,     d1
91*a97c2a1fSXin Li    vmull.u8    q12,    d2,     d2
92*a97c2a1fSXin Li    vmull.u8    q13,    d3,     d3
93*a97c2a1fSXin Li
94*a97c2a1fSXin Li    vaddl.u16   q10,    d20,    d21
95*a97c2a1fSXin Li    vaddl.u16   q11,    d22,    d23
96*a97c2a1fSXin Li    vaddl.u16   q12,    d24,    d25
97*a97c2a1fSXin Li    vaddl.u16   q13,    d26,    d27
98*a97c2a1fSXin Li
99*a97c2a1fSXin Li    vadd.u32    q10,    q10,    q11
100*a97c2a1fSXin Li    vadd.u32    q11,    q12,    q13
101*a97c2a1fSXin Li    vadd.u32    q10,    q10,    q11
102*a97c2a1fSXin Li    vadd.u32    d20,    d20,    d21
103*a97c2a1fSXin Li    vpadd.u32   d20,    d20,    d20
104*a97c2a1fSXin Li
105*a97c2a1fSXin Li    @ Sum(values)
106*a97c2a1fSXin Li    vmov.u16     r0,    d4[0]
107*a97c2a1fSXin Li
108*a97c2a1fSXin Li    @ SumOfSquares
109*a97c2a1fSXin Li    vmov.u32     r1,    d20[0]
110*a97c2a1fSXin Li
111*a97c2a1fSXin Li    @ SquareOfSums
112*a97c2a1fSXin Li    mul         r3,     r0,     r0
113*a97c2a1fSXin Li
114*a97c2a1fSXin Li    @ SumOfSquares * 8 * 4 - SquareOfSums
115*a97c2a1fSXin Li    rsb         r0,     r3,     r1,     LSL #5
116*a97c2a1fSXin Li
117*a97c2a1fSXin Li    @ Divide by 32 * 32
118*a97c2a1fSXin Li
119*a97c2a1fSXin Li    mov         r0,     r0,     ASR #10
120*a97c2a1fSXin Li    pop         {pc}
121