xref: /aosp_15_r20/external/libmpeg2/common/armv8/icv_variance_av8.s (revision a97c2a1f0a796dc32bed80d3353c69c5fc07c750)
1*a97c2a1fSXin Li//******************************************************************************
2*a97c2a1fSXin Li//*
3*a97c2a1fSXin Li//* Copyright (C) 2015 The Android Open Source Project
4*a97c2a1fSXin Li//*
5*a97c2a1fSXin Li//* Licensed under the Apache License, Version 2.0 (the "License");
6*a97c2a1fSXin Li//* you may not use this file except in compliance with the License.
7*a97c2a1fSXin Li//* You may obtain a copy of the License at:
8*a97c2a1fSXin Li//*
9*a97c2a1fSXin Li//* http://www.apache.org/licenses/LICENSE-2.0
10*a97c2a1fSXin Li//*
11*a97c2a1fSXin Li//* Unless required by applicable law or agreed to in writing, software
12*a97c2a1fSXin Li//* distributed under the License is distributed on an "AS IS" BASIS,
13*a97c2a1fSXin Li//* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14*a97c2a1fSXin Li//* See the License for the specific language governing permissions and
15*a97c2a1fSXin Li//* limitations under the License.
16*a97c2a1fSXin Li//*
17*a97c2a1fSXin Li//*****************************************************************************
18*a97c2a1fSXin Li//* Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
19*a97c2a1fSXin Li//*/
20*a97c2a1fSXin Li
21*a97c2a1fSXin Li//******************************************************************************
22*a97c2a1fSXin Li//*
23*a97c2a1fSXin Li//*
24*a97c2a1fSXin Li//* @brief
25*a97c2a1fSXin Li//*  This file contains definitions of routines for variance caclulation
26*a97c2a1fSXin Li//*
27*a97c2a1fSXin Li//* @author
28*a97c2a1fSXin Li//*  Ittiam
29*a97c2a1fSXin Li//*
30*a97c2a1fSXin Li//* @par List of Functions:
31*a97c2a1fSXin Li//*  - icv_variance_8x4_av8()
32*a97c2a1fSXin Li//*
33*a97c2a1fSXin Li//* @remarks
34*a97c2a1fSXin Li//*  None
35*a97c2a1fSXin Li//*
36*a97c2a1fSXin Li//*******************************************************************************
37*a97c2a1fSXin Li
38*a97c2a1fSXin Li
39*a97c2a1fSXin Li//******************************************************************************
40*a97c2a1fSXin Li//*
41*a97c2a1fSXin Li//*  @brief computes variance of a 8x4  block
42*a97c2a1fSXin Li//*
43*a97c2a1fSXin Li//*
44*a97c2a1fSXin Li//*  @par   Description
45*a97c2a1fSXin Li//*   This functions computes variance of a 8x4  block
46*a97c2a1fSXin Li//*
47*a97c2a1fSXin Li//* @param[in] pu1_src
48*a97c2a1fSXin Li//*  UWORD8 pointer to the source
49*a97c2a1fSXin Li//*
50*a97c2a1fSXin Li//* @param[in] src_strd
51*a97c2a1fSXin Li//*  integer source stride
52*a97c2a1fSXin Li//*
53*a97c2a1fSXin Li//* @param[in] wd
54*a97c2a1fSXin Li//*  Width (assumed to be 8)
55*a97c2a1fSXin Li//*
56*a97c2a1fSXin Li//* @param[in] ht
57*a97c2a1fSXin Li//*  Height (assumed to be 4)
58*a97c2a1fSXin Li//*
59*a97c2a1fSXin Li//* @returns
60*a97c2a1fSXin Li//*     variance value in x0
61*a97c2a1fSXin Li//*
62*a97c2a1fSXin Li//* @remarks
63*a97c2a1fSXin Li//*
64*a97c2a1fSXin Li//******************************************************************************
65*a97c2a1fSXin Li
66*a97c2a1fSXin Li    .global icv_variance_8x4_av8
67*a97c2a1fSXin Li
68*a97c2a1fSXin Liicv_variance_8x4_av8:
69*a97c2a1fSXin Li
70*a97c2a1fSXin Li    // Load 8x4 source
71*a97c2a1fSXin Li    ld1     {v0.8b},    [x0],     x1
72*a97c2a1fSXin Li    ld1     {v1.8b},    [x0],     x1
73*a97c2a1fSXin Li    ld1     {v2.8b},    [x0],     x1
74*a97c2a1fSXin Li    ld1     {v3.8b},    [x0],     x1
75*a97c2a1fSXin Li
76*a97c2a1fSXin Li    // Calculate Sum(values)
77*a97c2a1fSXin Li    uaddl   v4.8h,  v0.8b,  v1.8b
78*a97c2a1fSXin Li    uaddl   v6.8h,  v2.8b,  v3.8b
79*a97c2a1fSXin Li    add     v4.8h,  v4.8h,  v6.8h
80*a97c2a1fSXin Li
81*a97c2a1fSXin Li    addp    v4.8h,  v4.8h,  v4.8h
82*a97c2a1fSXin Li    addp    v4.4h,  v4.4h,  v4.4h
83*a97c2a1fSXin Li    addp    v4.4h,  v4.4h,  v4.4h
84*a97c2a1fSXin Li
85*a97c2a1fSXin Li    // Calculate SumOfSquares
86*a97c2a1fSXin Li    umull   v20.8h, v0.8b,  v0.8b
87*a97c2a1fSXin Li    umull   v22.8h, v1.8b,  v1.8b
88*a97c2a1fSXin Li    umull   v24.8h, v2.8b,  v2.8b
89*a97c2a1fSXin Li    umull   v26.8h, v3.8b,  v3.8b
90*a97c2a1fSXin Li
91*a97c2a1fSXin Li    uaddl   v21.4s,    v20.4h,    v22.4h
92*a97c2a1fSXin Li    uaddl   v25.4s,    v24.4h,    v26.4h
93*a97c2a1fSXin Li    uaddl2  v20.4s,    v20.8h,    v22.8h
94*a97c2a1fSXin Li    uaddl2  v24.4s,    v24.8h,    v26.8h
95*a97c2a1fSXin Li
96*a97c2a1fSXin Li    add     v20.4s,     v20.4s,  v21.4s
97*a97c2a1fSXin Li    add     v22.4s,     v24.4s,  v25.4s
98*a97c2a1fSXin Li    add     v20.4s,     v20.4s,  v22.4s
99*a97c2a1fSXin Li    addp    v20.4s,     v20.4s,  v20.4s
100*a97c2a1fSXin Li    addp    v20.2s,     v20.2s,  v20.2s
101*a97c2a1fSXin Li
102*a97c2a1fSXin Li    // Sum(values)
103*a97c2a1fSXin Li    smov    x0,     v4.h[0]
104*a97c2a1fSXin Li
105*a97c2a1fSXin Li    // SumOfSquares
106*a97c2a1fSXin Li    smov    x1,     v20.s[0]
107*a97c2a1fSXin Li
108*a97c2a1fSXin Li    // SquareOfSums
109*a97c2a1fSXin Li    mul     x3,     x0,     x0
110*a97c2a1fSXin Li
111*a97c2a1fSXin Li    // SumOfSquares * 8 * 4 - SquareOfSums
112*a97c2a1fSXin Li    sub     x1,     x3,     x1,        LSL #5
113*a97c2a1fSXin Li    neg     x0,     x1
114*a97c2a1fSXin Li
115*a97c2a1fSXin Li    // Divide by 32 * 32
116*a97c2a1fSXin Li
117*a97c2a1fSXin Li    ASR     x0,     x0,     #10
118*a97c2a1fSXin Li    ret
119