xref: /aosp_15_r20/external/libmpeg2/common/armv8/icv_sad_av8.s (revision a97c2a1f0a796dc32bed80d3353c69c5fc07c750)
1*a97c2a1fSXin Li//******************************************************************************
2*a97c2a1fSXin Li//*
3*a97c2a1fSXin Li//* Copyright (C) 2015 The Android Open Source Project
4*a97c2a1fSXin Li//*
5*a97c2a1fSXin Li//* Licensed under the Apache License, Version 2.0 (the "License");
6*a97c2a1fSXin Li//* you may not use this file except in compliance with the License.
7*a97c2a1fSXin Li//* You may obtain a copy of the License at:
8*a97c2a1fSXin Li//*
9*a97c2a1fSXin Li//* http://www.apache.org/licenses/LICENSE-2.0
10*a97c2a1fSXin Li//*
11*a97c2a1fSXin Li//* Unless required by applicable law or agreed to in writing, software
12*a97c2a1fSXin Li//* distributed under the License is distributed on an "AS IS" BASIS,
13*a97c2a1fSXin Li//* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14*a97c2a1fSXin Li//* See the License for the specific language governing permissions and
15*a97c2a1fSXin Li//* limitations under the License.
16*a97c2a1fSXin Li//*
17*a97c2a1fSXin Li//*****************************************************************************
18*a97c2a1fSXin Li//* Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
19*a97c2a1fSXin Li//*/
20*a97c2a1fSXin Li
21*a97c2a1fSXin Li
22*a97c2a1fSXin Li//******************************************************************************
23*a97c2a1fSXin Li//*
24*a97c2a1fSXin Li//*
25*a97c2a1fSXin Li//* @brief
26*a97c2a1fSXin Li//*  This file contains definitions of routines for SAD caclulation
27*a97c2a1fSXin Li//*
28*a97c2a1fSXin Li//* @author
29*a97c2a1fSXin Li//*  Ittiam
30*a97c2a1fSXin Li//*
31*a97c2a1fSXin Li//* @par List of Functions:
32*a97c2a1fSXin Li//*  - icv_sad_8x4_av8()
33*a97c2a1fSXin Li//*
34*a97c2a1fSXin Li//* @remarks
35*a97c2a1fSXin Li//*  None
36*a97c2a1fSXin Li//*
37*a97c2a1fSXin Li//*******************************************************************************
38*a97c2a1fSXin Li
39*a97c2a1fSXin Li
40*a97c2a1fSXin Li//******************************************************************************
41*a97c2a1fSXin Li//*
42*a97c2a1fSXin Li//*  @brief computes distortion (SAD) between 2 8x4  blocks
43*a97c2a1fSXin Li//*
44*a97c2a1fSXin Li//*
45*a97c2a1fSXin Li//*  @par   Description
46*a97c2a1fSXin Li//*   This functions computes SAD between 2 8x4 blocks.
47*a97c2a1fSXin Li//*
48*a97c2a1fSXin Li//* @param[in] pu1_src
49*a97c2a1fSXin Li//*  UWORD8 pointer to the source
50*a97c2a1fSXin Li//*
51*a97c2a1fSXin Li//* @param[out] pu1_ref
52*a97c2a1fSXin Li//*  UWORD8 pointer to the reference buffer
53*a97c2a1fSXin Li//*
54*a97c2a1fSXin Li//* @param[in] src_strd
55*a97c2a1fSXin Li//*  integer source stride
56*a97c2a1fSXin Li//*
57*a97c2a1fSXin Li//* @param[in] ref_strd
58*a97c2a1fSXin Li//*  integer reference stride
59*a97c2a1fSXin Li//*
60*a97c2a1fSXin Li//* @param[in] wd
61*a97c2a1fSXin Li//*  Width (assumed to be 8)
62*a97c2a1fSXin Li//*
63*a97c2a1fSXin Li//* @param[in] ht
64*a97c2a1fSXin Li//*  Height (assumed to be 4)
65*a97c2a1fSXin Li//*
66*a97c2a1fSXin Li//* @returns
67*a97c2a1fSXin Li//*     SAD value in r0
68*a97c2a1fSXin Li//*
69*a97c2a1fSXin Li//* @remarks
70*a97c2a1fSXin Li//*
71*a97c2a1fSXin Li//******************************************************************************
72*a97c2a1fSXin Li
73*a97c2a1fSXin Li    .global icv_sad_8x4_av8
74*a97c2a1fSXin Li
75*a97c2a1fSXin Liicv_sad_8x4_av8:
76*a97c2a1fSXin Li
77*a97c2a1fSXin Li    // Load 8x4 source
78*a97c2a1fSXin Li    ld1     {v0.8b},    [x0],     x2
79*a97c2a1fSXin Li    ld1     {v1.8b},    [x0],     x2
80*a97c2a1fSXin Li    ld1     {v2.8b},    [x0],     x2
81*a97c2a1fSXin Li    ld1     {v3.8b},    [x0],     x2
82*a97c2a1fSXin Li
83*a97c2a1fSXin Li    // Load 8x4 reference
84*a97c2a1fSXin Li    ld1     {v4.8b},    [x1],     x3
85*a97c2a1fSXin Li    ld1     {v5.8b},    [x1],     x3
86*a97c2a1fSXin Li    ld1     {v6.8b},    [x1],     x3
87*a97c2a1fSXin Li    ld1     {v7.8b},    [x1],     x3
88*a97c2a1fSXin Li
89*a97c2a1fSXin Li    uabdl   v0.8h,      v0.8b,      v4.8b
90*a97c2a1fSXin Li    uabal   v0.8h,      v1.8b,      v5.8b
91*a97c2a1fSXin Li    uabal   v0.8h,      v2.8b,      v6.8b
92*a97c2a1fSXin Li    uabal   v0.8h,      v3.8b,      v7.8b
93*a97c2a1fSXin Li
94*a97c2a1fSXin Li    addp    v0.8h,      v0.8h,      v0.8h
95*a97c2a1fSXin Li    addp    v0.8h,      v0.8h,      v0.8h
96*a97c2a1fSXin Li    addp    v0.8h,      v0.8h,      v0.8h
97*a97c2a1fSXin Li
98*a97c2a1fSXin Li    smov    x0,         v0.h[0]
99*a97c2a1fSXin Li
100*a97c2a1fSXin Li    ret
101