1*e1eccf28SAndroid Build Coastguard Worker/* 2*e1eccf28SAndroid Build Coastguard Worker * Copyright (C) 2013-2014 The Android Open Source Project 3*e1eccf28SAndroid Build Coastguard Worker * 4*e1eccf28SAndroid Build Coastguard Worker * Licensed under the Apache License, Version 2.0 (the "License"); 5*e1eccf28SAndroid Build Coastguard Worker * you may not use this file except in compliance with the License. 6*e1eccf28SAndroid Build Coastguard Worker * You may obtain a copy of the License at 7*e1eccf28SAndroid Build Coastguard Worker * 8*e1eccf28SAndroid Build Coastguard Worker * http://www.apache.org/licenses/LICENSE-2.0 9*e1eccf28SAndroid Build Coastguard Worker * 10*e1eccf28SAndroid Build Coastguard Worker * Unless required by applicable law or agreed to in writing, software 11*e1eccf28SAndroid Build Coastguard Worker * distributed under the License is distributed on an "AS IS" BASIS, 12*e1eccf28SAndroid Build Coastguard Worker * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13*e1eccf28SAndroid Build Coastguard Worker * See the License for the specific language governing permissions and 14*e1eccf28SAndroid Build Coastguard Worker * limitations under the License. 15*e1eccf28SAndroid Build Coastguard Worker */ 16*e1eccf28SAndroid Build Coastguard Worker 17*e1eccf28SAndroid Build Coastguard Worker#define ENTRY(f) .text; .align 4; .globl f; .type f,#function; f: 18*e1eccf28SAndroid Build Coastguard Worker#define END(f) .size f, .-f; 19*e1eccf28SAndroid Build Coastguard Worker 20*e1eccf28SAndroid Build Coastguard Worker#define BLEND_LIST(X) \ 21*e1eccf28SAndroid Build Coastguard Worker X(0, CLEAR) \ 22*e1eccf28SAndroid Build Coastguard Worker X(1, SRC) \ 23*e1eccf28SAndroid Build Coastguard Worker X(2, DST) \ 24*e1eccf28SAndroid Build Coastguard Worker X(3, SRC_OVER) \ 25*e1eccf28SAndroid Build Coastguard Worker X(4, DST_OVER) \ 26*e1eccf28SAndroid Build Coastguard Worker X(5, SRC_IN) \ 27*e1eccf28SAndroid Build Coastguard Worker X(6, DST_IN) \ 28*e1eccf28SAndroid Build Coastguard Worker X(7, SRC_OUT) \ 29*e1eccf28SAndroid Build Coastguard Worker X(8, DST_OUT) \ 30*e1eccf28SAndroid Build Coastguard Worker X(9, SRC_ATOP) \ 31*e1eccf28SAndroid Build Coastguard Worker X(10, DST_ATOP) \ 32*e1eccf28SAndroid Build Coastguard Worker X(11, XOR) \ 33*e1eccf28SAndroid Build Coastguard Worker X(12, MULTIPLY) \ 34*e1eccf28SAndroid Build Coastguard Worker X(13, ADD) \ 35*e1eccf28SAndroid Build Coastguard Worker X(14, SUBTRACT) 36*e1eccf28SAndroid Build Coastguard Worker 37*e1eccf28SAndroid Build Coastguard Worker/* This operation was not enabled in the original RenderScript. We could 38*e1eccf28SAndroid Build Coastguard Worker * enable it. 39*e1eccf28SAndroid Build Coastguard Worker * 40*e1eccf28SAndroid Build Coastguard Worker * X(15, DIFFERENCE) \ 41*e1eccf28SAndroid Build Coastguard Worker */ 42*e1eccf28SAndroid Build Coastguard Worker 43*e1eccf28SAndroid Build Coastguard Worker/* For every blend operation supported, define a macro with just the arithmetic 44*e1eccf28SAndroid Build Coastguard Worker * component. The rest can be handled later on. 45*e1eccf28SAndroid Build Coastguard Worker * 46*e1eccf28SAndroid Build Coastguard Worker * At entry q0-q3 contain the RGBA data from the destination buffer, and q8-q11 47*e1eccf28SAndroid Build Coastguard Worker * contain the data from the source buffer. Both have already been split out 48*e1eccf28SAndroid Build Coastguard Worker * into one colour component per register (if necessary). q3 and q11 contain 49*e1eccf28SAndroid Build Coastguard Worker * the alpha components. 50*e1eccf28SAndroid Build Coastguard Worker * 51*e1eccf28SAndroid Build Coastguard Worker * At the same time as defining the assembly macro, define a corresponding 52*e1eccf28SAndroid Build Coastguard Worker * preprocessor macro indicating any other requirements. 53*e1eccf28SAndroid Build Coastguard Worker * zipped=0 -- The macro does not require the RGBA components to be 54*e1eccf28SAndroid Build Coastguard Worker * separated. 55*e1eccf28SAndroid Build Coastguard Worker * lddst=0 -- The macro does not require data from the destination buffer. 56*e1eccf28SAndroid Build Coastguard Worker * ldsrc=0 -- The macro does not require data from the source buffer. 57*e1eccf28SAndroid Build Coastguard Worker * nowrap=1 -- The macro requires no wrapper at all, and should simply be 58*e1eccf28SAndroid Build Coastguard Worker * inserted without any surrounding load/store or loop code. 59*e1eccf28SAndroid Build Coastguard Worker */ 60*e1eccf28SAndroid Build Coastguard Worker 61*e1eccf28SAndroid Build Coastguard Worker#define params_CLEAR zipped=0, lddst=0, ldsrc=0 62*e1eccf28SAndroid Build Coastguard Worker.macro blend_kernel_CLEAR 63*e1eccf28SAndroid Build Coastguard Worker movi v0.16b, #0 64*e1eccf28SAndroid Build Coastguard Worker movi v1.16b, #0 65*e1eccf28SAndroid Build Coastguard Worker movi v2.16b, #0 66*e1eccf28SAndroid Build Coastguard Worker movi v3.16b, #0 67*e1eccf28SAndroid Build Coastguard Worker.endm 68*e1eccf28SAndroid Build Coastguard Worker 69*e1eccf28SAndroid Build Coastguard Worker#define params_SRC zipped=0, lddst=0 70*e1eccf28SAndroid Build Coastguard Worker.macro blend_kernel_SRC 71*e1eccf28SAndroid Build Coastguard Worker mov v0.16b, v8.16b 72*e1eccf28SAndroid Build Coastguard Worker mov v1.16b, v9.16b 73*e1eccf28SAndroid Build Coastguard Worker mov v2.16b, v10.16b 74*e1eccf28SAndroid Build Coastguard Worker mov v3.16b, v11.16b 75*e1eccf28SAndroid Build Coastguard Worker.endm 76*e1eccf28SAndroid Build Coastguard Worker 77*e1eccf28SAndroid Build Coastguard Worker#define params_DST nowrap=1 78*e1eccf28SAndroid Build Coastguard Worker.macro blend_kernel_DST 79*e1eccf28SAndroid Build Coastguard Worker /* nop */ 80*e1eccf28SAndroid Build Coastguard Worker.endm 81*e1eccf28SAndroid Build Coastguard Worker 82*e1eccf28SAndroid Build Coastguard Worker#define params_SRC_OVER zipped=1 83*e1eccf28SAndroid Build Coastguard Worker.macro blend_kernel_SRC_OVER 84*e1eccf28SAndroid Build Coastguard Worker mvn v7.16b, v11.16b 85*e1eccf28SAndroid Build Coastguard Worker 86*e1eccf28SAndroid Build Coastguard Worker umull2 v12.8h, v7.16b, v0.16b 87*e1eccf28SAndroid Build Coastguard Worker umull v0.8h, v7.8b, v0.8b 88*e1eccf28SAndroid Build Coastguard Worker umull2 v13.8h, v7.16b, v1.16b 89*e1eccf28SAndroid Build Coastguard Worker umull v1.8h, v7.8b, v1.8b 90*e1eccf28SAndroid Build Coastguard Worker umull2 v14.8h, v7.16b, v2.16b 91*e1eccf28SAndroid Build Coastguard Worker umull v2.8h, v7.8b, v2.8b 92*e1eccf28SAndroid Build Coastguard Worker umull2 v15.8h, v7.16b, v3.16b 93*e1eccf28SAndroid Build Coastguard Worker umull v3.8h, v7.8b, v3.8b 94*e1eccf28SAndroid Build Coastguard Worker 95*e1eccf28SAndroid Build Coastguard Worker rshrn v4.8b, v0.8h, #8 96*e1eccf28SAndroid Build Coastguard Worker rshrn2 v4.16b, v12.8h, #8 97*e1eccf28SAndroid Build Coastguard Worker rshrn v5.8b, v1.8h, #8 98*e1eccf28SAndroid Build Coastguard Worker rshrn2 v5.16b, v13.8h, #8 99*e1eccf28SAndroid Build Coastguard Worker rshrn v6.8b, v2.8h, #8 100*e1eccf28SAndroid Build Coastguard Worker rshrn2 v6.16b, v14.8h, #8 101*e1eccf28SAndroid Build Coastguard Worker rshrn v7.8b, v3.8h, #8 102*e1eccf28SAndroid Build Coastguard Worker rshrn2 v7.16b, v15.8h, #8 103*e1eccf28SAndroid Build Coastguard Worker 104*e1eccf28SAndroid Build Coastguard Worker uaddw v0.8h, v0.8h, v4.8b 105*e1eccf28SAndroid Build Coastguard Worker uaddw2 v12.8h, v12.8h, v4.16b 106*e1eccf28SAndroid Build Coastguard Worker uaddw v1.8h, v1.8h, v5.8b 107*e1eccf28SAndroid Build Coastguard Worker uaddw2 v13.8h, v13.8h, v5.16b 108*e1eccf28SAndroid Build Coastguard Worker uaddw v2.8h, v2.8h, v6.8b 109*e1eccf28SAndroid Build Coastguard Worker uaddw2 v14.8h, v14.8h, v6.16b 110*e1eccf28SAndroid Build Coastguard Worker uaddw v3.8h, v3.8h, v7.8b 111*e1eccf28SAndroid Build Coastguard Worker uaddw2 v15.8h, v15.8h, v7.16b 112*e1eccf28SAndroid Build Coastguard Worker 113*e1eccf28SAndroid Build Coastguard Worker rshrn v0.8b, v0.8h, #8 114*e1eccf28SAndroid Build Coastguard Worker rshrn2 v0.16b, v12.8h, #8 115*e1eccf28SAndroid Build Coastguard Worker rshrn v1.8b, v1.8h, #8 116*e1eccf28SAndroid Build Coastguard Worker rshrn2 v1.16b, v13.8h, #8 117*e1eccf28SAndroid Build Coastguard Worker rshrn v2.8b, v2.8h, #8 118*e1eccf28SAndroid Build Coastguard Worker rshrn2 v2.16b, v14.8h, #8 119*e1eccf28SAndroid Build Coastguard Worker rshrn v3.8b, v3.8h, #8 120*e1eccf28SAndroid Build Coastguard Worker rshrn2 v3.16b, v15.8h, #8 121*e1eccf28SAndroid Build Coastguard Worker 122*e1eccf28SAndroid Build Coastguard Worker uqadd v0.16b, v0.16b, v8.16b 123*e1eccf28SAndroid Build Coastguard Worker uqadd v1.16b, v1.16b, v9.16b 124*e1eccf28SAndroid Build Coastguard Worker uqadd v2.16b, v2.16b, v10.16b 125*e1eccf28SAndroid Build Coastguard Worker uqadd v3.16b, v3.16b, v11.16b 126*e1eccf28SAndroid Build Coastguard Worker.endm 127*e1eccf28SAndroid Build Coastguard Worker 128*e1eccf28SAndroid Build Coastguard Worker#define params_DST_OVER zipped=1 129*e1eccf28SAndroid Build Coastguard Worker.macro blend_kernel_DST_OVER 130*e1eccf28SAndroid Build Coastguard Worker mvn v7.16b, v3.16b 131*e1eccf28SAndroid Build Coastguard Worker 132*e1eccf28SAndroid Build Coastguard Worker umull2 v12.8h, v7.16b, v8.16b 133*e1eccf28SAndroid Build Coastguard Worker umull v8.8h, v7.8b, v8.8b 134*e1eccf28SAndroid Build Coastguard Worker umull2 v13.8h, v7.16b, v9.16b 135*e1eccf28SAndroid Build Coastguard Worker umull v9.8h, v7.8b, v9.8b 136*e1eccf28SAndroid Build Coastguard Worker umull2 v14.8h, v7.16b, v10.16b 137*e1eccf28SAndroid Build Coastguard Worker umull v10.8h, v7.8b, v10.8b 138*e1eccf28SAndroid Build Coastguard Worker umull2 v15.8h, v7.16b, v11.16b 139*e1eccf28SAndroid Build Coastguard Worker umull v11.8h, v7.8b, v11.8b 140*e1eccf28SAndroid Build Coastguard Worker 141*e1eccf28SAndroid Build Coastguard Worker rshrn v4.8b, v8.8h, #8 142*e1eccf28SAndroid Build Coastguard Worker rshrn2 v4.16b, v12.8h, #8 143*e1eccf28SAndroid Build Coastguard Worker rshrn v5.8b, v9.8h, #8 144*e1eccf28SAndroid Build Coastguard Worker rshrn2 v5.16b, v13.8h, #8 145*e1eccf28SAndroid Build Coastguard Worker rshrn v6.8b, v10.8h, #8 146*e1eccf28SAndroid Build Coastguard Worker rshrn2 v6.16b, v14.8h, #8 147*e1eccf28SAndroid Build Coastguard Worker rshrn v7.8b, v11.8h, #8 148*e1eccf28SAndroid Build Coastguard Worker rshrn2 v7.16b, v15.8h, #8 149*e1eccf28SAndroid Build Coastguard Worker 150*e1eccf28SAndroid Build Coastguard Worker uaddw v8.8h, v8.8h, v4.8b 151*e1eccf28SAndroid Build Coastguard Worker uaddw2 v12.8h, v12.8h, v4.16b 152*e1eccf28SAndroid Build Coastguard Worker uaddw v9.8h, v9.8h, v5.8b 153*e1eccf28SAndroid Build Coastguard Worker uaddw2 v13.8h, v13.8h, v5.16b 154*e1eccf28SAndroid Build Coastguard Worker uaddw v10.8h, v10.8h, v6.8b 155*e1eccf28SAndroid Build Coastguard Worker uaddw2 v14.8h, v14.8h, v6.16b 156*e1eccf28SAndroid Build Coastguard Worker uaddw v11.8h, v11.8h, v7.8b 157*e1eccf28SAndroid Build Coastguard Worker uaddw2 v15.8h, v15.8h, v7.16b 158*e1eccf28SAndroid Build Coastguard Worker 159*e1eccf28SAndroid Build Coastguard Worker rshrn v8.8b, v8.8h, #8 160*e1eccf28SAndroid Build Coastguard Worker rshrn2 v8.16b, v12.8h, #8 161*e1eccf28SAndroid Build Coastguard Worker rshrn v9.8b, v9.8h, #8 162*e1eccf28SAndroid Build Coastguard Worker rshrn2 v9.16b, v13.8h, #8 163*e1eccf28SAndroid Build Coastguard Worker rshrn v10.8b, v10.8h, #8 164*e1eccf28SAndroid Build Coastguard Worker rshrn2 v10.16b, v14.8h, #8 165*e1eccf28SAndroid Build Coastguard Worker rshrn v11.8b, v11.8h, #8 166*e1eccf28SAndroid Build Coastguard Worker rshrn2 v11.16b, v15.8h, #8 167*e1eccf28SAndroid Build Coastguard Worker 168*e1eccf28SAndroid Build Coastguard Worker uqadd v0.16b, v0.16b, v8.16b 169*e1eccf28SAndroid Build Coastguard Worker uqadd v1.16b, v1.16b, v9.16b 170*e1eccf28SAndroid Build Coastguard Worker uqadd v2.16b, v2.16b, v10.16b 171*e1eccf28SAndroid Build Coastguard Worker uqadd v3.16b, v3.16b, v11.16b 172*e1eccf28SAndroid Build Coastguard Worker.endm 173*e1eccf28SAndroid Build Coastguard Worker 174*e1eccf28SAndroid Build Coastguard Worker#define params_SRC_IN zipped=1 175*e1eccf28SAndroid Build Coastguard Worker.macro blend_kernel_SRC_IN 176*e1eccf28SAndroid Build Coastguard Worker umull2 v12.8h, v3.16b, v8.16b 177*e1eccf28SAndroid Build Coastguard Worker umull v0.8h, v3.8b, v8.8b 178*e1eccf28SAndroid Build Coastguard Worker umull2 v13.8h, v3.16b, v9.16b 179*e1eccf28SAndroid Build Coastguard Worker umull v1.8h, v3.8b, v9.8b 180*e1eccf28SAndroid Build Coastguard Worker umull2 v14.8h, v3.16b, v10.16b 181*e1eccf28SAndroid Build Coastguard Worker umull v2.8h, v3.8b, v10.8b 182*e1eccf28SAndroid Build Coastguard Worker umull2 v15.8h, v3.16b, v11.16b 183*e1eccf28SAndroid Build Coastguard Worker umull v3.8h, v3.8b, v11.8b 184*e1eccf28SAndroid Build Coastguard Worker 185*e1eccf28SAndroid Build Coastguard Worker rshrn v4.8b, v0.8h, #8 186*e1eccf28SAndroid Build Coastguard Worker rshrn2 v4.16b, v12.8h, #8 187*e1eccf28SAndroid Build Coastguard Worker rshrn v5.8b, v1.8h, #8 188*e1eccf28SAndroid Build Coastguard Worker rshrn2 v5.16b, v13.8h, #8 189*e1eccf28SAndroid Build Coastguard Worker rshrn v6.8b, v2.8h, #8 190*e1eccf28SAndroid Build Coastguard Worker rshrn2 v6.16b, v14.8h, #8 191*e1eccf28SAndroid Build Coastguard Worker rshrn v7.8b, v3.8h, #8 192*e1eccf28SAndroid Build Coastguard Worker rshrn2 v7.16b, v15.8h, #8 193*e1eccf28SAndroid Build Coastguard Worker 194*e1eccf28SAndroid Build Coastguard Worker uaddw v0.8h, v0.8h, v4.8b 195*e1eccf28SAndroid Build Coastguard Worker uaddw2 v12.8h, v12.8h, v4.16b 196*e1eccf28SAndroid Build Coastguard Worker uaddw v1.8h, v1.8h, v5.8b 197*e1eccf28SAndroid Build Coastguard Worker uaddw2 v13.8h, v13.8h, v5.16b 198*e1eccf28SAndroid Build Coastguard Worker uaddw v2.8h, v2.8h, v6.8b 199*e1eccf28SAndroid Build Coastguard Worker uaddw2 v14.8h, v14.8h, v6.16b 200*e1eccf28SAndroid Build Coastguard Worker uaddw v3.8h, v3.8h, v7.8b 201*e1eccf28SAndroid Build Coastguard Worker uaddw2 v15.8h, v15.8h, v7.16b 202*e1eccf28SAndroid Build Coastguard Worker 203*e1eccf28SAndroid Build Coastguard Worker rshrn v0.8b, v0.8h, #8 204*e1eccf28SAndroid Build Coastguard Worker rshrn2 v0.16b, v12.8h, #8 205*e1eccf28SAndroid Build Coastguard Worker rshrn v1.8b, v1.8h, #8 206*e1eccf28SAndroid Build Coastguard Worker rshrn2 v1.16b, v13.8h, #8 207*e1eccf28SAndroid Build Coastguard Worker rshrn v2.8b, v2.8h, #8 208*e1eccf28SAndroid Build Coastguard Worker rshrn2 v2.16b, v14.8h, #8 209*e1eccf28SAndroid Build Coastguard Worker rshrn v3.8b, v3.8h, #8 210*e1eccf28SAndroid Build Coastguard Worker rshrn2 v3.16b, v15.8h, #8 211*e1eccf28SAndroid Build Coastguard Worker.endm 212*e1eccf28SAndroid Build Coastguard Worker 213*e1eccf28SAndroid Build Coastguard Worker#define params_DST_IN zipped=1 214*e1eccf28SAndroid Build Coastguard Worker.macro blend_kernel_DST_IN 215*e1eccf28SAndroid Build Coastguard Worker umull2 v12.8h, v0.16b, v11.16b 216*e1eccf28SAndroid Build Coastguard Worker umull v0.8h, v0.8b, v11.8b 217*e1eccf28SAndroid Build Coastguard Worker umull2 v13.8h, v1.16b, v11.16b 218*e1eccf28SAndroid Build Coastguard Worker umull v1.8h, v1.8b, v11.8b 219*e1eccf28SAndroid Build Coastguard Worker umull2 v14.8h, v2.16b, v11.16b 220*e1eccf28SAndroid Build Coastguard Worker umull v2.8h, v2.8b, v11.8b 221*e1eccf28SAndroid Build Coastguard Worker umull2 v15.8h, v3.16b, v11.16b 222*e1eccf28SAndroid Build Coastguard Worker umull v3.8h, v3.8b, v11.8b 223*e1eccf28SAndroid Build Coastguard Worker 224*e1eccf28SAndroid Build Coastguard Worker rshrn v4.8b, v0.8h, #8 225*e1eccf28SAndroid Build Coastguard Worker rshrn2 v4.16b, v12.8h, #8 226*e1eccf28SAndroid Build Coastguard Worker rshrn v5.8b, v1.8h, #8 227*e1eccf28SAndroid Build Coastguard Worker rshrn2 v5.16b, v13.8h, #8 228*e1eccf28SAndroid Build Coastguard Worker rshrn v6.8b, v2.8h, #8 229*e1eccf28SAndroid Build Coastguard Worker rshrn2 v6.16b, v14.8h, #8 230*e1eccf28SAndroid Build Coastguard Worker rshrn v7.8b, v3.8h, #8 231*e1eccf28SAndroid Build Coastguard Worker rshrn2 v7.16b, v15.8h, #8 232*e1eccf28SAndroid Build Coastguard Worker 233*e1eccf28SAndroid Build Coastguard Worker uaddw v0.8h, v0.8h, v4.8b 234*e1eccf28SAndroid Build Coastguard Worker uaddw2 v12.8h, v12.8h, v4.16b 235*e1eccf28SAndroid Build Coastguard Worker uaddw v1.8h, v1.8h, v5.8b 236*e1eccf28SAndroid Build Coastguard Worker uaddw2 v13.8h, v13.8h, v5.16b 237*e1eccf28SAndroid Build Coastguard Worker uaddw v2.8h, v2.8h, v6.8b 238*e1eccf28SAndroid Build Coastguard Worker uaddw2 v14.8h, v14.8h, v6.16b 239*e1eccf28SAndroid Build Coastguard Worker uaddw v3.8h, v3.8h, v7.8b 240*e1eccf28SAndroid Build Coastguard Worker uaddw2 v15.8h, v15.8h, v7.16b 241*e1eccf28SAndroid Build Coastguard Worker 242*e1eccf28SAndroid Build Coastguard Worker rshrn v0.8b, v0.8h, #8 243*e1eccf28SAndroid Build Coastguard Worker rshrn2 v0.16b, v12.8h, #8 244*e1eccf28SAndroid Build Coastguard Worker rshrn v1.8b, v1.8h, #8 245*e1eccf28SAndroid Build Coastguard Worker rshrn2 v1.16b, v13.8h, #8 246*e1eccf28SAndroid Build Coastguard Worker rshrn v2.8b, v2.8h, #8 247*e1eccf28SAndroid Build Coastguard Worker rshrn2 v2.16b, v14.8h, #8 248*e1eccf28SAndroid Build Coastguard Worker rshrn v3.8b, v3.8h, #8 249*e1eccf28SAndroid Build Coastguard Worker rshrn2 v3.16b, v15.8h, #8 250*e1eccf28SAndroid Build Coastguard Worker.endm 251*e1eccf28SAndroid Build Coastguard Worker 252*e1eccf28SAndroid Build Coastguard Worker#define params_SRC_OUT zipped=1 253*e1eccf28SAndroid Build Coastguard Worker.macro blend_kernel_SRC_OUT 254*e1eccf28SAndroid Build Coastguard Worker mvn v3.16b, v3.16b 255*e1eccf28SAndroid Build Coastguard Worker blend_kernel_SRC_IN 256*e1eccf28SAndroid Build Coastguard Worker.endm 257*e1eccf28SAndroid Build Coastguard Worker 258*e1eccf28SAndroid Build Coastguard Worker 259*e1eccf28SAndroid Build Coastguard Worker#define params_DST_OUT zipped=1 260*e1eccf28SAndroid Build Coastguard Worker.macro blend_kernel_DST_OUT 261*e1eccf28SAndroid Build Coastguard Worker mvn v11.16b, v11.16b 262*e1eccf28SAndroid Build Coastguard Worker blend_kernel_DST_IN 263*e1eccf28SAndroid Build Coastguard Worker.endm 264*e1eccf28SAndroid Build Coastguard Worker 265*e1eccf28SAndroid Build Coastguard Worker#define params_SRC_ATOP zipped=1 266*e1eccf28SAndroid Build Coastguard Worker.macro blend_kernel_SRC_ATOP 267*e1eccf28SAndroid Build Coastguard Worker mvn v11.16b, v11.16b 268*e1eccf28SAndroid Build Coastguard Worker 269*e1eccf28SAndroid Build Coastguard Worker umull2 v12.8h, v11.16b, v0.16b 270*e1eccf28SAndroid Build Coastguard Worker umull v0.8h, v11.8b, v0.8b 271*e1eccf28SAndroid Build Coastguard Worker umull2 v13.8h, v11.16b, v1.16b 272*e1eccf28SAndroid Build Coastguard Worker umull v1.8h, v11.8b, v1.8b 273*e1eccf28SAndroid Build Coastguard Worker umull2 v14.8h, v11.16b, v2.16b 274*e1eccf28SAndroid Build Coastguard Worker umull v2.8h, v11.8b, v2.8b 275*e1eccf28SAndroid Build Coastguard Worker 276*e1eccf28SAndroid Build Coastguard Worker umull2 v4.8h, v3.16b, v8.16b 277*e1eccf28SAndroid Build Coastguard Worker umull v8.8h, v3.8b, v8.8b 278*e1eccf28SAndroid Build Coastguard Worker umull2 v5.8h, v3.16b, v9.16b 279*e1eccf28SAndroid Build Coastguard Worker umull v9.8h, v3.8b, v9.8b 280*e1eccf28SAndroid Build Coastguard Worker umull2 v6.8h, v3.16b, v10.16b 281*e1eccf28SAndroid Build Coastguard Worker umull v10.8h, v3.8b, v10.8b 282*e1eccf28SAndroid Build Coastguard Worker 283*e1eccf28SAndroid Build Coastguard Worker uqadd v12.8h, v12.8h, v4.8h 284*e1eccf28SAndroid Build Coastguard Worker uqadd v0.8h, v0.8h, v8.8h 285*e1eccf28SAndroid Build Coastguard Worker uqadd v13.8h, v13.8h, v5.8h 286*e1eccf28SAndroid Build Coastguard Worker uqadd v1.8h, v1.8h, v9.8h 287*e1eccf28SAndroid Build Coastguard Worker uqadd v14.8h, v14.8h, v6.8h 288*e1eccf28SAndroid Build Coastguard Worker uqadd v2.8h, v2.8h, v10.8h 289*e1eccf28SAndroid Build Coastguard Worker 290*e1eccf28SAndroid Build Coastguard Worker urshr v8.8h, v0.8h, #8 291*e1eccf28SAndroid Build Coastguard Worker urshr v4.8h, v12.8h, #8 292*e1eccf28SAndroid Build Coastguard Worker urshr v9.8h, v1.8h, #8 293*e1eccf28SAndroid Build Coastguard Worker urshr v5.8h, v13.8h, #8 294*e1eccf28SAndroid Build Coastguard Worker urshr v10.8h, v2.8h, #8 295*e1eccf28SAndroid Build Coastguard Worker urshr v6.8h, v14.8h, #8 296*e1eccf28SAndroid Build Coastguard Worker 297*e1eccf28SAndroid Build Coastguard Worker uqadd v0.8h, v0.8h, v8.8h 298*e1eccf28SAndroid Build Coastguard Worker uqadd v12.8h, v12.8h, v4.8h 299*e1eccf28SAndroid Build Coastguard Worker uqadd v1.8h, v1.8h, v9.8h 300*e1eccf28SAndroid Build Coastguard Worker uqadd v13.8h, v13.8h, v5.8h 301*e1eccf28SAndroid Build Coastguard Worker uqadd v2.8h, v2.8h, v10.8h 302*e1eccf28SAndroid Build Coastguard Worker uqadd v14.8h, v14.8h, v6.8h 303*e1eccf28SAndroid Build Coastguard Worker 304*e1eccf28SAndroid Build Coastguard Worker uqrshrn v0.8b, v0.8h, #8 305*e1eccf28SAndroid Build Coastguard Worker uqrshrn2 v0.16b, v12.8h, #8 306*e1eccf28SAndroid Build Coastguard Worker uqrshrn v1.8b, v1.8h, #8 307*e1eccf28SAndroid Build Coastguard Worker uqrshrn2 v1.16b, v13.8h, #8 308*e1eccf28SAndroid Build Coastguard Worker uqrshrn v2.8b, v2.8h, #8 309*e1eccf28SAndroid Build Coastguard Worker uqrshrn2 v2.16b, v14.8h, #8 310*e1eccf28SAndroid Build Coastguard Worker.endm 311*e1eccf28SAndroid Build Coastguard Worker 312*e1eccf28SAndroid Build Coastguard Worker#define params_DST_ATOP zipped=1 313*e1eccf28SAndroid Build Coastguard Worker.macro blend_kernel_DST_ATOP 314*e1eccf28SAndroid Build Coastguard Worker mvn v3.16b, v3.16b 315*e1eccf28SAndroid Build Coastguard Worker 316*e1eccf28SAndroid Build Coastguard Worker umull2 v12.8h, v11.16b, v0.16b 317*e1eccf28SAndroid Build Coastguard Worker umull v0.8h, v11.8b, v0.8b 318*e1eccf28SAndroid Build Coastguard Worker umull2 v13.8h, v11.16b, v1.16b 319*e1eccf28SAndroid Build Coastguard Worker umull v1.8h, v11.8b, v1.8b 320*e1eccf28SAndroid Build Coastguard Worker umull2 v14.8h, v11.16b, v2.16b 321*e1eccf28SAndroid Build Coastguard Worker umull v2.8h, v11.8b, v2.8b 322*e1eccf28SAndroid Build Coastguard Worker 323*e1eccf28SAndroid Build Coastguard Worker umull2 v4.8h, v3.16b, v8.16b 324*e1eccf28SAndroid Build Coastguard Worker umull v8.8h, v3.8b, v8.8b 325*e1eccf28SAndroid Build Coastguard Worker umull2 v5.8h, v3.16b, v9.16b 326*e1eccf28SAndroid Build Coastguard Worker umull v9.8h, v3.8b, v9.8b 327*e1eccf28SAndroid Build Coastguard Worker umull2 v6.8h, v3.16b, v10.16b 328*e1eccf28SAndroid Build Coastguard Worker umull v10.8h, v3.8b, v10.8b 329*e1eccf28SAndroid Build Coastguard Worker 330*e1eccf28SAndroid Build Coastguard Worker uqadd v12.8h, v12.8h, v4.8h 331*e1eccf28SAndroid Build Coastguard Worker uqadd v0.8h, v0.8h, v8.8h 332*e1eccf28SAndroid Build Coastguard Worker uqadd v13.8h, v13.8h, v5.8h 333*e1eccf28SAndroid Build Coastguard Worker uqadd v1.8h, v1.8h, v9.8h 334*e1eccf28SAndroid Build Coastguard Worker uqadd v14.8h, v14.8h, v6.8h 335*e1eccf28SAndroid Build Coastguard Worker uqadd v2.8h, v2.8h, v10.8h 336*e1eccf28SAndroid Build Coastguard Worker 337*e1eccf28SAndroid Build Coastguard Worker urshr v8.8h, v0.8h, #8 338*e1eccf28SAndroid Build Coastguard Worker urshr v4.8h, v12.8h, #8 339*e1eccf28SAndroid Build Coastguard Worker urshr v9.8h, v1.8h, #8 340*e1eccf28SAndroid Build Coastguard Worker urshr v5.8h, v13.8h, #8 341*e1eccf28SAndroid Build Coastguard Worker urshr v10.8h, v2.8h, #8 342*e1eccf28SAndroid Build Coastguard Worker urshr v6.8h, v14.8h, #8 343*e1eccf28SAndroid Build Coastguard Worker 344*e1eccf28SAndroid Build Coastguard Worker uqadd v0.8h, v0.8h, v8.8h 345*e1eccf28SAndroid Build Coastguard Worker uqadd v12.8h, v12.8h, v4.8h 346*e1eccf28SAndroid Build Coastguard Worker uqadd v1.8h, v1.8h, v9.8h 347*e1eccf28SAndroid Build Coastguard Worker uqadd v13.8h, v13.8h, v5.8h 348*e1eccf28SAndroid Build Coastguard Worker uqadd v2.8h, v2.8h, v10.8h 349*e1eccf28SAndroid Build Coastguard Worker uqadd v14.8h, v14.8h, v6.8h 350*e1eccf28SAndroid Build Coastguard Worker 351*e1eccf28SAndroid Build Coastguard Worker uqrshrn v0.8b, v0.8h, #8 352*e1eccf28SAndroid Build Coastguard Worker uqrshrn2 v0.16b, v12.8h, #8 353*e1eccf28SAndroid Build Coastguard Worker uqrshrn v1.8b, v1.8h, #8 354*e1eccf28SAndroid Build Coastguard Worker uqrshrn2 v1.16b, v13.8h, #8 355*e1eccf28SAndroid Build Coastguard Worker uqrshrn v2.8b, v2.8h, #8 356*e1eccf28SAndroid Build Coastguard Worker uqrshrn2 v2.16b, v14.8h, #8 357*e1eccf28SAndroid Build Coastguard Worker 358*e1eccf28SAndroid Build Coastguard Worker mov v3.16b, v11.16b 359*e1eccf28SAndroid Build Coastguard Worker.endm 360*e1eccf28SAndroid Build Coastguard Worker 361*e1eccf28SAndroid Build Coastguard Worker#define params_MULTIPLY zipped=0 362*e1eccf28SAndroid Build Coastguard Worker.macro blend_kernel_MULTIPLY 363*e1eccf28SAndroid Build Coastguard Worker umull2 v12.8h, v0.16b, v8.16b 364*e1eccf28SAndroid Build Coastguard Worker umull v0.8h, v0.8b, v8.8b 365*e1eccf28SAndroid Build Coastguard Worker umull2 v13.8h, v1.16b, v9.16b 366*e1eccf28SAndroid Build Coastguard Worker umull v1.8h, v1.8b, v9.8b 367*e1eccf28SAndroid Build Coastguard Worker umull2 v14.8h, v2.16b, v10.16b 368*e1eccf28SAndroid Build Coastguard Worker umull v2.8h, v2.8b, v10.8b 369*e1eccf28SAndroid Build Coastguard Worker umull2 v15.8h, v3.16b, v11.16b 370*e1eccf28SAndroid Build Coastguard Worker umull v3.8h, v3.8b, v11.8b 371*e1eccf28SAndroid Build Coastguard Worker 372*e1eccf28SAndroid Build Coastguard Worker rshrn v4.8b, v0.8h, #8 373*e1eccf28SAndroid Build Coastguard Worker rshrn2 v4.16b, v12.8h, #8 374*e1eccf28SAndroid Build Coastguard Worker rshrn v5.8b, v1.8h, #8 375*e1eccf28SAndroid Build Coastguard Worker rshrn2 v5.16b, v13.8h, #8 376*e1eccf28SAndroid Build Coastguard Worker rshrn v6.8b, v2.8h, #8 377*e1eccf28SAndroid Build Coastguard Worker rshrn2 v6.16b, v14.8h, #8 378*e1eccf28SAndroid Build Coastguard Worker rshrn v7.8b, v3.8h, #8 379*e1eccf28SAndroid Build Coastguard Worker rshrn2 v7.16b, v15.8h, #8 380*e1eccf28SAndroid Build Coastguard Worker 381*e1eccf28SAndroid Build Coastguard Worker uaddw v0.8h, v0.8h, v4.8b 382*e1eccf28SAndroid Build Coastguard Worker uaddw2 v12.8h, v12.8h, v4.16b 383*e1eccf28SAndroid Build Coastguard Worker uaddw v1.8h, v1.8h, v5.8b 384*e1eccf28SAndroid Build Coastguard Worker uaddw2 v13.8h, v13.8h, v5.16b 385*e1eccf28SAndroid Build Coastguard Worker uaddw v2.8h, v2.8h, v6.8b 386*e1eccf28SAndroid Build Coastguard Worker uaddw2 v14.8h, v14.8h, v6.16b 387*e1eccf28SAndroid Build Coastguard Worker uaddw v3.8h, v3.8h, v7.8b 388*e1eccf28SAndroid Build Coastguard Worker uaddw2 v15.8h, v15.8h, v7.16b 389*e1eccf28SAndroid Build Coastguard Worker 390*e1eccf28SAndroid Build Coastguard Worker rshrn v0.8b, v0.8h, #8 391*e1eccf28SAndroid Build Coastguard Worker rshrn2 v0.16b, v12.8h, #8 392*e1eccf28SAndroid Build Coastguard Worker rshrn v1.8b, v1.8h, #8 393*e1eccf28SAndroid Build Coastguard Worker rshrn2 v1.16b, v13.8h, #8 394*e1eccf28SAndroid Build Coastguard Worker rshrn v2.8b, v2.8h, #8 395*e1eccf28SAndroid Build Coastguard Worker rshrn2 v2.16b, v14.8h, #8 396*e1eccf28SAndroid Build Coastguard Worker rshrn v3.8b, v3.8h, #8 397*e1eccf28SAndroid Build Coastguard Worker rshrn2 v3.16b, v15.8h, #8 398*e1eccf28SAndroid Build Coastguard Worker.endm 399*e1eccf28SAndroid Build Coastguard Worker 400*e1eccf28SAndroid Build Coastguard Worker#define params_ADD zipped=0 401*e1eccf28SAndroid Build Coastguard Worker.macro blend_kernel_ADD 402*e1eccf28SAndroid Build Coastguard Worker uqadd v0.16b, v0.16b, v8.16b 403*e1eccf28SAndroid Build Coastguard Worker uqadd v1.16b, v1.16b, v9.16b 404*e1eccf28SAndroid Build Coastguard Worker uqadd v2.16b, v2.16b, v10.16b 405*e1eccf28SAndroid Build Coastguard Worker uqadd v3.16b, v3.16b, v11.16b 406*e1eccf28SAndroid Build Coastguard Worker.endm 407*e1eccf28SAndroid Build Coastguard Worker 408*e1eccf28SAndroid Build Coastguard Worker#define params_SUBTRACT zipped=0 409*e1eccf28SAndroid Build Coastguard Worker.macro blend_kernel_SUBTRACT 410*e1eccf28SAndroid Build Coastguard Worker uqsub v0.16b, v0.16b, v8.16b 411*e1eccf28SAndroid Build Coastguard Worker uqsub v1.16b, v1.16b, v9.16b 412*e1eccf28SAndroid Build Coastguard Worker uqsub v2.16b, v2.16b, v10.16b 413*e1eccf28SAndroid Build Coastguard Worker uqsub v3.16b, v3.16b, v11.16b 414*e1eccf28SAndroid Build Coastguard Worker.endm 415*e1eccf28SAndroid Build Coastguard Worker 416*e1eccf28SAndroid Build Coastguard Worker#define params_DIFFERENCE zipped=0 417*e1eccf28SAndroid Build Coastguard Worker.macro blend_kernel_DIFFERENCE 418*e1eccf28SAndroid Build Coastguard Worker uabd v0.16b, v0.16b, v8.16b 419*e1eccf28SAndroid Build Coastguard Worker uabd v1.16b, v1.16b, v9.16b 420*e1eccf28SAndroid Build Coastguard Worker uabd v2.16b, v2.16b, v10.16b 421*e1eccf28SAndroid Build Coastguard Worker uabd v3.16b, v3.16b, v11.16b 422*e1eccf28SAndroid Build Coastguard Worker.endm 423*e1eccf28SAndroid Build Coastguard Worker 424*e1eccf28SAndroid Build Coastguard Worker#define params_XOR zipped=0 425*e1eccf28SAndroid Build Coastguard Worker.macro blend_kernel_XOR 426*e1eccf28SAndroid Build Coastguard Worker eor v0.16b, v0.16b, v8.16b 427*e1eccf28SAndroid Build Coastguard Worker eor v1.16b, v1.16b, v9.16b 428*e1eccf28SAndroid Build Coastguard Worker eor v2.16b, v2.16b, v10.16b 429*e1eccf28SAndroid Build Coastguard Worker eor v3.16b, v3.16b, v11.16b 430*e1eccf28SAndroid Build Coastguard Worker.endm 431*e1eccf28SAndroid Build Coastguard Worker 432*e1eccf28SAndroid Build Coastguard Worker 433*e1eccf28SAndroid Build Coastguard Worker/* Define the wrapper code which will load and store the data, iterate the 434*e1eccf28SAndroid Build Coastguard Worker * correct number of times, and safely handle the remainder at the end of the 435*e1eccf28SAndroid Build Coastguard Worker * loop. Various sections of assembly code are dropped or substituted for 436*e1eccf28SAndroid Build Coastguard Worker * simpler operations if they're not needed. 437*e1eccf28SAndroid Build Coastguard Worker */ 438*e1eccf28SAndroid Build Coastguard Worker.macro wrap_line kernel, nowrap=0, zipped=1, lddst=1, ldsrc=1, pld=1 439*e1eccf28SAndroid Build Coastguard Worker.if \nowrap 440*e1eccf28SAndroid Build Coastguard Worker \kernel 441*e1eccf28SAndroid Build Coastguard Worker.else 442*e1eccf28SAndroid Build Coastguard Worker sub x3, sp, #32 443*e1eccf28SAndroid Build Coastguard Worker sub sp, sp, #64 444*e1eccf28SAndroid Build Coastguard Worker st1 {v8.1d - v11.1d}, [sp] 445*e1eccf28SAndroid Build Coastguard Worker st1 {v12.1d - v15.1d}, [x3] 446*e1eccf28SAndroid Build Coastguard Worker subs x2, x2, #64 447*e1eccf28SAndroid Build Coastguard Worker b 2f 448*e1eccf28SAndroid Build Coastguard Worker.align 4 449*e1eccf28SAndroid Build Coastguard Worker1: 450*e1eccf28SAndroid Build Coastguard Worker .if \lddst 451*e1eccf28SAndroid Build Coastguard Worker .if \zipped 452*e1eccf28SAndroid Build Coastguard Worker ld4 {v0.16b - v3.16b}, [x0] 453*e1eccf28SAndroid Build Coastguard Worker .else 454*e1eccf28SAndroid Build Coastguard Worker ld1 {v0.16b - v3.16b}, [x0] 455*e1eccf28SAndroid Build Coastguard Worker .endif 456*e1eccf28SAndroid Build Coastguard Worker .endif 457*e1eccf28SAndroid Build Coastguard Worker .if \ldsrc 458*e1eccf28SAndroid Build Coastguard Worker .if \zipped 459*e1eccf28SAndroid Build Coastguard Worker ld4 {v8.16b - v11.16b}, [x1], #64 460*e1eccf28SAndroid Build Coastguard Worker .else 461*e1eccf28SAndroid Build Coastguard Worker ld1 {v8.16b - v11.16b}, [x1], #64 462*e1eccf28SAndroid Build Coastguard Worker .endif 463*e1eccf28SAndroid Build Coastguard Worker .endif 464*e1eccf28SAndroid Build Coastguard Worker .if \pld 465*e1eccf28SAndroid Build Coastguard Worker#if 0 /* TODO: test this on real hardware */ 466*e1eccf28SAndroid Build Coastguard Worker .if \lddst ; prfm PLDL1STRM, [x0, #192] ; .endif 467*e1eccf28SAndroid Build Coastguard Worker .if \ldsrc ; prfm PLDL1STRM, [x1, #192] ; .endif 468*e1eccf28SAndroid Build Coastguard Worker#endif 469*e1eccf28SAndroid Build Coastguard Worker .endif 470*e1eccf28SAndroid Build Coastguard Worker 471*e1eccf28SAndroid Build Coastguard Worker \kernel 472*e1eccf28SAndroid Build Coastguard Worker 473*e1eccf28SAndroid Build Coastguard Worker subs x2, x2, #64 474*e1eccf28SAndroid Build Coastguard Worker .if \zipped 475*e1eccf28SAndroid Build Coastguard Worker st4 {v0.16b,v1.16b,v2.16b,v3.16b}, [x0], #64 476*e1eccf28SAndroid Build Coastguard Worker .else 477*e1eccf28SAndroid Build Coastguard Worker st1 {v0.16b,v1.16b,v2.16b,v3.16b}, [x0], #64 478*e1eccf28SAndroid Build Coastguard Worker .endif 479*e1eccf28SAndroid Build Coastguard Worker 480*e1eccf28SAndroid Build Coastguard Worker2: bge 1b 481*e1eccf28SAndroid Build Coastguard Worker adds x2, x2, #64 482*e1eccf28SAndroid Build Coastguard Worker beq 2f 483*e1eccf28SAndroid Build Coastguard Worker 484*e1eccf28SAndroid Build Coastguard Worker /* To handle the tail portion of the data (something less than 64 485*e1eccf28SAndroid Build Coastguard Worker * bytes) load small power-of-two chunks into working registers. It 486*e1eccf28SAndroid Build Coastguard Worker * doesn't matter where they end up in the register; the same process 487*e1eccf28SAndroid Build Coastguard Worker * will store them back out using the same positions and the operations 488*e1eccf28SAndroid Build Coastguard Worker * don't require data to interact with its neighbours. 489*e1eccf28SAndroid Build Coastguard Worker */ 490*e1eccf28SAndroid Build Coastguard Worker movi v0.16b, #0 491*e1eccf28SAndroid Build Coastguard Worker movi v1.16b, #0 492*e1eccf28SAndroid Build Coastguard Worker movi v2.16b, #0 493*e1eccf28SAndroid Build Coastguard Worker movi v3.16b, #0 494*e1eccf28SAndroid Build Coastguard Worker 495*e1eccf28SAndroid Build Coastguard Worker movi v8.16b, #0 496*e1eccf28SAndroid Build Coastguard Worker movi v9.16b, #0 497*e1eccf28SAndroid Build Coastguard Worker movi v10.16b, #0 498*e1eccf28SAndroid Build Coastguard Worker movi v11.16b, #0 499*e1eccf28SAndroid Build Coastguard Worker 500*e1eccf28SAndroid Build Coastguard Worker tbz x2, #5, 1f 501*e1eccf28SAndroid Build Coastguard Worker .if \lddst ; ld1 {v2.16b,v3.16b}, [x0], #32 ; .endif 502*e1eccf28SAndroid Build Coastguard Worker .if \ldsrc ; ld1 {v10.16b,v11.16b}, [x1], #32 ; .endif 503*e1eccf28SAndroid Build Coastguard Worker1: tbz x2, #4, 1f 504*e1eccf28SAndroid Build Coastguard Worker .if \lddst ; ld1 {v1.16b}, [x0], #16 ; .endif 505*e1eccf28SAndroid Build Coastguard Worker .if \ldsrc ; ld1 {v9.16b}, [x1], #16 ; .endif 506*e1eccf28SAndroid Build Coastguard Worker1: tbz x2, #3, 1f 507*e1eccf28SAndroid Build Coastguard Worker .if \lddst ; ld1 {v0.d}[1], [x0], #8 ; .endif 508*e1eccf28SAndroid Build Coastguard Worker .if \ldsrc ; ld1 {v8.d}[1], [x1], #8 ; .endif 509*e1eccf28SAndroid Build Coastguard Worker1: tbz x2, #2, 1f 510*e1eccf28SAndroid Build Coastguard Worker .if \lddst ; ld1 {v0.s}[1], [x0], #4 ; .endif 511*e1eccf28SAndroid Build Coastguard Worker .if \ldsrc ; ld1 {v8.s}[1], [x1], #4 ; .endif 512*e1eccf28SAndroid Build Coastguard Worker1: tbz x2, #1, 1f 513*e1eccf28SAndroid Build Coastguard Worker .if \lddst ; ld1 {v0.h}[1], [x0], #2 ; .endif 514*e1eccf28SAndroid Build Coastguard Worker .if \ldsrc ; ld1 {v8.h}[1], [x1], #2 ; .endif 515*e1eccf28SAndroid Build Coastguard Worker1: tbz x2, #0, 1f 516*e1eccf28SAndroid Build Coastguard Worker .if \lddst ; ld1 {v0.b}[1], [x0], #1 ; .endif 517*e1eccf28SAndroid Build Coastguard Worker .if \ldsrc ; ld1 {v8.b}[1], [x1], #1 ; .endif 518*e1eccf28SAndroid Build Coastguard Worker1: 519*e1eccf28SAndroid Build Coastguard Worker .if \lddst ; sub x0, x0, x2 ; .endif 520*e1eccf28SAndroid Build Coastguard Worker 521*e1eccf28SAndroid Build Coastguard Worker.if \zipped 522*e1eccf28SAndroid Build Coastguard Worker /* One small impediment in the process above is that some of the load 523*e1eccf28SAndroid Build Coastguard Worker * operations can't perform byte-wise structure deinterleaving at the 524*e1eccf28SAndroid Build Coastguard Worker * same time as loading only part of a register. So the data is loaded 525*e1eccf28SAndroid Build Coastguard Worker * linearly and unpacked manually at this point. 526*e1eccf28SAndroid Build Coastguard Worker */ 527*e1eccf28SAndroid Build Coastguard Worker uzp1 v4.16b, v0.16b, v1.16b 528*e1eccf28SAndroid Build Coastguard Worker uzp2 v5.16b, v0.16b, v1.16b 529*e1eccf28SAndroid Build Coastguard Worker uzp1 v6.16b, v2.16b, v3.16b 530*e1eccf28SAndroid Build Coastguard Worker uzp2 v7.16b, v2.16b, v3.16b 531*e1eccf28SAndroid Build Coastguard Worker uzp1 v0.16b, v4.16b, v6.16b 532*e1eccf28SAndroid Build Coastguard Worker uzp2 v2.16b, v4.16b, v6.16b 533*e1eccf28SAndroid Build Coastguard Worker uzp1 v1.16b, v5.16b, v7.16b 534*e1eccf28SAndroid Build Coastguard Worker uzp2 v3.16b, v5.16b, v7.16b 535*e1eccf28SAndroid Build Coastguard Worker 536*e1eccf28SAndroid Build Coastguard Worker uzp1 v4.16b, v8.16b, v9.16b 537*e1eccf28SAndroid Build Coastguard Worker uzp2 v5.16b, v8.16b, v9.16b 538*e1eccf28SAndroid Build Coastguard Worker uzp1 v6.16b, v10.16b, v11.16b 539*e1eccf28SAndroid Build Coastguard Worker uzp2 v7.16b, v10.16b, v11.16b 540*e1eccf28SAndroid Build Coastguard Worker uzp1 v8.16b, v4.16b, v6.16b 541*e1eccf28SAndroid Build Coastguard Worker uzp2 v10.16b, v4.16b, v6.16b 542*e1eccf28SAndroid Build Coastguard Worker uzp1 v9.16b, v5.16b, v7.16b 543*e1eccf28SAndroid Build Coastguard Worker uzp2 v11.16b, v5.16b, v7.16b 544*e1eccf28SAndroid Build Coastguard Worker 545*e1eccf28SAndroid Build Coastguard Worker \kernel 546*e1eccf28SAndroid Build Coastguard Worker 547*e1eccf28SAndroid Build Coastguard Worker zip1 v4.16b, v0.16b, v2.16b 548*e1eccf28SAndroid Build Coastguard Worker zip2 v6.16b, v0.16b, v2.16b 549*e1eccf28SAndroid Build Coastguard Worker zip1 v5.16b, v1.16b, v3.16b 550*e1eccf28SAndroid Build Coastguard Worker zip2 v7.16b, v1.16b, v3.16b 551*e1eccf28SAndroid Build Coastguard Worker zip1 v0.16b, v4.16b, v5.16b 552*e1eccf28SAndroid Build Coastguard Worker zip2 v1.16b, v4.16b, v5.16b 553*e1eccf28SAndroid Build Coastguard Worker zip1 v2.16b, v6.16b, v7.16b 554*e1eccf28SAndroid Build Coastguard Worker zip2 v3.16b, v6.16b, v7.16b 555*e1eccf28SAndroid Build Coastguard Worker .else 556*e1eccf28SAndroid Build Coastguard Worker \kernel 557*e1eccf28SAndroid Build Coastguard Worker .endif 558*e1eccf28SAndroid Build Coastguard Worker 559*e1eccf28SAndroid Build Coastguard Worker tbz x2, #5, 1f 560*e1eccf28SAndroid Build Coastguard Worker st1 {v2.16b,v3.16b}, [x0], #32 561*e1eccf28SAndroid Build Coastguard Worker1: tbz x2, #4, 1f 562*e1eccf28SAndroid Build Coastguard Worker st1 {v1.16b}, [x0], #16 563*e1eccf28SAndroid Build Coastguard Worker1: tbz x2, #3, 1f 564*e1eccf28SAndroid Build Coastguard Worker st1 {v0.d}[1], [x0], #8 565*e1eccf28SAndroid Build Coastguard Worker1: tbz x2, #2, 1f 566*e1eccf28SAndroid Build Coastguard Worker st1 {v0.s}[1], [x0], #4 567*e1eccf28SAndroid Build Coastguard Worker1: tbz x2, #1, 1f 568*e1eccf28SAndroid Build Coastguard Worker st1 {v0.h}[1], [x0], #2 569*e1eccf28SAndroid Build Coastguard Worker1: tbz x2, #0, 2f 570*e1eccf28SAndroid Build Coastguard Worker st1 {v0.b}[1], [x0], #1 571*e1eccf28SAndroid Build Coastguard Worker2: ld1 {v8.1d - v11.1d}, [sp], #32 572*e1eccf28SAndroid Build Coastguard Worker ld1 {v12.1d - v15.1d}, [sp], #32 573*e1eccf28SAndroid Build Coastguard Worker.endif 574*e1eccf28SAndroid Build Coastguard Worker mov x0, #0 575*e1eccf28SAndroid Build Coastguard Worker ret 576*e1eccf28SAndroid Build Coastguard Worker.endm 577*e1eccf28SAndroid Build Coastguard Worker 578*e1eccf28SAndroid Build Coastguard Worker 579*e1eccf28SAndroid Build Coastguard Worker/* produce list of blend_line_XX() functions; each function uses the wrap_line 580*e1eccf28SAndroid Build Coastguard Worker * macro, passing it the name of the operation macro it wants along with 581*e1eccf28SAndroid Build Coastguard Worker * optional parameters to remove unnecessary operations. 582*e1eccf28SAndroid Build Coastguard Worker */ 583*e1eccf28SAndroid Build Coastguard Worker#define BLEND_X(d, n) ENTRY(blend_line_##n) ; wrap_line blend_kernel_##n, params_##n ; END(blend_line_##n) ; 584*e1eccf28SAndroid Build Coastguard Worker BLEND_LIST(BLEND_X) 585*e1eccf28SAndroid Build Coastguard Worker#undef BLEND_X 586*e1eccf28SAndroid Build Coastguard Worker 587*e1eccf28SAndroid Build Coastguard Worker#define BLEND_X(d, n) .set tablesize, d+1 ; 588*e1eccf28SAndroid Build Coastguard Worker BLEND_LIST(BLEND_X) 589*e1eccf28SAndroid Build Coastguard Worker#undef BLEND_X 590*e1eccf28SAndroid Build Coastguard Worker 591*e1eccf28SAndroid Build Coastguard Worker/* int rsdIntrinsicBlend_K( 592*e1eccf28SAndroid Build Coastguard Worker * uchar4 *out, // x0 593*e1eccf28SAndroid Build Coastguard Worker * uchar4 const *in, // x1 594*e1eccf28SAndroid Build Coastguard Worker * int slot, // x2 595*e1eccf28SAndroid Build Coastguard Worker * size_t xstart, // x3 596*e1eccf28SAndroid Build Coastguard Worker * size_t xend); // x4 597*e1eccf28SAndroid Build Coastguard Worker */ 598*e1eccf28SAndroid Build Coastguard WorkerENTRY(rsdIntrinsicBlend_K) 599*e1eccf28SAndroid Build Coastguard Worker adrp x5, blendtable 600*e1eccf28SAndroid Build Coastguard Worker add x5, x5, :lo12:blendtable 601*e1eccf28SAndroid Build Coastguard Worker cmp w2, tablesize 602*e1eccf28SAndroid Build Coastguard Worker bhs 1f 603*e1eccf28SAndroid Build Coastguard Worker ldrsh x6, [x5, w2, uxtw #1] 604*e1eccf28SAndroid Build Coastguard Worker add x0, x0, w3, uxtw #2 605*e1eccf28SAndroid Build Coastguard Worker add x1, x1, w3, uxtw #2 606*e1eccf28SAndroid Build Coastguard Worker sub w2, w4, w3 607*e1eccf28SAndroid Build Coastguard Worker ubfiz x2, x2, #2, #32 /* TODO: fix */ 608*e1eccf28SAndroid Build Coastguard Worker cbz x6, 1f 609*e1eccf28SAndroid Build Coastguard Worker adr x5, 2f 610*e1eccf28SAndroid Build Coastguard Worker add x6, x5, x6 611*e1eccf28SAndroid Build Coastguard Worker2: br x6 612*e1eccf28SAndroid Build Coastguard Worker1: mov x0, #-1 613*e1eccf28SAndroid Build Coastguard Worker ret 614*e1eccf28SAndroid Build Coastguard Worker 615*e1eccf28SAndroid Build Coastguard WorkerEND(rsdIntrinsicBlend_K) 616*e1eccf28SAndroid Build Coastguard Worker 617*e1eccf28SAndroid Build Coastguard Worker.rodata 618*e1eccf28SAndroid Build Coastguard Worker.set off,0 619*e1eccf28SAndroid Build Coastguard Workerblendtable: 620*e1eccf28SAndroid Build Coastguard Worker#define BLEND_X(d, n) .rept d-off ; .hword 0 ; .endr ; .hword blend_line_##n - 2b ; .set off, d+1 ; 621*e1eccf28SAndroid Build Coastguard Worker BLEND_LIST(BLEND_X) 622*e1eccf28SAndroid Build Coastguard Worker#undef BLEND_X 623