Lines Matching full:w
45 vreplgr2vr.w vr31, a3
57 vpermi.w vr25, vr24, 0x01
64 vsub.w vr24, vr24, vr31 //px
65 vsub.w vr25, vr25, vr31
67 vadd.w vr4, vr4, vr24 //diag[0][y+x]
68 vadd.w vr5, vr5, vr25
70 vpackev.w vr26, vr25, vr24
71 vpackod.w vr27, vr25, vr24
72 vpermi.w vr26, vr26, 0xd8 //px0246
73 vpermi.w vr27, vr27, 0xd8 //px1357
74 vadd.w vr12, vr12, vr26
75 vadd.w vr12, vr12, vr27 //alt[0][y+(x>>1)]
77 vhaddw.d.w vr28, vr24, vr24
80 vhaddw.d.w vr28, vr25, vr25
84 vinsgr2vr.w vr0, a3, 0 //hv[0][y]
86 vadd.w vr15, vr15, vr26
87 vadd.w vr15, vr15, vr27 //alt[1][3+y-(x>>1)]
88 vpermi.w vr15, vr15, 0x1b
90 vadd.w vr9, vr9, vr24
91 vadd.w vr8, vr8, vr25
92 vpermi.w vr8, vr8, 0x1b
93 vpermi.w vr9, vr9, 0x1b //diag[1][7+y-x]
97 vadd.w vr28, vr28, vr24
98 vadd.w vr29, vr29, vr25
99 vextrins.w vr18, vr28, 0x30
100 vshuf4i.w vr19, vr28, 0x39
101 vextrins.w vr19, vr29, 0x30
102 vshuf4i.w vr20, vr29, 0x39 //alt[2][3-(y>>1)+7]
103 vinsgr2vr.w vr20, zero, 3
105 vadd.w vr2, vr2, vr24
106 vadd.w vr3, vr3, vr25 //hv[1][x]
108 vadd.w vr21, vr21, vr24
109 vadd.w vr22, vr22, vr25 //alt[3][(y>>1)+x]
115 vpermi.w vr25, vr24, 0x01
122 vsub.w vr24, vr24, vr31 //px
123 vsub.w vr25, vr25, vr31
127 vextrins.w vr28, vr5, 0x30
128 vadd.w vr28, vr28, vr24 //diag[0][y+x]
129 vadd.w vr29, vr29, vr25
131 vextrins.w vr5, vr28, 0x03
132 vextrins.w vr6, vr29, 0x03
133 vextrins.w vr28, vr4, 0x30
134 vshuf4i.w vr4, vr28, 0x93
137 vextrins.w vr28, vr13, 0x30
138 vpackev.w vr26, vr25, vr24
139 vpackod.w vr27, vr25, vr24
140 vpermi.w vr26, vr26, 0xd8 //px0246
141 vpermi.w vr27, vr27, 0xd8 //px1357
142 vadd.w vr28, vr28, vr26
143 vadd.w vr28, vr28, vr27 //alt[0][y+(x>>1)]
144 vextrins.w vr13, vr28, 0x03
145 vextrins.w vr28, vr12, 0x30
146 vshuf4i.w vr12, vr28, 0x93
148 vhaddw.d.w vr28, vr24, vr24
151 vhaddw.d.w vr28, vr25, vr25
155 vinsgr2vr.w vr0, a3, 1 //hv[0][y]
158 vextrins.w vr28, vr16, 0x30
159 vpermi.w vr28, vr28, 0x1b
160 vadd.w vr28, vr28, vr26
161 vadd.w vr28, vr28, vr27 //alt[1][3+y-(x>>1)]
162 vextrins.w vr16, vr28, 0x00
163 vextrins.w vr28, vr15, 0x00
164 vshuf4i.w vr15, vr28, 0x6c
168 vextrins.w vr28, vr9, 0x30
169 vpermi.w vr28, vr28, 0x1b
170 vpermi.w vr29, vr29, 0x1b
171 vadd.w vr29, vr29, vr24
172 vadd.w vr28, vr28, vr25 //diag[1][7+y-x]
173 vextrins.w vr10, vr29, 0x00
174 vextrins.w vr29, vr28, 0x00
175 vshuf4i.w vr9, vr29, 0x6c
176 vextrins.w vr28, vr8, 0x00
177 vshuf4i.w vr8, vr28, 0x6c
180 vextrins.w vr28, vr18, 0x03
182 vextrins.w vr29, vr19, 0x03
183 vadd.w vr28, vr28, vr24
184 vadd.w vr29, vr29, vr25 //alt[2][3-(y>>1)+7]
185 vextrins.w vr18, vr28, 0x30
186 vextrins.w vr28, vr29, 0x00
187 vshuf4i.w vr19, vr28, 0x39
190 vadd.w vr2, vr2, vr24
191 vadd.w vr3, vr3, vr25 //hv[1][x]
193 vadd.w vr21, vr21, vr24
194 vadd.w vr22, vr22, vr25 //alt[3][(y>>1)+x]
200 vpermi.w vr25, vr24, 0x01
207 vsub.w vr24, vr24, vr31 //px
208 vsub.w vr25, vr25, vr31
214 vadd.w vr28, vr28, vr24 //diag[0][y+x]
215 vadd.w vr29, vr29, vr25
223 vpackev.w vr26, vr25, vr24
224 vpackod.w vr27, vr25, vr24
225 vpermi.w vr26, vr26, 0xd8 //px0246
226 vpermi.w vr27, vr27, 0xd8 //px1357
227 vadd.w vr28, vr28, vr26
228 vadd.w vr28, vr28, vr27 //alt[0][y+(x>>1)]
232 vhaddw.d.w vr28, vr24, vr24
235 vhaddw.d.w vr28, vr25, vr25
239 vinsgr2vr.w vr0, a3, 2 //hv[0][y]
243 vpermi.w vr28, vr28, 0x1b
244 vadd.w vr28, vr28, vr26
245 vadd.w vr28, vr28, vr27 //alt[1][3+y-(x>>1)]
246 vpermi.w vr28, vr28, 0x1b
254 vpermi.w vr28, vr28, 0x1b //5432
255 vpermi.w vr29, vr29, 0x1b //9876
256 vadd.w vr29, vr29, vr24
257 vadd.w vr28, vr28, vr25
258 vpermi.w vr28, vr28, 0x1b
259 vpermi.w vr29, vr29, 0x1b
269 vadd.w vr28, vr28, vr24
270 vadd.w vr29, vr29, vr25
276 vadd.w vr2, vr2, vr24
277 vadd.w vr3, vr3, vr25 //hv[1][x]
280 vextrins.w vr28, vr22, 0x30 //1234
282 vadd.w vr28, vr28, vr24
283 vadd.w vr29, vr29, vr25 //alt[3][(y>>1)+x]
284 vextrins.w vr23, vr29, 0x03
285 vextrins.w vr29, vr28, 0x33
286 vshuf4i.w vr22, vr29, 0x93
287 vextrins.w vr28, vr21, 0x30
288 vshuf4i.w vr21, vr28, 0x93
294 vpermi.w vr25, vr24, 0x01
301 vsub.w vr24, vr24, vr31 //px
302 vsub.w vr25, vr25, vr31
305 vextrins.w vr28, vr4, 0x03 //3456
307 vextrins.w vr29, vr5, 0x03 //78910
308 vadd.w vr28, vr28, vr24 //diag[0][y+x]
309 vadd.w vr29, vr29, vr25
310 vextrins.w vr4, vr28, 0x30
311 vextrins.w vr28, vr29, 0x00
312 vshuf4i.w vr5, vr28, 0x39
316 vextrins.w vr28, vr12, 0x03
317 vpackev.w vr26, vr25, vr24
318 vpackod.w vr27, vr25, vr24
319 vpermi.w vr26, vr26, 0xd8 //px0246
320 vpermi.w vr27, vr27, 0xd8 //px1357
321 vadd.w vr28, vr28, vr26
322 vadd.w vr28, vr28, vr27 //alt[0][y+(x>>1)]
323 vextrins.w vr12, vr28, 0x30
326 vhaddw.d.w vr28, vr24, vr24
329 vhaddw.d.w vr28, vr25, vr25
333 vinsgr2vr.w vr0, a3, 3 //hv[0][y]
336 vextrins.w vr28, vr15, 0x03
337 vpermi.w vr28, vr28, 0x1b //6543
338 vadd.w vr28, vr28, vr26
339 vadd.w vr28, vr28, vr27 //alt[1][3+y-(x>>1)]
340 vextrins.w vr15, vr28, 0x33
341 vshuf4i.w vr16, vr28, 0xc6
342 vinsgr2vr.w vr16, zero, 3
345 vextrins.w vr28, vr8, 0x03 //3456
347 vextrins.w vr29, vr9, 0x03 //78910
348 vpermi.w vr28, vr28, 0x1b //6543
349 vpermi.w vr29, vr29, 0x1b //10987
350 vadd.w vr29, vr29, vr24
351 vadd.w vr28, vr28, vr25 //diag[1][7+y-x]
352 vextrins.w vr8, vr28, 0x33
353 vextrins.w vr28, vr29, 0x33
354 vshuf4i.w vr9, vr28, 0xc6
355 vshuf4i.w vr10, vr29, 0xc6
356 vinsgr2vr.w vr10, zero, 3
362 vadd.w vr28, vr28, vr24
363 vadd.w vr29, vr29, vr25
369 vadd.w vr2, vr2, vr24
370 vadd.w vr3, vr3, vr25 //hv[1][x]
373 vextrins.w vr28, vr22, 0x30 //1234
375 vextrins.w vr29, vr23, 0x30
376 vadd.w vr28, vr28, vr24
377 vadd.w vr29, vr29, vr25 //alt[3][(y>>1)+x]
378 vextrins.w vr23, vr29, 0x03
379 vextrins.w vr29, vr28, 0x33
380 vshuf4i.w vr22, vr29, 0x93
381 vextrins.w vr28, vr21, 0x30
382 vshuf4i.w vr21, vr28, 0x93
388 vpermi.w vr25, vr24, 0x01
395 vsub.w vr24, vr24, vr31 //px
396 vsub.w vr25, vr25, vr31
398 vadd.w vr5, vr5, vr24 //diag[0][y+x]
399 vadd.w vr6, vr6, vr25
401 vpackev.w vr26, vr25, vr24
402 vpackod.w vr27, vr25, vr24
403 vpermi.w vr26, vr26, 0xd8 //px0246
404 vpermi.w vr27, vr27, 0xd8 //px1357
405 vadd.w vr13, vr13, vr26
406 vadd.w vr13, vr13, vr27 //alt[0][y+(x>>1)]
408 vhaddw.d.w vr28, vr24, vr24
411 vhaddw.d.w vr28, vr25, vr25
415 vinsgr2vr.w vr1, a3, 0 //hv[0][y]
417 vpermi.w vr16, vr16, 0x1b
418 vadd.w vr16, vr16, vr26
419 vadd.w vr16, vr16, vr27 //alt[1][3+y-(x>>1)]
420 vpermi.w vr16, vr16, 0x1b
422 vpermi.w vr9, vr9, 0x1b
423 vpermi.w vr10, vr10, 0x1b
424 vadd.w vr10, vr10, vr24
425 vadd.w vr9, vr9, vr25
426 vpermi.w vr9, vr9, 0x1b
427 vpermi.w vr10, vr10, 0x1b //diag[1][7+y-x]
430 vextrins.w vr28, vr19, 0x30 //1234
432 vextrins.w vr29, vr20, 0x30 //5678
433 vadd.w vr28, vr28, vr24
434 vadd.w vr29, vr29, vr25 //alt[2][3-(y>>1)+7]
435 vextrins.w vr20, vr29, 0x03
436 vextrins.w vr29, vr28, 0x33
437 vshuf4i.w vr19, vr29, 0x93
440 vadd.w vr2, vr2, vr24
441 vadd.w vr3, vr3, vr25 //hv[1][x]
447 vadd.w vr28, vr28, vr24
448 vadd.w vr29, vr29, vr25
458 vpermi.w vr25, vr24, 0x01
465 vsub.w vr24, vr24, vr31 //px
466 vsub.w vr25, vr25, vr31
470 vextrins.w vr28, vr6, 0x30
471 vadd.w vr28, vr28, vr24 //diag[0][y+x]
472 vadd.w vr29, vr29, vr25
473 vextrins.w vr7, vr29, 0x03
474 vextrins.w vr29, vr28, 0x33
475 vshuf4i.w vr6, vr29, 0x93
476 vextrins.w vr28, vr5, 0x30
477 vshuf4i.w vr5, vr28, 0x93
480 vextrins.w vr28, vr14, 0x30
481 vpackev.w vr26, vr25, vr24
482 vpackod.w vr27, vr25, vr24
483 vpermi.w vr26, vr26, 0xd8 //px0246
484 vpermi.w vr27, vr27, 0xd8 //px1357
485 vadd.w vr28, vr28, vr26
486 vadd.w vr28, vr28, vr27 //alt[0][y+(x>>1)]
487 vextrins.w vr14, vr28, 0x03
488 vextrins.w vr28, vr13, 0x30
489 vshuf4i.w vr13, vr28, 0x93
491 vhaddw.d.w vr28, vr24, vr24
494 vhaddw.d.w vr28, vr25, vr25
498 vinsgr2vr.w vr1, a3, 1 //hv[0][y]
501 vextrins.w vr28, vr17, 0x30
502 vpermi.w vr28, vr28, 0x1b
503 vadd.w vr28, vr28, vr26
504 vadd.w vr28, vr28, vr27 //alt[1][3+y-(x>>1)]
505 vextrins.w vr17, vr28, 0x00
506 vextrins.w vr28, vr16, 0x00
507 vshuf4i.w vr16, vr28, 0x6c
511 vextrins.w vr28, vr10, 0x30
512 vpermi.w vr28, vr28, 0x1b //8-5
513 vpermi.w vr29, vr29, 0x1b //12-9
514 vadd.w vr29, vr29, vr24
515 vadd.w vr28, vr28, vr25 //diag[1][7+y-x]
516 vextrins.w vr11, vr29, 0x00
517 vextrins.w vr29, vr28, 0x00
518 vshuf4i.w vr10, vr29, 0x6c
519 vextrins.w vr28, vr9, 0x00
520 vshuf4i.w vr9, vr28, 0x6c
523 vextrins.w vr28, vr19, 0x30 //1234
525 vextrins.w vr29, vr20, 0x30 //5678
526 vadd.w vr28, vr28, vr24
527 vadd.w vr29, vr29, vr25 //alt[2][3-(y>>1)+7]
528 vextrins.w vr20, vr29, 0x03
529 vextrins.w vr29, vr28, 0x33
530 vshuf4i.w vr19, vr29, 0x93
533 vadd.w vr2, vr2, vr24
534 vadd.w vr3, vr3, vr25 //hv[1][x]
540 vadd.w vr28, vr28, vr24
541 vadd.w vr29, vr29, vr25
551 vpermi.w vr25, vr24, 0x01
558 vsub.w vr24, vr24, vr31 //px
559 vsub.w vr25, vr25, vr31
565 vadd.w vr28, vr28, vr24 //diag[0][y+x]
566 vadd.w vr29, vr29, vr25
574 vpackev.w vr26, vr25, vr24
575 vpackod.w vr27, vr25, vr24
576 vpermi.w vr26, vr26, 0xd8 //px0246
577 vpermi.w vr27, vr27, 0xd8 //px1357
578 vadd.w vr28, vr28, vr26
579 vadd.w vr28, vr28, vr27 //alt[0][y+(x>>1)]
583 vhaddw.d.w vr28, vr24, vr24
586 vhaddw.d.w vr28, vr25, vr25
590 vinsgr2vr.w vr1, a3, 2 //hv[0][y]
594 vpermi.w vr28, vr28, 0x1b
595 vadd.w vr28, vr28, vr26
596 vadd.w vr28, vr28, vr27 //alt[1][3+y-(x>>1)]
597 vpermi.w vr28, vr28, 0x1b
605 vpermi.w vr28, vr28, 0x1b //9876
606 vpermi.w vr29, vr29, 0x1b //13-10
607 vadd.w vr29, vr29, vr24
608 vadd.w vr28, vr28, vr25
609 vpermi.w vr28, vr28, 0x1b
610 vpermi.w vr29, vr29, 0x1b
616 vadd.w vr18, vr18, vr24 //0123
617 vadd.w vr19, vr19, vr25 //4567 alt[2][3-(y>>1)+7]
619 vadd.w vr2, vr2, vr24
620 vadd.w vr3, vr3, vr25 //hv[1][x]
623 vextrins.w vr28, vr21, 0x03 //3456
625 vextrins.w vr29, vr22, 0x03 //78910
626 vadd.w vr28, vr28, vr24
627 vadd.w vr29, vr29, vr25 //alt[3][(y>>1)+x]
628 vextrins.w vr21, vr28, 0x30
629 vextrins.w vr28, vr29, 0x00
630 vshuf4i.w vr22, vr28, 0x39
637 vpermi.w vr25, vr24, 0x01
644 vsub.w vr24, vr24, vr31 //px
645 vsub.w vr25, vr25, vr31
648 vextrins.w vr28, vr5, 0x03 //78910
650 vextrins.w vr29, vr6, 0x03 //11-14
651 vadd.w vr28, vr28, vr24 //diag[0][y+x]
652 vadd.w vr29, vr29, vr25
653 vextrins.w vr5, vr28, 0x30
654 vextrins.w vr28, vr29, 0x00
655 vshuf4i.w vr6, vr28, 0x39
659 vextrins.w vr28, vr13, 0x03
660 vpackev.w vr26, vr25, vr24
661 vpackod.w vr27, vr25, vr24
662 vpermi.w vr26, vr26, 0xd8 //px0246
663 vpermi.w vr27, vr27, 0xd8 //px1357
664 vadd.w vr28, vr28, vr26
665 vadd.w vr28, vr28, vr27 //alt[0][y+(x>>1)]
666 vextrins.w vr13, vr28, 0x30
669 vhaddw.d.w vr28, vr24, vr24
672 vhaddw.d.w vr28, vr25, vr25
676 vinsgr2vr.w vr1, a3, 3 //hv[0][y]
679 vextrins.w vr28, vr16, 0x03
680 vpermi.w vr28, vr28, 0x1b //10987
681 vadd.w vr28, vr28, vr26
682 vadd.w vr28, vr28, vr27 //alt[1][3+y-(x>>1)]
683 vextrins.w vr16, vr28, 0x33
684 vshuf4i.w vr17, vr28, 0xc6
685 vinsgr2vr.w vr17, zero, 3
688 vextrins.w vr28, vr9, 0x03 //7-10
690 vextrins.w vr29, vr10, 0x03 //11-14
691 vpermi.w vr28, vr28, 0x1b //10-7
692 vpermi.w vr29, vr29, 0x1b //14-11
693 vadd.w vr29, vr29, vr24
694 vadd.w vr28, vr28, vr25 //diag[1][7+y-x]
695 vextrins.w vr9, vr28, 0x33
696 vextrins.w vr28, vr29, 0x33
697 vshuf4i.w vr10, vr28, 0xc6
698 vshuf4i.w vr11, vr29, 0xc6
699 vinsgr2vr.w vr11, zero, 3
701 vadd.w vr18, vr18, vr24 //0123
702 vadd.w vr19, vr19, vr25 //4567 alt[2][3-(y>>1)+7]
704 vadd.w vr2, vr2, vr24
705 vadd.w vr3, vr3, vr25 //hv[1][x]
708 vextrins.w vr28, vr21, 0x03 //3456
710 vextrins.w vr29, vr22, 0x03 //78910
711 vadd.w vr28, vr28, vr24
712 vadd.w vr29, vr29, vr25 //alt[3][(y>>1)+x]
713 vextrins.w vr21, vr28, 0x30
714 vextrins.w vr28, vr29, 0x00
715 vshuf4i.w vr22, vr28, 0x39
723 vmul.w vr26, vr0, vr0
724 vmul.w vr27, vr1, vr1
725 vhaddw.d.w vr28, vr26, vr26
728 vhaddw.d.w vr28, vr27, vr27
733 vmul.w vr26, vr2, vr2
734 vmul.w vr27, vr3, vr3
735 vhaddw.d.w vr28, vr26, vr26
738 vhaddw.d.w vr28, vr27, vr27
744 mul.w a3, a3, a6
745 mul.w a4, a4, a6
746 vinsgr2vr.w vr24, a3, 2
747 vinsgr2vr.w vr25, a4, 2
752 vinsgr2vr.w vr30, t0, 0
754 vinsgr2vr.w vr30, t0, 1
756 vinsgr2vr.w vr30, t0, 2
758 vinsgr2vr.w vr30, t0, 3
760 vinsgr2vr.w vr31, t0, 0
762 vinsgr2vr.w vr31, t0, 1
764 vinsgr2vr.w vr31, t0, 2
767 vextrins.w vr27, vr6, 0x03
768 vpermi.w vr27, vr27, 0x1b
769 vmul.w vr26, vr4, vr4
770 vmadd.w vr26, vr27, vr27
771 vmul.w vr26, vr26, vr30
772 vhaddw.d.w vr28, vr26, vr26
776 vpermi.w vr27, vr27, 0x1b
777 vmul.w vr26, vr5, vr5
778 vmadd.w vr26, vr27, vr27
779 vmul.w vr26, vr26, vr31
780 vextrins.w vr26, vr31, 0x33
781 vhaddw.d.w vr28, vr26, vr26
787 vextrins.w vr27, vr10, 0x03
788 vpermi.w vr27, vr27, 0x1b
789 vmul.w vr26, vr8, vr8
790 vmadd.w vr26, vr27, vr27
791 vmul.w vr26, vr26, vr30
792 vhaddw.d.w vr28, vr26, vr26
796 vpermi.w vr27, vr27, 0x1b
797 vmul.w vr26, vr9, vr9
798 vmadd.w vr26, vr27, vr27
799 vmul.w vr26, vr26, vr31
800 vextrins.w vr26, vr31, 0x33
801 vhaddw.d.w vr28, vr26, vr26
806 vpickve2gr.w a5, vr5, 3
807 mul.w a5, a5, a5
808 mul.w a5, a5, a6
809 add.w a3, a3, a5
810 vinsgr2vr.w vr24, a3, 0
811 vpickve2gr.w a5, vr9, 3
812 mul.w a5, a5, a5
813 mul.w a5, a5, a6
814 add.w a4, a4, a5
815 vinsgr2vr.w vr25, a4, 0
818 vpickve2gr.w a3, vr24, 1
819 vmul.w vr26, vr13, vr13
820 vhaddw.d.w vr28, vr26, vr26
823 vpickve2gr.w a5, vr12, 3
824 mul.w a5, a5, a5
827 mul.w a3, a3, a6 //*cost_ptr
829 vextrins.w vr29, vr30, 0x01
830 vextrins.w vr29, vr30, 0x13
831 vextrins.w vr29, vr31, 0x21
832 vextrins.w vr29, vr31, 0x33
834 vpermi.w vr27, vr27, 0x1b
835 vmul.w vr28, vr12, vr12
836 vextrins.w vr28, vr31, 0x33
837 vmadd.w vr28, vr27, vr27
838 vmul.w vr26, vr28, vr29
839 vhaddw.d.w vr28, vr26, vr26
843 vinsgr2vr.w vr24, a3, 1
846 vpickve2gr.w a3, vr24, 3
847 vmul.w vr26, vr16, vr16
848 vhaddw.d.w vr28, vr26, vr26
851 vpickve2gr.w a5, vr15, 3
852 mul.w a5, a5, a5
855 mul.w a3, a3, a6 //*cost_ptr
858 vpermi.w vr27, vr27, 0x1b
859 vmul.w vr28, vr15, vr15
860 vextrins.w vr28, vr31, 0x33
861 vmadd.w vr28, vr27, vr27
862 vmul.w vr26, vr28, vr29
863 vhaddw.d.w vr28, vr26, vr26
867 vinsgr2vr.w vr24, a3, 3
870 vpickve2gr.w a3, vr25, 1
871 vmul.w vr26, vr19, vr19
872 vhaddw.d.w vr28, vr26, vr26
875 vpickve2gr.w a5, vr18, 3
876 mul.w a5, a5, a5
879 mul.w a3, a3, a6 //*cost_ptr
882 vpermi.w vr27, vr27, 0x1b
883 vmul.w vr28, vr18, vr18
884 vextrins.w vr28, vr31, 0x33
885 vmadd.w vr28, vr27, vr27
886 vmul.w vr26, vr28, vr29
887 vhaddw.d.w vr28, vr26, vr26
891 vinsgr2vr.w vr25, a3, 1
894 vpickve2gr.w a3, vr25, 3
895 vmul.w vr26, vr22, vr22
896 vhaddw.d.w vr28, vr26, vr26
899 vpickve2gr.w a5, vr21, 3
900 mul.w a5, a5, a5
903 mul.w a3, a3, a6 //*cost_ptr
906 vpermi.w vr27, vr27, 0x1b
907 vmul.w vr28, vr21, vr21
908 vextrins.w vr28, vr31, 0x33
909 vmadd.w vr28, vr27, vr27
910 vmul.w vr26, vr28, vr29
911 vhaddw.d.w vr28, vr26, vr26
915 vinsgr2vr.w vr25, a3, 3
918 vpickve2gr.w a4, vr24, 0 //best_cost
920 vpickve2gr.w a5, vr24, 1
925 vpickve2gr.w a5, vr24, 2
930 vpickve2gr.w a5, vr24, 3
935 vpickve2gr.w a5, vr25, 0
940 vpickve2gr.w a5, vr25, 1
945 vpickve2gr.w a5, vr25, 2
950 vpickve2gr.w a5, vr25, 3
958 vreplve.w vr26, vr24, a5
961 vreplve.w vr26, vr25, a5
963 vpickve2gr.w a5, vr26, 0
964 sub.w a5, a4, a5
966 st.w a5, a2, 0
981 .macro cdef_fill tmp, stride, w, h
986 srai.d s6, \w, 3 //x
994 andi s6, \w, 4
999 andi s6, \w, 2
1004 andi s6, \w, 1
1006 li.w s6, -16384
1060 mul.w t2, t7, s5
1118 mul.w t3, t7, s5
1235 clz.w t3, a6
1236 sub.w t3, t2, t3
1237 sub.w t3, s7, t3 //sec_shift
1290 clz.w t3, a6
1291 li.w t2, 31
1292 sub.w t3, t2, t3
1293 sub.w t3, s7, t3 //sec_shift
1331 vpermi.w vr6, vr5, 0x44
1332 vpermi.w vr8, vr7, 0x44
1400 // const int dir, const int damping, const int w, int h,
1402 // w=4 h=4
1404 //sec_strength:a6, dir:a7, damping:s7, w:s0, h:s1, edges:s2
1406 ld.w t0, sp, 0
1407 ld.w t1, sp, 8
1418 li.w s0, 4 //w
1419 li.w s1, 4 //h
1430 li.w t2, 1
1435 li.w t0, -16384
1439 li.w t5, -2 //x_start
1441 li.w t7, -2 //y_start
1443 li.w t2, 2
1462 mul.w t3, s1, s5
1476 mul.w t3, t7, s5
1490 mul.w t3, t7, s5
1506 li.w t0, 4
1510 clz.w t1, a5
1512 sub.w t1, t2, t1
1513 sub.w t1, s7, t1
1530 vpermi.w vr0, vr0, 0x44
1573 vshuf4i.w vr5, vr1, 0x0e
1574 vshuf4i.w vr6, vr3, 0x0e
1575 vshuf4i.w vr7, vr2, 0x0e
1600 vpermi.w vr0, vr0, 0x44
1616 vshuf4i.w vr5, vr1, 0x0e
1638 vpermi.w vr0, vr0, 0x44
1664 vshuf4i.w vr5, vr1, 0x0e
1692 ld.w t0, sp, 0
1693 ld.w t1, sp, 8
1704 li.w s0, 4 //w
1705 li.w s1, 8 //h
1716 li.w t2, 1
1721 li.w t0, -16384
1725 li.w t5, -2 //x_start
1727 li.w t7, -2 //y_start
1729 li.w t2, 2
1748 mul.w t3, s1, s5
1762 mul.w t3, t7, s5
1776 mul.w t3, t7, s5
1792 li.w t0, 4
1796 clz.w t1, a5
1798 sub.w t1, t2, t1
1799 sub.w t1, s7, t1
1816 vpermi.w vr0, vr0, 0x44
1859 vshuf4i.w vr5, vr1, 0x0e
1860 vshuf4i.w vr6, vr3, 0x0e
1861 vshuf4i.w vr7, vr2, 0x0e
1886 vpermi.w vr0, vr0, 0x44
1902 vshuf4i.w vr5, vr1, 0x0e
1924 vpermi.w vr0, vr0, 0x44
1950 vshuf4i.w vr5, vr1, 0x0e
1978 ld.w t0, sp, 0
1979 ld.w t1, sp, 8
1990 li.w s0, 8 //w
1991 li.w s1, 8 //h
2003 li.w t2, 1
2008 li.w t0, -16384
2012 li.w t5, -2 //x_start
2014 li.w t7, -2 //y_start
2016 li.w t2, 2
2035 mul.w t3, s1, s5
2049 mul.w t3, t7, s5
2064 mul.w t3, t7, s5
2081 li.w t0, 4
2086 clz.w t1, a5
2088 sub.w t3, t2, t1
2089 sub.w t3, s7, t3