Lines Matching full:rsp

1114     mova    [rsp+gprsize+16*1], m6
1118 pmulhrsw m7, [rsp+gprsize+16*0]
1131 mova [rsp+gprsize+16*2], m6
1132 mova m6, [rsp+gprsize+16*1]
1142 mova [rsp+gprsize+16*0], m2
1147 mova m7, [rsp+gprsize+16*2]
1150 mova m7, [rsp+gprsize+16*0]
1162 mova [rsp+gprsize+16*1], m6
1166 pmulhrsw m7, [rsp+gprsize+16*0]
1167 mova [rsp+gprsize+16*2], m5
1168 mova [rsp+gprsize+16*0], m7
1173 WRITE_8X4 4, [rsp+gprsize+16*2], [rsp+gprsize+16*1], [rsp+gprsize+16*0], 5, 6, 7
1183 mova [rsp+gprsize*2+16*0], m7
1184 mova [rsp+gprsize*2+16*1], m3
1185 mova [rsp+gprsize*2+16*2], m1
1188 mova m3, [rsp+gprsize*2+16*2]
1189 mova [rsp+gprsize*2+16*2], m2
1190 mova m2, [rsp+gprsize*2+16*1]
1191 mova [rsp+gprsize*2+16*1], m4
1192 mova m4, [rsp+gprsize*2+16*0]
1193 mova [rsp+gprsize*2+16*0], m6
1195 mova m6, [rsp+gprsize*2+16*0]
1198 mova [rsp+gprsize*2+16*0], m7
1199 mova m1, [rsp+gprsize*2+16*2]
1202 mova m7, [rsp+gprsize*2+16*1]
1227 mova [rsp+gprsize+16*1], m6
1244 mova [rsp+gprsize+16*1], m6
1252 mova [rsp+gprsize*2+16*0], m7
1253 mova [rsp+gprsize*2+16*1], m3
1254 mova [rsp+gprsize*2+16*2], m4
1264 mova m6, [rsp+gprsize*2+16*2]
1265 mova [rsp+gprsize*2+16*2], m5
1266 mova m1, [rsp+gprsize*2+16*1]
1267 mova [rsp+gprsize*2+16*1], m2
1268 mova m5, [rsp+gprsize*2+16*0]
1269 mova [rsp+gprsize*2+16*0], m3
1278 mova m7, [rsp+gprsize*2+16*0]
1281 mova [rsp+gprsize*2+16*0], m1
1284 mova m6, [rsp+gprsize*2+16*2]
1285 mova m7, [rsp+gprsize*2+16*1]
1293 mova [rsp+gprsize*2+16*1], m1
1294 mova [rsp+gprsize*2+16*2], m6
1332 mova m1, [rsp+gprsize*2+16*1]
1333 mova m6, [rsp+gprsize*2+16*2]
1366 mova [rsp+gprsize+16*1], m1
1378 pmulhrsw m0, [rsp+gprsize+16*0]
1394 mova [rsp+gprsize+16*2], m2
1401 mova [rsp+gprsize+16*1], m1
1405 pmulhrsw m0, [rsp+gprsize+16*0]
1407 mova [rsp+gprsize+16*0], m7
1417 mova [rsp+gprsize+16*1], m6
1426 mova [rsp+gprsize+16*0], m7
1429 mova [rsp+gprsize+16*2], m5
1430 mova [rsp+gprsize+16*1], m6
2219 mov [rsp+gprsize+16*11], tx2q
2226 mov tx2q, [rsp+gprsize+16*11]
2248 SAVE_7ROWS rsp+gprsize+16*3, 16
2264 LOAD_8ROWS rsp+gprsize+16*3, 16
2265 mova [rsp+gprsize+16*0], m7
2288 mova [rsp+gprsize+16*7], m0
2289 mova [rsp+gprsize+16*8], m1
2290 mova [rsp+gprsize+16*5], m6
2291 mova [rsp+gprsize+16*6], m7
2302 mova [rsp+gprsize+16*3], m4
2303 mova [rsp+gprsize+16*4], m5
2304 mova [rsp+gprsize+16*9], m6
2305 mova [rsp+gprsize+32*5], m7
2319 LOAD_8ROWS rsp+gprsize+16*3, 16
2320 mova [rsp+gprsize+16*0], m7
2340 mova [rsp+gprsize+16*7], m0
2341 mova [rsp+gprsize+16*8], m1
2342 mova [rsp+gprsize+16*5], m6
2343 mova [rsp+gprsize+16*6], m7
2354 mova [rsp+gprsize+16*3], m4
2355 mova [rsp+gprsize+16*4], m5
2356 mova [rsp+gprsize+16*9], m6
2357 mova [rsp+gprsize+32*5], m7
2368 LOAD_8ROWS rsp+gprsize+16*3, 16
2369 mova [rsp+gprsize+16*0], m7
2384 mova [rsp+gprsize+16*1], m6
2391 mova [rsp+gprsize+16*1], m6
2398 mova [rsp+gprsize+16*0], m7
2399 mova [rsp+gprsize+16*1], m6
2402 mova m6, [rsp+gprsize+16*1]
2403 mova [rsp+gprsize+16*2], m5
2405 mova m5, [rsp+gprsize+16*0]
2409 pmulhrsw m7, [rsp+gprsize+16*2]
2410 mova [rsp+gprsize+16*0], m5
2411 mova [rsp+gprsize+16*1], m6
2412 mova [rsp+gprsize+16*2], m7
2446 SAVE_7ROWS rsp+gprsize+16*3, 16
2456 LOAD_8ROWS rsp+gprsize+16*3, 16
2457 mova [rsp+gprsize+16*0], m7
2475 mova [rsp+gprsize*2+16*1], m2
2476 mova [rsp+gprsize*2+16*2], m6
2477 mova [rsp+gprsize*2+32*5], m5
2487 mova m3, [rsp+gprsize*2+16*1]
2488 mova m5, [rsp+gprsize*2+32*5]
2489 mova [rsp+gprsize*2+16*1], m2
2490 mova [rsp+gprsize*2+32*5], m4
2491 mova m2, [rsp+gprsize*2+16*2]
2492 mova [rsp+gprsize*2+16*2], m7
2500 mova m7, [rsp+gprsize*2+32*5]
2505 mova m5, [rsp+gprsize*2+16*0]
2508 mova [rsp+gprsize*2+32*5], m0
2509 mova m5, [rsp+gprsize*2+16*9]
2512 mova [rsp+gprsize*2+16*0], m0
2513 mova [rsp+gprsize*2+16*9], m2
2514 mova m0, [rsp+gprsize*2+16*1]
2515 mova m2, [rsp+gprsize*2+16*2]
2516 mova [rsp+gprsize*2+16*1], m3
2522 mova [rsp+gprsize*2+16*2], m1
2525 mova m3, [rsp+gprsize*2+16*8]
2528 mova m5, [rsp+gprsize*2+16*7]
2529 mova [rsp+gprsize*2+16*8], m3
2532 mova m4, [rsp+gprsize*2+16*6]
2533 mova [rsp+gprsize*2+16*7], m5
2536 mova m6, [rsp+gprsize*2+16*5]
2537 mova [rsp+gprsize*2+16*6], m5
2540 mova m7, [rsp+gprsize*2+16*4]
2541 mova [rsp+gprsize*2+16*5], m6
2544 mova m1, [rsp+gprsize*2+16*2]
2545 mova m0, [rsp+gprsize*2+16*3]
2546 mova [rsp+gprsize*2+16*4], m7
2549 mova [rsp+gprsize*2+16*3], m0
2550 mova m1, [rsp+gprsize*2+16*0]
2551 mova m0, [rsp+gprsize*2+16*1]
2552 mova [rsp+gprsize*2+16*0], m7
2566 mova [rsp+gprsize+16*7], m0
2567 mova [rsp+gprsize+16*8], m1
2568 mova [rsp+gprsize+16*9], m2
2569 mova [rsp+gprsize+32*5], m3
2574 mova [rsp+gprsize+16*3], m2
2575 mova [rsp+gprsize+16*4], m3
2576 mova [rsp+gprsize+16*5], m0
2577 mova [rsp+gprsize+16*6], m1
2595 LOAD_8ROWS rsp+gprsize+16*3, 16
2596 mova [rsp+gprsize+16*0], m7
2613 mova [rsp+gprsize*2+16*0], m1
2614 mova [rsp+gprsize*2+16*1], m2
2615 mova [rsp+gprsize*2+16*2], m6
2625 mova m2, [rsp+gprsize*2+16*0] ;in3
2626 mova m7, [rsp+gprsize*2+16*1] ;in4
2627 mova [rsp+gprsize*2+16*0], m1 ;t11
2628 mova [rsp+gprsize*2+16*1], m4 ;t10
2629 mova m1, [rsp+gprsize*2+16*2] ;in12
2630 mova [rsp+gprsize*2+16*2], m0 ;t2a
2638 mova m2, [rsp+gprsize*2+16*8] ;in1
2639 mova m7, [rsp+gprsize*2+16*9] ;in14
2640 mova [rsp+gprsize*2+16*8], m4 ;t12
2641 mova [rsp+gprsize*2+16*9], m0 ;t13
2642 mova m4, [rsp+gprsize*2+16*4] ;in9
2643 mova m0, [rsp+gprsize*2+16*5] ;in6
2644 mova [rsp+gprsize*2+16*4], m1 ;t4a
2645 mova [rsp+gprsize*2+16*5], m5 ;t5a
2653 mova m2, [rsp+gprsize*2+16*2] ;t2a
2654 mova [rsp+gprsize*2+16*2], m5 ;t14
2660 mova m4, [rsp+gprsize*2+16*7] ;in0
2661 mova m5, [rsp+gprsize*2+32*5] ;in15
2662 mova [rsp+gprsize*2+16*7], m3 ;t3
2663 mova [rsp+gprsize*2+32*5], m1 ;t15
2664 mova m1, [rsp+gprsize*2+16*6] ;in7
2665 mova m3, [rsp+gprsize*2+16*3] ;in8
2666 mova [rsp+gprsize*2+16*6], m7 ;t7a
2667 mova [rsp+gprsize*2+16*3], m0 ;t6a
2675 mova m1, [rsp+gprsize*2+16*4] ;t4a
2676 mova m7, [rsp+gprsize*2+16*5] ;t5a
2677 mova [rsp+gprsize*2+16*4], m3 ;t8
2678 mova [rsp+gprsize*2+16*5], m0 ;t9
2684 mova m7, [rsp+gprsize*2+16*3] ;t6a
2687 mova [rsp+gprsize*2+16*3], m4 ;out0
2688 mova m4, [rsp+gprsize*2+16*6] ;t7a
2691 mova [rsp+gprsize*2+16*6], m3 ;-out3
2694 mova [rsp+gprsize*2+16*12], m3
2695 mova m3, [rsp+gprsize*2+16*7] ;t3
2696 mova [rsp+gprsize*2+16* 7], m2 ;out4
2699 mova [rsp+gprsize*2+16*11], m2
2700 mova m2, [rsp+gprsize*2+32*5] ;t15
2701 mova [rsp+gprsize*2+16*10], m1 ;-out7
2702 mova m1, [rsp+gprsize*2+16*0] ;t11
2703 mova [rsp+gprsize*2+16*0 ], m5 ;-out15
2704 mova m3, [rsp+gprsize*2+16*1] ;t10
2705 mova [rsp+gprsize*2+16*1 ], m4 ;-out11
2706 mova m4, [rsp+gprsize*2+16*2] ;t14
2707 mova [rsp+gprsize*2+16*2 ], m0 ;out12
2713 mova m2, [rsp+gprsize*2+16*4] ;t8
2714 mova m4, [rsp+gprsize*2+16*5] ;t9
2715 mova [rsp+gprsize*2+16*4], m3 ;t10a
2716 mova [rsp+gprsize*2+16*5], m1 ;t11a
2717 mova m3, [rsp+gprsize*2+16*8] ;t12
2718 mova m1, [rsp+gprsize*2+16*9] ;t13
2719 mova [rsp+gprsize*2+16*8], m5 ;t14
2720 mova [rsp+gprsize*2+16*9], m0 ;t15
2726 mova m6, [rsp+gprsize*2+16*4] ;t10a
2727 mova m1, [rsp+gprsize*2+16*5] ;t11a
2732 mova [rsp+gprsize*2+16*14], m4
2733 mova [rsp+gprsize*2+16* 4], m2 ;-out1
2734 mova m4, [rsp+gprsize*2+16*8] ;t14
2735 mova m2, [rsp+gprsize*2+16*9] ;t15
2736 mova [rsp+gprsize*2+16* 9], m3 ;out6
2741 mova [rsp+gprsize*2+16* 5], m0 ;out2
2745 mova m0, [rsp+gprsize*2+16*14]
2746 mova [rsp+gprsize*2+16*14], m5
2747 mova [rsp+gprsize*2+16*15], m6
2761 mova [rsp+gprsize*2+16* 8], m4
2762 mova m3, [rsp+gprsize*2+16* 9]
2773 mova [rsp+gprsize*2+16* 9], m0
2774 mova m0, [rsp+gprsize*2+16* 7]
2775 mova m4, [rsp+gprsize*2+16*12]
2786 mova [rsp+gprsize*2+16* 7], m4
2787 mova m4, [rsp+gprsize*2+16*10]
2788 mova m5, [rsp+gprsize*2+16*11]
2799 mova [rsp+gprsize*2+16*10], m5
2800 mova m4, [rsp+gprsize*2+16* 2] ;out12
2801 mova m5, [rsp+gprsize*2+16*14] ;-out13
2802 mova m6, [rsp+gprsize*2+16*15] ;out14
2807 mova m1, [rsp+gprsize*2+16* 9]
2808 mova m2, [rsp+gprsize*2+16*14]
2813 mova [rsp+gprsize*2+16* 9], m0
2818 mova [rsp+gprsize*2+16* 8], m3
2819 mova m3, [rsp+gprsize*2+16* 7]
2820 mova m4, [rsp+gprsize*2+16*12]
2825 mova [rsp+gprsize*2+16* 7], m0
2826 mova m0, [rsp+gprsize*2+16*10]
2827 paddsw m4, m0, [rsp+gprsize*2+16*11]
2828 psubsw m0, [rsp+gprsize*2+16*11]
2831 mova [rsp+gprsize*2+16*10], m4
2832 mova m4, [rsp+gprsize*2+16*2 ] ;out12
2846 mova [rsp+gprsize+16*7], m0
2847 mova [rsp+gprsize+16*8], m1
2848 mova [rsp+gprsize+16*9], m2
2849 mova [rsp+gprsize+32*5], m3
2854 mova [rsp+gprsize+16*3], m2
2855 mova [rsp+gprsize+16*4], m3
2856 mova [rsp+gprsize+16*5], m0
2857 mova [rsp+gprsize+16*6], m1
2870 mova m7, [rsp+gprsize+16*0]
2872 LOAD_8ROWS rsp+gprsize+16*3, 16
2873 mova [rsp+gprsize+16*0], m7
2881 mova [rsp+gprsize+16*0], m7
2930 mova [rsp+gprsize+16*0], m4
2935 mova [rsp+gprsize+16*1], m6
2946 mova m3, [rsp+gprsize+16*0]
3001 SAVE_7ROWS rsp+gprsize+16*3, 16
3011 LOAD_8ROWS rsp+gprsize+16*3, 16
3012 mova [rsp+gprsize+16*0], m7
3021 SAVE_7ROWS rsp+gprsize+16*3, 16
3030 LOAD_8ROWS rsp+gprsize+16*3, 16
3031 mova [rsp+gprsize+16*0], m7
3041 LOAD_8ROWS rsp+gprsize+16*3, 16
3042 mova [rsp+gprsize+16*0], m7
3072 mova [rsp+gprsize+16*7], m0
3073 mova [rsp+gprsize+16*8], m1
3074 mova [rsp+gprsize+16*9], m2
3075 mova [rsp+gprsize+32*5], m3
3080 mova [rsp+gprsize+16*3], m2
3081 mova [rsp+gprsize+16*4], m3
3082 mova [rsp+gprsize+16*5], m0
3083 mova [rsp+gprsize+16*6], m1
3099 mova [rsp+gprsize+16*7], m0
3100 mova [rsp+gprsize+16*8], m1
3101 mova [rsp+gprsize+16*9], m2
3102 mova [rsp+gprsize+32*5], m3
3107 mova [rsp+gprsize+16*3], m2
3108 mova [rsp+gprsize+16*4], m3
3109 mova [rsp+gprsize+16*5], m0
3110 mova [rsp+gprsize+16*6], m1
3137 LOAD_8ROWS rsp+gprsize+16*3, 16
3138 mova [rsp+gprsize+16*0], m7
3155 LOAD_8ROWS rsp+gprsize+16*3, 16
3156 mova [rsp+gprsize+16*0], m7
3166 LOAD_8ROWS rsp+gprsize+16*3, 16
3167 mova [rsp+gprsize+16*0], m7
3188 mova [rsp+gprsize+16*7], m4
3189 mova [rsp+gprsize+16*8], m5
3190 mova [rsp+gprsize+16*5], m6
3191 mova [rsp+gprsize+16*6], m7
3212 LOAD_8ROWS rsp+gprsize+16*3, 16
3213 mova [rsp+gprsize+16*0], m7
3224 mova m7, [rsp+gprsize+16*0]
3226 LOAD_8ROWS rsp+gprsize+16*3, 16
3227 mova [rsp+gprsize+16*0], m7
3235 mova [rsp+gprsize+16*0], m7
3246 LOAD_8ROWS rsp+gprsize+16*3, 16
3247 mova [rsp+gprsize+16*0], m7
3266 mova [rsp+gprsize+16*7], m4
3267 mova [rsp+gprsize+16*8], m5
3268 mova [rsp+gprsize+16*5], m6
3269 mova [rsp+gprsize+16*6], m7
3276 LOAD_8ROWS rsp+gprsize+16*3, 16
3277 mova [rsp+gprsize+16*0], m7
3307 mova [rsp+gprsize+16*1], m7
3336 mova [rsp+gprsize+16*0], m7
3337 mova [rsp+gprsize+16*1], m4
3343 mova [rsp+gprsize+16*2], m5
3344 mova m5, [rsp+gprsize+16*1]
3345 mova [rsp+gprsize+16*1], m6
3347 mova m6, [rsp+gprsize+16*0]
3351 mova [rsp+gprsize+16*0], m6
3417 mova [rsp+gprsize+16*9 ], m0 ;in24
3418 mova [rsp+gprsize+16*10], m4 ;in28
3419 mova [rsp+gprsize+16*17], m2 ;in26
3420 mova [rsp+gprsize+16*18], m6 ;in30
3421 mova [rsp+gprsize+16*31], m1 ;in25
3422 mova [rsp+gprsize+16*30], m3 ;in27
3423 mova [rsp+gprsize+16*27], m5 ;in29
3424 mova [rsp+gprsize+16*34], m7 ;in31
3432 mova [rsp+gprsize+16*7 ], m0 ;in16
3433 mova [rsp+gprsize+16*8 ], m4 ;in20
3434 mova [rsp+gprsize+16*15], m2 ;in18
3435 mova [rsp+gprsize+16*16], m6 ;in22
3436 mova [rsp+gprsize+16*33], m1 ;in17
3437 mova [rsp+gprsize+16*28], m3 ;in19
3438 mova [rsp+gprsize+16*29], m5 ;in21
3439 mova [rsp+gprsize+16*32], m7 ;in23
3449 mova [rsp+gprsize+16*5 ], m0 ;in8
3450 mova [rsp+gprsize+16*6 ], m4 ;in12
3451 mova [rsp+gprsize+16*13], m2 ;in10
3452 mova [rsp+gprsize+16*14], m6 ;in14
3453 mova [rsp+gprsize+16*21], m1 ;in9
3454 mova [rsp+gprsize+16*24], m3 ;in11
3455 mova [rsp+gprsize+16*25], m5 ;in13
3456 mova [rsp+gprsize+16*20], m7 ;in15
3464 mova [rsp+gprsize+16*11], m2 ;in2
3465 mova [rsp+gprsize+16*12], m6 ;in6
3466 mova [rsp+gprsize+16*19], m1 ;in1
3467 mova [rsp+gprsize+16*26], m3 ;in3
3468 mova [rsp+gprsize+16*23], m5 ;in5
3469 mova [rsp+gprsize+16*22], m7 ;in7
3471 mova m2, [rsp+gprsize+16*5 ] ;in8
3472 mova m3, [rsp+gprsize+16*6 ] ;in12
3480 SAVE_7ROWS rsp+gprsize+16*3 , 16
3481 mova m0, [rsp+gprsize+16*11]
3482 mova m1, [rsp+gprsize+16*12]
3483 mova m2, [rsp+gprsize+16*13]
3484 mova m3, [rsp+gprsize+16*14]
3488 mova m7, [rsp+gprsize+16*0]
3489 SAVE_8ROWS rsp+gprsize+16*11, 16
3495 mova m4, [rsp+gprsize+16*7 ] ;in16
3496 mova m5, [rsp+gprsize+16*8 ] ;in20
3497 mova m6, [rsp+gprsize+16*9 ] ;in24
3498 mova m7, [rsp+gprsize+16*10] ;in28
3500 SAVE_7ROWS rsp+gprsize+16*3 , 16
3501 LOAD_8ROWS rsp+gprsize+16*11, 16
3503 mova m7, [rsp+gprsize+16*0]
3504 SAVE_8ROWS rsp+gprsize+16*11, 16
3511 mova [rsp+gprsize+16*0 ], m7
3528 LOAD_8ROWS rsp+gprsize+16*11, 16
3529 mova [rsp+gprsize+16*0 ], m7
3535 LOAD_8ROWS rsp+gprsize+16*19, 16
3536 mova [rsp+gprsize+16*0 ], m7
3542 LOAD_8ROWS rsp+gprsize+16*27, 16
3543 mova [rsp+gprsize+16*0 ], m7
3553 mova m0, [rsp+gprsize*2+16*19] ;in1
3557 mova [rsp+gprsize*2+16*19], m0 ;t16
3558 mova [rsp+gprsize*2+16*34], m3 ;t31
3560 mova [rsp+gprsize*2+16*20], m3 ;t17a
3561 mova [rsp+gprsize*2+16*33], m0 ;t30a
3562 mova m1, [rsp+gprsize*2+16*22] ;in7
3565 mova [rsp+gprsize*2+16*22], m1 ;t19
3566 mova [rsp+gprsize*2+16*31], m2 ;t28
3568 mova [rsp+gprsize*2+16*21], m2 ;t18a
3569 mova [rsp+gprsize*2+16*32], m1 ;t29a
3570 mova m0, [rsp+gprsize*2+16*23] ;in5
3573 mova [rsp+gprsize*2+16*23], m0 ;t20
3574 mova [rsp+gprsize*2+16*30], m3 ;t27
3576 mova [rsp+gprsize*2+16*24], m3 ;t21a
3577 mova [rsp+gprsize*2+16*29], m0 ;t26a
3578 mova m2, [rsp+gprsize*2+16*26] ;in3
3587 mova m0, [rsp+gprsize*2+16*19] ;in1
3588 mova m1, [rsp+gprsize*2+16*20] ;in15
3599 mova [rsp+gprsize*2+16*19], m0 ;t16
3600 mova [rsp+gprsize*2+16*20], m5 ;t17a
3601 mova [rsp+gprsize*2+16*33], m4 ;t30a
3602 mova [rsp+gprsize*2+16*34], m3 ;t31
3603 mova m0, [rsp+gprsize*2+16*21] ;in9
3604 mova m1, [rsp+gprsize*2+16*22] ;in7
3614 mova [rsp+gprsize*2+16*21], m5 ;t18a
3615 mova [rsp+gprsize*2+16*22], m0 ;t19
3616 mova [rsp+gprsize*2+16*31], m3 ;t28
3617 mova [rsp+gprsize*2+16*32], m4 ;t29a
3618 mova m0, [rsp+gprsize*2+16*23] ;in5
3619 mova m1, [rsp+gprsize*2+16*24] ;in11
3629 mova [rsp+gprsize*2+16*23], m0 ;t20
3630 mova [rsp+gprsize*2+16*24], m5 ;t21a
3631 mova [rsp+gprsize*2+16*29], m4 ;t26a
3632 mova [rsp+gprsize*2+16*30], m3 ;t27
3633 mova m0, [rsp+gprsize*2+16*25] ;in13
3634 mova m2, [rsp+gprsize*2+16*26] ;in3
3644 mova m0, [rsp+gprsize*2+16*19] ;in1
3645 mova m1, [rsp+gprsize*2+16*20] ;in15
3646 mova m2, [rsp+gprsize*2+16*33] ;in17
3647 mova m3, [rsp+gprsize*2+16*34] ;in31
3655 mova [rsp+gprsize*2+16*19], m0 ;t16
3656 mova [rsp+gprsize*2+16*20], m5 ;t17a
3657 mova [rsp+gprsize*2+16*33], m4 ;t30a
3658 mova [rsp+gprsize*2+16*34], m3 ;t31
3659 mova m0, [rsp+gprsize*2+16*21] ;in9
3660 mova m1, [rsp+gprsize*2+16*22] ;in7
3661 mova m2, [rsp+gprsize*2+16*31] ;in25
3662 mova m3, [rsp+gprsize*2+16*32] ;in23
3670 mova [rsp+gprsize*2+16*21], m5 ;t18a
3671 mova [rsp+gprsize*2+16*22], m0 ;t19
3672 mova [rsp+gprsize*2+16*31], m3 ;t28
3673 mova [rsp+gprsize*2+16*32], m4 ;t29a
3674 mova m0, [rsp+gprsize*2+16*23] ;in5
3675 mova m1, [rsp+gprsize*2+16*24] ;in11
3676 mova m2, [rsp+gprsize*2+16*29] ;in21
3677 mova m3, [rsp+gprsize*2+16*30] ;in27
3685 mova [rsp+gprsize*2+16*23], m0 ;t20
3686 mova [rsp+gprsize*2+16*24], m5 ;t21a
3687 mova [rsp+gprsize*2+16*29], m4 ;t26a
3688 mova [rsp+gprsize*2+16*30], m3 ;t27
3689 mova m0, [rsp+gprsize*2+16*25] ;in13
3690 mova m1, [rsp+gprsize*2+16*26] ;in3
3691 mova m2, [rsp+gprsize*2+16*27] ;in29
3692 mova m3, [rsp+gprsize*2+16*28] ;in19
3702 mova m2, [rsp+gprsize*2+16*24] ;t21a
3705 mova [rsp+gprsize*2+16*25], m5 ;t22
3706 mova m2, [rsp+gprsize*2+16*29] ;t26a
3709 mova [rsp+gprsize*2+16*28], m4 ;t25
3711 mova [rsp+gprsize*2+16*24], m5 ;t21a
3712 mova [rsp+gprsize*2+16*29], m1 ;t26a
3714 mova m1, [rsp+gprsize*2+16*23] ;t20
3715 mova m5, [rsp+gprsize*2+16*30] ;t27
3721 mova [rsp+gprsize*2+16*26], m0 ;t23a
3722 mova [rsp+gprsize*2+16*27], m3 ;t24a
3723 mova [rsp+gprsize*2+16*30], m2 ;t27
3725 mova m0, [rsp+gprsize*2+16*20] ;t17a
3726 mova m1, [rsp+gprsize*2+16*21] ;t18a
3727 mova m2, [rsp+gprsize*2+16*32] ;t29a
3728 mova m3, [rsp+gprsize*2+16*33] ;t30a
3734 mova [rsp+gprsize*2+16*20], m0 ;t17
3735 mova [rsp+gprsize*2+16*21], m5 ;t18a
3736 mova [rsp+gprsize*2+16*32], m4 ;t29a
3737 mova [rsp+gprsize*2+16*33], m3 ;t30
3738 mova m0, [rsp+gprsize*2+16*19] ;t16
3739 mova m1, [rsp+gprsize*2+16*22] ;t19
3740 mova m2, [rsp+gprsize*2+16*31] ;t28
3741 mova m3, [rsp+gprsize*2+16*34] ;t31
3747 mova m2, [rsp+gprsize*2+16*15] ;tmp12
3752 mova m5, [rsp+gprsize*2+16*30] ;t27
3753 mova [rsp+gprsize*2+16*22], m6 ;out19
3754 mova [rsp+gprsize*2+16*15], m2 ;out12
3758 mova m2, [rsp+gprsize*2+16*6 ] ;tmp3
3761 mova m4, [rsp+gprsize*2+16*14] ;tmp11
3762 mova [rsp+gprsize*2+16*31], m5 ;out28
3763 mova [rsp+gprsize*2+16*6 ], m2 ;out3
3766 mova m2, [rsp+gprsize*2+16*7 ] ;tmp4
3767 mova [rsp+gprsize*2+16*23], m5 ;out20
3768 mova [rsp+gprsize*2+16*14], m4 ;out11
3771 mova m1, [rsp+gprsize*2+16*26] ;t23a
3772 mova m4, [rsp+gprsize*2+16*27] ;t24a
3773 mova [rsp+gprsize*2+16*30], m5 ;out27
3774 mova [rsp+gprsize*2+16*7 ], m2 ;out4
3780 mova m6, [rsp+gprsize*2+16*18] ;tmp15
3783 mova m0, [rsp+gprsize*2+16*3 ] ;tmp0
3784 mova m1, [rsp+gprsize*2+16*11] ;tmp8
3785 mova [rsp+gprsize*2+16*18], m6 ;out15
3786 mova [rsp+gprsize*2+16*19], m4 ;out16
3791 mova m3, [rsp+gprsize*2+16*10] ;tmp7
3792 mova [rsp+gprsize*2+16*34], m6 ;out31
3793 mova [rsp+gprsize*2+16*11], m1 ;out8
3794 mova [rsp+gprsize*2+16*26], m4 ;out23
3797 mova m1, [rsp+gprsize*2+16*20] ;t17
3798 mova m5, [rsp+gprsize*2+16*25] ;t22
3799 mova m2, [rsp+gprsize*2+16*17] ;tmp14
3800 mova [rsp+gprsize*2+16*27], m3 ;out24
3805 mova m5, [rsp+gprsize*2+16*28] ;t25
3806 mova m1, [rsp+gprsize*2+16*33] ;t30
3807 mova [rsp+gprsize*2+16*17], m2 ;out14
3808 mova [rsp+gprsize*2+16*20], m3 ;out17
3812 mova m5, [rsp+gprsize*2+16*4 ] ;tmp1
3815 mova m1, [rsp+gprsize*2+16*12] ;tmp9
3816 mova [rsp+gprsize*2+16*33], m3 ;out30
3817 mova [rsp+gprsize*2+16*4 ], m5 ;out1
3820 mova m5, [rsp+gprsize*2+16*9 ] ;tmp6
3821 mova [rsp+gprsize*2+16*25], m3 ;out22
3822 mova [rsp+gprsize*2+16*12], m1 ;out9
3825 mova m4, [rsp+gprsize*2+16*21] ;t18a
3826 mova m1, [rsp+gprsize*2+16*24] ;t21a
3827 mova m2, [rsp+gprsize*2+16*16] ;tmp13
3828 mova [rsp+gprsize*2+16*28], m3 ;out25
3829 mova [rsp+gprsize*2+16*9 ], m5 ;out6
3834 mova m1, [rsp+gprsize*2+16*29] ;t26a
3835 mova m3, [rsp+gprsize*2+16*32] ;t29a
3836 mova [rsp+gprsize*2+16*21], m5 ;out18
3837 mova [rsp+gprsize*2+16*16], m2 ;out13
3841 mova m2, [rsp+gprsize*2+16*5 ] ;tmp2
3844 mova m3, [rsp+gprsize*2+16*13] ;tmp10
3845 mova [rsp+gprsize*2+16*32], m1 ;out29
3848 mova m5, [rsp+gprsize*2+16*8 ] ;tmp5
3849 mova [rsp+gprsize*2+16*24], m7 ;out21
3850 mova [rsp+gprsize*2+16*13], m3 ;out10
3854 mova m3, [rsp+gprsize*2+16*6 ] ;out3
3855 mova m4, [rsp+gprsize*2+16*7 ] ;out4
3856 mova [rsp+gprsize*2+16*29], m1 ;out26
3857 mova m6, [rsp+gprsize*2+16*9 ] ;out6
3858 mova m1, [rsp+gprsize*2+16*4 ] ;out1
3915 SAVE_7ROWS rsp+gprsize+16*3, 16
3919 mova m7, [rsp+gprsize+16*0]
3920 SAVE_8ROWS rsp+gprsize+16*11, 16
3923 mova [rsp+gprsize+16*19], m0 ;in1
3924 mova [rsp+gprsize+16*26], m1 ;in3
3925 mova [rsp+gprsize+16*23], m2 ;in5
3926 mova [rsp+gprsize+16*22], m3 ;in7
3927 mova [rsp+gprsize+16*21], m4 ;in9
3928 mova [rsp+gprsize+16*24], m5 ;in11
3929 mova [rsp+gprsize+16*25], m6 ;in13
3930 mova [rsp+gprsize+16*20], m7 ;in15
3939 mova [rsp+gprsize+16*33], m0 ;in17
3940 mova [rsp+gprsize+16*28], m1 ;in19
3941 mova [rsp+gprsize+16*29], m2 ;in21
3942 mova [rsp+gprsize+16*32], m3 ;in23
3943 mova [rsp+gprsize+16*31], m4 ;in25
3944 mova [rsp+gprsize+16*30], m5 ;in27
3945 mova [rsp+gprsize+16*27], m6 ;in29
3946 mova [rsp+gprsize+16*34], m7 ;in31
3950 mova [rsp+gprsize+16*0 ], m7
3965 LOAD_8ROWS rsp+gprsize+16*11, 16
3966 mova [rsp+gprsize+16*0 ], m7
3978 LOAD_8ROWS rsp+gprsize+16*19, 16
3979 mova [rsp+gprsize+16*0 ], m7
3991 LOAD_8ROWS rsp+gprsize+16*27, 16
3992 mova [rsp+gprsize+16*0 ], m7
4019 mova [rsp+16*1], m6
4024 mova [rsp+16*2], m5
4025 mova [rsp+16*1], m6
4026 mova [rsp+16*0], m7
4049 mova [rsp+16*1], m6
4055 mov [rsp+16*3], dstq
4056 mova [rsp+16*2], m5
4057 mova [rsp+16*1], m6
4058 mova [rsp+16*0], m7
4063 mov dstq, [rsp+16*3]
4095 SAVE_7ROWS rsp+gprsize+16*3, 16
4103 LOAD_8ROWS rsp+gprsize+16*3, 16
4104 mova [rsp+gprsize+16*0], m7
4111 mova [rsp+gprsize+16*13], m2 ;in10
4112 mova [rsp+gprsize+16*14], m6 ;in14
4113 mova [rsp+gprsize+16*21], m1 ;in9
4114 mova [rsp+gprsize+16*24], m3 ;in11
4115 mova [rsp+gprsize+16*25], m5 ;in13
4116 mova [rsp+gprsize+16*20], m7 ;in15
4119 SAVE_7ROWS rsp+gprsize+16*3, 16
4127 LOAD_8ROWS rsp+gprsize+16*3, 16
4128 mova [rsp+gprsize+16*0], m7
4133 mova [rsp+gprsize+16*11], m2 ;in2
4134 mova [rsp+gprsize+16*12], m6 ;in6
4135 mova [rsp+gprsize+16*19], m1 ;in1
4136 mova [rsp+gprsize+16*26], m3 ;in3
4137 mova [rsp+gprsize+16*23], m5 ;in5
4138 mova [rsp+gprsize+16*22], m7 ;in7
4149 SAVE_7ROWS rsp+gprsize+16*3, 16
4150 mova m0, [rsp+gprsize+16*11] ;in2
4151 mova m1, [rsp+gprsize+16*12] ;in6
4152 mova m2, [rsp+gprsize+16*13] ;in10
4153 mova m3, [rsp+gprsize+16*14] ;in14
4157 mova m7, [rsp+gprsize+16*0]
4158 SAVE_8ROWS rsp+gprsize+16*11, 16
4169 SAVE_7ROWS rsp+gprsize+16*3, 16
4177 LOAD_8ROWS rsp+gprsize+16*3, 16
4178 mova [rsp+gprsize+16*0], m7
4185 mova [rsp+gprsize+16*15], m2 ;in18
4186 mova [rsp+gprsize+16*16], m6 ;in22
4187 mova [rsp+gprsize+16*33], m1 ;in17
4188 mova [rsp+gprsize+16*28], m3 ;in19
4189 mova [rsp+gprsize+16*29], m5 ;in21
4190 mova [rsp+gprsize+16*32], m7 ;in23
4194 SAVE_7ROWS rsp+gprsize+16*3, 16
4202 LOAD_8ROWS rsp+gprsize+16*3, 16
4203 mova [rsp+gprsize+16*0], m7
4208 mova [rsp+gprsize+16*17], m2 ;in26
4209 mova [rsp+gprsize+16*18], m6 ;in30
4210 mova [rsp+gprsize+16*31], m1 ;in25
4211 mova [rsp+gprsize+16*30], m3 ;in27
4212 mova [rsp+gprsize+16*27], m5 ;in29
4213 mova [rsp+gprsize+16*34], m7 ;in31
4224 SAVE_7ROWS rsp+gprsize+16*3 , 16
4225 LOAD_8ROWS rsp+gprsize+16*11, 16
4227 mova m7, [rsp+gprsize+16*0]
4228 SAVE_8ROWS rsp+gprsize+16*11, 16
4233 mov [rsp+gprsize*1+16*35], eobd
4235 mov [rsp+gprsize*2+16*35], r3
4240 mov dstq, [rsp+gprsize*2+16*35]
4241 mov eobd, [rsp+gprsize*1+16*35]
4253 mova [rsp+gprsize+16*19], m0 ;in1
4254 mova [rsp+gprsize+16*26], m1 ;in3
4255 mova [rsp+gprsize+16*23], m2 ;in5
4256 mova [rsp+gprsize+16*22], m3 ;in7
4257 mova [rsp+gprsize+16*21], m4 ;in9
4258 mova [rsp+gprsize+16*24], m5 ;in11
4259 mova [rsp+gprsize+16*25], m6 ;in13
4260 mova [rsp+gprsize+16*20], m7 ;in15
4273 SAVE_7ROWS rsp+gprsize+16*3, 16
4282 mova m7, [rsp+gprsize+16*0]
4283 SAVE_8ROWS rsp+gprsize+16*11, 16
4294 SAVE_7ROWS rsp+gprsize+16*3, 16
4305 mova m7, [rsp+gprsize+16*0]
4306 SAVE_8ROWS rsp+gprsize+16*11, 16
4317 mova [rsp+gprsize+16*33], m0 ;in17
4318 mova [rsp+gprsize+16*28], m1 ;in19
4319 mova [rsp+gprsize+16*29], m2 ;in21
4320 mova [rsp+gprsize+16*32], m3 ;in23
4321 mova [rsp+gprsize+16*31], m4 ;in25
4322 mova [rsp+gprsize+16*30], m5 ;in27
4323 mova [rsp+gprsize+16*27], m6 ;in29
4324 mova [rsp+gprsize+16*34], m7 ;in31
4342 LOAD_8ROWS rsp+16*11, 16
4343 mova [rsp+16*0], m7
4350 LOAD_8ROWS rsp+16*19, 16
4351 mova [rsp+16*0], m7
4358 LOAD_8ROWS rsp+16*27, 16
4359 mova [rsp+16*0], m7
4382 SAVE_7ROWS rsp+gprsize+16*3, 16
4386 mova m7, [rsp+gprsize+16*0]
4387 SAVE_8ROWS rsp+gprsize+16*11, 16
4390 mova [rsp+gprsize+16*19], m0 ;in1
4391 mova [rsp+gprsize+16*26], m1 ;in3
4392 mova [rsp+gprsize+16*23], m2 ;in5
4393 mova [rsp+gprsize+16*22], m3 ;in7
4394 mova [rsp+gprsize+16*21], m4 ;in9
4395 mova [rsp+gprsize+16*24], m5 ;in11
4396 mova [rsp+gprsize+16*25], m6 ;in13
4397 mova [rsp+gprsize+16*20], m7 ;in15
4400 mova [rsp+gprsize+16*33], m0 ;in17
4401 mova [rsp+gprsize+16*28], m1 ;in19
4402 mova [rsp+gprsize+16*29], m2 ;in21
4403 mova [rsp+gprsize+16*32], m3 ;in23
4404 mova [rsp+gprsize+16*31], m4 ;in25
4405 mova [rsp+gprsize+16*30], m5 ;in27
4406 mova [rsp+gprsize+16*27], m6 ;in29
4407 mova [rsp+gprsize+16*34], m7 ;in31
4411 mova [rsp+gprsize+16*0 ], m7
4417 LOAD_8ROWS rsp+gprsize+16*11, 16
4418 mova [rsp+gprsize+16*0 ], m7
4424 LOAD_8ROWS rsp+gprsize+16*19, 16
4425 mova [rsp+gprsize+16*0 ], m7
4431 LOAD_8ROWS rsp+gprsize+16*27, 16
4432 mova [rsp+gprsize+16*0 ], m7
4460 mov [rsp+16*3], r4
4461 mov [rsp+gprsize+16*3], r3d
4462 mov [rsp+gprsize*2+16*3], coeffq
4466 mova [rsp+16*1], m6
4471 mova [rsp+16*0], m2
4472 mova [rsp+16*1], m3
4473 mova [rsp+16*2], m4
4479 mova m2, [rsp+16*0]
4480 mova [rsp+16*0], m7
4482 mova m7, [rsp+16*2]
4483 mova [rsp+16*2], m5
4485 mova m5, [rsp+16*1]
4486 mova [rsp+16*1], m6
4499 mov coeffq, [rsp+gprsize*2+16*3]
4501 mov r3d, [rsp+gprsize+16*3]
4503 mov [rsp+gprsize+16*3], dstq
4504 mov dstq, [rsp+16*3]
4523 mov [rsp+16*3], r4
4528 mova [rsp+16*1], m6
4531 mova [rsp+16*1], m5
4532 mova [rsp+16*2], m6
4536 mova m5, [rsp+16*1]
4537 mova [rsp+16*0], m7
4539 mova m7, [rsp+16*2]
4543 mova [rsp+16*2], m5
4544 mova [rsp+16*1], m7
4559 mov dstq, [rsp+16*3]
4561 mov [rsp+16*3], r4
4591 mov [rsp+gprsize*1+16*35], eobd
4599 mov [rsp+gprsize*2+16*35], coeffq
4603 mova [rsp+gprsize+16*19], m0 ;in1
4604 mova [rsp+gprsize+16*26], m1 ;in3
4605 mova [rsp+gprsize+16*23], m2 ;in5
4606 mova [rsp+gprsize+16*22], m3 ;in7
4607 mova [rsp+gprsize+16*21], m4 ;in9
4608 mova [rsp+gprsize+16*24], m5 ;in11
4609 mova [rsp+gprsize+16*25], m6 ;in13
4610 mova [rsp+gprsize+16*20], m7 ;in15
4612 mov tx2d, [rsp+gprsize*1+16*35]
4619 SAVE_7ROWS rsp+gprsize+16*3, 16
4622 mova m7, [rsp+gprsize+16*0]
4623 SAVE_8ROWS rsp+gprsize+16*11, 16
4626 mova [rsp+gprsize+16*33], m0 ;in17
4627 mova [rsp+gprsize+16*28], m1 ;in19
4628 mova [rsp+gprsize+16*29], m2 ;in21
4629 mova [rsp+gprsize+16*32], m3 ;in23
4630 mova [rsp+gprsize+16*31], m4 ;in25
4631 mova [rsp+gprsize+16*30], m5 ;in27
4632 mova [rsp+gprsize+16*27], m6 ;in29
4633 mova [rsp+gprsize+16*34], m7 ;in31
4647 SAVE_7ROWS rsp+gprsize+16*3, 16
4655 mova m7, [rsp+gprsize+16*0]
4656 SAVE_8ROWS rsp+gprsize+16*11, 16
4661 mova [rsp+gprsize+16*0], m7
4668 LOAD_8ROWS rsp+gprsize+16*11, 16
4669 mova [rsp+gprsize+16*0], m7
4676 LOAD_8ROWS rsp+gprsize+16*19, 16
4677 mova [rsp+gprsize+16*0], m7
4684 LOAD_8ROWS rsp+gprsize+16*27, 16
4685 mova [rsp+gprsize+16*0], m7
4699 mov coeffq, [rsp+gprsize*2+16*35]
4704 mov [rsp+gprsize*3+16*35], r3d
4706 mov [rsp+gprsize*2+16*35], r3
4716 mova [rsp+gprsize+16*19], m0 ;in1
4717 mova [rsp+gprsize+16*26], m1 ;in3
4718 mova [rsp+gprsize+16*23], m2 ;in5
4719 mova [rsp+gprsize+16*22], m3 ;in7
4720 mova [rsp+gprsize+16*21], m4 ;in9
4721 mova [rsp+gprsize+16*24], m5 ;in11
4722 mova [rsp+gprsize+16*25], m6 ;in13
4723 mova [rsp+gprsize+16*20], m7 ;in15
4725 mov eobd, [rsp+gprsize*1+16*35]
4739 SAVE_7ROWS rsp+gprsize+16*3, 16
4750 mova m7, [rsp+gprsize+16*0]
4751 SAVE_8ROWS rsp+gprsize+16*11, 16
4761 mova [rsp+gprsize+16*33], m0 ;in17
4762 mova [rsp+gprsize+16*28], m1 ;in19
4763 mova [rsp+gprsize+16*29], m2 ;in21
4764 mova [rsp+gprsize+16*32], m3 ;in23
4765 mova [rsp+gprsize+16*31], m4 ;in25
4766 mova [rsp+gprsize+16*30], m5 ;in27
4767 mova [rsp+gprsize+16*27], m6 ;in29
4768 mova [rsp+gprsize+16*34], m7 ;in31
4781 SAVE_7ROWS rsp+gprsize+16*3, 16
4790 mova m7, [rsp+gprsize+16*0]
4791 SAVE_8ROWS rsp+gprsize+16*11, 16
4803 mov dstq, [rsp+gprsize*2+16*35]
4804 mov r3d, [rsp+gprsize*3+16*35]
4822 mov [rsp+gprsize*0+16*3], r4
4823 mov [rsp+gprsize*1+16*3], r3d
4824 mov [rsp+gprsize*2+16*3], r3d
4825 mov [rsp+gprsize*3+16*3], coeffq
4829 mova [rsp+16*1], m6
4833 mova [rsp+16*0], m7
4836 mova [rsp+16*1], m6
4837 mova [rsp+16*2], m5
4848 mov r4d, [rsp+gprsize*2+16*3]
4852 mov dstq, [rsp+gprsize*0+16*3]
4853 mov coeffq, [rsp+gprsize*3+16*3]
4854 mov [rsp+gprsize*2+16*3], r4
4857 mov [rsp+gprsize*0+16*3], r3
4858 mov r3d, [rsp+gprsize*1+16*3]
4859 mov [rsp+gprsize*3+16*3], coeffq
4889 mov [rsp+gprsize*1+16*67], eobd
4897 mov [rsp+gprsize*2+16*67], coeffq
4902 SAVE_7ROWS rsp+gprsize+16*3, 16
4911 LOAD_8ROWS rsp+gprsize+16*3, 16
4912 mova [rsp+gprsize+16*0], m7
4924 mov coeffq, [rsp+gprsize*2+16*67]
4927 mov [rsp+gprsize*2+16*67], r4
4931 mov [rsp+gprsize*3+16*67], r3d
4932 mov eobd, [rsp+gprsize*1+16*67]
4942 mova [rsp+gprsize+16*35], m0 ;in1
4943 mova [rsp+gprsize+16*49], m1 ;in3
4944 mova [rsp+gprsize+16*43], m2 ;in5
4945 mova [rsp+gprsize+16*41], m3 ;in7
4946 mova [rsp+gprsize+16*39], m4 ;in9
4947 mova [rsp+gprsize+16*45], m5 ;in11
4948 mova [rsp+gprsize+16*47], m6 ;in13
4949 mova [rsp+gprsize+16*37], m7 ;in15
4964 SAVE_7ROWS rsp+gprsize+16*3, 16
4974 mova m7, [rsp+gprsize+16*0]
4975 SAVE_8ROWS rsp+gprsize+16*11, 16
4985 mova [rsp+gprsize+16*19], m0
4986 mova [rsp+gprsize+16*26], m1
4987 mova [rsp+gprsize+16*23], m2
4988 mova [rsp+gprsize+16*22], m3
4989 mova [rsp+gprsize+16*21], m4
4990 mova [rsp+gprsize+16*24], m5
4991 mova [rsp+gprsize+16*25], m6
4992 mova [rsp+gprsize+16*20], m7
4995 SAVE_8ROWS rsp+gprsize+16*3, 16
5005 mova [rsp+gprsize+16*63], m0 ;in17
5006 mova [rsp+gprsize+16*53], m1 ;in19
5007 mova [rsp+gprsize+16*55], m2 ;in21
5008 mova [rsp+gprsize+16*61], m3 ;in23
5009 mova [rsp+gprsize+16*59], m4 ;in25
5010 mova [rsp+gprsize+16*57], m5 ;in27
5011 mova [rsp+gprsize+16*51], m6 ;in29
5012 mova [rsp+gprsize+16*65], m7 ;in31
5020 SAVE_7ROWS rsp+gprsize+16*3, 16
5028 mova m7, [rsp+gprsize+16*0]
5029 SAVE_8ROWS rsp+gprsize+16*11, 16
5035 mova [rsp+gprsize+16*19], m0 ;in1
5036 mova [rsp+gprsize+16*26], m1 ;in3
5037 mova [rsp+gprsize+16*23], m2 ;in5
5038 mova [rsp+gprsize+16*22], m3 ;in7
5041 SAVE_8ROWS rsp+gprsize+16*3, 16
5046 LOAD_8ROWS rsp+gprsize+16*3, 16
5047 mova [rsp+gprsize+16*0], m7
5052 LOAD_8ROWS rsp+gprsize+16*35, 16
5054 lea r3, [rsp+16*32+gprsize]
5056 mov dstq, [rsp+gprsize*2+16*67]
5057 mov r3d, [rsp+gprsize*3+16*67]
5059 mov [rsp+gprsize*2+16*67], r4
5099 mova m0, [rsp+gprsize*2+16*35] ;in1
5103 mova [rsp+gprsize*2+16*35], m0 ;t32
5104 mova [rsp+gprsize*2+16*66], m3 ;t63
5106 mova [rsp+gprsize*2+16*36], m3 ;t33a
5107 mova [rsp+gprsize*2+16*65], m0 ;t62a
5109 mova m1, [rsp+gprsize*2+16*37] ;in15
5112 mova [rsp+gprsize*2+16*38], m1 ;t35
5113 mova [rsp+gprsize*2+16*63], m2 ;t60
5115 mova [rsp+gprsize*2+16*37], m2 ;t34a
5116 mova [rsp+gprsize*2+16*64], m1 ;t61a
5118 mova m0, [rsp+gprsize*2+16*39] ;in9
5121 mova [rsp+gprsize*2+16*39], m0 ;t36
5122 mova [rsp+gprsize*2+16*62], m3 ;t59
5124 mova [rsp+gprsize*2+16*40], m3 ;t37a
5125 mova [rsp+gprsize*2+16*61], m0 ;t58a
5127 mova m1, [rsp+gprsize*2+16*41] ;in7
5130 mova [rsp+gprsize*2+16*42], m1 ;t39
5131 mova [rsp+gprsize*2+16*59], m2 ;t56
5133 mova [rsp+gprsize*2+16*41], m2 ;t38a
5134 mova [rsp+gprsize*2+16*60], m1 ;t57a
5136 mova m0, [rsp+gprsize*2+16*43] ;in5
5139 mova [rsp+gprsize*2+16*43], m0 ;t40
5140 mova [rsp+gprsize*2+16*58], m3 ;t55
5142 mova [rsp+gprsize*2+16*44], m3 ;t41a
5143 mova [rsp+gprsize*2+16*57], m0 ;t54a
5145 mova m1, [rsp+gprsize*2+16*45] ;in11
5148 mova [rsp+gprsize*2+16*46], m1 ;t43
5149 mova [rsp+gprsize*2+16*55], m2 ;t52
5151 mova [rsp+gprsize*2+16*45], m2 ;t42a
5152 mova [rsp+gprsize*2+16*56], m1 ;t53a
5154 mova m0, [rsp+gprsize*2+16*47] ;in13
5158 mova [rsp+gprsize*2+16*54], m3 ;t51
5160 mova [rsp+gprsize*2+16*48], m3 ;t45a
5161 mova [rsp+gprsize*2+16*53], m0 ;t50a
5163 mova m0, [rsp+gprsize*2+16*49] ;in3
5173 mova m0, [rsp+gprsize*2+16*35] ;in1
5174 mova m1, [rsp+gprsize*2+16*65] ;in31
5185 mova [rsp+gprsize*2+16*35], m0 ;t32
5186 mova [rsp+gprsize*2+16*36], m5 ;t33a
5187 mova [rsp+gprsize*2+16*65], m4 ;t62a
5188 mova [rsp+gprsize*2+16*66], m3 ;t63
5190 mova m0, [rsp+gprsize*2+16*63] ;in17
5191 mova m1, [rsp+gprsize*2+16*37] ;in15
5201 mova [rsp+gprsize*2+16*37], m5 ;t34a
5202 mova [rsp+gprsize*2+16*38], m0 ;t35
5203 mova [rsp+gprsize*2+16*63], m3 ;t60
5204 mova [rsp+gprsize*2+16*64], m4 ;t61a
5206 mova m0, [rsp+gprsize*2+16*39] ;in9
5207 mova m1, [rsp+gprsize*2+16*61] ;in23
5217 mova [rsp+gprsize*2+16*39], m0 ;t36
5218 mova [rsp+gprsize*2+16*40], m5 ;t37a
5219 mova [rsp+gprsize*2+16*61], m4 ;t58a
5220 mova [rsp+gprsize*2+16*62], m3 ;t59
5222 mova m0, [rsp+gprsize*2+16*59] ;in25
5223 mova m1, [rsp+gprsize*2+16*41] ;in7
5233 mova [rsp+gprsize*2+16*41], m5 ;t38a
5234 mova [rsp+gprsize*2+16*42], m0 ;t39
5235 mova [rsp+gprsize*2+16*59], m3 ;t56
5236 mova [rsp+gprsize*2+16*60], m4 ;t57a
5238 mova m0, [rsp+gprsize*2+16*43] ;in5
5239 mova m1, [rsp+gprsize*2+16*57] ;in27
5249 mova [rsp+gprsize*2+16*43], m0 ;t40
5250 mova [rsp+gprsize*2+16*44], m5 ;t41a
5251 mova [rsp+gprsize*2+16*57], m4 ;t54a
5252 mova [rsp+gprsize*2+16*58], m3 ;t55
5254 mova m0, [rsp+gprsize*2+16*55] ;in21
5255 mova m1, [rsp+gprsize*2+16*45] ;in11
5265 mova [rsp+gprsize*2+16*45], m5 ;t42a
5266 mova [rsp+gprsize*2+16*46], m0 ;t43
5267 mova [rsp+gprsize*2+16*55], m3 ;t52
5268 mova [rsp+gprsize*2+16*56], m4 ;t53a
5270 mova m0, [rsp+gprsize*2+16*47] ;in13
5271 mova m1, [rsp+gprsize*2+16*53] ;in19
5282 mova [rsp+gprsize*2+16*48], m5 ;t45a
5283 mova [rsp+gprsize*2+16*53], m4 ;t50a
5284 mova [rsp+gprsize*2+16*54], m3 ;t51
5286 mova m0, [rsp+gprsize*2+16*51] ;in29
5287 mova m1, [rsp+gprsize*2+16*49] ;in3
5300 mova m1, [rsp+gprsize*2+16*54] ;t51
5305 mova [rsp+gprsize*2+16*50], m0 ;t47a
5306 mova [rsp+gprsize*2+16*51], m3 ;t48a
5308 mova [rsp+gprsize*2+16*47], m6 ;t44
5309 mova [rsp+gprsize*2+16*54], m2 ;t51
5311 mova m0, [rsp+gprsize*2+16*48] ;t45a
5312 mova m3, [rsp+gprsize*2+16*53] ;t50a
5318 mova [rsp+gprsize*2+16*48], m6 ;t45a
5319 mova [rsp+gprsize*2+16*49], m4 ;t46
5320 mova [rsp+gprsize*2+16*52], m5 ;t49
5321 mova [rsp+gprsize*2+16*53], m2 ;t50a
5323 mova m0, [rsp+gprsize*2+16*43] ;t40
5324 mova m2, [rsp+gprsize*2+16*46] ;t43
5325 mova m3, [rsp+gprsize*2+16*55] ;t52
5326 mova m1, [rsp+gprsize*2+16*58] ;t55
5332 mova [rsp+gprsize*2+16*43], m0 ;t40a
5333 mova [rsp+gprsize*2+16*46], m5 ;t43
5334 mova [rsp+gprsize*2+16*55], m4 ;t52
5335 mova [rsp+gprsize*2+16*58], m1 ;t55a
5337 mova m0, [rsp+gprsize*2+16*44] ;t41a
5338 mova m2, [rsp+gprsize*2+16*45] ;t42a
5339 mova m3, [rsp+gprsize*2+16*56] ;t53a
5340 mova m1, [rsp+gprsize*2+16*57] ;t54a
5346 mova [rsp+gprsize*2+16*44], m0 ;t41
5347 mova [rsp+gprsize*2+16*45], m5 ;t42a
5348 mova [rsp+gprsize*2+16*56], m4 ;t53a
5349 mova [rsp+gprsize*2+16*57], m1 ;t54
5351 mova m0, [rsp+gprsize*2+16*41] ;t38a
5352 mova m2, [rsp+gprsize*2+16*40] ;t37a
5353 mova m3, [rsp+gprsize*2+16*61] ;t58a
5354 mova m1, [rsp+gprsize*2+16*60] ;t57a
5360 mova [rsp+gprsize*2+16*41], m0 ;t38
5361 mova [rsp+gprsize*2+16*40], m5 ;t37a
5362 mova [rsp+gprsize*2+16*61], m4 ;t58a
5363 mova [rsp+gprsize*2+16*60], m1 ;t57
5365 mova m0, [rsp+gprsize*2+16*42] ;t39
5366 mova m2, [rsp+gprsize*2+16*39] ;t36
5367 mova m3, [rsp+gprsize*2+16*62] ;t59
5368 mova m1, [rsp+gprsize*2+16*59] ;t56
5374 mova [rsp+gprsize*2+16*42], m0 ;t39a
5375 mova [rsp+gprsize*2+16*39], m5 ;t36
5376 mova [rsp+gprsize*2+16*62], m4 ;t59
5377 mova [rsp+gprsize*2+16*59], m1 ;t56a
5379 mova m0, [rsp+gprsize*2+16*35] ;t32
5380 mova m2, [rsp+gprsize*2+16*38] ;t35
5381 mova m3, [rsp+gprsize*2+16*63] ;t60
5382 mova m1, [rsp+gprsize*2+16*66] ;t63
5388 mova [rsp+gprsize*2+16*35], m0 ;t32a
5389 mova [rsp+gprsize*2+16*38], m5 ;t35
5390 mova [rsp+gprsize*2+16*63], m4 ;t60
5391 mova [rsp+gprsize*2+16*66], m1 ;t63a
5393 mova m0, [rsp+gprsize*2+16*36] ;t33a
5394 mova m2, [rsp+gprsize*2+16*37] ;t34a
5395 mova m3, [rsp+gprsize*2+16*64] ;t61a
5396 mova m1, [rsp+gprsize*2+16*65] ;t62a
5403 mova m2, [rsp+gprsize*2+16*41] ;t38
5404 mova m3, [rsp+gprsize*2+16*60] ;t57
5409 mova [rsp+gprsize*2+16*36], m0 ;t33a
5410 mova [rsp+gprsize*2+16*65], m1 ;t62a
5412 mova [rsp+gprsize*2+16*41], m2 ;t38
5413 mova [rsp+gprsize*2+16*60], m6 ;t57
5415 mova m2, [rsp+gprsize*2+16*40] ;t37
5416 mova m3, [rsp+gprsize*2+16*61] ;t58
5422 mova [rsp+gprsize*2+16*37], m5 ;t34
5423 mova [rsp+gprsize*2+16*64], m4 ;t61
5424 mova [rsp+gprsize*2+16*40], m1 ;t37a
5425 mova [rsp+gprsize*2+16*61], m0 ;t58a
5427 mova m0, [rsp+gprsize*2+16*38] ;t35
5428 mova m2, [rsp+gprsize*2+16*39] ;t36
5429 mova m3, [rsp+gprsize*2+16*62] ;t59
5430 mova m1, [rsp+gprsize*2+16*63] ;t60
5436 mova [rsp+gprsize*2+16*38], m0 ;t35a
5437 mova [rsp+gprsize*2+16*39], m5 ;t36
5438 mova [rsp+gprsize*2+16*62], m4 ;t59
5439 mova [rsp+gprsize*2+16*63], m1 ;t60a
5441 mova m0, [rsp+gprsize*2+16*35] ;t32a
5442 mova m2, [rsp+gprsize*2+16*42] ;t39a
5443 mova m3, [rsp+gprsize*2+16*59] ;t56a
5444 mova m1, [rsp+gprsize*2+16*66] ;t63a
5450 mova [rsp+gprsize*2+16*35], m0 ;t32
5451 mova [rsp+gprsize*2+16*42], m5 ;t39a
5452 mova [rsp+gprsize*2+16*59], m4 ;t56a
5453 mova [rsp+gprsize*2+16*66], m1 ;t63
5455 mova m0, [rsp+gprsize*2+16*50] ;t47a
5456 mova m2, [rsp+gprsize*2+16*43] ;t40a
5457 mova m3, [rsp+gprsize*2+16*58] ;t55a
5458 mova m1, [rsp+gprsize*2+16*51] ;t48a
5464 mova [rsp+gprsize*2+16*50], m0 ;t47
5465 mova [rsp+gprsize*2+16*43], m5 ;t40a
5466 mova [rsp+gprsize*2+16*58], m4 ;t55a
5467 mova [rsp+gprsize*2+16*51], m1 ;t48
5469 mova m0, [rsp+gprsize*2+16*49] ;t46
5470 mova m2, [rsp+gprsize*2+16*44] ;t41
5471 mova m3, [rsp+gprsize*2+16*57] ;t54
5472 mova m1, [rsp+gprsize*2+16*52] ;t49
5478 mova [rsp+gprsize*2+16*49], m0 ;t46a
5479 mova [rsp+gprsize*2+16*44], m5 ;t41
5480 mova [rsp+gprsize*2+16*57], m4 ;t54
5481 mova [rsp+gprsize*2+16*52], m1 ;t49a
5483 mova m0, [rsp+gprsize*2+16*48] ;t45a
5484 mova m2, [rsp+gprsize*2+16*45] ;t42a
5485 mova m3, [rsp+gprsize*2+16*56] ;t53a
5486 mova m1, [rsp+gprsize*2+16*53] ;t50a
5492 mova [rsp+gprsize*2+16*48], m0 ;t45
5493 mova [rsp+gprsize*2+16*45], m5 ;t42a
5494 mova [rsp+gprsize*2+16*56], m4 ;t53a
5495 mova [rsp+gprsize*2+16*53], m1 ;t50
5497 mova m0, [rsp+gprsize*2+16*47] ;t44
5498 mova m2, [rsp+gprsize*2+16*46] ;t43
5499 mova m3, [rsp+gprsize*2+16*55] ;t52
5500 mova m1, [rsp+gprsize*2+16*54] ;t51
5507 mova m2, [rsp+gprsize*2+16*38] ;t35a
5508 mova m3, [rsp+gprsize*2+16*31] ;tmp[28]
5513 mova m3, [rsp+gprsize*2+16*63] ;t60a
5514 mova [rsp+gprsize*2+16*38], m0 ;out35
5515 mova [rsp+gprsize*2+16*31], m2 ;out28
5519 mova m2, [rsp+gprsize*2+16*6 ] ;tmp[3]
5522 mova m3, [rsp+gprsize*2+16*22] ;tmp[19]
5523 mova [rsp+gprsize*2+16*63], m1 ;out60
5524 mova [rsp+gprsize*2+16*6 ], m2 ;out3
5527 mova m2, [rsp+gprsize*2+16*15] ;tmp[12]
5529 mova m0, [rsp+gprsize*2+16*39] ;t36
5530 mova [rsp+gprsize*2+16*47], m1 ;out44
5531 mova [rsp+gprsize*2+16*22], m3 ;out19
5532 mova m1, [rsp+gprsize*2+16*62] ;t59
5535 mova [rsp+gprsize*2+16*54], m3 ;out51
5536 mova [rsp+gprsize*2+16*15], m2 ;out12
5539 mova m5, [rsp+gprsize*2+16*30] ;tmp[27]
5543 mova m4, [rsp+gprsize*2+16*7 ] ;tmp[4 ]
5548 mova [rsp+gprsize*2+16*39], m6 ;out36
5549 mova [rsp+gprsize*2+16*30], m5 ;out27
5550 mova [rsp+gprsize*2+16*62], m0 ;out59
5551 mova [rsp+gprsize*2+16*7 ], m4 ;out4
5552 mova m0, [rsp+gprsize*2+16*23] ;tmp[20]
5553 mova m5, [rsp+gprsize*2+16*14] ;tmp[11]
5558 mova [rsp+gprsize*2+16*46], m4 ;out43
5559 mova [rsp+gprsize*2+16*23], m0 ;out20
5560 mova [rsp+gprsize*2+16*55], m6 ;out52
5561 mova [rsp+gprsize*2+16*14], m5 ;out11
5563 mova m0, [rsp+gprsize*2+16*40] ;t37a
5564 mova m5, [rsp+gprsize*2+16*45] ;t42a
5565 mova m3, [rsp+gprsize*2+16*56] ;t53a
5566 mova m1, [rsp+gprsize*2+16*61] ;t58a
5567 mova m2, [rsp+gprsize*2+16*29] ;tmp[26]
5573 mova m3, [rsp+gprsize*2+16*8 ] ;tmp[5 ]
5578 mova [rsp+gprsize*2+16*40], m6 ;out37
5579 mova [rsp+gprsize*2+16*29], m2 ;out26
5580 mova [rsp+gprsize*2+16*61], m0 ;out58
5581 mova [rsp+gprsize*2+16*8 ], m3 ;out5
5582 mova m0, [rsp+gprsize*2+16*24] ;tmp[21]
5583 mova m1, [rsp+gprsize*2+16*13] ;tmp[10]
5588 mova [rsp+gprsize*2+16*45], m2 ;out42
5589 mova [rsp+gprsize*2+16*24], m0 ;out21
5590 mova [rsp+gprsize*2+16*56], m3 ;out53
5591 mova [rsp+gprsize*2+16*13], m1 ;out10
5593 mova m0, [rsp+gprsize*2+16*41] ;t38
5594 mova m5, [rsp+gprsize*2+16*44] ;t41
5595 mova m3, [rsp+gprsize*2+16*57] ;t54
5596 mova m1, [rsp+gprsize*2+16*60] ;t57
5597 mova m2, [rsp+gprsize*2+16*28] ;tmp[25]
5603 mova m3, [rsp+gprsize*2+16*9 ] ;tmp[6 ]
5608 mova [rsp+gprsize*2+16*41], m6 ;out38
5609 mova [rsp+gprsize*2+16*28], m2 ;out25
5610 mova [rsp+gprsize*2+16*60], m0 ;out57
5611 mova [rsp+gprsize*2+16*9 ], m3 ;out6
5612 mova m0, [rsp+gprsize*2+16*25] ;tmp[22]
5613 mova m1, [rsp+gprsize*2+16*12] ;tmp[9 ]
5618 mova [rsp+gprsize*2+16*44], m2 ;out41
5619 mova [rsp+gprsize*2+16*25], m0 ;out22
5620 mova [rsp+gprsize*2+16*57], m3 ;out54
5621 mova [rsp+gprsize*2+16*12], m1 ;out9
5623 mova m0, [rsp+gprsize*2+16*42] ;t39a
5624 mova m5, [rsp+gprsize*2+16*43] ;t40a
5625 mova m3, [rsp+gprsize*2+16*58] ;t55a
5626 mova m1, [rsp+gprsize*2+16*59] ;t56a
5627 mova m2, [rsp+gprsize*2+16*27] ;tmp[24]
5633 mova m3, [rsp+gprsize*2+16*10] ;tmp[7 ]
5638 mova [rsp+gprsize*2+16*42], m6 ;out39
5639 mova [rsp+gprsize*2+16*27], m2 ;out24
5640 mova [rsp+gprsize*2+16*59], m0 ;out56
5641 mova [rsp+gprsize*2+16*10], m3 ;out7
5642 mova m0, [rsp+gprsize*2+16*26] ;tmp[23]
5643 mova m1, [rsp+gprsize*2+16*11] ;tmp[8 ]
5648 mova [rsp+gprsize*2+16*43], m2 ;out40
5649 mova [rsp+gprsize*2+16*26], m0 ;out23
5650 mova [rsp+gprsize*2+16*58], m3 ;out55
5651 mova [rsp+gprsize*2+16*11], m1 ;out8
5653 mova m0, [rsp+gprsize*2+16*37] ;t34
5654 mova m5, [rsp+gprsize*2+16*48] ;t45
5655 mova m3, [rsp+gprsize*2+16*53] ;t50
5656 mova m1, [rsp+gprsize*2+16*64] ;t61
5657 mova m2, [rsp+gprsize*2+16*32] ;tmp[29]
5663 mova m3, [rsp+gprsize*2+16*5 ] ;tmp[2 ]
5668 mova [rsp+gprsize*2+16*37], m6 ;out34
5669 mova [rsp+gprsize*2+16*32], m2 ;out29
5670 mova [rsp+gprsize*2+16*64], m0 ;out61
5671 mova [rsp+gprsize*2+16*5 ], m3 ;out2
5672 mova m0, [rsp+gprsize*2+16*21] ;tmp[18]
5673 mova m1, [rsp+gprsize*2+16*16] ;tmp[13]
5678 mova [rsp+gprsize*2+16*48], m2 ;out45
5679 mova [rsp+gprsize*2+16*21], m0 ;out18
5680 mova [rsp+gprsize*2+16*53], m3 ;out50
5681 mova [rsp+gprsize*2+16*16], m1 ;out13
5683 mova m0, [rsp+gprsize*2+16*36] ;t33a
5684 mova m5, [rsp+gprsize*2+16*49] ;t46a
5685 mova m3, [rsp+gprsize*2+16*52] ;t49a
5686 mova m1, [rsp+gprsize*2+16*65] ;t62a
5687 mova m2, [rsp+gprsize*2+16*33] ;tmp[30]
5693 mova m3, [rsp+gprsize*2+16*4 ] ;tmp[1 ]
5698 mova [rsp+gprsize*2+16*36], m6 ;out33
5699 mova [rsp+gprsize*2+16*33], m2 ;out30
5700 mova [rsp+gprsize*2+16*65], m0 ;out62
5701 mova [rsp+gprsize*2+16*4 ], m3 ;out1
5702 mova m0, [rsp+gprsize*2+16*20] ;tmp[17]
5703 mova m1, [rsp+gprsize*2+16*17] ;tmp[14]
5708 mova [rsp+gprsize*2+16*49], m2 ;out46
5709 mova [rsp+gprsize*2+16*20], m0 ;out17
5710 mova [rsp+gprsize*2+16*52], m3 ;out49
5711 mova [rsp+gprsize*2+16*17], m1 ;out14
5713 mova m0, [rsp+gprsize*2+16*35] ;t32
5714 mova m5, [rsp+gprsize*2+16*50] ;t47
5715 mova m3, [rsp+gprsize*2+16*51] ;t48
5716 mova m1, [rsp+gprsize*2+16*66] ;t63
5717 mova m2, [rsp+gprsize*2+16*34] ;tmp[31]
5723 mova m3, [rsp+gprsize*2+16*3 ] ;tmp[0 ]
5728 mova [rsp+gprsize*2+16*35], m6 ;out32
5729 mova [rsp+gprsize*2+16*34], m2 ;out31
5730 mova [rsp+gprsize*2+16*66], m0 ;out63
5731 mova [rsp+gprsize*2+16*3 ], m3 ;out0
5732 mova m0, [rsp+gprsize*2+16*19] ;tmp[16]
5733 mova m1, [rsp+gprsize*2+16*18] ;tmp[15]
5738 mova [rsp+gprsize*2+16*50], m2 ;out47
5739 mova [rsp+gprsize*2+16*19], m0 ;out16
5740 mova [rsp+gprsize*2+16*51], m3 ;out48
5741 mova [rsp+gprsize*2+16*18], m1 ;out15
5835 mov [rsp+gprsize*2+16*67], dstq
5836 lea dstq, [rsp+gprsize+16*68]
5843 SAVE_7ROWS rsp+gprsize+16*3, 16
5850 mova m7, [rsp+gprsize+16*0]
5851 SAVE_8ROWS rsp+gprsize+16*11, 16
5854 mova [rsp+gprsize+16*19], m0
5855 mova [rsp+gprsize+16*26], m1
5856 mova [rsp+gprsize+16*23], m2
5857 mova [rsp+gprsize+16*22], m3
5858 mova [rsp+gprsize+16*21], m4
5859 mova [rsp+gprsize+16*24], m5
5860 mova [rsp+gprsize+16*25], m6
5861 mova [rsp+gprsize+16*20], m7
5864 SAVE_8ROWS rsp+gprsize+16*3, 16
5867 mova [rsp+gprsize+16*35], m0 ;in1
5868 mova [rsp+gprsize+16*49], m1 ;in3
5869 mova [rsp+gprsize+16*43], m2 ;in5
5870 mova [rsp+gprsize+16*41], m3 ;in7
5871 mova [rsp+gprsize+16*39], m4 ;in9
5872 mova [rsp+gprsize+16*45], m5 ;in11
5873 mova [rsp+gprsize+16*47], m6 ;in13
5874 mova [rsp+gprsize+16*37], m7 ;in15
5877 mova [rsp+gprsize+16*63], m0 ;in17
5878 mova [rsp+gprsize+16*53], m1 ;in19
5879 mova [rsp+gprsize+16*55], m2 ;in21
5880 mova [rsp+gprsize+16*61], m3 ;in23
5881 mova [rsp+gprsize+16*59], m4 ;in25
5882 mova [rsp+gprsize+16*57], m5 ;in27
5883 mova [rsp+gprsize+16*51], m6 ;in29
5884 mova [rsp+gprsize+16*65], m7 ;in31
5888 LOAD_8ROWS rsp+gprsize+16*3, 16
5889 mova [rsp+gprsize+16*0], m7
5896 LOAD_8ROWS rsp+gprsize+16*11, 16
5897 mova [rsp+gprsize+16*0], m7
5904 LOAD_8ROWS rsp+gprsize+16*19, 16
5905 mova [rsp+gprsize+16*0], m7
5912 LOAD_8ROWS rsp+gprsize+16*27, 16
5913 mova [rsp+gprsize+16*0], m7
5920 LOAD_8ROWS rsp+gprsize+16*35, 16
5921 mova [rsp+gprsize+16*0], m7
5928 LOAD_8ROWS rsp+gprsize+16*43, 16
5929 mova [rsp+gprsize+16*0], m7
5936 LOAD_8ROWS rsp+gprsize+16*51, 16
5937 mova [rsp+gprsize+16*0], m7
5944 LOAD_8ROWS rsp+gprsize+16*59, 16
5945 mova [rsp+gprsize+16*0], m7
5959 mov dstq, [rsp+gprsize*2+16*67]
5964 mov [rsp+gprsize*1+16*67], r3d
5969 SAVE_7ROWS rsp+gprsize+16*3, 16
5980 LOAD_8ROWS rsp+gprsize+16*3, 16
5981 mova [rsp+gprsize+16*0], m7
5991 mov r3d, [rsp+gprsize*1+16*67]
5992 mov dstq, [rsp+gprsize*2+16*67]
5994 mov [rsp+gprsize*2+16*67], dstq
5999 lea coeffq, [rsp+gprsize+16*68]
6001 mov [rsp+gprsize*1+16*67], r3d
6006 SAVE_7ROWS rsp+gprsize+16*3, 16
6017 LOAD_8ROWS rsp+gprsize+16*3, 16
6018 mova [rsp+gprsize+16*0], m7
6026 mov r3d, [rsp+gprsize*1+16*67]
6027 mov dstq, [rsp+gprsize*2+16*67]
6029 mov [rsp+gprsize*2+16*67], dstq
6059 mov [rsp+gprsize*1+16*67], eobd
6067 mov [rsp+gprsize*2+16*67], coeffq
6071 mova [rsp+gprsize+16*19], m0 ;in1
6072 mova [rsp+gprsize+16*26], m1 ;in3
6073 mova [rsp+gprsize+16*23], m2 ;in5
6074 mova [rsp+gprsize+16*22], m3 ;in7
6075 mova [rsp+gprsize+16*21], m4 ;in9
6076 mova [rsp+gprsize+16*24], m5 ;in11
6077 mova [rsp+gprsize+16*25], m6 ;in13
6078 mova [rsp+gprsize+16*20], m7 ;in15
6080 mov tx2d, [rsp+gprsize*1+16*67]
6087 SAVE_7ROWS rsp+gprsize+16*3, 16
6090 mova m7, [rsp+gprsize+16*0]
6091 SAVE_8ROWS rsp+gprsize+16*11, 16
6094 mova [rsp+gprsize+16*33], m0 ;in17
6095 mova [rsp+gprsize+16*28], m1 ;in19
6096 mova [rsp+gprsize+16*29], m2 ;in21
6097 mova [rsp+gprsize+16*32], m3 ;in23
6098 mova [rsp+gprsize+16*31], m4 ;in25
6099 mova [rsp+gprsize+16*30], m5 ;in27
6100 mova [rsp+gprsize+16*27], m6 ;in29
6101 mova [rsp+gprsize+16*34], m7 ;in31
6112 SAVE_7ROWS rsp+gprsize+16*3, 16
6117 mova m7, [rsp+gprsize+16*0]
6118 SAVE_8ROWS rsp+gprsize+16*11, 16
6123 mova [rsp+gprsize+16*0], m7
6129 LOAD_8ROWS rsp+gprsize+16*11, 16
6130 mova [rsp+gprsize+16*0], m7
6136 LOAD_8ROWS rsp+gprsize+16*19, 16
6137 mova [rsp+gprsize+16*0], m7
6143 LOAD_8ROWS rsp+gprsize+16*27, 16
6144 mova [rsp+gprsize+16*0], m7
6156 mov coeffq, [rsp+gprsize*2+16*67]
6159 mov [rsp+gprsize*2+16*67], r4
6188 mov [rsp+gprsize*1+16*67], eobd
6196 mov [rsp+gprsize*2+16*67], coeffq
6197 mov [rsp+gprsize*3+16*67], dstq
6198 lea dstq, [rsp+gprsize+16*69]
6199 mov [rsp+gprsize*4+16*67], dstq
6206 SAVE_7ROWS rsp+gprsize+16*3, 16
6213 mova m7, [rsp+gprsize+16*0]
6214 SAVE_8ROWS rsp+gprsize+16*11, 16
6217 mova [rsp+gprsize+16*19], m0
6218 mova [rsp+gprsize+16*26], m1
6219 mova [rsp+gprsize+16*23], m2
6220 mova [rsp+gprsize+16*22], m3
6221 mova [rsp+gprsize+16*21], m4
6222 mova [rsp+gprsize+16*24], m5
6223 mova [rsp+gprsize+16*25], m6
6224 mova [rsp+gprsize+16*20], m7
6227 SAVE_8ROWS rsp+gprsize+16*3, 16
6230 mova [rsp+gprsize+16*35], m0 ;in1
6231 mova [rsp+gprsize+16*49], m1 ;in3
6232 mova [rsp+gprsize+16*43], m2 ;in5
6233 mova [rsp+gprsize+16*41], m3 ;in7
6234 mova [rsp+gprsize+16*39], m4 ;in9
6235 mova [rsp+gprsize+16*45], m5 ;in11
6236 mova [rsp+gprsize+16*47], m6 ;in13
6237 mova [rsp+gprsize+16*37], m7 ;in15
6240 mova [rsp+gprsize+16*63], m0 ;in17
6241 mova [rsp+gprsize+16*53], m1 ;in19
6242 mova [rsp+gprsize+16*55], m2 ;in21
6243 mova [rsp+gprsize+16*61], m3 ;in23
6244 mova [rsp+gprsize+16*59], m4 ;in25
6245 mova [rsp+gprsize+16*57], m5 ;in27
6246 mova [rsp+gprsize+16*51], m6 ;in29
6247 mova [rsp+gprsize+16*65], m7 ;in31
6251 LOAD_8ROWS rsp+gprsize+16*3, 16
6252 mova [rsp+gprsize+16*0], m7
6258 LOAD_8ROWS rsp+gprsize+16*11, 16
6259 mova [rsp+gprsize+16*0], m7
6265 LOAD_8ROWS rsp+gprsize+16*19, 16
6266 mova [rsp+gprsize+16*0], m7
6272 LOAD_8ROWS rsp+gprsize+16*27, 16
6273 mova [rsp+gprsize+16*0], m7
6279 LOAD_8ROWS rsp+gprsize+16*35, 16
6280 mova [rsp+gprsize+16*0], m7
6286 LOAD_8ROWS rsp+gprsize+16*43, 16
6287 mova [rsp+gprsize+16*0], m7
6293 LOAD_8ROWS rsp+gprsize+16*51, 16
6294 mova [rsp+gprsize+16*0], m7
6300 LOAD_8ROWS rsp+gprsize+16*59, 16
6301 mova [rsp+gprsize+16*0], m7
6314 mov coeffq, [rsp+gprsize*4+16*67]
6315 mov dstq, [rsp+gprsize*3+16*67]
6316 mov eobd, [rsp+gprsize*1+16*67]
6318 mov [rsp+gprsize*1+16*35], eobd
6324 mova [rsp+gprsize+16*0], m7
6331 mov dstq, [rsp+gprsize*2+16*35]
6332 mov r3d, [rsp+gprsize*3+16*35]
6337 mov dstq, [rsp+gprsize*3+16*67]
6338 mov coeffq, [rsp+gprsize*2+16*67]
6373 mov [rsp+gprsize*1+16*67], eobd
6375 mov [rsp+gprsize*4+16*67], coeffq
6376 mov [rsp+gprsize*3+16*67], dstq
6377 lea dstq, [rsp+gprsize+16*69]
6378 mov [rsp+gprsize*2+16*67], dstq
6385 SAVE_7ROWS rsp+gprsize+16*3, 16
6392 mova m7, [rsp+gprsize+16*0]
6393 SAVE_8ROWS rsp+gprsize+16*11, 16
6396 mova [rsp+gprsize+16*19], m0
6397 mova [rsp+gprsize+16*26], m1
6398 mova [rsp+gprsize+16*23], m2
6399 mova [rsp+gprsize+16*22], m3
6400 mova [rsp+gprsize+16*21], m4
6401 mova [rsp+gprsize+16*24], m5
6402 mova [rsp+gprsize+16*25], m6
6403 mova [rsp+gprsize+16*20], m7
6406 SAVE_8ROWS rsp+gprsize+16*3, 16
6409 mova [rsp+gprsize+16*35], m0 ;in1
6410 mova [rsp+gprsize+16*49], m1 ;in3
6411 mova [rsp+gprsize+16*43], m2 ;in5
6412 mova [rsp+gprsize+16*41], m3 ;in7
6413 mova [rsp+gprsize+16*39], m4 ;in9
6414 mova [rsp+gprsize+16*45], m5 ;in11
6415 mova [rsp+gprsize+16*47], m6 ;in13
6416 mova [rsp+gprsize+16*37], m7 ;in15
6419 mova [rsp+gprsize+16*63], m0 ;in17
6420 mova [rsp+gprsize+16*53], m1 ;in19
6421 mova [rsp+gprsize+16*55], m2 ;in21
6422 mova [rsp+gprsize+16*61], m3 ;in23
6423 mova [rsp+gprsize+16*59], m4 ;in25
6424 mova [rsp+gprsize+16*57], m5 ;in27
6425 mova [rsp+gprsize+16*51], m6 ;in29
6426 mova [rsp+gprsize+16*65], m7 ;in31
6430 LOAD_8ROWS rsp+gprsize+16*3, 16
6431 mova [rsp+gprsize+16*0], m7
6438 LOAD_8ROWS rsp+gprsize+16*11, 16
6439 mova [rsp+gprsize+16*0], m7
6446 LOAD_8ROWS rsp+gprsize+16*19, 16
6447 mova [rsp+gprsize+16*0], m7
6454 LOAD_8ROWS rsp+gprsize+16*27, 16
6455 mova [rsp+gprsize+16*0], m7
6462 LOAD_8ROWS rsp+gprsize+16*35, 16
6463 mova [rsp+gprsize+16*0], m7
6470 LOAD_8ROWS rsp+gprsize+16*43, 16
6471 mova [rsp+gprsize+16*0], m7
6478 LOAD_8ROWS rsp+gprsize+16*51, 16
6479 mova [rsp+gprsize+16*0], m7
6486 LOAD_8ROWS rsp+gprsize+16*59, 16
6487 mova [rsp+gprsize+16*0], m7
6501 mov dstq, [rsp+gprsize*3+16*67]
6502 mov coeffq, [rsp+gprsize*2+16*67]
6506 mov [rsp+gprsize*2+16*67], r4
6511 LOAD_8ROWS rsp+gprsize+16*35, 16
6513 lea r3, [rsp+16*32+gprsize]
6514 mova [rsp+gprsize+16*0], m7
6516 mov dstq, [rsp+gprsize*2+16*67]
6517 mov r3d, [rsp+gprsize*3+16*67]
6519 mov [rsp+gprsize*2+16*67], r4
6526 mov coeffq, [rsp+gprsize*4+16*67]
6527 mov dstq, [rsp+gprsize*2+16*67]
6531 mov [rsp+gprsize*2+16*67], r4