xref: /aosp_15_r20/external/libjpeg-turbo/simd/x86_64/jsimdcpu.asm (revision dfc6aa5c1cfd4bc4e2018dc74aa96e29ee49c6da)
1*dfc6aa5cSAndroid Build Coastguard Worker;
2*dfc6aa5cSAndroid Build Coastguard Worker; jsimdcpu.asm - SIMD instruction support check
3*dfc6aa5cSAndroid Build Coastguard Worker;
4*dfc6aa5cSAndroid Build Coastguard Worker; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
5*dfc6aa5cSAndroid Build Coastguard Worker; Copyright (C) 2016, D. R. Commander.
6*dfc6aa5cSAndroid Build Coastguard Worker;
7*dfc6aa5cSAndroid Build Coastguard Worker; Based on
8*dfc6aa5cSAndroid Build Coastguard Worker; x86 SIMD extension for IJG JPEG library
9*dfc6aa5cSAndroid Build Coastguard Worker; Copyright (C) 1999-2006, MIYASAKA Masaru.
10*dfc6aa5cSAndroid Build Coastguard Worker; For conditions of distribution and use, see copyright notice in jsimdext.inc
11*dfc6aa5cSAndroid Build Coastguard Worker;
12*dfc6aa5cSAndroid Build Coastguard Worker; This file should be assembled with NASM (Netwide Assembler),
13*dfc6aa5cSAndroid Build Coastguard Worker; can *not* be assembled with Microsoft's MASM or any compatible
14*dfc6aa5cSAndroid Build Coastguard Worker; assembler (including Borland's Turbo Assembler).
15*dfc6aa5cSAndroid Build Coastguard Worker; NASM is available from http://nasm.sourceforge.net/ or
16*dfc6aa5cSAndroid Build Coastguard Worker; http://sourceforge.net/project/showfiles.php?group_id=6208
17*dfc6aa5cSAndroid Build Coastguard Worker
18*dfc6aa5cSAndroid Build Coastguard Worker%include "jsimdext.inc"
19*dfc6aa5cSAndroid Build Coastguard Worker
20*dfc6aa5cSAndroid Build Coastguard Worker; --------------------------------------------------------------------------
21*dfc6aa5cSAndroid Build Coastguard Worker    SECTION     SEG_TEXT
22*dfc6aa5cSAndroid Build Coastguard Worker    BITS        64
23*dfc6aa5cSAndroid Build Coastguard Worker;
24*dfc6aa5cSAndroid Build Coastguard Worker; Check if the CPU supports SIMD instructions
25*dfc6aa5cSAndroid Build Coastguard Worker;
26*dfc6aa5cSAndroid Build Coastguard Worker; GLOBAL(unsigned int)
27*dfc6aa5cSAndroid Build Coastguard Worker; jpeg_simd_cpu_support(void)
28*dfc6aa5cSAndroid Build Coastguard Worker;
29*dfc6aa5cSAndroid Build Coastguard Worker
30*dfc6aa5cSAndroid Build Coastguard Worker    align       32
31*dfc6aa5cSAndroid Build Coastguard Worker    GLOBAL_FUNCTION(jpeg_simd_cpu_support)
32*dfc6aa5cSAndroid Build Coastguard Worker
33*dfc6aa5cSAndroid Build Coastguard WorkerEXTN(jpeg_simd_cpu_support):
34*dfc6aa5cSAndroid Build Coastguard Worker    push        rbx
35*dfc6aa5cSAndroid Build Coastguard Worker    push        rdi
36*dfc6aa5cSAndroid Build Coastguard Worker
37*dfc6aa5cSAndroid Build Coastguard Worker    xor         rdi, rdi                ; simd support flag
38*dfc6aa5cSAndroid Build Coastguard Worker
39*dfc6aa5cSAndroid Build Coastguard Worker    ; Assume that all x86-64 processors support SSE & SSE2 instructions
40*dfc6aa5cSAndroid Build Coastguard Worker    or          rdi, JSIMD_SSE2
41*dfc6aa5cSAndroid Build Coastguard Worker    or          rdi, JSIMD_SSE
42*dfc6aa5cSAndroid Build Coastguard Worker
43*dfc6aa5cSAndroid Build Coastguard Worker    ; Check whether CPUID leaf 07H is supported
44*dfc6aa5cSAndroid Build Coastguard Worker    ; (leaf 07H is used to check for AVX2 instruction support)
45*dfc6aa5cSAndroid Build Coastguard Worker    mov         rax, 0
46*dfc6aa5cSAndroid Build Coastguard Worker    cpuid
47*dfc6aa5cSAndroid Build Coastguard Worker    cmp         rax, 7
48*dfc6aa5cSAndroid Build Coastguard Worker    jl          short .return           ; Maximum leaf < 07H
49*dfc6aa5cSAndroid Build Coastguard Worker
50*dfc6aa5cSAndroid Build Coastguard Worker    ; Check for AVX2 instruction support
51*dfc6aa5cSAndroid Build Coastguard Worker    mov         rax, 7
52*dfc6aa5cSAndroid Build Coastguard Worker    xor         rcx, rcx
53*dfc6aa5cSAndroid Build Coastguard Worker    cpuid
54*dfc6aa5cSAndroid Build Coastguard Worker    mov         rax, rbx                ; rax = Extended feature flags
55*dfc6aa5cSAndroid Build Coastguard Worker
56*dfc6aa5cSAndroid Build Coastguard Worker    test        rax, 1<<5               ; bit5:AVX2
57*dfc6aa5cSAndroid Build Coastguard Worker    jz          short .return
58*dfc6aa5cSAndroid Build Coastguard Worker
59*dfc6aa5cSAndroid Build Coastguard Worker    ; Check for AVX2 O/S support
60*dfc6aa5cSAndroid Build Coastguard Worker    mov         rax, 1
61*dfc6aa5cSAndroid Build Coastguard Worker    xor         rcx, rcx
62*dfc6aa5cSAndroid Build Coastguard Worker    cpuid
63*dfc6aa5cSAndroid Build Coastguard Worker    test        rcx, 1<<27
64*dfc6aa5cSAndroid Build Coastguard Worker    jz          short .return           ; O/S does not support XSAVE
65*dfc6aa5cSAndroid Build Coastguard Worker    test        rcx, 1<<28
66*dfc6aa5cSAndroid Build Coastguard Worker    jz          short .return           ; CPU does not support AVX2
67*dfc6aa5cSAndroid Build Coastguard Worker
68*dfc6aa5cSAndroid Build Coastguard Worker    xor         rcx, rcx
69*dfc6aa5cSAndroid Build Coastguard Worker    xgetbv
70*dfc6aa5cSAndroid Build Coastguard Worker    and         rax, 6
71*dfc6aa5cSAndroid Build Coastguard Worker    cmp         rax, 6                  ; O/S does not manage XMM/YMM state
72*dfc6aa5cSAndroid Build Coastguard Worker                                        ; using XSAVE
73*dfc6aa5cSAndroid Build Coastguard Worker    jnz         short .return
74*dfc6aa5cSAndroid Build Coastguard Worker
75*dfc6aa5cSAndroid Build Coastguard Worker    or          rdi, JSIMD_AVX2
76*dfc6aa5cSAndroid Build Coastguard Worker
77*dfc6aa5cSAndroid Build Coastguard Worker.return:
78*dfc6aa5cSAndroid Build Coastguard Worker    mov         rax, rdi
79*dfc6aa5cSAndroid Build Coastguard Worker
80*dfc6aa5cSAndroid Build Coastguard Worker    pop         rdi
81*dfc6aa5cSAndroid Build Coastguard Worker    pop         rbx
82*dfc6aa5cSAndroid Build Coastguard Worker    ret
83*dfc6aa5cSAndroid Build Coastguard Worker
84*dfc6aa5cSAndroid Build Coastguard Worker; For some reason, the OS X linker does not honor the request to align the
85*dfc6aa5cSAndroid Build Coastguard Worker; segment unless we do this.
86*dfc6aa5cSAndroid Build Coastguard Worker    align       32
87