1 /*
2 * Copyright © 2018, VideoLAN and dav1d authors
3 * Copyright © 2018, Two Orioles, LLC
4 * All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions are met:
8 *
9 * 1. Redistributions of source code must retain the above copyright notice, this
10 * list of conditions and the following disclaimer.
11 *
12 * 2. Redistributions in binary form must reproduce the above copyright notice,
13 * this list of conditions and the following disclaimer in the documentation
14 * and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
20 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
21 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
22 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
23 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 */
27
28 #include "config.h"
29
30 #include <stdint.h>
31 #include <string.h>
32
33 #include "common/attributes.h"
34
35 #include "src/cpu.h"
36 #include "src/x86/cpu.h"
37
38 typedef struct {
39 uint32_t eax, ebx, edx, ecx;
40 } CpuidRegisters;
41
42 void dav1d_cpu_cpuid(CpuidRegisters *regs, unsigned leaf, unsigned subleaf);
43 uint64_t dav1d_cpu_xgetbv(unsigned xcr);
44
45 #define X(reg, mask) (((reg) & (mask)) == (mask))
46
dav1d_get_cpu_flags_x86(void)47 COLD unsigned dav1d_get_cpu_flags_x86(void) {
48 union {
49 CpuidRegisters r;
50 struct {
51 uint32_t max_leaf;
52 char vendor[12];
53 };
54 } cpu;
55 dav1d_cpu_cpuid(&cpu.r, 0, 0);
56 unsigned flags = dav1d_get_default_cpu_flags();
57
58 if (cpu.max_leaf >= 1) {
59 CpuidRegisters r;
60 dav1d_cpu_cpuid(&r, 1, 0);
61 const unsigned family = ((r.eax >> 8) & 0x0f) + ((r.eax >> 20) & 0xff);
62
63 if (X(r.edx, 0x06008000)) /* CMOV/SSE/SSE2 */ {
64 flags |= DAV1D_X86_CPU_FLAG_SSE2;
65 if (X(r.ecx, 0x00000201)) /* SSE3/SSSE3 */ {
66 flags |= DAV1D_X86_CPU_FLAG_SSSE3;
67 if (X(r.ecx, 0x00080000)) /* SSE4.1 */
68 flags |= DAV1D_X86_CPU_FLAG_SSE41;
69 }
70 }
71 #if ARCH_X86_64
72 /* We only support >128-bit SIMD on x86-64. */
73 if (X(r.ecx, 0x18000000)) /* OSXSAVE/AVX */ {
74 const uint64_t xcr0 = dav1d_cpu_xgetbv(0);
75 if (X(xcr0, 0x00000006)) /* XMM/YMM */ {
76 if (cpu.max_leaf >= 7) {
77 dav1d_cpu_cpuid(&r, 7, 0);
78 if (X(r.ebx, 0x00000128)) /* BMI1/BMI2/AVX2 */ {
79 flags |= DAV1D_X86_CPU_FLAG_AVX2;
80 if (X(xcr0, 0x000000e0)) /* ZMM/OPMASK */ {
81 if (X(r.ebx, 0xd0230000) && X(r.ecx, 0x00005f42))
82 flags |= DAV1D_X86_CPU_FLAG_AVX512ICL;
83 }
84 }
85 }
86 }
87 }
88 #endif
89 if (!memcmp(cpu.vendor, "AuthenticAMD", sizeof(cpu.vendor))) {
90 if ((flags & DAV1D_X86_CPU_FLAG_AVX2) && family <= 0x19) {
91 /* Excavator, Zen, Zen+, Zen 2, Zen 3, Zen 3+, Zen 4 */
92 flags |= DAV1D_X86_CPU_FLAG_SLOW_GATHER;
93 }
94 }
95 }
96
97 return flags;
98 }
99