Skip to content

Commit 553aa7c

Browse files
committed
use enumeration for flags
1 parent 1f9dbb4 commit 553aa7c

2 files changed

Lines changed: 186 additions & 76 deletions

File tree

Include/internal/pycore_cpuinfo.h

Lines changed: 178 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,184 @@ extern "C" {
2323

2424
#include "Python.h"
2525

26+
/*
27+
* The enumeration describes masks to apply on CPUID output registers.
28+
*
29+
* Member names are Py_CPUID_MASK_<REGISTER>_L<LEAF>[S<SUBLEAF>]_<FEATURE>,
30+
* where <> (resp. []) denotes a required (resp. optional) group and:
31+
*
32+
* - REGISTER is EAX, EBX, ECX or EDX,
33+
* - LEAF is the initial value of the EAX register (1 or 7),
34+
* - SUBLEAF is the initial value of the ECX register (omitted if 0), and
35+
* - FEATURE is a SIMD feature (with one or more specialized instructions).
36+
*
37+
* For maintainability, the flags are ordered by registers, leafs, subleafs,
38+
* and bits. See https://en.wikipedia.org/wiki/CPUID for the values.
39+
*
40+
* Note 1: The LEAF is also called the 'page' or the 'level'.
41+
* Note 2: The SUBLEAF is also referred to as the 'count'.
42+
*
43+
* The LEAF value should only 1 or 7 as other values may have different
44+
* meanings depending on the underlying architecture.
45+
*/
46+
// fmt: off
47+
typedef enum py_cpuid_feature_mask {
48+
/*[python input]
49+
# {(LEAF, SUBLEAF, REGISTRY): {FEATURE: BIT}}
50+
data = {
51+
(1, 0, 'ECX'): {
52+
'SSE3': 0,
53+
'PCLMULQDQ': 1,
54+
'SSSE3': 9,
55+
'FMA': 12,
56+
'SSE4_1': 19,
57+
'SSE4_2': 20,
58+
'POPCNT': 23,
59+
'XSAVE': 26,
60+
'OSXSAVE': 27,
61+
'AVX': 28,
62+
},
63+
(1, 0, 'EDX'): {
64+
'CMOV': 15,
65+
'SSE': 25,
66+
'SSE2': 26,
67+
},
68+
(7, 0, 'EBX'): {
69+
'AVX2': 5,
70+
'AVX512_F': 16,
71+
'AVX512_DQ': 17,
72+
'AVX512_IFMA': 21,
73+
'AVX512_PF': 26,
74+
'AVX512_ER': 27,
75+
'AVX512_CD': 28,
76+
'AVX512_BW': 30,
77+
'AVX512_VL': 31,
78+
},
79+
(7, 0, 'ECX'): {
80+
'AVX512_VBMI': 1,
81+
'AVX512_VBMI2': 6,
82+
'AVX512_VNNI': 11,
83+
'AVX512_BITALG': 12,
84+
'AVX512_VPOPCNTDQ': 14,
85+
},
86+
(7, 0, 'EDX'): {
87+
'AVX512_4VNNIW': 2,
88+
'AVX512_4FMAPS': 3,
89+
'AVX512_VP2INTERSECT': 8,
90+
},
91+
(7, 1, 'EAX'): {
92+
'AVX_VNNI': 4,
93+
'AVX_IFMA': 23,
94+
},
95+
(7, 1, 'EDX'): {
96+
'AVX_VNNI_INT8': 4,
97+
'AVX_NE_CONVERT': 5,
98+
'AVX_VNNI_INT16': 10,
99+
},
100+
}
101+
102+
def get_member_name(leaf, subleaf, registry, name):
103+
node = f'L{leaf}S{subleaf}' if subleaf else f'L{leaf}'
104+
return f'Py_CPUID_MASK_{registry}_{node}_{name}'
105+
106+
def get_member_mask(bit):
107+
val = format(1 << bit, '008x')
108+
return f'= 0x{val},'
109+
110+
# BUG(picnixz): Clinic does not like when commented lines have empty lines.
111+
# so we use '::' for now to indicate an empty line.
112+
# ::
113+
# The enumeration is rendered as follows:
114+
# ::
115+
# <INDENT><MEMBER_NAME> <TAB>= 0x<MASK>, <TAB>// bit = BIT
116+
# ^ ^ ^ ^ ^ ^ ^
117+
# ::
118+
# where ^ indicates a column that is a multiple of 4, <MASK> has
119+
# exactly 8 characters and <BIT> has at most 2 characters.
120+
121+
INDENT = ' ' * 4
122+
# BUG(picnixz): Clinic does not like when '/' and '*' are put together.
123+
COMMENT = '/' + '* '
124+
125+
def next_block(w):
126+
"""Compute the smallest multiple of 4 strictly larger than *w*."""
127+
return ((w + 3) & ~0x03) if (w % 4) else (w + 4)
128+
129+
NAMESIZE = next_block(max(
130+
len(get_member_name(*group, name))
131+
for group, values in data.items()
132+
for name in values
133+
))
134+
MASKSIZE = 8 + next_block(len('= 0x,'))
135+
136+
for group, values in data.items():
137+
title = 'CPUID (LEAF={}, SUBLEAF={}) [{}]'.format(*group)
138+
print(INDENT, *COMMENT, title, *COMMENT[::-1], sep='')
139+
for name, bit in values.items():
140+
assert name, f"invalid entry in {group}"
141+
key = get_member_name(*group, name)
142+
assert 0 <= bit < 32, f"invalid bit value for {name!r}"
143+
val = get_member_mask(bit)
144+
145+
member_name = key.ljust(NAMESIZE)
146+
member_mask = val.ljust(MASKSIZE)
147+
148+
print(INDENT, member_name, member_mask, f'// bit = {bit}', sep='')
149+
[python start generated code]*/
150+
/* CPUID (LEAF=1, SUBLEAF=0) [ECX] */
151+
Py_CPUID_MASK_ECX_L1_SSE3 = 0x00000001, // bit = 0
152+
Py_CPUID_MASK_ECX_L1_PCLMULQDQ = 0x00000002, // bit = 1
153+
Py_CPUID_MASK_ECX_L1_SSSE3 = 0x00000200, // bit = 9
154+
Py_CPUID_MASK_ECX_L1_FMA = 0x00001000, // bit = 12
155+
Py_CPUID_MASK_ECX_L1_SSE4_1 = 0x00080000, // bit = 19
156+
Py_CPUID_MASK_ECX_L1_SSE4_2 = 0x00100000, // bit = 20
157+
Py_CPUID_MASK_ECX_L1_POPCNT = 0x00800000, // bit = 23
158+
Py_CPUID_MASK_ECX_L1_XSAVE = 0x04000000, // bit = 26
159+
Py_CPUID_MASK_ECX_L1_OSXSAVE = 0x08000000, // bit = 27
160+
Py_CPUID_MASK_ECX_L1_AVX = 0x10000000, // bit = 28
161+
/* CPUID (LEAF=1, SUBLEAF=0) [EDX] */
162+
Py_CPUID_MASK_EDX_L1_CMOV = 0x00008000, // bit = 15
163+
Py_CPUID_MASK_EDX_L1_SSE = 0x02000000, // bit = 25
164+
Py_CPUID_MASK_EDX_L1_SSE2 = 0x04000000, // bit = 26
165+
/* CPUID (LEAF=7, SUBLEAF=0) [EBX] */
166+
Py_CPUID_MASK_EBX_L7_AVX2 = 0x00000020, // bit = 5
167+
Py_CPUID_MASK_EBX_L7_AVX512_F = 0x00010000, // bit = 16
168+
Py_CPUID_MASK_EBX_L7_AVX512_DQ = 0x00020000, // bit = 17
169+
Py_CPUID_MASK_EBX_L7_AVX512_IFMA = 0x00200000, // bit = 21
170+
Py_CPUID_MASK_EBX_L7_AVX512_PF = 0x04000000, // bit = 26
171+
Py_CPUID_MASK_EBX_L7_AVX512_ER = 0x08000000, // bit = 27
172+
Py_CPUID_MASK_EBX_L7_AVX512_CD = 0x10000000, // bit = 28
173+
Py_CPUID_MASK_EBX_L7_AVX512_BW = 0x40000000, // bit = 30
174+
Py_CPUID_MASK_EBX_L7_AVX512_VL = 0x80000000, // bit = 31
175+
/* CPUID (LEAF=7, SUBLEAF=0) [ECX] */
176+
Py_CPUID_MASK_ECX_L7_AVX512_VBMI = 0x00000002, // bit = 1
177+
Py_CPUID_MASK_ECX_L7_AVX512_VBMI2 = 0x00000040, // bit = 6
178+
Py_CPUID_MASK_ECX_L7_AVX512_VNNI = 0x00000800, // bit = 11
179+
Py_CPUID_MASK_ECX_L7_AVX512_BITALG = 0x00001000, // bit = 12
180+
Py_CPUID_MASK_ECX_L7_AVX512_VPOPCNTDQ = 0x00004000, // bit = 14
181+
/* CPUID (LEAF=7, SUBLEAF=0) [EDX] */
182+
Py_CPUID_MASK_EDX_L7_AVX512_4VNNIW = 0x00000004, // bit = 2
183+
Py_CPUID_MASK_EDX_L7_AVX512_4FMAPS = 0x00000008, // bit = 3
184+
Py_CPUID_MASK_EDX_L7_AVX512_VP2INTERSECT = 0x00000100, // bit = 8
185+
/* CPUID (LEAF=7, SUBLEAF=1) [EAX] */
186+
Py_CPUID_MASK_EAX_L7S1_AVX_VNNI = 0x00000010, // bit = 4
187+
Py_CPUID_MASK_EAX_L7S1_AVX_IFMA = 0x00800000, // bit = 23
188+
/* CPUID (LEAF=7, SUBLEAF=1) [EDX] */
189+
Py_CPUID_MASK_EDX_L7S1_AVX_VNNI_INT8 = 0x00000010, // bit = 4
190+
Py_CPUID_MASK_EDX_L7S1_AVX_NE_CONVERT = 0x00000020, // bit = 5
191+
Py_CPUID_MASK_EDX_L7S1_AVX_VNNI_INT16 = 0x00000400, // bit = 10
192+
/*[python end generated code: output=e53c5376296af250 input=46c9e43c1f6f5cf9]*/
193+
} py_cpuid_feature_mask;
194+
// fmt: on
195+
196+
/* XSAVE state components (XCR0 control register) */
197+
typedef enum py_xsave_feature_mask {
198+
Py_XSAVE_MASK_XCR0_SSE = 0x00000002, // bit = 1
199+
Py_XSAVE_MASK_XCR0_AVX = 0x00000004, // bit = 2
200+
Py_XSAVE_MASK_XCR0_AVX512_OPMASK = 0x00000020, // bit = 5
201+
Py_XSAVE_MASK_XCR0_AVX512_ZMM_HI256 = 0x00000040, // bit = 6
202+
Py_XSAVE_MASK_XCR0_AVX512_HI16_ZMM = 0x00000080, // bit = 7
203+
} py_xsave_feature_mask;
26204

27205
typedef struct py_cpuid_features {
28206
uint32_t maxleaf;

Python/cpuinfo.c

Lines changed: 8 additions & 76 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,11 @@
11
#include "pycore_cpuinfo.h"
22

3-
#include <stdint.h> // UINT32_C()
4-
53
/* CPUID input and output registers are 32-bit unsigned integers */
64
#define CPUID_REG uint32_t
75
/* Check one or more CPUID register bits. */
8-
#define CPUID_CHECK_REG(REG, MASK) ((((REG) & (MASK)) == (MASK)) ? 0 : 1)
6+
#define CHECK_REG(REG, MASK) ((((REG) & (MASK)) == (MASK)) ? 0 : 1)
7+
#define CPUID_CHECK_REG(REG, FEAT) CHECK_REG(REG, (Py_CPUID_MASK_ ## FEAT))
8+
#define XSAVE_CHECK_REG(REG, FEAT) CHECK_REG(REG, (Py_XSAVE_MASK_ ## FEAT))
99

1010
// For now, we only try to enable SIMD instructions for x86-64 Intel CPUs.
1111
// In the future, we should carefully enable support for ARM NEON and POWER
@@ -113,67 +113,6 @@
113113
# define SHOULD_PARSE_CPUID_L7S1
114114
#endif
115115

116-
/*
117-
* The macros below describe masks to apply on CPUID output registers.
118-
*
119-
* Each macro is of the form <REGISTER>_L<LEAF>[S<SUBLEAF>]_<FEATURE>,
120-
* where <> (resp. []) denotes a required (resp. optional) group and:
121-
*
122-
* - REGISTER is EAX, EBX, ECX or EDX,
123-
* - LEAF is the initial value of the EAX register (1 or 7),
124-
* - SUBLEAF is the initial value of the ECX register (omitted if 0), and
125-
* - FEATURE is a SIMD feature (with one or more specialized instructions).
126-
*
127-
* For maintainability, the flags are ordered by registers, leafs, subleafs,
128-
* and bits. See https://en.wikipedia.org/wiki/CPUID for the values.
129-
*
130-
* Note 1: The LEAF is also called the 'page' or the 'level'.
131-
* Note 2: The SUBLEAF is also referred to as the 'count'.
132-
*/
133-
134-
/* CPUID (LEAF=1, SUBLEAF=0) [ECX] */
135-
#define ECX_L1_SSE3 (UINT32_C(1) << 0) // 0x00000001
136-
#define ECX_L1_PCLMULQDQ (UINT32_C(1) << 1) // 0x00000002
137-
#define ECX_L1_SSSE3 (UINT32_C(1) << 9) // 0x00000200
138-
#define ECX_L1_FMA (UINT32_C(1) << 12) // 0x00001000
139-
#define ECX_L1_SSE4_1 (UINT32_C(1) << 19) // 0x00080000
140-
#define ECX_L1_SSE4_2 (UINT32_C(1) << 20) // 0x00100000
141-
#define ECX_L1_POPCNT (UINT32_C(1) << 23) // 0x00800000
142-
#define ECX_L1_XSAVE (UINT32_C(1) << 26) // 0x04000000
143-
#define ECX_L1_OSXSAVE (UINT32_C(1) << 27) // 0x08000000
144-
#define ECX_L1_AVX (UINT32_C(1) << 28) // 0x10000000
145-
/* CPUID (LEAF=1, SUBLEAF=0) [EDX] */
146-
#define EDX_L1_CMOV (UINT32_C(1) << 15) // 0x00008000
147-
#define EDX_L1_SSE (UINT32_C(1) << 25) // 0x02000000
148-
#define EDX_L1_SSE2 (UINT32_C(1) << 26) // 0x04000000
149-
/* CPUID (LEAF=7, SUBLEAF=0) [EBX] */
150-
#define EBX_L7_AVX2 (UINT32_C(1) << 5) // 0x00000020
151-
#define EBX_L7_AVX512_F (UINT32_C(1) << 16) // 0x00010000
152-
#define EBX_L7_AVX512_DQ (UINT32_C(1) << 17) // 0x00020000
153-
#define EBX_L7_AVX512_IFMA (UINT32_C(1) << 21) // 0x00200000
154-
#define EBX_L7_AVX512_PF (UINT32_C(1) << 26) // 0x04000000
155-
#define EBX_L7_AVX512_ER (UINT32_C(1) << 27) // 0x08000000
156-
#define EBX_L7_AVX512_CD (UINT32_C(1) << 28) // 0x10000000
157-
#define EBX_L7_AVX512_BW (UINT32_C(1) << 30) // 0x40000000
158-
#define EBX_L7_AVX512_VL (UINT32_C(1) << 31) // 0x80000000
159-
/* CPUID (LEAF=7, SUBLEAF=0) [ECX] */
160-
#define ECX_L7_AVX512_VBMI (UINT32_C(1) << 1) // 0x00000002
161-
#define ECX_L7_AVX512_VBMI2 (UINT32_C(1) << 6) // 0x00000040
162-
#define ECX_L7_AVX512_VNNI (UINT32_C(1) << 11) // 0x00000800
163-
#define ECX_L7_AVX512_BITALG (UINT32_C(1) << 12) // 0x00001000
164-
#define ECX_L7_AVX512_VPOPCNTDQ (UINT32_C(1) << 14) // 0x00004000
165-
/* CPUID (LEAF=7, SUBLEAF=0) [EDX] */
166-
#define EDX_L7_AVX512_4VNNIW (UINT32_C(1) << 2) // 0x00000004
167-
#define EDX_L7_AVX512_4FMAPS (UINT32_C(1) << 3) // 0x00000008
168-
#define EDX_L7_AVX512_VP2INTERSECT (UINT32_C(1) << 8) // 0x00000100
169-
/* CPUID (LEAF=7, SUBLEAF=1) [EAX] */
170-
#define EAX_L7S1_AVX_VNNI (UINT32_C(1) << 4) // 0x00000010
171-
#define EAX_L7S1_AVX_IFMA (UINT32_C(1) << 23) // 0x00800000
172-
/* CPUID (LEAF=7, SUBLEAF=1) [EDX] */
173-
#define EDX_L7S1_AVX_VNNI_INT8 (UINT32_C(1) << 4) // 0x00000010
174-
#define EDX_L7S1_AVX_NE_CONVERT (UINT32_C(1) << 5) // 0x00000020
175-
#define EDX_L7S1_AVX_VNNI_INT16 (UINT32_C(1) << 10) // 0x00000400
176-
177116
/*
178117
* Call __cpuid_count() or equivalent and get
179118
* its EAX, EBX, ECX and EDX output registers.
@@ -195,13 +134,6 @@ get_cpuid_info(uint32_t level /* input eax */,
195134
#endif
196135
}
197136

198-
/* XSAVE state components (XCR0 control register) */
199-
#define XCR0_SSE (UINT32_C(1) << 1) // 0x00000002
200-
#define XCR0_AVX (UINT32_C(1) << 2) // 0x00000004
201-
#define XCR0_AVX512_OPMASK (UINT32_C(1) << 5) // 0x00000020
202-
#define XCR0_AVX512_ZMM_HI256 (UINT32_C(1) << 6) // 0x00000040
203-
#define XCR0_AVX512_HI16_ZMM (UINT32_C(1) << 7) // 0x00000080
204-
205137
static inline uint64_t
206138
get_xgetbv(uint32_t index)
207139
{
@@ -380,11 +312,11 @@ detect_cpuid_xsave_state(py_cpuid_features *flags)
380312
// Keep the ordering and newlines as they are declared in the structure.
381313
#ifdef HAS_XGETBV_SUPPORT
382314
uint64_t xcr0 = flags->osxsave ? get_xgetbv(0) : 0;
383-
flags->xcr0_sse = CPUID_CHECK_REG(xcr0, XCR0_SSE);
384-
flags->xcr0_avx = CPUID_CHECK_REG(xcr0, XCR0_AVX);
385-
flags->xcr0_avx512_opmask = CPUID_CHECK_REG(xcr0, XCR0_AVX512_OPMASK);
386-
flags->xcr0_avx512_zmm_hi256 = CPUID_CHECK_REG(xcr0, XCR0_AVX512_ZMM_HI256);
387-
flags->xcr0_avx512_hi16_zmm = CPUID_CHECK_REG(xcr0, XCR0_AVX512_HI16_ZMM);
315+
flags->xcr0_sse = XSAVE_CHECK_REG(xcr0, XCR0_SSE);
316+
flags->xcr0_avx = XSAVE_CHECK_REG(xcr0, XCR0_AVX);
317+
flags->xcr0_avx512_opmask = XSAVE_CHECK_REG(xcr0, XCR0_AVX512_OPMASK);
318+
flags->xcr0_avx512_zmm_hi256 = XSAVE_CHECK_REG(xcr0, XCR0_AVX512_ZMM_HI256);
319+
flags->xcr0_avx512_hi16_zmm = XSAVE_CHECK_REG(xcr0, XCR0_AVX512_HI16_ZMM);
388320
#endif
389321
}
390322

0 commit comments

Comments
 (0)