@@ -23,6 +23,184 @@ extern "C" {
2323
2424#include "Python.h"
2525
26+ /*
27+ * The enumeration describes masks to apply on CPUID output registers.
28+ *
29+ * Member names are Py_CPUID_MASK_<REGISTER>_L<LEAF>[S<SUBLEAF>]_<FEATURE>,
30+ * where <> (resp. []) denotes a required (resp. optional) group and:
31+ *
32+ * - REGISTER is EAX, EBX, ECX or EDX,
33+ * - LEAF is the initial value of the EAX register (1 or 7),
34+ * - SUBLEAF is the initial value of the ECX register (omitted if 0), and
35+ * - FEATURE is a SIMD feature (with one or more specialized instructions).
36+ *
37+ * For maintainability, the flags are ordered by registers, leafs, subleafs,
38+ * and bits. See https://en.wikipedia.org/wiki/CPUID for the values.
39+ *
40+ * Note 1: The LEAF is also called the 'page' or the 'level'.
41+ * Note 2: The SUBLEAF is also referred to as the 'count'.
42+ *
43+ * The LEAF value should only 1 or 7 as other values may have different
44+ * meanings depending on the underlying architecture.
45+ */
46+ // fmt: off
47+ typedef enum py_cpuid_feature_mask {
48+ /*[python input]
49+ # {(LEAF, SUBLEAF, REGISTRY): {FEATURE: BIT}}
50+ data = {
51+ (1, 0, 'ECX'): {
52+ 'SSE3': 0,
53+ 'PCLMULQDQ': 1,
54+ 'SSSE3': 9,
55+ 'FMA': 12,
56+ 'SSE4_1': 19,
57+ 'SSE4_2': 20,
58+ 'POPCNT': 23,
59+ 'XSAVE': 26,
60+ 'OSXSAVE': 27,
61+ 'AVX': 28,
62+ },
63+ (1, 0, 'EDX'): {
64+ 'CMOV': 15,
65+ 'SSE': 25,
66+ 'SSE2': 26,
67+ },
68+ (7, 0, 'EBX'): {
69+ 'AVX2': 5,
70+ 'AVX512_F': 16,
71+ 'AVX512_DQ': 17,
72+ 'AVX512_IFMA': 21,
73+ 'AVX512_PF': 26,
74+ 'AVX512_ER': 27,
75+ 'AVX512_CD': 28,
76+ 'AVX512_BW': 30,
77+ 'AVX512_VL': 31,
78+ },
79+ (7, 0, 'ECX'): {
80+ 'AVX512_VBMI': 1,
81+ 'AVX512_VBMI2': 6,
82+ 'AVX512_VNNI': 11,
83+ 'AVX512_BITALG': 12,
84+ 'AVX512_VPOPCNTDQ': 14,
85+ },
86+ (7, 0, 'EDX'): {
87+ 'AVX512_4VNNIW': 2,
88+ 'AVX512_4FMAPS': 3,
89+ 'AVX512_VP2INTERSECT': 8,
90+ },
91+ (7, 1, 'EAX'): {
92+ 'AVX_VNNI': 4,
93+ 'AVX_IFMA': 23,
94+ },
95+ (7, 1, 'EDX'): {
96+ 'AVX_VNNI_INT8': 4,
97+ 'AVX_NE_CONVERT': 5,
98+ 'AVX_VNNI_INT16': 10,
99+ },
100+ }
101+
102+ def get_member_name(leaf, subleaf, registry, name):
103+ node = f'L{leaf}S{subleaf}' if subleaf else f'L{leaf}'
104+ return f'Py_CPUID_MASK_{registry}_{node}_{name}'
105+
106+ def get_member_mask(bit):
107+ val = format(1 << bit, '008x')
108+ return f'= 0x{val},'
109+
110+ # BUG(picnixz): Clinic does not like when commented lines have empty lines.
111+ # so we use '::' for now to indicate an empty line.
112+ # ::
113+ # The enumeration is rendered as follows:
114+ # ::
115+ # <INDENT><MEMBER_NAME> <TAB>= 0x<MASK>, <TAB>// bit = BIT
116+ # ^ ^ ^ ^ ^ ^ ^
117+ # ::
118+ # where ^ indicates a column that is a multiple of 4, <MASK> has
119+ # exactly 8 characters and <BIT> has at most 2 characters.
120+
121+ INDENT = ' ' * 4
122+ # BUG(picnixz): Clinic does not like when '/' and '*' are put together.
123+ COMMENT = '/' + '* '
124+
125+ def next_block(w):
126+ """Compute the smallest multiple of 4 strictly larger than *w*."""
127+ return ((w + 3) & ~0x03) if (w % 4) else (w + 4)
128+
129+ NAMESIZE = next_block(max(
130+ len(get_member_name(*group, name))
131+ for group, values in data.items()
132+ for name in values
133+ ))
134+ MASKSIZE = 8 + next_block(len('= 0x,'))
135+
136+ for group, values in data.items():
137+ title = 'CPUID (LEAF={}, SUBLEAF={}) [{}]'.format(*group)
138+ print(INDENT, *COMMENT, title, *COMMENT[::-1], sep='')
139+ for name, bit in values.items():
140+ assert name, f"invalid entry in {group}"
141+ key = get_member_name(*group, name)
142+ assert 0 <= bit < 32, f"invalid bit value for {name!r}"
143+ val = get_member_mask(bit)
144+
145+ member_name = key.ljust(NAMESIZE)
146+ member_mask = val.ljust(MASKSIZE)
147+
148+ print(INDENT, member_name, member_mask, f'// bit = {bit}', sep='')
149+ [python start generated code]*/
150+ /* CPUID (LEAF=1, SUBLEAF=0) [ECX] */
151+ Py_CPUID_MASK_ECX_L1_SSE3 = 0x00000001 , // bit = 0
152+ Py_CPUID_MASK_ECX_L1_PCLMULQDQ = 0x00000002 , // bit = 1
153+ Py_CPUID_MASK_ECX_L1_SSSE3 = 0x00000200 , // bit = 9
154+ Py_CPUID_MASK_ECX_L1_FMA = 0x00001000 , // bit = 12
155+ Py_CPUID_MASK_ECX_L1_SSE4_1 = 0x00080000 , // bit = 19
156+ Py_CPUID_MASK_ECX_L1_SSE4_2 = 0x00100000 , // bit = 20
157+ Py_CPUID_MASK_ECX_L1_POPCNT = 0x00800000 , // bit = 23
158+ Py_CPUID_MASK_ECX_L1_XSAVE = 0x04000000 , // bit = 26
159+ Py_CPUID_MASK_ECX_L1_OSXSAVE = 0x08000000 , // bit = 27
160+ Py_CPUID_MASK_ECX_L1_AVX = 0x10000000 , // bit = 28
161+ /* CPUID (LEAF=1, SUBLEAF=0) [EDX] */
162+ Py_CPUID_MASK_EDX_L1_CMOV = 0x00008000 , // bit = 15
163+ Py_CPUID_MASK_EDX_L1_SSE = 0x02000000 , // bit = 25
164+ Py_CPUID_MASK_EDX_L1_SSE2 = 0x04000000 , // bit = 26
165+ /* CPUID (LEAF=7, SUBLEAF=0) [EBX] */
166+ Py_CPUID_MASK_EBX_L7_AVX2 = 0x00000020 , // bit = 5
167+ Py_CPUID_MASK_EBX_L7_AVX512_F = 0x00010000 , // bit = 16
168+ Py_CPUID_MASK_EBX_L7_AVX512_DQ = 0x00020000 , // bit = 17
169+ Py_CPUID_MASK_EBX_L7_AVX512_IFMA = 0x00200000 , // bit = 21
170+ Py_CPUID_MASK_EBX_L7_AVX512_PF = 0x04000000 , // bit = 26
171+ Py_CPUID_MASK_EBX_L7_AVX512_ER = 0x08000000 , // bit = 27
172+ Py_CPUID_MASK_EBX_L7_AVX512_CD = 0x10000000 , // bit = 28
173+ Py_CPUID_MASK_EBX_L7_AVX512_BW = 0x40000000 , // bit = 30
174+ Py_CPUID_MASK_EBX_L7_AVX512_VL = 0x80000000 , // bit = 31
175+ /* CPUID (LEAF=7, SUBLEAF=0) [ECX] */
176+ Py_CPUID_MASK_ECX_L7_AVX512_VBMI = 0x00000002 , // bit = 1
177+ Py_CPUID_MASK_ECX_L7_AVX512_VBMI2 = 0x00000040 , // bit = 6
178+ Py_CPUID_MASK_ECX_L7_AVX512_VNNI = 0x00000800 , // bit = 11
179+ Py_CPUID_MASK_ECX_L7_AVX512_BITALG = 0x00001000 , // bit = 12
180+ Py_CPUID_MASK_ECX_L7_AVX512_VPOPCNTDQ = 0x00004000 , // bit = 14
181+ /* CPUID (LEAF=7, SUBLEAF=0) [EDX] */
182+ Py_CPUID_MASK_EDX_L7_AVX512_4VNNIW = 0x00000004 , // bit = 2
183+ Py_CPUID_MASK_EDX_L7_AVX512_4FMAPS = 0x00000008 , // bit = 3
184+ Py_CPUID_MASK_EDX_L7_AVX512_VP2INTERSECT = 0x00000100 , // bit = 8
185+ /* CPUID (LEAF=7, SUBLEAF=1) [EAX] */
186+ Py_CPUID_MASK_EAX_L7S1_AVX_VNNI = 0x00000010 , // bit = 4
187+ Py_CPUID_MASK_EAX_L7S1_AVX_IFMA = 0x00800000 , // bit = 23
188+ /* CPUID (LEAF=7, SUBLEAF=1) [EDX] */
189+ Py_CPUID_MASK_EDX_L7S1_AVX_VNNI_INT8 = 0x00000010 , // bit = 4
190+ Py_CPUID_MASK_EDX_L7S1_AVX_NE_CONVERT = 0x00000020 , // bit = 5
191+ Py_CPUID_MASK_EDX_L7S1_AVX_VNNI_INT16 = 0x00000400 , // bit = 10
192+ /*[python end generated code: output=e53c5376296af250 input=46c9e43c1f6f5cf9]*/
193+ } py_cpuid_feature_mask ;
194+ // fmt: on
195+
196+ /* XSAVE state components (XCR0 control register) */
197+ typedef enum py_xsave_feature_mask {
198+ Py_XSAVE_MASK_XCR0_SSE = 0x00000002 , // bit = 1
199+ Py_XSAVE_MASK_XCR0_AVX = 0x00000004 , // bit = 2
200+ Py_XSAVE_MASK_XCR0_AVX512_OPMASK = 0x00000020 , // bit = 5
201+ Py_XSAVE_MASK_XCR0_AVX512_ZMM_HI256 = 0x00000040 , // bit = 6
202+ Py_XSAVE_MASK_XCR0_AVX512_HI16_ZMM = 0x00000080 , // bit = 7
203+ } py_xsave_feature_mask ;
26204
27205typedef struct py_cpuid_features {
28206 uint32_t maxleaf ;
0 commit comments