Skip to content

Commit 5b472b6

Browse files
author
Peter Zijlstra
committed
x86_64/bug: Implement __WARN_printf()
The basic idea is to have __WARN_printf() be a vararg function such that the compiler can do the optimal calling convention for us. This function body will be a #UD and then set up a va_list in the exception from pt_regs. But because the trap will be in a called function, the bug_entry must be passed in. Have that be the first argument, with the format tucked away inside the bug_entry. The comments should clarify the real fun details. The big downside is that all WARNs will now show: RIP: 0010:__WARN_trap:+0 One possible solution is to simply discard the top frame when unwinding. A follow up patch takes care of this slightly differently by abusing the x86 static_call implementation. This changes (with the next patches): WARN_ONCE(preempt_count() != 2*PREEMPT_DISABLE_OFFSET, "corrupted preempt_count: %s/%d/0x%x\n", from: cmpl $2, %ecx #, _7 jne .L1472 ... .L1472: cmpb $0, __already_done.11(%rip) je .L1513 ... .L1513 movb $1, __already_done.11(%rip) movl 1424(%r14), %edx # _15->pid, _15->pid leaq 1912(%r14), %rsi #, _17 movq $.LC43, %rdi #, call __warn_printk # ud2 .pushsection __bug_table,"aw" 2: .long 1b - . # bug_entry::bug_addr .long .LC1 - . # bug_entry::file .word 5093 # bug_entry::line .word 2313 # bug_entry::flags .org 2b + 12 .popsection .pushsection .discard.annotate_insn,"M", @progbits, 8 .long 1b - . .long 8 # ANNOTYPE_REACHABLE .popsection into: cmpl $2, %ecx #, _7 jne .L1442 #, ... .L1442: lea (2f)(%rip), %rdi 1: .pushsection __bug_table,"aw" 2: .long 1b - . # bug_entry::bug_addr .long .LC43 - . # bug_entry::format .long .LC1 - . # bug_entry::file .word 5093 # bug_entry::line .word 2323 # bug_entry::flags .org 2b + 16 .popsection movl 1424(%r14), %edx # _19->pid, _19->pid leaq 1912(%r14), %rsi #, _13 ud1 (%edx), %rdi Notably, by pushing everything into the exception handler it can take care of the ONCE thing. Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> Link: https://patch.msgid.link/20251110115758.213813530@infradead.org
1 parent 4f1b701 commit 5b472b6

3 files changed

Lines changed: 170 additions & 15 deletions

File tree

arch/x86/entry/entry.S

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,14 @@ SYM_FUNC_END(write_ibpb)
3232
/* For KVM */
3333
EXPORT_SYMBOL_GPL(write_ibpb);
3434

35+
SYM_FUNC_START(__WARN_trap)
36+
ANNOTATE_NOENDBR
37+
ANNOTATE_REACHABLE
38+
ud1 (%edx), %_ASM_ARG1
39+
RET
40+
SYM_FUNC_END(__WARN_trap)
41+
EXPORT_SYMBOL(__WARN_trap)
42+
3543
.popsection
3644

3745
/*

arch/x86/include/asm/bug.h

Lines changed: 58 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,11 @@
77
#include <linux/objtool.h>
88
#include <asm/asm.h>
99

10+
#ifndef __ASSEMBLY__
11+
struct bug_entry;
12+
extern void __WARN_trap(struct bug_entry *bug, ...);
13+
#endif
14+
1015
/*
1116
* Despite that some emulators terminate on UD2, we use it for WARN().
1217
*/
@@ -31,6 +36,7 @@
3136
#define BUG_UD2 0xfffe
3237
#define BUG_UD1 0xfffd
3338
#define BUG_UD1_UBSAN 0xfffc
39+
#define BUG_UD1_WARN 0xfffb
3440
#define BUG_UDB 0xffd6
3541
#define BUG_LOCK 0xfff0
3642

@@ -58,14 +64,17 @@
5864
#define __BUG_ENTRY_FORMAT(format)
5965
#endif
6066

67+
#ifdef CONFIG_X86_64
68+
#define HAVE_ARCH_BUG_FORMAT_ARGS
69+
#endif
70+
6171
#define __BUG_ENTRY(format, file, line, flags) \
6272
__BUG_REL("1b") "\t# bug_entry::bug_addr\n" \
6373
__BUG_ENTRY_FORMAT(format) \
6474
__BUG_ENTRY_VERBOSE(file, line) \
6575
"\t.word " flags "\t# bug_entry::flags\n"
6676

67-
#define _BUG_FLAGS_ASM(ins, format, file, line, flags, size, extra) \
68-
"1:\t" ins "\n" \
77+
#define _BUG_FLAGS_ASM(format, file, line, flags, size, extra) \
6978
".pushsection __bug_table,\"aw\"\n\t" \
7079
ANNOTATE_DATA_SPECIAL \
7180
"2:\n\t" \
@@ -82,7 +91,8 @@
8291

8392
#define _BUG_FLAGS(cond_str, ins, flags, extra) \
8493
do { \
85-
asm_inline volatile(_BUG_FLAGS_ASM(ins, "%c[fmt]", "%c[file]", \
94+
asm_inline volatile("1:\t" ins "\n" \
95+
_BUG_FLAGS_ASM("%c[fmt]", "%c[file]", \
8696
"%c[line]", "%c[fl]", \
8797
"%c[size]", extra) \
8898
: : [fmt] "i" (WARN_CONDITION_STR(cond_str)), \
@@ -93,7 +103,8 @@ do { \
93103
} while (0)
94104

95105
#define ARCH_WARN_ASM(file, line, flags, size) \
96-
_BUG_FLAGS_ASM(ASM_UD2, "0", file, line, flags, size, "")
106+
"1:\t " ASM_UD2 "\n" \
107+
_BUG_FLAGS_ASM("0", file, line, flags, size, "")
97108

98109
#else
99110

@@ -126,6 +137,49 @@ do { \
126137
instrumentation_end(); \
127138
} while (0)
128139

140+
#ifdef HAVE_ARCH_BUG_FORMAT_ARGS
141+
142+
#ifndef __ASSEMBLY__
143+
struct pt_regs;
144+
struct sysv_va_list { /* from AMD64 System V ABI */
145+
unsigned int gp_offset;
146+
unsigned int fp_offset;
147+
void *overflow_arg_area;
148+
void *reg_save_area;
149+
};
150+
struct arch_va_list {
151+
unsigned long regs[6];
152+
struct sysv_va_list args;
153+
};
154+
extern void *__warn_args(struct arch_va_list *args, struct pt_regs *regs);
155+
#endif /* __ASSEMBLY__ */
156+
157+
#define __WARN_bug_entry(flags, format) ({ \
158+
struct bug_entry *bug; \
159+
asm_inline volatile("lea (2f)(%%rip), %[addr]\n1:\n" \
160+
_BUG_FLAGS_ASM("%c[fmt]", "%c[file]", \
161+
"%c[line]", "%c[fl]", \
162+
"%c[size]", "") \
163+
: [addr] "=r" (bug) \
164+
: [fmt] "i" (format), \
165+
[file] "i" (__FILE__), \
166+
[line] "i" (__LINE__), \
167+
[fl] "i" (flags), \
168+
[size] "i" (sizeof(struct bug_entry))); \
169+
bug; })
170+
171+
#define __WARN_print_arg(flags, format, arg...) \
172+
do { \
173+
int __flags = (flags) | BUGFLAG_WARNING | BUGFLAG_ARGS ; \
174+
__WARN_trap(__WARN_bug_entry(__flags, format), ## arg); \
175+
asm (""); /* inhibit tail-call optimization */ \
176+
} while (0)
177+
178+
#define __WARN_printf(taint, fmt, arg...) \
179+
__WARN_print_arg(BUGFLAG_TAINT(taint), fmt, ## arg)
180+
181+
#endif /* HAVE_ARCH_BUG_FORMAT_ARGS */
182+
129183
#include <asm-generic/bug.h>
130184

131185
#endif /* _ASM_X86_BUG_H */

arch/x86/kernel/traps.c

Lines changed: 104 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -102,25 +102,37 @@ __always_inline int is_valid_bugaddr(unsigned long addr)
102102
* UBSan{0}: 67 0f b9 00 ud1 (%eax),%eax
103103
* UBSan{10}: 67 0f b9 40 10 ud1 0x10(%eax),%eax
104104
* static_call: 0f b9 cc ud1 %esp,%ecx
105+
* __WARN_trap: 67 48 0f b9 3a ud1 (%edx),%reg
105106
*
106-
* Notably UBSAN uses EAX, static_call uses ECX.
107+
* Notable, since __WARN_trap can use all registers, the distinction between
108+
* UD1 users is through R/M.
107109
*/
108110
__always_inline int decode_bug(unsigned long addr, s32 *imm, int *len)
109111
{
110112
unsigned long start = addr;
113+
u8 v, reg, rm, rex = 0;
114+
int type = BUG_UD1;
111115
bool lock = false;
112-
u8 v;
113116

114117
if (addr < TASK_SIZE_MAX)
115118
return BUG_NONE;
116119

117-
v = *(u8 *)(addr++);
118-
if (v == INSN_ASOP)
120+
for (;;) {
119121
v = *(u8 *)(addr++);
122+
if (v == INSN_ASOP)
123+
continue;
120124

121-
if (v == INSN_LOCK) {
122-
lock = true;
123-
v = *(u8 *)(addr++);
125+
if (v == INSN_LOCK) {
126+
lock = true;
127+
continue;
128+
}
129+
130+
if ((v & 0xf0) == 0x40) {
131+
rex = v;
132+
continue;
133+
}
134+
135+
break;
124136
}
125137

126138
switch (v) {
@@ -156,18 +168,33 @@ __always_inline int decode_bug(unsigned long addr, s32 *imm, int *len)
156168
if (X86_MODRM_MOD(v) != 3 && X86_MODRM_RM(v) == 4)
157169
addr++; /* SIB */
158170

171+
reg = X86_MODRM_REG(v) + 8*!!X86_REX_R(rex);
172+
rm = X86_MODRM_RM(v) + 8*!!X86_REX_B(rex);
173+
159174
/* Decode immediate, if present */
160175
switch (X86_MODRM_MOD(v)) {
161176
case 0: if (X86_MODRM_RM(v) == 5)
162-
addr += 4; /* RIP + disp32 */
177+
addr += 4; /* RIP + disp32 */
178+
179+
if (rm == 0) /* (%eax) */
180+
type = BUG_UD1_UBSAN;
181+
182+
if (rm == 2) { /* (%edx) */
183+
*imm = reg;
184+
type = BUG_UD1_WARN;
185+
}
163186
break;
164187

165188
case 1: *imm = *(s8 *)addr;
166189
addr += 1;
190+
if (rm == 0) /* (%eax) */
191+
type = BUG_UD1_UBSAN;
167192
break;
168193

169194
case 2: *imm = *(s32 *)addr;
170195
addr += 4;
196+
if (rm == 0) /* (%eax) */
197+
type = BUG_UD1_UBSAN;
171198
break;
172199

173200
case 3: break;
@@ -176,12 +203,73 @@ __always_inline int decode_bug(unsigned long addr, s32 *imm, int *len)
176203
/* record instruction length */
177204
*len = addr - start;
178205

179-
if (X86_MODRM_REG(v) == 0) /* EAX */
180-
return BUG_UD1_UBSAN;
206+
return type;
207+
}
181208

182-
return BUG_UD1;
209+
static inline unsigned long pt_regs_val(struct pt_regs *regs, int nr)
210+
{
211+
int offset = pt_regs_offset(regs, nr);
212+
if (WARN_ON_ONCE(offset < -0))
213+
return 0;
214+
return *((unsigned long *)((void *)regs + offset));
183215
}
184216

217+
#ifdef HAVE_ARCH_BUG_FORMAT_ARGS
218+
/*
219+
* Create a va_list from an exception context.
220+
*/
221+
void *__warn_args(struct arch_va_list *args, struct pt_regs *regs)
222+
{
223+
/*
224+
* Register save area; populate with function call argument registers
225+
*/
226+
args->regs[0] = regs->di;
227+
args->regs[1] = regs->si;
228+
args->regs[2] = regs->dx;
229+
args->regs[3] = regs->cx;
230+
args->regs[4] = regs->r8;
231+
args->regs[5] = regs->r9;
232+
233+
/*
234+
* From the ABI document:
235+
*
236+
* @gp_offset - the element holds the offset in bytes from
237+
* reg_save_area to the place where the next available general purpose
238+
* argument register is saved. In case all argument registers have
239+
* been exhausted, it is set to the value 48 (6*8).
240+
*
241+
* @fp_offset - the element holds the offset in bytes from
242+
* reg_save_area to the place where the next available floating point
243+
* argument is saved. In case all argument registers have been
244+
* exhausted, it is set to the value 176 (6*8 + 8*16)
245+
*
246+
* @overflow_arg_area - this pointer is used to fetch arguments passed
247+
* on the stack. It is initialized with the address of the first
248+
* argument passed on the stack, if any, and then always updated to
249+
* point to the start of the next argument on the stack.
250+
*
251+
* @reg_save_area - the element points to the start of the register
252+
* save area.
253+
*
254+
* Notably the vararg starts with the second argument and there are no
255+
* floating point arguments in the kernel.
256+
*/
257+
args->args.gp_offset = 1*8;
258+
args->args.fp_offset = 6*8 + 8*16;
259+
args->args.reg_save_area = &args->regs;
260+
args->args.overflow_arg_area = (void *)regs->sp;
261+
262+
/*
263+
* If the exception came from __WARN_trap, there is a return
264+
* address on the stack, skip that. This is why any __WARN_trap()
265+
* caller must inhibit tail-call optimization.
266+
*/
267+
if ((void *)regs->ip == &__WARN_trap)
268+
args->args.overflow_arg_area += 8;
269+
270+
return &args->args;
271+
}
272+
#endif /* HAVE_ARCH_BUG_FORMAT */
185273

186274
static nokprobe_inline int
187275
do_trap_no_signal(struct task_struct *tsk, int trapnr, const char *str,
@@ -334,6 +422,11 @@ static noinstr bool handle_bug(struct pt_regs *regs)
334422
raw_local_irq_enable();
335423

336424
switch (ud_type) {
425+
case BUG_UD1_WARN:
426+
if (report_bug_entry((void *)pt_regs_val(regs, ud_imm), regs) == BUG_TRAP_TYPE_WARN)
427+
handled = true;
428+
break;
429+
337430
case BUG_UD2:
338431
if (report_bug(regs->ip, regs) == BUG_TRAP_TYPE_WARN) {
339432
handled = true;

0 commit comments

Comments
 (0)