Skip to content

Commit e722ae6

Browse files
authored
Merge pull request #1628 from CathalMullan/graviola
Extend x86 inline asm to support `ymm` and `zmm` vector registers
2 parents 890434a + 8c93b88 commit e722ae6

2 files changed

Lines changed: 106 additions & 29 deletions

File tree

example/std_example.rs

Lines changed: 72 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,8 @@
99
)]
1010
#![allow(internal_features)]
1111

12+
#[cfg(target_arch = "x86_64")]
13+
use std::arch::asm;
1214
#[cfg(target_arch = "x86_64")]
1315
use std::arch::x86_64::*;
1416
use std::hint::black_box;
@@ -279,6 +281,17 @@ unsafe fn test_simd() {
279281

280282
#[cfg(not(jit))]
281283
test_crc32();
284+
285+
#[cfg(not(jit))]
286+
test_xmm_roundtrip();
287+
#[cfg(not(jit))]
288+
if is_x86_feature_detected!("avx") {
289+
test_ymm_roundtrip();
290+
}
291+
#[cfg(not(jit))]
292+
if is_x86_feature_detected!("avx512f") {
293+
test_zmm_roundtrip();
294+
}
282295
}
283296
}
284297

@@ -576,6 +589,65 @@ unsafe fn test_mm_cvtps_ph() {
576589
assert_eq_m128i(r, e);
577590
}
578591

592+
#[cfg(target_arch = "x86_64")]
593+
#[cfg(not(jit))]
594+
unsafe fn test_xmm_roundtrip() {
595+
unsafe {
596+
let input = [1u8; 16];
597+
let mut output = [0u8; 16];
598+
599+
asm!(
600+
"movups {xmm}, [{input}]",
601+
"movups [{output}], {xmm}",
602+
input = in(reg) input.as_ptr(),
603+
output = in(reg) output.as_mut_ptr(),
604+
xmm = out(xmm_reg) _,
605+
);
606+
607+
assert_eq!(input, output);
608+
}
609+
}
610+
611+
#[cfg(target_arch = "x86_64")]
612+
#[target_feature(enable = "avx")]
613+
#[cfg(not(jit))]
614+
unsafe fn test_ymm_roundtrip() {
615+
unsafe {
616+
let input = [1u8; 32];
617+
let mut output = [0u8; 32];
618+
619+
asm!(
620+
"vmovups {ymm}, [{input}]",
621+
"vmovups [{output}], {ymm}",
622+
input = in(reg) input.as_ptr(),
623+
output = in(reg) output.as_mut_ptr(),
624+
ymm = out(ymm_reg) _,
625+
);
626+
627+
assert_eq!(input, output);
628+
}
629+
}
630+
631+
#[cfg(target_arch = "x86_64")]
632+
#[target_feature(enable = "avx512f")]
633+
#[cfg(not(jit))]
634+
unsafe fn test_zmm_roundtrip() {
635+
unsafe {
636+
let input = [1u8; 64];
637+
let mut output = [0u8; 64];
638+
639+
asm!(
640+
"vmovups {zmm}, [{input}]",
641+
"vmovups [{output}], {zmm}",
642+
input = in(reg) input.as_ptr(),
643+
output = in(reg) output.as_mut_ptr(),
644+
zmm = out(zmm_reg) _,
645+
);
646+
647+
assert_eq!(input, output);
648+
}
649+
}
650+
579651
fn test_checked_mul() {
580652
let u: Option<u8> = u8::from_str_radix("1000", 10).ok();
581653
assert_eq!(u, None);

src/inline_asm.rs

Lines changed: 34 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -548,22 +548,21 @@ impl<'tcx> InlineAssemblyGenerator<'_, 'tcx> {
548548
match self.arch {
549549
InlineAsmArch::X86_64 => match reg {
550550
InlineAsmReg::X86(reg)
551-
if reg as u32 >= X86InlineAsmReg::xmm0 as u32
552-
&& reg as u32 <= X86InlineAsmReg::xmm15 as u32 =>
551+
if matches!(
552+
reg.reg_class(),
553+
X86InlineAsmRegClass::xmm_reg
554+
| X86InlineAsmRegClass::ymm_reg
555+
| X86InlineAsmRegClass::zmm_reg
556+
) =>
553557
{
554-
// rustc emits x0 rather than xmm0
555-
let class = match *modifier {
556-
None | Some('x') => "xmm",
557-
Some('y') => "ymm",
558-
Some('z') => "zmm",
559-
_ => unreachable!(),
560-
};
561-
write!(
562-
generated_asm,
563-
"{class}{}",
564-
reg as u32 - X86InlineAsmReg::xmm0 as u32
565-
)
566-
.unwrap();
558+
// rustc emits x0/y0/z0 rather than xmm0/ymm0/zmm0
559+
let name = reg.name();
560+
if let Some(prefix) = modifier {
561+
let index = &name[3..];
562+
write!(generated_asm, "{prefix}mm{index}").unwrap();
563+
} else {
564+
write!(generated_asm, "{name}").unwrap();
565+
}
567566
}
568567
_ => reg
569568
.emit(&mut generated_asm, InlineAsmArch::X86_64, *modifier)
@@ -716,12 +715,17 @@ impl<'tcx> InlineAssemblyGenerator<'_, 'tcx> {
716715
InlineAsmArch::X86_64 => {
717716
match reg {
718717
InlineAsmReg::X86(reg)
719-
if reg as u32 >= X86InlineAsmReg::xmm0 as u32
720-
&& reg as u32 <= X86InlineAsmReg::xmm15 as u32 =>
718+
if matches!(
719+
reg.reg_class(),
720+
X86InlineAsmRegClass::xmm_reg
721+
| X86InlineAsmRegClass::ymm_reg
722+
| X86InlineAsmRegClass::zmm_reg
723+
) =>
721724
{
722-
// rustc emits x0 rather than xmm0
723-
write!(generated_asm, " movups [rbx+0x{:x}], ", offset.bytes()).unwrap();
724-
write!(generated_asm, "xmm{}", reg as u32 - X86InlineAsmReg::xmm0 as u32)
725+
// rustc emits x0/y0/z0 rather than xmm0/ymm0/zmm0
726+
let name = reg.name();
727+
let mov = if name.starts_with("xmm") { "movups" } else { "vmovups" };
728+
write!(generated_asm, " {mov} [rbx+0x{:x}], {name}", offset.bytes())
725729
.unwrap();
726730
}
727731
_ => {
@@ -761,16 +765,17 @@ impl<'tcx> InlineAssemblyGenerator<'_, 'tcx> {
761765
InlineAsmArch::X86_64 => {
762766
match reg {
763767
InlineAsmReg::X86(reg)
764-
if reg as u32 >= X86InlineAsmReg::xmm0 as u32
765-
&& reg as u32 <= X86InlineAsmReg::xmm15 as u32 =>
768+
if matches!(
769+
reg.reg_class(),
770+
X86InlineAsmRegClass::xmm_reg
771+
| X86InlineAsmRegClass::ymm_reg
772+
| X86InlineAsmRegClass::zmm_reg
773+
) =>
766774
{
767-
// rustc emits x0 rather than xmm0
768-
write!(
769-
generated_asm,
770-
" movups xmm{}",
771-
reg as u32 - X86InlineAsmReg::xmm0 as u32
772-
)
773-
.unwrap();
775+
// rustc emits x0/y0/z0 rather than xmm0/ymm0/zmm0
776+
let name = reg.name();
777+
let mov = if name.starts_with("xmm") { "movups" } else { "vmovups" };
778+
write!(generated_asm, " {mov} {name}").unwrap();
774779
}
775780
_ => {
776781
generated_asm.push_str(" mov ");

0 commit comments

Comments
 (0)