Skip to content

Commit cecf564

Browse files
authored
GH-146128: Fix AArch64 multi-instruction constants and relocations (GH-148598)
Fix AArch64 multi-instruction constants and relocations * Elimates rendundant orr xN, xN, 0xffff after 16 or 32 bit loads * Merges adrp (21rx) and ldr (12) relocations into single 33rx relocation, when safe to do so.
1 parent 600f4db commit cecf564

File tree

4 files changed

+331
-107
lines changed

4 files changed

+331
-107
lines changed

Python/jit.c

Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -355,6 +355,14 @@ patch_aarch64_12(unsigned char *location, uint64_t value)
355355
set_bits(loc32, 10, value, shift, 12);
356356
}
357357

358+
// Relaxable 12-bit low part of an absolute address.
359+
// Usually paired with patch_aarch64_21rx (below).
360+
void
361+
patch_aarch64_12x(unsigned char *location, uint64_t value)
362+
{
363+
patch_aarch64_12(location, value);
364+
}
365+
358366
// 16-bit low part of an absolute address.
359367
void
360368
patch_aarch64_16a(unsigned char *location, uint64_t value)
@@ -415,6 +423,14 @@ patch_aarch64_21r(unsigned char *location, uint64_t value)
415423
set_bits(loc32, 5, value, 2, 19);
416424
}
417425

426+
// Relaxable 21-bit count of pages between this page and an absolute address's
427+
// page. Usually paired with patch_aarch64_12x (above).
428+
void
429+
patch_aarch64_21rx(unsigned char *location, uint64_t value)
430+
{
431+
patch_aarch64_21r(location, value);
432+
}
433+
418434
// 21-bit relative branch.
419435
void
420436
patch_aarch64_19r(unsigned char *location, uint64_t value)
@@ -445,6 +461,56 @@ patch_aarch64_26r(unsigned char *location, uint64_t value)
445461
set_bits(loc32, 0, value, 2, 26);
446462
}
447463

464+
// A pair of patch_aarch64_21rx and patch_aarch64_12x.
465+
void
466+
patch_aarch64_33rx(unsigned char *location_a, unsigned char *location_b, uint64_t value)
467+
{
468+
uint32_t *loc32_a = (uint32_t *)location_a;
469+
uint32_t *loc32_b = (uint32_t *)location_b;
470+
// Try to relax the pair of GOT loads into an immediate value:
471+
assert(IS_AARCH64_ADRP(*loc32_a));
472+
assert(IS_AARCH64_LDR_OR_STR(*loc32_b));
473+
unsigned char reg = get_bits(*loc32_a, 0, 5);
474+
// There should be only one register involved:
475+
assert(reg == get_bits(*loc32_a, 0, 5)); // ldr's output register.
476+
assert(reg == get_bits(*loc32_b, 5, 5)); // ldr's input register.
477+
uint64_t relaxed = *(uint64_t *)value;
478+
if (relaxed < (1UL << 16)) {
479+
// adrp reg, AAA; ldr reg, [reg + BBB] -> movz reg, XXX; nop
480+
*loc32_a = 0xD2800000 | (get_bits(relaxed, 0, 16) << 5) | reg;
481+
*loc32_b = 0xD503201F;
482+
return;
483+
}
484+
if (relaxed < (1ULL << 32)) {
485+
// adrp reg, AAA; ldr reg, [reg + BBB] -> movz reg, XXX; movk reg, YYY
486+
*loc32_a = 0xD2800000 | (get_bits(relaxed, 0, 16) << 5) | reg;
487+
*loc32_b = 0xF2A00000 | (get_bits(relaxed, 16, 16) << 5) | reg;
488+
return;
489+
}
490+
int64_t page_delta = (relaxed >> 12) - ((uintptr_t)location_a >> 12);
491+
if (page_delta >= -(1L << 20) &&
492+
page_delta < (1L << 20))
493+
{
494+
// adrp reg, AAA; ldr reg, [reg + BBB] -> adrp reg, AAA; add reg, reg, BBB
495+
patch_aarch64_21rx(location_a, relaxed);
496+
*loc32_b = 0x91000000 | get_bits(relaxed, 0, 12) << 10 | reg << 5 | reg;
497+
return;
498+
}
499+
relaxed = value - (uintptr_t)location_a;
500+
if ((relaxed & 0x3) == 0 &&
501+
(int64_t)relaxed >= -(1L << 19) &&
502+
(int64_t)relaxed < (1L << 19))
503+
{
504+
// adrp reg, AAA; ldr reg, [reg + BBB] -> ldr reg, XXX; nop
505+
*loc32_a = 0x58000000 | (get_bits(relaxed, 2, 19) << 5) | reg;
506+
*loc32_b = 0xD503201F;
507+
return;
508+
}
509+
// Couldn't do it. Just patch the two instructions normally:
510+
patch_aarch64_21rx(location_a, value);
511+
patch_aarch64_12x(location_b, value);
512+
}
513+
448514
// Relaxable 32-bit relative address.
449515
void
450516
patch_x86_64_32rx(unsigned char *location, uint64_t value)

0 commit comments

Comments
 (0)