Skip to content

Commit 0fd0843

Browse files
agattidpgeorge
authored andcommitted
tools/mpy_ld.py: Optimise MPY trampoline sizes if possible.
This commit changes the way native modules' trampoline code sequence is emitted, generating an optimised code sequence to jump to the entry symbol. Turns out the address of the entry point is known even before the segments are built and the address of the entry point doesn't change when processing the module on anything but Xtensa. This means that the jump trampoline doesn't have to be a dummy fixed-size block to be filled in later, but it can be the final trampoline being used in the module. On Xtensa the address of the symbol is offset by the length of the literals pool, but since the trampoline being generated is always the shortest one said platform is left alone (handling distances greater than 128KiB would require more extensive changes). Signed-off-by: Alessandro Gatti <a.gatti@frob.it>
1 parent 21e4c03 commit 0fd0843

1 file changed

Lines changed: 80 additions & 39 deletions

File tree

tools/mpy_ld.py

Lines changed: 80 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -131,73 +131,101 @@
131131
# Architecture configuration
132132

133133

134+
def fit_signed(bits, value):
135+
return (value >> bits) == 0 or (value >> bits) == -1
136+
137+
138+
# Note: all trampoline jump function arguments are raw offsets calculated from
139+
# the start of the text segment with no relocation applied beforehand.
140+
141+
134142
def asm_jump_x86(entry):
135-
return struct.pack("<BI", 0xE9, entry - 5)
143+
return struct.pack("<BI", 0xE9, entry)
136144

137145

138146
def asm_jump_thumb(entry):
139-
# This function must return the same number of bytes for the encoding of the jump
140-
# regardless of the value of `entry`.
141-
b_off = entry - 4
142-
if b_off >> 11 == 0 or b_off >> 11 == -1:
147+
if fit_signed(11, entry):
143148
# Signed value fits in 12 bits.
144-
b0 = 0xE000 | (b_off >> 1 & 0x07FF)
145-
b1 = 0
146-
b2 = 0
147-
b3 = 0
149+
b0 = 0xE000 | ((entry >> 1) & 0x07FF)
150+
return struct.pack("<H", b0)
148151
else:
149152
# Use bl to do a large jump/call:
150153
# push {r0, lr}
151154
# bl <dest>
152155
# pop {r0, pc}
153-
b_off -= 2 # skip "push {r0, lr}"
156+
entry += 2 # skip "push {r0, lr}"
154157
b0 = 0xB400 | 0x0100 | 0x0001 # push, lr, r0
155-
b1 = 0xF000 | (((b_off) >> 12) & 0x07FF)
156-
b2 = 0xF800 | (((b_off) >> 1) & 0x07FF)
158+
b1 = 0xF000 | ((entry >> 12) & 0x07FF)
159+
b2 = 0xF800 | ((entry >> 1) & 0x07FF)
157160
b3 = 0xBC00 | 0x0100 | 0x0001 # pop, pc, r0
158-
return struct.pack("<HHHH", b0, b1, b2, b3)
161+
return struct.pack("<HHHH", b0, b1, b2, b3)
159162

160163

161164
def asm_jump_thumb2(entry):
162-
b_off = entry - 4
163-
if b_off >> 11 == 0 or b_off >> 11 == -1:
165+
if fit_signed(11, entry):
164166
# Signed value fits in 12 bits
165-
b0 = 0xE000 | (b_off >> 1 & 0x07FF)
166-
b1 = 0
167+
b0 = 0xE000 | ((entry >> 1) & 0x07FF)
168+
return struct.pack("<H", b0)
167169
else:
168170
# Use large jump
169-
b0 = 0xF000 | (b_off >> 12 & 0x07FF)
170-
b1 = 0xB800 | (b_off >> 1 & 0x7FF)
171-
return struct.pack("<HH", b0, b1)
171+
b0 = 0xF000 | ((entry >> 12) & 0x07FF)
172+
b1 = 0xB800 | ((entry >> 1) & 0x07FF)
173+
return struct.pack("<HH", b0, b1)
172174

173175

174176
def asm_jump_xtensa(entry):
175-
jump_offset = entry - 4
176-
jump_op = jump_offset << 6 | 6
177-
return struct.pack("<BH", jump_op & 0xFF, jump_op >> 8)
177+
if fit_signed(17, entry):
178+
jump_op = (entry - 4) << 6 | 6
179+
return struct.pack("<BH", jump_op & 0xFF, jump_op >> 8)
180+
else:
181+
raise LinkError("Large jumps are not yet supported on Xtensa")
178182

179183

180184
def asm_jump_riscv(entry):
181-
# This could be 6 bytes shorter, but the code currently cannot
182-
# support a trampoline with varying length depending on the offset.
183-
184-
# auipc t6, HI(entry)
185-
# jalr zero, t6, LO(entry)
186-
upper, lower = split_riscv_address(entry)
187-
return struct.pack(
188-
"<II", (upper | 0x00000F97) & 0xFFFFFFFF, ((lower << 20) | 0x000F8067) & 0xFFFFFFFF
189-
)
185+
if fit_signed(11, entry):
186+
entry += 2
187+
# c.j entry
188+
return struct.pack(
189+
"<H",
190+
0xA001
191+
| ((entry & 0x0E) << 2)
192+
| ((entry & 0x300) << 1)
193+
| ((entry & 0x800) << 1)
194+
| ((entry & 0x400) >> 2)
195+
| ((entry & 0x80) >> 1)
196+
| ((entry & 0x40) << 1)
197+
| ((entry & 0x20) >> 3)
198+
| ((entry & 0x10) << 7),
199+
)
200+
else:
201+
# auipc t6, HI(entry)
202+
# jalr zero, t6, LO(entry)
203+
upper, lower = split_riscv_address(entry + 8)
204+
return struct.pack(
205+
"<II", (upper | 0x00000F97) & 0xFFFFFFFF, ((lower << 20) | 0x000F8067) & 0xFFFFFFFF
206+
)
190207

191208

192209
class ArchData:
193-
def __init__(self, name, mpy_feature, word_size, arch_got, asm_jump, *, separate_rodata=False):
210+
def __init__(
211+
self,
212+
name,
213+
mpy_feature,
214+
word_size,
215+
arch_got,
216+
asm_jump,
217+
*,
218+
separate_rodata=False,
219+
delayed_entry_offset=False,
220+
):
194221
self.name = name
195222
self.mpy_feature = mpy_feature
196223
self.qstr_entry_size = 2
197224
self.word_size = word_size
198225
self.arch_got = arch_got
199226
self.asm_jump = asm_jump
200227
self.separate_rodata = separate_rodata
228+
self.delayed_entry_offset = delayed_entry_offset
201229

202230

203231
ARCH_DATA = {
@@ -249,6 +277,7 @@ def __init__(self, name, mpy_feature, word_size, arch_got, asm_jump, *, separate
249277
4,
250278
(R_XTENSA_32, R_XTENSA_PLT),
251279
asm_jump_xtensa,
280+
delayed_entry_offset=True,
252281
),
253282
"xtensawin": ArchData(
254283
"EM_XTENSA",
@@ -257,6 +286,7 @@ def __init__(self, name, mpy_feature, word_size, arch_got, asm_jump, *, separate
257286
(R_XTENSA_32, R_XTENSA_PLT),
258287
asm_jump_xtensa,
259288
separate_rodata=True,
289+
delayed_entry_offset=True,
260290
),
261291
"rv32imc": ArchData(
262292
"EM_RISCV",
@@ -428,6 +458,9 @@ def find_addr(self, name):
428458
return s.section.addr + s["st_value"]
429459
raise LinkError("unknown symbol: {}".format(name))
430460

461+
def find_entry_addr(self):
462+
return self.find_addr("mpy_init")
463+
431464

432465
def build_got_generic(env):
433466
env.got_entries = {}
@@ -1248,8 +1281,13 @@ def link_objects(env, native_qstr_vals_len):
12481281
if undef_errors:
12491282
raise LinkError("\n".join(undef_errors))
12501283

1284+
# Generate the entry trampoline assuming the offset is already known.
1285+
env.entry_point = env.find_entry_addr()
1286+
jump = env.arch.asm_jump(env.entry_point)
1287+
env.entry_trampoline_len = len(jump)
1288+
12511289
# Align sections, assign their addresses, and create full_text
1252-
env.full_text = bytearray(env.arch.asm_jump(8)) # dummy, to be filled in later
1290+
env.full_text = bytearray(jump)
12531291
env.full_rodata = bytearray(0)
12541292
env.full_bss = bytearray(0)
12551293
for sec in env.sections:
@@ -1341,10 +1379,13 @@ def write_reloc(self, base, offset, dest, n):
13411379
self.write_uint(n)
13421380

13431381

1344-
def build_mpy(env, entry_offset, fmpy, native_qstr_vals):
1345-
# Write jump instruction to start of text
1346-
jump = env.arch.asm_jump(entry_offset)
1347-
env.full_text[: len(jump)] = jump
1382+
def build_mpy(env, fmpy, native_qstr_vals):
1383+
# Rewrite the entry trampoline if the proper value isn't known earlier, and
1384+
# ensure the trampoline size remains the same.
1385+
if env.arch.delayed_entry_offset:
1386+
jump = env.arch.asm_jump(env.find_entry_addr())
1387+
env.full_text[: len(jump)] = jump
1388+
assert len(jump) == env.entry_trampoline_len
13481389

13491390
log(LOG_LEVEL_1, "arch: {}".format(env.arch.name))
13501391
log(LOG_LEVEL_1, "text size: {}".format(len(env.full_text)))
@@ -1512,7 +1553,7 @@ def do_link(args):
15121553
load_object_file(env, f, obj_name)
15131554

15141555
link_objects(env, len(native_qstr_vals))
1515-
build_mpy(env, env.find_addr("mpy_init"), args.output, native_qstr_vals)
1556+
build_mpy(env, args.output, native_qstr_vals)
15161557
except LinkError as er:
15171558
print("LinkError:", er.args[0])
15181559
sys.exit(1)

0 commit comments

Comments
 (0)