Skip to content

Commit 8f06188

Browse files
committed
more efficient save/restore of registers
1 parent 4120fb6 commit 8f06188

File tree

2 files changed

+107
-22
lines changed

2 files changed

+107
-22
lines changed

src/vm.rs

Lines changed: 32 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
// Register-based virtual machine for when we can't JIT,
22
// like on iOS.
33

4+
use std::convert::TryInto;
45
use std::fmt;
56

67
/// Register index (0-255)
@@ -285,6 +286,15 @@ pub enum Opcode {
285286
/// Zero memory region
286287
MemZero { dst: Reg, size: u32 },
287288

289+
// ============ Register Save/Restore ============
290+
291+
/// Save registers [start_reg..start_reg+count] to locals at slot offset.
292+
/// Each register is 8 bytes.
293+
SaveRegs { start_reg: Reg, count: u8, slot: u32 },
294+
295+
/// Restore registers [start_reg..start_reg+count] from locals at slot offset.
296+
RestoreRegs { start_reg: Reg, count: u8, slot: u32 },
297+
288298
// ============ Debugging ============
289299

290300
/// Print integer value (for debugging)
@@ -394,9 +404,6 @@ struct CallFrame {
394404

395405
/// Register snapshot (for return value handling)
396406
return_reg: Reg,
397-
398-
/// Saved registers - stored as boxed array to avoid stack overflow
399-
saved_registers: Box<[u64; 256]>,
400407
}
401408

402409
/// The virtual machine state
@@ -888,13 +895,12 @@ impl VM {
888895
}
889896

890897
Opcode::Call { func, args_start, arg_count } => {
891-
// Save current frame with registers
898+
// Save current frame
892899
let frame = CallFrame {
893900
func_idx: self.current_func,
894901
ip: self.ip,
895902
locals_base: self.locals_base,
896903
return_reg: 0,
897-
saved_registers: Box::new(self.registers),
898904
};
899905
self.call_stack.push(frame);
900906

@@ -925,13 +931,12 @@ impl VM {
925931
Opcode::CallIndirect { func_reg, args_start, arg_count } => {
926932
let func_idx = self.get_u64(func_reg) as FuncIdx;
927933

928-
// Save current frame with registers
934+
// Save current frame
929935
let frame = CallFrame {
930936
func_idx: self.current_func,
931937
ip: self.ip,
932938
locals_base: self.locals_base,
933939
return_reg: 0,
934-
saved_registers: Box::new(self.registers),
935940
};
936941
self.call_stack.push(frame);
937942

@@ -953,36 +958,26 @@ impl VM {
953958
}
954959

955960
Opcode::Return => {
956-
// Save return value (implicitly in r0) before restoring registers
957-
let return_value = self.registers[0];
958-
959961
if self.call_stack.is_empty() {
960962
return self.get_i64(0);
961963
}
962964
let frame = self.call_stack.pop().unwrap();
963965
self.current_func = frame.func_idx;
964966
self.ip = frame.ip;
965967
self.locals_base = frame.locals_base;
966-
// Restore caller's registers and put return value in r0
967-
self.registers = *frame.saved_registers;
968-
self.registers[0] = return_value;
969968
}
970969

971970
Opcode::ReturnReg { src } => {
972-
// Save return value before restoring registers
973-
let return_value = self.registers[src as usize];
971+
// Move return value to r0
972+
self.registers[0] = self.registers[src as usize];
974973

975974
if self.call_stack.is_empty() {
976-
self.registers[0] = return_value;
977975
return self.get_i64(0);
978976
}
979977
let frame = self.call_stack.pop().unwrap();
980978
self.current_func = frame.func_idx;
981979
self.ip = frame.ip;
982980
self.locals_base = frame.locals_base;
983-
// Restore caller's registers and put return value in r0
984-
self.registers = *frame.saved_registers;
985-
self.registers[0] = return_value;
986981
}
987982

988983
Opcode::AllocLocals { size } => {
@@ -1011,6 +1006,24 @@ impl VM {
10111006
}
10121007
}
10131008

1009+
Opcode::SaveRegs { start_reg, count, slot } => {
1010+
let base = self.locals_base + slot as usize;
1011+
for i in 0..count as usize {
1012+
let reg_val = self.registers[start_reg as usize + i];
1013+
let offset = base + i * 8;
1014+
self.locals[offset..offset + 8].copy_from_slice(&reg_val.to_le_bytes());
1015+
}
1016+
}
1017+
1018+
Opcode::RestoreRegs { start_reg, count, slot } => {
1019+
let base = self.locals_base + slot as usize;
1020+
for i in 0..count as usize {
1021+
let offset = base + i * 8;
1022+
let bytes: [u8; 8] = self.locals[offset..offset + 8].try_into().unwrap();
1023+
self.registers[start_reg as usize + i] = u64::from_le_bytes(bytes);
1024+
}
1025+
}
1026+
10141027
// Debugging
10151028
Opcode::PrintI32 { src } => {
10161029
println!("{}", self.get_i64(src) as i32);

src/vm_codegen.rs

Lines changed: 75 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -217,6 +217,9 @@ struct FunctionTranslator<'a> {
217217

218218
/// Tracks if a return has been emitted (so we don't emit epilogue).
219219
has_returned: bool,
220+
221+
/// Byte offset in locals where registers are saved.
222+
save_regs_offset: u32,
220223
}
221224

222225
/// Check if a type should be returned via output pointer.
@@ -246,6 +249,7 @@ impl<'a> FunctionTranslator<'a> {
246249
globals,
247250
output_ptr: None,
248251
has_returned: false,
252+
save_regs_offset: 0,
249253
}
250254
}
251255

@@ -263,6 +267,17 @@ impl<'a> FunctionTranslator<'a> {
263267
self.alloc_reg();
264268
}
265269

270+
// Reserve space for saving registers. We'll patch this after translation
271+
// when we know how many registers are used.
272+
// Reserve 256 * 8 = 2048 bytes maximum, but we'll only use what we need.
273+
let save_regs_slot = self.alloc_local(256 * 8);
274+
self.save_regs_offset = (save_regs_slot as u32) * 8; // Convert slot to byte offset
275+
func.emit(Opcode::SaveRegs {
276+
start_reg: 0,
277+
count: 0, // Placeholder - will be patched
278+
slot: self.save_regs_offset,
279+
});
280+
266281
// Save parameters to local slots so they persist across recursive calls.
267282
// Registers are shared across all call frames, but locals are per-frame.
268283
for (i, param) in self.decl.params.iter().enumerate() {
@@ -295,17 +310,58 @@ impl<'a> FunctionTranslator<'a> {
295310
src: result_reg,
296311
size,
297312
});
313+
// Restore all registers before return.
314+
func.emit(Opcode::RestoreRegs {
315+
start_reg: 0,
316+
count: 0, // Placeholder - will be patched
317+
slot: self.save_regs_offset,
318+
});
298319
func.emit(Opcode::Return);
299-
} else if result_reg != 0 {
300-
func.emit(Opcode::ReturnReg { src: result_reg });
301320
} else {
321+
// Move result to r0, restore r1..N, return
322+
if result_reg != 0 {
323+
func.emit(Opcode::Move { dst: 0, src: result_reg });
324+
}
325+
func.emit(Opcode::RestoreRegs {
326+
start_reg: 1, // Skip r0 which has the return value
327+
count: 0, // Placeholder - will be patched
328+
slot: self.save_regs_offset + 8,
329+
});
302330
func.emit(Opcode::Return);
303331
}
304332
}
305333
} else {
334+
// No body - restore all registers and return
335+
func.emit(Opcode::RestoreRegs {
336+
start_reg: 0,
337+
count: 0, // Placeholder - will be patched
338+
slot: self.save_regs_offset,
339+
});
306340
func.emit(Opcode::Return);
307341
}
308342

343+
// Patch SaveRegs/RestoreRegs instructions with actual register count.
344+
let reg_count = self.next_reg;
345+
for op in &mut func.code {
346+
match op {
347+
Opcode::SaveRegs { count, .. } => {
348+
*count = reg_count;
349+
}
350+
Opcode::RestoreRegs { start_reg, count, .. } => {
351+
// If start_reg is 1, we're skipping r0 (for returns with value)
352+
if *start_reg == 1 {
353+
*count = if reg_count > 1 { reg_count - 1 } else { 0 };
354+
} else {
355+
*count = reg_count;
356+
}
357+
}
358+
_ => {}
359+
}
360+
}
361+
362+
// Adjust locals_size to account for saved registers.
363+
self.locals_size = self.save_regs_offset + (reg_count as u32) * 8;
364+
309365
// Set function metadata.
310366
func.locals_size = self.locals_size;
311367
func.local_slots = self.next_slot;
@@ -539,9 +595,25 @@ impl<'a> FunctionTranslator<'a> {
539595
src: result,
540596
size,
541597
});
598+
// Restore caller's registers before returning.
599+
func.emit(Opcode::RestoreRegs {
600+
start_reg: 0,
601+
count: 0, // Will be patched
602+
slot: self.save_regs_offset,
603+
});
542604
func.emit(Opcode::Return);
543605
} else {
544-
func.emit(Opcode::ReturnReg { src: result });
606+
// Move result to r0 before restoring (it will be preserved).
607+
if result != 0 {
608+
func.emit(Opcode::Move { dst: 0, src: result });
609+
}
610+
// Restore caller's registers, but skip r0 which has return value.
611+
func.emit(Opcode::RestoreRegs {
612+
start_reg: 1, // Skip r0
613+
count: 0, // Will be patched to next_reg - 1
614+
slot: self.save_regs_offset + 8, // Skip first 8 bytes (r0's slot)
615+
});
616+
func.emit(Opcode::Return);
545617
}
546618
self.has_returned = true;
547619
result

0 commit comments

Comments
 (0)