Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
163 changes: 155 additions & 8 deletions asm/assemble.c
Original file line number Diff line number Diff line change
Expand Up @@ -710,12 +710,118 @@ static void no_match_error(enum match_result m, const insn *ins)
}
}

/*
* Per-pass tracking: for each relaxable jcc/jmp instance encountered
* (in source order), record whether the previous pass emitted it as
* NEAR (true) or SHORT (false). Used by jmp_match() to correctly
* account for self-shrink when evaluating whether a forward jcc/jmp
* can shrink to SHORT.
*
* Pass change detected via _passn (defined in asm/nasm.c).
*/
static struct {
int64_t nalloc;
int64_t nused;
int64_t pass_seen;
uint8_t *cur_was_near;
uint8_t *prev_was_near;
uint8_t *spec_used; /* persistent: speculated short at least once */
} jmp_track = { 0, 0, -1, NULL, NULL, NULL };

static int64_t jmp_track_alloc(void)
{
int64_t i = jmp_track.nused++;
if (i >= jmp_track.nalloc) {
int64_t new_nalloc = jmp_track.nalloc ? jmp_track.nalloc * 2 : 256;
while (new_nalloc <= i)
new_nalloc *= 2;
size_t new_bytes = (size_t)(new_nalloc - jmp_track.nalloc);
jmp_track.prev_was_near = nasm_realloc(jmp_track.prev_was_near, (size_t)new_nalloc);
jmp_track.cur_was_near = nasm_realloc(jmp_track.cur_was_near, (size_t)new_nalloc);
jmp_track.spec_used = nasm_realloc(jmp_track.spec_used, (size_t)new_nalloc);
memset(jmp_track.prev_was_near + jmp_track.nalloc, 0, new_bytes);
memset(jmp_track.cur_was_near + jmp_track.nalloc, 0, new_bytes);
memset(jmp_track.spec_used + jmp_track.nalloc, 0, new_bytes);
jmp_track.nalloc = new_nalloc;
}
return i;
}

static void jmp_track_check_new_pass(void)
{
if (_passn == jmp_track.pass_seen)
return;

uint8_t *tmp = jmp_track.prev_was_near;
jmp_track.prev_was_near = jmp_track.cur_was_near;
jmp_track.cur_was_near = tmp;
if (jmp_track.cur_was_near && jmp_track.nalloc > 0)
memset(jmp_track.cur_was_near, 0, (size_t)jmp_track.nalloc);
jmp_track.nused = 0;
jmp_track.pass_seen = _passn;
}

void jmp_track_cleanup(void)
{
nasm_free(jmp_track.cur_was_near);
nasm_free(jmp_track.prev_was_near);
nasm_free(jmp_track.spec_used);
jmp_track.cur_was_near = NULL;
jmp_track.prev_was_near = NULL;
jmp_track.spec_used = NULL;
jmp_track.nalloc = 0;
jmp_track.nused = 0;
jmp_track.pass_seen = -1;
}

static bool jmp_track_prev_near(int64_t i)
{
return jmp_track.prev_was_near && i < jmp_track.nalloc && jmp_track.prev_was_near[i];
}

static void jmp_track_record(int64_t i, bool was_near)
{
if (jmp_track.cur_was_near && i < jmp_track.nalloc)
jmp_track.cur_was_near[i] = was_near;
}

/*
* Bounded-speculation latch. The forward self-shrink re-check predicts
* the post-shrink displacement assuming the span between the jump and
* its target shifts rigidly. That holds for plain code, but an "align"
* (or any position-dependent padding) between the jump and the target
* makes the prediction wrong: shrinking the jump changes the padding,
* so the speculative SHORT does not actually fit. Left unchecked the
* jump oscillates NEAR<->SHORT forever and the relaxation never
* converges.
*
* To guarantee termination we let each jump speculate at most once for
* the whole assembly. After its single attempt it falls back to the
* baseline current-layout test, which is convergent. If the speculative
* SHORT was a true fixed point (rigid span) the baseline keeps it short;
* if not, the baseline grows it back to NEAR and -- speculation now
* spent -- it stays there. This bit therefore persists across passes
* and is never cleared at pass boundaries.
*/
static bool jmp_track_spec_used(int64_t i)
{
return jmp_track.spec_used && i < jmp_track.nalloc && jmp_track.spec_used[i];
}

static void jmp_track_mark_spec_used(int64_t i)
{
if (jmp_track.spec_used && i < jmp_track.nalloc)
jmp_track.spec_used[i] = 1;
}

/* This is a real hack. The jcc8 or jmp8 byte code must come first. */
static enum match_result
jmp_match(const insn *ins, const struct itemplate *temp)
{
const struct operand * const op0 = get_operand_const(ins, 0);
int64_t delta;
int64_t idx;
bool prev_near;

if (op0->type & STRICT)
return MERR_INVALOP;
Expand All @@ -736,13 +842,19 @@ jmp_match(const insn *ins, const struct itemplate *temp)
}
}

jmp_track_check_new_pass();
idx = jmp_track_alloc();
prev_near = jmp_track_prev_near(idx);

if (op0->opflags & OPFLAG_UNKNOWN) {
/* Be optimistic in pass 1 */
jmp_track_record(idx, false);
return MOK_GOOD;
}

if (op0->segment != ins->loc.segment) {
/* Cross-segment jump */
jmp_track_record(idx, true);
return MERR_INVALOP;
}

Expand All @@ -759,19 +871,54 @@ jmp_match(const insn *ins, const struct itemplate *temp)
delta = op0->offset - ins->loc.offset;
if (delta < -128 + 2 || delta > 127 + 15) {
/* This cannot be a byte-sized jump */
jmp_track_record(idx, true);
return MERR_INVALOP;
} else if (delta >= -128 + 15 && delta <= 127 + 2) {
/* It is guaranteed to be a valid byte-sized jump, no need to test */
} else {
}
if (delta < -128 + 15 || delta > 127 + 2) {
/* Borderline: need to do this the hard way... */
int64_t isize = calcsize_speculative(ins, temp);
if (isize < 0)
return MERR_INVALOP;
delta -= isize;
if ((int8_t)delta != delta)
int64_t post_delta;
if (isize < 0) {
jmp_track_record(idx, true);
return MERR_INVALOP;
}
post_delta = delta - isize;
if ((int8_t)post_delta != post_delta) {
/*
* Doesn't fit at the current short encoding. Forward
* jcc/jmp self-shrink fallback: if the previous pass
* emitted this instance as NEAR, switching to SHORT will
* shift the (forward) target near_size - short_size bytes
* closer in the current layout, so the real post-shrink
* rel8 is (delta - near_size), not (delta - short_size).
* Re-check against the post-shrink displacement before
* rejecting.
*
* Restricted to (prev_near && post_delta > 0): backward
* jumps and already-short jumps don't self-shrink. Without
* the prev_near guard this would accept SHORT for jumps
* stably short in the previous pass that have since grown
* past 127 -- which then emit an out-of-range rel8.
*/
if (!prev_near || post_delta <= 0 || jmp_track_spec_used(idx)) {
jmp_track_record(idx, true);
return MERR_INVALOP;
}
const uint8_t c = temp->code[0];
int rel_bytes = (isize == 2)
? (ins->bits == 16 ? 2 : 4)
: (ins->bits == 16 ? 4 : 2);
int savings = (c == 0370) ? rel_bytes : (rel_bytes - 1);
int64_t post_shrink = post_delta - savings;
if (post_shrink < -128 || post_shrink > 127) {
jmp_track_record(idx, true);
return MERR_INVALOP;
}
/* Speculative SHORT accepted; spend this jump's one attempt. */
jmp_track_mark_spec_used(idx);
}
}

jmp_track_record(idx, false);
return MOK_GOOD;
}

Expand Down
1 change: 1 addition & 0 deletions asm/assemble.h
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ extern bool in_absolute; /* Are we in an absolute segment? */
extern struct location absolute;

int64_t increment_offset(int64_t delta);
void jmp_track_cleanup(void);
void process_insn(insn *instruction);

bool directive_valid(const char *);
Expand Down
1 change: 1 addition & 0 deletions asm/nasm.c
Original file line number Diff line number Diff line change
Expand Up @@ -792,6 +792,7 @@ int main(int argc, char **argv)
raa_free(offsets);
saa_free(forwrefs);
eval_cleanup();
jmp_track_cleanup();
stdscan_cleanup();
src_free();
strlist_free(&include_path);
Expand Down
15 changes: 15 additions & 0 deletions travis/test/jccshrink.asm
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
; Regression test: forward jcc/jmp self-shrink in jmp_match.
;
; Without the self-shrink-aware rel8 check, NASM keeps `jmp B` at
; offset 0x1d9 as a 5-byte near jump (e9 7f 00 00 00). With the fix it
; emits a 2-byte short jump (eb 7f, rel8 = 127), saving 3 bytes.
; Total binary: 605 bytes stock vs 602 bytes patched.

bits 32
times 43 jmp A
times 256 db 0
je A
A:
jmp B
times 127 db 0
B:
Binary file added travis/test/jccshrink.bin.t
Binary file not shown.
12 changes: 12 additions & 0 deletions travis/test/jccshrink.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
[
{
"description": "Forward jcc/jmp self-shrink in jmp_match",
"id": "jccshrink",
"format": "bin",
"source": "jccshrink.asm",
"option": "-Ox",
"target": [
{ "output": "jccshrink.bin" }
]
}
]
20 changes: 20 additions & 0 deletions travis/test/jccshrinkalign.asm
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
; Convergence regression: a forward jcc/jmp self-shrink candidate with a
; position-dependent "align" between the jump and its target.
;
; The naive self-shrink re-check in jmp_match() predicts the post-shrink
; displacement assuming the span shifts rigidly. The "align 2" below
; breaks that assumption: shrinking `jmp B` changes the alignment
; padding, so the speculative SHORT does not fit. Unbounded, the jump
; oscillates NEAR<->SHORT and assembly stalls ("unable to find valid
; values for all labels ... giving up"). With bounded speculation the
; jump speculates once, reverts to NEAR, and assembly converges.

bits 32
times 43 jmp A
times 256 db 0
je A
A:
jmp B
align 2
times 127 db 0
B:
Binary file added travis/test/jccshrinkalign.bin.t
Binary file not shown.
12 changes: 12 additions & 0 deletions travis/test/jccshrinkalign.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
[
{
"description": "Forward jcc/jmp self-shrink convergence with align in span",
"id": "jccshrinkalign",
"format": "bin",
"source": "jccshrinkalign.asm",
"option": "-Ox",
"target": [
{ "output": "jccshrinkalign.bin" }
]
}
]