@@ -2179,49 +2179,93 @@ pub(crate) async fn join_ring_request(
21792179 }
21802180
21812181 let own = op_manager. ring . connection_manager . own_location ( ) ;
2182- // Use the joiner's own location as the desired_location for the connect request.
2183- //
2184- // History: PR #2907 fixed a critical issue where using `gateway.location()` caused
2185- // 99.7% of join requests to converge on a single acceptor. That PR intended to use
2186- // the joiner's own location but implemented `Location::random()` instead, which
2187- // broke simulation determinism for certain seeds (issues #3028, #3030).
2188- //
2189- // Using `own.location()` is correct because:
2190- // - Each joiner has a unique location → requests spread across gateway neighbors
2191- // - The gateway routes toward the joiner's ring neighborhood → better initial placement
2192- // - Deterministic in simulation tests (same seed → same location → same topology)
2193- // - Falls back to random only if own address is unknown (NAT before ObservedAddress)
21942182 let base_location = own. location ( ) . unwrap_or_else ( Location :: random) ;
21952183
21962184 // Capture now once so the jitter failure counter and any other time-sensitive
21972185 // calls in this function share a consistent timestamp.
21982186 let now = Instant :: now ( ) ;
21992187
2200- // Apply location jitter on retry to explore different ring regions.
2188+ let current_connections = op_manager. ring . connection_manager . connection_count ( ) ;
2189+
2190+ // Helper: apply jitter to base_location for NAT-compatible acceptor exploration.
22012191 // After consecutive hole-punch failures, the same location routes to the same
2202- // acceptors. Jittering the target location causes routing to converge on
2203- // different ring peers, increasing the chance of finding a NAT-compatible acceptor.
2204- let failures = op_manager
2205- . ring
2206- . connection_manager
2207- . increment_connect_jitter_failures ( now) ;
2208- let desired_location = if failures > 1 {
2209- // Jitter magnitude: 0.05 * 2^(failures-2), capped at 0.25
2210- let magnitude = ( 0.05 * ( 1u32 << ( failures - 2 ) . min ( 3 ) ) as f64 ) . min ( 0.25 ) ;
2211- // Generate uniform random offset in [-magnitude, +magnitude]
2212- let uniform_01 = ( GlobalRng :: random_u64 ( ) as f64 ) / ( u64:: MAX as f64 ) ;
2213- let offset = magnitude * ( 2.0 * uniform_01 - 1.0 ) ;
2214- let jittered = ( base_location. as_f64 ( ) + offset) . rem_euclid ( 1.0 ) ;
2215- tracing:: info!(
2216- failures,
2217- magnitude,
2218- base = %base_location,
2219- jittered,
2220- "connect: applying location jitter after consecutive failures"
2221- ) ;
2222- Location :: new ( jittered)
2192+ // acceptors. Jittering explores different ring regions.
2193+ let apply_bootstrap_jitter = |base : Location | -> Location {
2194+ let failures = op_manager
2195+ . ring
2196+ . connection_manager
2197+ . increment_connect_jitter_failures ( now) ;
2198+ if failures > 1 {
2199+ // Jitter magnitude: 0.05 * 2^(failures-2), capped at 0.25
2200+ let magnitude = ( 0.05 * ( 1u32 << ( failures - 2 ) . min ( 3 ) ) as f64 ) . min ( 0.25 ) ;
2201+ let uniform_01 = ( GlobalRng :: random_u64 ( ) as f64 ) / ( u64:: MAX as f64 ) ;
2202+ let offset = magnitude * ( 2.0 * uniform_01 - 1.0 ) ;
2203+ let jittered = ( base. as_f64 ( ) + offset) . rem_euclid ( 1.0 ) ;
2204+ tracing:: info!(
2205+ failures,
2206+ magnitude,
2207+ base = %base,
2208+ jittered,
2209+ "connect: applying location jitter after consecutive failures"
2210+ ) ;
2211+ Location :: new ( jittered)
2212+ } else {
2213+ base
2214+ }
2215+ } ;
2216+
2217+ // Choose desired_location based on whether we have enough connections for
2218+ // gap-based targeting (Kleinberg-optimal) or need bootstrap targeting.
2219+ //
2220+ // With connections: use gap_target to find the largest gap in our connection
2221+ // distribution in log-distance space and target its midpoint. This produces
2222+ // Kleinberg-optimal 1/d-distributed connections.
2223+ //
2224+ // Without connections (true bootstrap): target own location with jitter.
2225+ // Own-location targeting spreads requests across gateway neighbors (each
2226+ // joiner has a unique location). Jitter after failures explores different
2227+ // ring regions to find NAT-compatible acceptors.
2228+ // Minimum connections needed for meaningful gap analysis. Independent of
2229+ // min_connections because this is about statistical sample size for the 1/d
2230+ // distribution, not connection targets. 3 points are sufficient to identify
2231+ // the largest gap in log-distance space.
2232+ const GAP_TARGET_THRESHOLD : usize = 3 ;
2233+ let desired_location = if current_connections >= GAP_TARGET_THRESHOLD {
2234+ // Use gap_target: find the largest gap in our current connections
2235+ // and target its midpoint for Kleinberg-optimal placement.
2236+ if let Some ( my_loc) = own. location ( ) {
2237+ let neighbor_distances: Vec < f64 > = op_manager
2238+ . ring
2239+ . connection_manager
2240+ . location_for_all_peers ( )
2241+ . into_iter ( )
2242+ . map ( |peer_loc| my_loc. distance ( peer_loc) . as_f64 ( ) )
2243+ . collect ( ) ;
2244+ if neighbor_distances. len ( ) >= GAP_TARGET_THRESHOLD {
2245+ let target = crate :: topology:: small_world_rand:: gap_target (
2246+ my_loc,
2247+ neighbor_distances. into_iter ( ) ,
2248+ ) ;
2249+ tracing:: info!(
2250+ current_connections,
2251+ own_location = %my_loc,
2252+ target = %target,
2253+ distance = my_loc. distance( target) . as_f64( ) ,
2254+ "connect: using gap_target for Kleinberg-optimal placement"
2255+ ) ;
2256+ target
2257+ } else {
2258+ // Fewer unique neighbor locations than connections (e.g. sybil
2259+ // masking collapses peers to same ring location). Fall back to
2260+ // jitter-based exploration to avoid retrying the same acceptors.
2261+ apply_bootstrap_jitter ( base_location)
2262+ }
2263+ } else {
2264+ apply_bootstrap_jitter ( base_location)
2265+ }
22232266 } else {
2224- base_location
2267+ // Bootstrap mode: target own location with jitter after failures.
2268+ apply_bootstrap_jitter ( base_location)
22252269 } ;
22262270 let ttl = op_manager
22272271 . ring
@@ -4389,4 +4433,69 @@ mod tests {
43894433 "unconnected peer should not be in the visited filter"
43904434 ) ;
43914435 }
4436+
4437+ /// Verify that gap_target produces shorter connections than own-location+jitter.
4438+ ///
4439+ /// This is a regression test for the bootstrap topology bug: before the fix,
4440+ /// join_ring_request always targeted the joiner's own location with up to ±0.25
4441+ /// jitter, producing median connection distance ~0.12 instead of the Kleinberg-
4442+ /// optimal ~0.02. Now, with ≥3 existing connections, gap_target is used instead.
4443+ #[ test]
4444+ fn test_gap_target_produces_shorter_connections_than_jitter ( ) {
4445+ use crate :: topology:: small_world_rand:: gap_target;
4446+
4447+ let _guard = GlobalRng :: seed_guard ( 42 ) ;
4448+
4449+ let my_location = Location :: new ( 0.5 ) ;
4450+
4451+ // Simulate a peer with 5 existing connections at various distances
4452+ let existing_connections = vec ! [
4453+ Location :: new( 0.501 ) , // very close
4454+ Location :: new( 0.51 ) , // close
4455+ Location :: new( 0.55 ) , // nearby
4456+ Location :: new( 0.7 ) , // medium
4457+ Location :: new( 0.9 ) , // far
4458+ ] ;
4459+
4460+ let distances: Vec < f64 > = existing_connections
4461+ . iter ( )
4462+ . map ( |loc| my_location. distance ( * loc) . as_f64 ( ) )
4463+ . collect ( ) ;
4464+
4465+ // Generate 100 gap_target samples and 100 jitter samples
4466+ let mut gap_target_distances = Vec :: new ( ) ;
4467+ let mut jitter_distances = Vec :: new ( ) ;
4468+
4469+ for _ in 0 ..100 {
4470+ let target = gap_target ( my_location, distances. iter ( ) . copied ( ) ) ;
4471+ gap_target_distances. push ( my_location. distance ( target) . as_f64 ( ) ) ;
4472+
4473+ // Simulate jitter with failures=3 (magnitude=0.10)
4474+ let magnitude = 0.10 ;
4475+ let uniform_01 = ( GlobalRng :: random_u64 ( ) as f64 ) / ( u64:: MAX as f64 ) ;
4476+ let offset = magnitude * ( 2.0 * uniform_01 - 1.0 ) ;
4477+ let jittered = ( my_location. as_f64 ( ) + offset) . rem_euclid ( 1.0 ) ;
4478+ jitter_distances. push ( my_location. distance ( Location :: new ( jittered) ) . as_f64 ( ) ) ;
4479+ }
4480+
4481+ gap_target_distances. sort_by ( |a, b| a. partial_cmp ( b) . unwrap ( ) ) ;
4482+ jitter_distances. sort_by ( |a, b| a. partial_cmp ( b) . unwrap ( ) ) ;
4483+
4484+ let gap_median = gap_target_distances[ 50 ] ;
4485+ let jitter_median = jitter_distances[ 50 ] ;
4486+
4487+ // gap_target should produce significantly shorter median distances
4488+ // because it targets gaps in the 1/d distribution (Kleinberg-optimal),
4489+ // while jitter produces roughly uniform distances around the peer.
4490+ assert ! (
4491+ gap_median < jitter_median,
4492+ "gap_target median ({gap_median:.4}) should be shorter than jitter median ({jitter_median:.4})"
4493+ ) ;
4494+
4495+ // gap_target median should be well under 0.05 (typically ~0.001-0.01)
4496+ assert ! (
4497+ gap_median < 0.05 ,
4498+ "gap_target median ({gap_median:.4}) should be < 0.05 (Kleinberg-optimal produces short connections)"
4499+ ) ;
4500+ }
43924501}
0 commit comments