freenet
diff --git a/‎.github/workflows/benchmarks-extended.yml‎
Lines changed: 43 additions & 8 deletions b/‎.github/workflows/benchmarks-extended.yml‎
Lines changed: 43 additions & 8 deletions
diff --git a/‎.github/workflows/benchmarks.yml‎
Lines changed: 4 additions & 2 deletions b/‎.github/workflows/benchmarks.yml‎
Lines changed: 4 additions & 2 deletions
diff --git a/‎crates/core/Cargo.toml‎
Lines changed: 8 additions & 1 deletion b/‎crates/core/Cargo.toml‎
Lines changed: 8 additions & 1 deletion
diff --git a/‎crates/core/benches/transport/blackbox.rs‎
Lines changed: 37 additions & 12 deletions b/‎crates/core/benches/transport/blackbox.rs‎
Lines changed: 37 additions & 12 deletions
diff --git a/‎crates/core/benches/transport/common.rs‎
Lines changed: 35 additions & 12 deletions b/‎crates/core/benches/transport/common.rs‎
Lines changed: 35 additions & 12 deletions
@@ -32,8 +32,8 @@ jobs:
     name: Extended Benchmarks
     runs-on: ubuntu-latest
     timeout-minutes: 60  # Extended benchmarks take longer
-    # Don't fail workflow if regressions detected
-    continue-on-error: true
+    # Job will fail on severe regressions (>25%) for nightly/main builds
+    # PRs only get informational comments, not failures
 
     env:
       CARGO_TARGET_DIR: ${{ github.workspace }}/target
@@ -120,26 +120,43 @@ jobs:
           echo "" >> $GITHUB_STEP_SUMMARY
 
           # Run extended benchmark suite
-          cargo bench --bench transport_extended --features bench 2>&1 | tee bench_output.txt || true
+          # Use --color=never to avoid ANSI escape codes in output that break markdown
+          cargo bench --bench transport_extended --features bench --color=never 2>&1 | tee bench_output.txt || true
 
       # Parse results with structured script
       - name: Parse Benchmark Results
         id: parse_results
         run: |
           python3 --version
 
-          if python3 scripts/parse_bench_output.py bench_output.txt > parsed_output.txt 2>&1; then
+          # Run parser - captures exit code
+          # Exit codes: 0=ok, 1=regressions, 2=severe regressions
+          set +e
+          python3 scripts/parse_bench_output.py bench_output.txt > parsed_output.txt 2>&1
+          PARSE_EXIT=$?
+          set -e
+
+          if [ $PARSE_EXIT -eq 0 ]; then
             echo "regression_detected=false" >> $GITHUB_OUTPUT
-          else
+            echo "severe_regression=false" >> $GITHUB_OUTPUT
+          elif [ $PARSE_EXIT -eq 1 ]; then
+            echo "regression_detected=true" >> $GITHUB_OUTPUT
+            echo "severe_regression=false" >> $GITHUB_OUTPUT
+          elif [ $PARSE_EXIT -eq 2 ]; then
             echo "regression_detected=true" >> $GITHUB_OUTPUT
+            echo "severe_regression=true" >> $GITHUB_OUTPUT
+          else
+            echo "regression_detected=false" >> $GITHUB_OUTPUT
+            echo "severe_regression=false" >> $GITHUB_OUTPUT
           fi
 
           # Append parsed results
           echo "" >> $GITHUB_STEP_SUMMARY
           cat bench_summary.md >> $GITHUB_STEP_SUMMARY 2>/dev/null || {
             echo "⚠️ Failed to parse results" >> $GITHUB_STEP_SUMMARY
             echo '```' >> $GITHUB_STEP_SUMMARY
-            tail -100 bench_output.txt >> $GITHUB_STEP_SUMMARY
+            # Strip ANSI escape codes for clean markdown output
+            tail -100 bench_output.txt | sed 's/\x1b\[[0-9;]*m//g' >> $GITHUB_STEP_SUMMARY
             echo '```' >> $GITHUB_STEP_SUMMARY
           }
 
@@ -211,7 +228,20 @@ jobs:
             target/criterion/**/report/index.html
           retention-days: 90  # Keep extended results longer
 
-  # Summary job that always succeeds
+      # Fail nightly/main builds on severe throughput regressions (>25%)
+      # PRs only get informational comments, not failures
+      - name: Fail on Severe Regressions
+        if: steps.parse_results.outputs.severe_regression == 'true' && github.event_name != 'pull_request'
+        run: |
+          echo "🚨 SEVERE THROUGHPUT REGRESSION DETECTED" >> $GITHUB_STEP_SUMMARY
+          echo "" >> $GITHUB_STEP_SUMMARY
+          echo "One or more throughput benchmarks regressed by more than 25%." >> $GITHUB_STEP_SUMMARY
+          echo "This indicates a significant performance problem that should be investigated." >> $GITHUB_STEP_SUMMARY
+          echo "" >> $GITHUB_STEP_SUMMARY
+          echo "See the benchmark results above for details." >> $GITHUB_STEP_SUMMARY
+          exit 1
+
+  # Summary job - reports status but doesn't block PRs
   benchmark-summary:
     name: Extended Benchmark Summary
     runs-on: ubuntu-latest
@@ -223,7 +253,12 @@ jobs:
       - name: Check Status
         run: |
           if [ "${{ needs.extended-benchmark.result }}" == "failure" ]; then
-            echo "⚠️ Extended benchmarks detected regressions (non-blocking)"
+            if [ "${{ github.event_name }}" == "pull_request" ]; then
+              echo "⚠️ Extended benchmarks detected regressions (informational, not blocking PR)"
+            else
+              echo "🚨 Extended benchmarks failed - severe throughput regression detected (>25%)"
+              echo "This indicates a significant performance problem that should be investigated."
+            fi
           elif [ "${{ needs.extended-benchmark.result }}" == "cancelled" ]; then
             echo "⚠️ Extended benchmarks were cancelled"
           else
 
@@ -135,7 +135,8 @@ jobs:
 
           # Run transport benchmarks (requires bench feature)
           # Uses transport_ci benchmark suite - fast, deterministic subset for CI
-          cargo bench --bench transport_ci --features bench 2>&1 | tee bench_output.txt
+          # Use --color=never to avoid ANSI escape codes in output that break markdown
+          cargo bench --bench transport_ci --features bench --color=never 2>&1 | tee bench_output.txt
 
       # Parse benchmark results with structured script
       - name: Parse Benchmark Results
@@ -159,7 +160,8 @@ jobs:
           cat bench_summary.md >> $GITHUB_STEP_SUMMARY 2>/dev/null || {
             echo "⚠️ Failed to parse benchmark results" >> $GITHUB_STEP_SUMMARY
             echo '```' >> $GITHUB_STEP_SUMMARY
-            tail -50 bench_output.txt >> $GITHUB_STEP_SUMMARY
+            # Strip ANSI escape codes for clean markdown output
+            tail -50 bench_output.txt | sed 's/\x1b\[[0-9;]*m//g' >> $GITHUB_STEP_SUMMARY
             echo '```' >> $GITHUB_STEP_SUMMARY
           }
 
 
@@ -127,12 +127,19 @@ ureq = { version = "3.1", features = ["json"] }
 which = "8.0"
 regex = "1"
 
-# CI-optimized benchmark suite (~8 min, deterministic)
+# CI-optimized benchmark suite (~5 min, deterministic)
 [[bench]]
 name = "transport_ci"
 harness = false
 required-features = ["bench"]
 
+# Extended benchmark suite (~30-45 min, nightly)
+# Includes high-latency paths, packet loss, large transfers, and micro-benchmarks
+[[bench]]
+name = "transport_extended"
+harness = false
+required-features = ["bench"]
+
 # LEDBAT validation benchmarks (manual, ~3-5 min)
 [[bench]]
 name = "transport_ledbat"
 
@@ -91,28 +91,53 @@ pub fn bench_message_throughput(c: &mut Criterion) {
                     (Arc::new(DashMap::new()) as Channels, message)
                 },
                 |(channels, message)| async move {
-                    // Create peers
-                    let (peer_a_pub, mut peer_a, peer_a_addr) =
-                        create_mock_peer(PacketDropPolicy::ReceiveAll, channels.clone())
-                            .await
-                            .unwrap();
+                    // Create peers - handle errors gracefully
+                    let (peer_a_pub, mut peer_a, peer_a_addr) = match create_mock_peer(
+                        PacketDropPolicy::ReceiveAll,
+                        channels.clone(),
+                    )
+                    .await
+                    {
+                        Ok(p) => p,
+                        Err(e) => {
+                            eprintln!("throughput peer_a creation failed: {:?}", e);
+                            return;
+                        }
+                    };
                     let (peer_b_pub, mut peer_b, peer_b_addr) =
-                        create_mock_peer(PacketDropPolicy::ReceiveAll, channels)
-                            .await
-                            .unwrap();
+                        match create_mock_peer(PacketDropPolicy::ReceiveAll, channels).await {
+                            Ok(p) => p,
+                            Err(e) => {
+                                eprintln!("throughput peer_b creation failed: {:?}", e);
+                                return;
+                            }
+                        };
 
                     // Connect
                     let (conn_a_inner, conn_b_inner) = futures::join!(
                         peer_a.connect(peer_b_pub, peer_b_addr),
                         peer_b.connect(peer_a_pub, peer_a_addr),
                     );
                     let (conn_a, conn_b) = futures::join!(conn_a_inner, conn_b_inner);
-                    let (mut conn_a, mut conn_b) = (conn_a.unwrap(), conn_b.unwrap());
+                    let (mut conn_a, mut conn_b) = match (conn_a, conn_b) {
+                        (Ok(a), Ok(b)) => (a, b),
+                        (Err(e), _) | (_, Err(e)) => {
+                            eprintln!("throughput connection failed: {:?}", e);
+                            return;
+                        }
+                    };
 
                     // Send and receive message (this is what we're measuring)
-                    conn_a.send(message).await.unwrap();
-                    let received: Vec<u8> = conn_b.recv().await.unwrap();
-                    std_black_box(received);
+                    if let Err(e) = conn_a.send(message).await {
+                        eprintln!("throughput send failed: {:?}", e);
+                        return;
+                    }
+                    match conn_b.recv().await {
+                        Ok(received) => {
+                            std_black_box(received);
+                        }
+                        Err(e) => eprintln!("throughput recv failed: {:?}", e),
+                    }
                 },
                 BatchSize::SmallInput,
             );
 
@@ -58,13 +58,16 @@ pub struct PeerPair {
     pub peer_b_pub: TransportPublicKey,
     pub peer_b: OutboundConnectionHandler<MockSocket>,
     pub peer_b_addr: SocketAddr,
+    /// Keep channels alive - dropping this closes MockSocket inbound channels
+    #[allow(dead_code)]
+    channels: Channels,
 }
 
 /// Connected peer pair ready for data transfer
 ///
-/// Encapsulates both connections and their peer handlers. The peer handlers
-/// MUST be kept alive - dropping them closes the `inbound_packet_sender`
-/// channel which causes `ConnectionClosed` errors.
+/// Encapsulates both connections and their peer handlers. The channels map
+/// MUST be kept alive - dropping it closes the MockSocket inbound channels
+/// which causes the listener tasks to exit and `ConnectionClosed` errors.
 ///
 /// ## Example
 ///
@@ -76,12 +79,15 @@ pub struct PeerPair {
 pub struct ConnectedPeerPair {
     pub conn_a: PeerConnection<MockSocket>,
     pub conn_b: PeerConnection<MockSocket>,
-    /// Must keep peer_a alive - it holds the inbound_packet_sender channel
+    /// Must keep peer_a alive for send_queue channel
     #[allow(dead_code)]
     peer_a: OutboundConnectionHandler<MockSocket>,
-    /// Must keep peer_b alive - it holds the inbound_packet_sender channel
+    /// Must keep peer_b alive for send_queue channel
     #[allow(dead_code)]
     peer_b: OutboundConnectionHandler<MockSocket>,
+    /// Keep channels alive - dropping this closes MockSocket inbound channels
+    #[allow(dead_code)]
+    channels: Channels,
 }
 
 impl PeerPair {
@@ -102,6 +108,7 @@ impl PeerPair {
             conn_b,
             peer_a: self.peer_a,
             peer_b: self.peer_b,
+            channels: self.channels,
         }
     }
 }
@@ -112,18 +119,32 @@ impl ConnectedPeerPair {
     /// Performs `iterations` send/receive cycles to allow LEDBAT congestion
     /// control to reach steady state before measurements begin.
     ///
+    /// Returns the number of successful warmup iterations. If warmup fails
+    /// early, benchmarks can still proceed (the connection may be less stable).
+    ///
     /// ## Example
     ///
     /// ```rust,ignore
     /// let mut peers = create_connected_peers().await;
-    /// peers.warmup(5, 65536).await; // 5 x 64KB warmup transfers
+    /// let completed = peers.warmup(5, 65536).await; // 5 x 64KB warmup transfers
     /// ```
-    pub async fn warmup(&mut self, iterations: usize, message_size: usize) {
-        for _ in 0..iterations {
+    pub async fn warmup(&mut self, iterations: usize, message_size: usize) -> usize {
+        let mut completed = 0;
+        for i in 0..iterations {
             let msg = vec![0xABu8; message_size];
-            self.conn_a.send(msg).await.expect("warmup send");
-            let _: Vec<u8> = self.conn_b.recv().await.expect("warmup recv");
+            if let Err(e) = self.conn_a.send(msg).await {
+                eprintln!("Warmup send {} failed: {:?}", i, e);
+                break;
+            }
+            match self.conn_b.recv().await {
+                Ok(_) => completed += 1,
+                Err(e) => {
+                    eprintln!("Warmup recv {} failed: {:?}", i, e);
+                    break;
+                }
+            }
         }
+        completed
     }
 
     /// Get mutable references to both connections
@@ -147,7 +168,7 @@ pub async fn create_peer_pair(channels: Channels) -> PeerPair {
             .expect("create peer A");
 
     let (peer_b_pub, peer_b, peer_b_addr) =
-        create_mock_peer(PacketDropPolicy::ReceiveAll, channels)
+        create_mock_peer(PacketDropPolicy::ReceiveAll, channels.clone())
             .await
             .expect("create peer B");
 
@@ -158,6 +179,7 @@ pub async fn create_peer_pair(channels: Channels) -> PeerPair {
         peer_b_pub,
         peer_b,
         peer_b_addr,
+        channels,
     }
 }
 
@@ -174,7 +196,7 @@ pub async fn create_peer_pair_with_delay(channels: Channels, delay: Duration) ->
     let (peer_b_pub, peer_b, peer_b_addr) = create_mock_peer_with_delay(
         PacketDropPolicy::ReceiveAll,
         PacketDelayPolicy::Fixed(delay),
-        channels,
+        channels.clone(),
     )
     .await
     .expect("create peer B");
@@ -186,6 +208,7 @@ pub async fn create_peer_pair_with_delay(channels: Channels, delay: Duration) ->
         peer_b_pub,
         peer_b,
         peer_b_addr,
+        channels,
     }
 }