diff --git a/crates/gitlawb-node/src/api/repos.rs b/crates/gitlawb-node/src/api/repos.rs index 5f7599c..94a542b 100644 --- a/crates/gitlawb-node/src/api/repos.rs +++ b/crates/gitlawb-node/src/api/repos.rs @@ -52,7 +52,13 @@ async fn replication_withheld_set( return (false, None); } let withheld = match rules { - Some(rules) if rules.is_empty() => Some(std::collections::HashSet::new()), + // No path-scoped rule can withhold anything (covers the empty-rules and + // root-only-rules cases), so skip the full withheld_blob_oids walk and + // withhold nothing. The predicate's safety-invariant test guards that + // this short-circuit matches what the walk would have returned. + Some(rules) if !visibility_pack::has_path_scoped_rule(&rules) => { + Some(std::collections::HashSet::new()) + } // withheld_blob_oids walks every ref with blocking `git ls-tree`; keep // that off the async worker thread. Some(rules) => { @@ -499,33 +505,40 @@ pub async fn git_upload_pack( .map_err(|e| AppError::Git(e.to_string()))?; let body_len = body.len(); - // withheld_blob_oids walks every ref with blocking `git ls-tree`; keep that - // off the async worker thread. - let withheld = { - let path = disk_path.clone(); - let rules = rules.clone(); - let owner_did = record.owner_did.clone(); - let caller_owned = caller.map(str::to_string); - let is_public = record.is_public; - tokio::task::spawn_blocking(move || { - visibility_pack::withheld_blob_oids( - &path, - &rules, - is_public, - &owner_did, - caller_owned.as_deref(), - ) - }) - .await - .map_err(|e| AppError::Git(e.to_string()))? - .map_err(|e| AppError::Git(e.to_string()))? - }; - - let resp = if withheld.is_empty() { + // No path-scoped rule can withhold an individual blob, and the whole-repo + // "/" gate above already enforced repo-level access. Skip the per-blob + // withheld walk and serve the pack directly. + let resp = if !visibility_pack::has_path_scoped_rule(&rules) { smart_http::upload_pack(&disk_path, body).await } else { - tracing::info!(repo = %name, caller = ?caller, withheld = withheld.len(), "serving filtered pack"); - smart_http::upload_pack_excluding(&disk_path, body, &withheld).await + // withheld_blob_oids walks every ref with blocking `git ls-tree`; keep + // that off the async worker thread. + let withheld = { + let path = disk_path.clone(); + let rules = rules.clone(); + let owner_did = record.owner_did.clone(); + let caller_owned = caller.map(str::to_string); + let is_public = record.is_public; + tokio::task::spawn_blocking(move || { + visibility_pack::withheld_blob_oids( + &path, + &rules, + is_public, + &owner_did, + caller_owned.as_deref(), + ) + }) + .await + .map_err(|e| AppError::Git(e.to_string()))? + .map_err(|e| AppError::Git(e.to_string()))? + }; + + if withheld.is_empty() { + smart_http::upload_pack(&disk_path, body).await + } else { + tracing::info!(repo = %name, caller = ?caller, withheld = withheld.len(), "serving filtered pack"); + smart_http::upload_pack_excluding(&disk_path, body, &withheld).await + } } .map_err(|e| { let msg = e.to_string(); @@ -825,7 +838,11 @@ pub async fn git_receive_pack( // Option B1: encrypt-then-pin the withheld blobs so authorized // readers can recover them when the origin cannot serve them. - if let Some(rules) = rules_for_enc.filter(|r| !r.is_empty()) { + // No path-scoped rule can withhold a blob, so withheld_blob_recipients + // would return an empty map after a full per-ref walk; skip it. Mirrors + // the has_path_scoped_rule gate on the other two withheld-walk sites. + if let Some(rules) = rules_for_enc.filter(|r| visibility_pack::has_path_scoped_rule(r)) + { let p = repo_path_clone.clone(); let owner = owner_did.clone(); let recip = tokio::task::spawn_blocking(move || { diff --git a/crates/gitlawb-node/src/git/visibility_pack.rs b/crates/gitlawb-node/src/git/visibility_pack.rs index 90ca772..6431300 100644 --- a/crates/gitlawb-node/src/git/visibility_pack.rs +++ b/crates/gitlawb-node/src/git/visibility_pack.rs @@ -77,6 +77,25 @@ pub fn withheld_blob_oids( Ok(denied.difference(&allowed).cloned().collect()) } +/// True if any rule scopes a sub-path of the repo (i.e. is not the whole-repo +/// "/" rule). When this returns `false`, no rule can withhold an individual +/// blob: the only rules present are whole-repo "/" rules, which are already +/// resolved by the "/" gate the caller runs *before* reaching the serve / +/// replication walk (a denying "/" rule 404s the caller; see +/// `withheld_blob_oids` above). For any caller that has passed that gate, +/// `withheld_blob_oids` therefore returns an empty set, so such callers may +/// skip the (potentially expensive) per-blob walk. Do not skip the walk on this +/// predicate without the "/" gate having run first. +/// +/// Validator dependency: this predicate treats `path_glob == "/"` as the only +/// whole-repo scope. That holds because `validate_path_glob` +/// (crates/gitlawb-node/src/api/visibility.rs) rejects `/**`, the only other +/// glob whose prefix collapses to `/` and would therefore match every path. If +/// glob syntax is ever extended, revisit this predicate. +pub fn has_path_scoped_rule(rules: &[VisibilityRule]) -> bool { + rules.iter().any(|r| r.path_glob != "/") +} + /// Objects that may replicate to the public: everything not in `withheld`. /// Order-preserving. The single seam every replication site (IPFS, Pinata) /// passes its object list through; option B would later reroute the withheld @@ -251,6 +270,122 @@ mod tests { ); } + #[test] + fn has_path_scoped_rule_empty_is_false() { + assert!(!has_path_scoped_rule(&[])); + } + + #[test] + fn has_path_scoped_rule_single_root_is_false() { + assert!(!has_path_scoped_rule(&[rule("/", &[])])); + } + + #[test] + fn has_path_scoped_rule_single_scoped_is_true() { + assert!(has_path_scoped_rule(&[rule("/secret/**", &[])])); + } + + #[test] + fn has_path_scoped_rule_mixed_is_true() { + assert!(has_path_scoped_rule(&[ + rule("/", &[]), + rule("/secret/**", &[]), + ])); + } + + #[test] + fn has_path_scoped_rule_multiple_root_is_false() { + assert!(!has_path_scoped_rule(&[rule("/", &[]), rule("/", &[])])); + } + + #[test] + fn has_path_scoped_rule_safety_invariant_matches_withheld_walk() { + // Pin the claim the predicate's docs make, with its real precondition: + // when no rule is path-scoped, then *for any caller that has passed the + // whole-repo "/" gate*, withheld_blob_oids returns an empty set, so the + // walk is safe to skip. The "/" gate (resolved before the serve / + // replication call sites) is what excludes the denying-root caller; this + // function does not re-check it, so the test models only gate-passing + // callers — matching how U2/U3 consult the predicate. + let (_td, bare, _secret, _public) = fixture(); + // (rules, caller) pairs where the caller is Allowed at "/": + // - public repo, no rules, anonymous: "/" allows (is_public). + // - root-only allow-rule, the listed reader: "/" allows them. + // - root-only deny-all rule, the owner: owner bypasses every rule. + let cases: [(Vec, Option<&str>); 3] = [ + (Vec::new(), None), + ( + vec![rule("/", &["did:key:zFriend"])], + Some("did:key:zFriend"), + ), + (vec![rule("/", &[])], Some(OWNER)), + ]; + for (rules, caller) in cases { + assert!(!has_path_scoped_rule(&rules)); + let withheld = withheld_blob_oids(&bare, &rules, true, OWNER, caller).unwrap(); + assert!( + withheld.is_empty(), + "no path-scoped rule must withhold nothing for a gate-passing caller (caller={caller:?})" + ); + } + } + + #[test] + fn serve_decision_skips_walk_for_root_only_and_withholds_for_path_scoped() { + // Drive the git_upload_pack serve decision over a real bare repo, both + // branches the has_path_scoped_rule gate selects, for the INV-2 caller: + // a reader allowed at whole-repo "/" but denied a path-scoped subtree. + // `replicable_objects` is the seam the serve path filters through, so the + // returned set models exactly what the served pack would carry. + let (_td, bare, secret, public) = fixture(); + let reader = Some("did:key:zReader"); + let all = vec![secret.clone(), public.clone()]; + + // Branch A — predicate false: skip the walk and serve the full pack. The + // skip is only sound if the walk would have withheld nothing, so assert + // the walk is empty and the served set is complete. + let root_only = vec![rule("/", &["did:key:zReader"])]; + assert!(!has_path_scoped_rule(&root_only)); + let withheld_a = withheld_blob_oids(&bare, &root_only, true, OWNER, reader).unwrap(); + assert!( + withheld_a.is_empty(), + "root-only rules withhold nothing for a gate-passing reader; the skip is safe" + ); + let served_a = replicable_objects(all.clone(), &withheld_a); + assert!( + served_a.contains(&secret) && served_a.contains(&public), + "the full pack is served when no rule is path-scoped" + ); + + // Branch B — predicate true: run the walk and serve the filtered pack. + // /secret/** is scoped to a different DID, so the reader (allowed at "/") + // is denied /secret and the secret blob must be excluded. + let scoped = vec![ + rule("/", &["did:key:zReader"]), + rule("/secret/**", &["did:key:zOther"]), + ]; + assert!(has_path_scoped_rule(&scoped)); + let withheld_b = withheld_blob_oids(&bare, &scoped, true, OWNER, reader).unwrap(); + let served_b = replicable_objects(all, &withheld_b); + assert!( + !served_b.contains(&secret), + "a reader denied /secret must not be served the secret blob" + ); + assert!( + served_b.contains(&public), + "the public blob the reader may see stays in the served pack" + ); + + // Branch C — same path-scoped rules, but the caller is the owner. The + // owner bypasses every rule, so the walk withholds nothing and the full + // pack (secret included) is served even though a path-scoped rule exists. + let withheld_c = withheld_blob_oids(&bare, &scoped, true, OWNER, Some(OWNER)).unwrap(); + assert!( + withheld_c.is_empty(), + "the owner bypasses path-scoped rules and is served everything" + ); + } + #[test] fn replicable_objects_drops_withheld_keeps_rest() { let all = vec!["aaa".to_string(), "bbb".to_string(), "ccc".to_string()];