Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
71 changes: 44 additions & 27 deletions crates/gitlawb-node/src/api/repos.rs
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,13 @@ async fn replication_withheld_set(
return (false, None);
}
let withheld = match rules {
Some(rules) if rules.is_empty() => Some(std::collections::HashSet::new()),
// No path-scoped rule can withhold anything (covers the empty-rules and
// root-only-rules cases), so skip the full withheld_blob_oids walk and
// withhold nothing. The predicate's safety-invariant test guards that
// this short-circuit matches what the walk would have returned.
Some(rules) if !visibility_pack::has_path_scoped_rule(&rules) => {
Some(std::collections::HashSet::new())
}
// withheld_blob_oids walks every ref with blocking `git ls-tree`; keep
// that off the async worker thread.
Some(rules) => {
Expand Down Expand Up @@ -499,33 +505,40 @@ pub async fn git_upload_pack(
.map_err(|e| AppError::Git(e.to_string()))?;
let body_len = body.len();

// withheld_blob_oids walks every ref with blocking `git ls-tree`; keep that
// off the async worker thread.
let withheld = {
let path = disk_path.clone();
let rules = rules.clone();
let owner_did = record.owner_did.clone();
let caller_owned = caller.map(str::to_string);
let is_public = record.is_public;
tokio::task::spawn_blocking(move || {
visibility_pack::withheld_blob_oids(
&path,
&rules,
is_public,
&owner_did,
caller_owned.as_deref(),
)
})
.await
.map_err(|e| AppError::Git(e.to_string()))?
.map_err(|e| AppError::Git(e.to_string()))?
};

let resp = if withheld.is_empty() {
// No path-scoped rule can withhold an individual blob, and the whole-repo
// "/" gate above already enforced repo-level access. Skip the per-blob
// withheld walk and serve the pack directly.
let resp = if !visibility_pack::has_path_scoped_rule(&rules) {
smart_http::upload_pack(&disk_path, body).await
} else {
tracing::info!(repo = %name, caller = ?caller, withheld = withheld.len(), "serving filtered pack");
smart_http::upload_pack_excluding(&disk_path, body, &withheld).await
// withheld_blob_oids walks every ref with blocking `git ls-tree`; keep
// that off the async worker thread.
let withheld = {
let path = disk_path.clone();
let rules = rules.clone();
let owner_did = record.owner_did.clone();
let caller_owned = caller.map(str::to_string);
let is_public = record.is_public;
tokio::task::spawn_blocking(move || {
visibility_pack::withheld_blob_oids(
&path,
&rules,
is_public,
&owner_did,
caller_owned.as_deref(),
)
})
.await
.map_err(|e| AppError::Git(e.to_string()))?
.map_err(|e| AppError::Git(e.to_string()))?
};

if withheld.is_empty() {
smart_http::upload_pack(&disk_path, body).await
} else {
tracing::info!(repo = %name, caller = ?caller, withheld = withheld.len(), "serving filtered pack");
smart_http::upload_pack_excluding(&disk_path, body, &withheld).await
}
}
.map_err(|e| {
let msg = e.to_string();
Expand Down Expand Up @@ -825,7 +838,11 @@ pub async fn git_receive_pack(

// Option B1: encrypt-then-pin the withheld blobs so authorized
// readers can recover them when the origin cannot serve them.
if let Some(rules) = rules_for_enc.filter(|r| !r.is_empty()) {
// No path-scoped rule can withhold a blob, so withheld_blob_recipients
// would return an empty map after a full per-ref walk; skip it. Mirrors
// the has_path_scoped_rule gate on the other two withheld-walk sites.
if let Some(rules) = rules_for_enc.filter(|r| visibility_pack::has_path_scoped_rule(r))
{
let p = repo_path_clone.clone();
let owner = owner_did.clone();
let recip = tokio::task::spawn_blocking(move || {
Expand Down
135 changes: 135 additions & 0 deletions crates/gitlawb-node/src/git/visibility_pack.rs
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,25 @@ pub fn withheld_blob_oids(
Ok(denied.difference(&allowed).cloned().collect())
}

/// True if any rule scopes a sub-path of the repo (i.e. is not the whole-repo
/// "/" rule). When this returns `false`, no rule can withhold an individual
/// blob: the only rules present are whole-repo "/" rules, which are already
/// resolved by the "/" gate the caller runs *before* reaching the serve /
/// replication walk (a denying "/" rule 404s the caller; see
/// `withheld_blob_oids` above). For any caller that has passed that gate,
/// `withheld_blob_oids` therefore returns an empty set, so such callers may
/// skip the (potentially expensive) per-blob walk. Do not skip the walk on this
/// predicate without the "/" gate having run first.
///
/// Validator dependency: this predicate treats `path_glob == "/"` as the only
/// whole-repo scope. That holds because `validate_path_glob`
/// (crates/gitlawb-node/src/api/visibility.rs) rejects `/**`, the only other
/// glob whose prefix collapses to `/` and would therefore match every path. If
/// glob syntax is ever extended, revisit this predicate.
pub fn has_path_scoped_rule(rules: &[VisibilityRule]) -> bool {
rules.iter().any(|r| r.path_glob != "/")
}

/// Objects that may replicate to the public: everything not in `withheld`.
/// Order-preserving. The single seam every replication site (IPFS, Pinata)
/// passes its object list through; option B would later reroute the withheld
Expand Down Expand Up @@ -251,6 +270,122 @@ mod tests {
);
}

#[test]
fn has_path_scoped_rule_empty_is_false() {
assert!(!has_path_scoped_rule(&[]));
}

#[test]
fn has_path_scoped_rule_single_root_is_false() {
assert!(!has_path_scoped_rule(&[rule("/", &[])]));
}

#[test]
fn has_path_scoped_rule_single_scoped_is_true() {
assert!(has_path_scoped_rule(&[rule("/secret/**", &[])]));
}

#[test]
fn has_path_scoped_rule_mixed_is_true() {
assert!(has_path_scoped_rule(&[
rule("/", &[]),
rule("/secret/**", &[]),
]));
}

#[test]
fn has_path_scoped_rule_multiple_root_is_false() {
assert!(!has_path_scoped_rule(&[rule("/", &[]), rule("/", &[])]));
}

#[test]
fn has_path_scoped_rule_safety_invariant_matches_withheld_walk() {
// Pin the claim the predicate's docs make, with its real precondition:
// when no rule is path-scoped, then *for any caller that has passed the
// whole-repo "/" gate*, withheld_blob_oids returns an empty set, so the
// walk is safe to skip. The "/" gate (resolved before the serve /
// replication call sites) is what excludes the denying-root caller; this
// function does not re-check it, so the test models only gate-passing
// callers — matching how U2/U3 consult the predicate.
let (_td, bare, _secret, _public) = fixture();
// (rules, caller) pairs where the caller is Allowed at "/":
// - public repo, no rules, anonymous: "/" allows (is_public).
// - root-only allow-rule, the listed reader: "/" allows them.
// - root-only deny-all rule, the owner: owner bypasses every rule.
let cases: [(Vec<VisibilityRule>, Option<&str>); 3] = [
(Vec::new(), None),
(
vec![rule("/", &["did:key:zFriend"])],
Some("did:key:zFriend"),
),
(vec![rule("/", &[])], Some(OWNER)),
];
for (rules, caller) in cases {
assert!(!has_path_scoped_rule(&rules));
let withheld = withheld_blob_oids(&bare, &rules, true, OWNER, caller).unwrap();
assert!(
withheld.is_empty(),
"no path-scoped rule must withhold nothing for a gate-passing caller (caller={caller:?})"
);
}
}

#[test]
fn serve_decision_skips_walk_for_root_only_and_withholds_for_path_scoped() {
// Drive the git_upload_pack serve decision over a real bare repo, both
// branches the has_path_scoped_rule gate selects, for the INV-2 caller:
// a reader allowed at whole-repo "/" but denied a path-scoped subtree.
// `replicable_objects` is the seam the serve path filters through, so the
// returned set models exactly what the served pack would carry.
let (_td, bare, secret, public) = fixture();
let reader = Some("did:key:zReader");
let all = vec![secret.clone(), public.clone()];

// Branch A — predicate false: skip the walk and serve the full pack. The
// skip is only sound if the walk would have withheld nothing, so assert
// the walk is empty and the served set is complete.
let root_only = vec![rule("/", &["did:key:zReader"])];
assert!(!has_path_scoped_rule(&root_only));
let withheld_a = withheld_blob_oids(&bare, &root_only, true, OWNER, reader).unwrap();
assert!(
withheld_a.is_empty(),
"root-only rules withhold nothing for a gate-passing reader; the skip is safe"
);
let served_a = replicable_objects(all.clone(), &withheld_a);
assert!(
served_a.contains(&secret) && served_a.contains(&public),
"the full pack is served when no rule is path-scoped"
);

// Branch B — predicate true: run the walk and serve the filtered pack.
// /secret/** is scoped to a different DID, so the reader (allowed at "/")
// is denied /secret and the secret blob must be excluded.
let scoped = vec![
rule("/", &["did:key:zReader"]),
rule("/secret/**", &["did:key:zOther"]),
];
assert!(has_path_scoped_rule(&scoped));
let withheld_b = withheld_blob_oids(&bare, &scoped, true, OWNER, reader).unwrap();
let served_b = replicable_objects(all, &withheld_b);
assert!(
!served_b.contains(&secret),
"a reader denied /secret must not be served the secret blob"
);
assert!(
served_b.contains(&public),
"the public blob the reader may see stays in the served pack"
);

// Branch C — same path-scoped rules, but the caller is the owner. The
// owner bypasses every rule, so the walk withholds nothing and the full
// pack (secret included) is served even though a path-scoped rule exists.
let withheld_c = withheld_blob_oids(&bare, &scoped, true, OWNER, Some(OWNER)).unwrap();
assert!(
withheld_c.is_empty(),
"the owner bypasses path-scoped rules and is served everything"
);
}

#[test]
fn replicable_objects_drops_withheld_keeps_rest() {
let all = vec!["aaa".to_string(), "bbb".to_string(), "ccc".to_string()];
Expand Down
Loading