Skip to content

Commit 4894aa1

Browse files
yarikopticclaude
andcommitted
feat: add --max-per-group truncation to dandi validate
Limit how many results are shown per leaf group (or in the flat list when no grouping is used). Excess results are replaced by a TruncationNotice placeholder — a distinct dataclass (not a ValidationResult) so consumers can isinstance() check. - TruncationNotice dataclass + LeafItem/TruncatedResults type aliases - _truncate_leaves() walks the grouped tree, caps leaf lists - Human output: "... and N more issues" in cyan - Structured output: {"_truncated": true, "omitted_count": N} sentinel - Headers show original counts including omitted items - Works without grouping (flat list) and with multi-level grouping Co-Authored-By: Claude Code 2.1.63 / Claude Opus 4.6 <noreply@anthropic.com>
1 parent 1100de5 commit 4894aa1

File tree

2 files changed

+605
-39
lines changed

2 files changed

+605
-39
lines changed

dandi/cli/cmd_validate.py

Lines changed: 215 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,13 @@
11
from __future__ import annotations
22

3+
from collections import OrderedDict
4+
import dataclasses
5+
import json as json_mod
36
import logging
47
import os
58
import re
69
import sys
7-
from typing import IO, cast
10+
from typing import IO, Union, cast
811
import warnings
912

1013
import click
@@ -18,6 +21,14 @@
1821

1922
lgr = logging.getLogger(__name__)
2023

24+
25+
@dataclasses.dataclass
26+
class TruncationNotice:
27+
"""Placeholder indicating omitted results in truncated output."""
28+
29+
omitted_count: int
30+
31+
2132
STRUCTURED_FORMATS = ("json", "json_pp", "json_lines", "yaml")
2233

2334
_EXT_TO_FORMAT = {
@@ -131,7 +142,9 @@ def validate_bids(
131142
"`dandi validate` instead. Proceeding to parse the call to `dandi validate` now.",
132143
DeprecationWarning,
133144
)
134-
ctx.invoke(validate, paths=paths, grouping=grouping)
145+
ctx.invoke(
146+
validate, paths=paths, grouping=(grouping,) if grouping != "none" else ()
147+
)
135148

136149

137150
@click.command()
@@ -145,12 +158,13 @@ def validate_bids(
145158
@click.option(
146159
"--grouping",
147160
"-g",
148-
help="How to group error/warning reporting.",
161+
help="How to group output. Repeat for hierarchical nesting, e.g. -g severity -g id.",
149162
type=click.Choice(
150163
["none", "path", "severity", "id", "validator", "standard", "dandiset"],
151164
case_sensitive=False,
152165
),
153-
default="none",
166+
multiple=True,
167+
default=(),
154168
)
155169
@click.option("--ignore", metavar="REGEX", help="Regex matching error IDs to ignore")
156170
@click.option(
@@ -181,6 +195,13 @@ def validate_bids(
181195
help="Show summary statistics.",
182196
default=False,
183197
)
198+
@click.option(
199+
"--max-per-group",
200+
type=int,
201+
default=None,
202+
help="Limit results per group (or total if ungrouped). "
203+
"Excess results are replaced by a count of omitted items.",
204+
)
184205
@click.option(
185206
"--load",
186207
help="Load validation results from JSONL file(s) instead of running validation.",
@@ -196,11 +217,12 @@ def validate(
196217
ctx: click.Context,
197218
paths: tuple[str, ...],
198219
ignore: str | None,
199-
grouping: str,
220+
grouping: tuple[str, ...],
200221
min_severity: str,
201222
output_format: str = "human",
202223
output_file: str | None = None,
203224
summary: bool = False,
225+
max_per_group: int | None = None,
204226
load: tuple[str, ...] = (),
205227
schema: str | None = None,
206228
devel_debug: bool = False,
@@ -210,6 +232,9 @@ def validate(
210232
211233
Exits with non-0 exit code if any file is not compliant.
212234
"""
235+
# Normalize grouping: strip "none" values
236+
grouping = tuple(g for g in grouping if g != "none")
237+
213238
# Auto-detect format from output file extension when --format not given
214239
if output_file is not None and output_format == "human":
215240
detected = _format_from_ext(output_file)
@@ -221,6 +246,13 @@ def validate(
221246
)
222247
output_format = detected
223248

249+
# JSONL is incompatible with grouping (flat format, no nesting)
250+
if grouping and output_format == "json_lines":
251+
raise click.UsageError(
252+
"--grouping is incompatible with json_lines format "
253+
"(JSONL is a flat format that cannot represent nested groups)."
254+
)
255+
224256
if load and paths:
225257
raise click.UsageError("--load and positional paths are mutually exclusive.")
226258

@@ -234,19 +266,31 @@ def validate(
234266
filtered = _filter_results(results, min_severity, ignore)
235267

236268
if output_format == "human":
237-
_render_human(filtered, grouping)
269+
_render_human(filtered, grouping, max_per_group=max_per_group)
238270
if summary:
239271
_print_summary(filtered, sys.stdout)
240272
_exit_if_errors(filtered)
241273
elif output_file is not None:
242274
with open(output_file, "w") as fh:
243-
_render_structured(filtered, output_format, fh)
275+
_render_structured(
276+
filtered,
277+
output_format,
278+
fh,
279+
grouping,
280+
max_per_group=max_per_group,
281+
)
244282
lgr.info("Validation output written to %s", output_file)
245283
if summary:
246284
_print_summary(filtered, sys.stderr)
247285
_exit_if_errors(filtered)
248286
else:
249-
_render_structured(filtered, output_format, sys.stdout)
287+
_render_structured(
288+
filtered,
289+
output_format,
290+
sys.stdout,
291+
grouping,
292+
max_per_group=max_per_group,
293+
)
250294
if summary:
251295
_print_summary(filtered, sys.stderr)
252296
# Auto-save sidecar next to logfile (skip when loading)
@@ -316,12 +360,39 @@ def _render_structured(
316360
results: list[ValidationResult],
317361
output_format: str,
318362
out: IO[str],
363+
grouping: tuple[str, ...] = (),
364+
max_per_group: int | None = None,
319365
) -> None:
320366
"""Render validation results in a structured format."""
321-
formatter = _get_formatter(output_format, out=out)
322-
with formatter:
323-
for r in results:
324-
formatter(r.model_dump(mode="json"))
367+
if grouping:
368+
# Grouped output: build nested dict, serialize directly
369+
grouped: GroupedResults | TruncatedResults = _group_results(results, grouping)
370+
if max_per_group is not None:
371+
grouped = _truncate_leaves(grouped, max_per_group)
372+
data = _serialize_grouped(grouped)
373+
if output_format in ("json", "json_pp"):
374+
indent = 2 if output_format == "json_pp" else None
375+
json_mod.dump(data, out, indent=indent, sort_keys=True, default=str)
376+
out.write("\n")
377+
elif output_format == "yaml":
378+
import ruamel.yaml
379+
380+
yaml = ruamel.yaml.YAML(typ="safe")
381+
yaml.default_flow_style = False
382+
yaml.dump(data, out)
383+
else:
384+
raise ValueError(f"Unsupported format for grouped output: {output_format}")
385+
else:
386+
items: list[dict] = [r.model_dump(mode="json") for r in results]
387+
if max_per_group is not None and len(items) > max_per_group:
388+
items = items[:max_per_group]
389+
items.append(
390+
{"_truncated": True, "omitted_count": len(results) - max_per_group}
391+
)
392+
formatter = _get_formatter(output_format, out=out)
393+
with formatter:
394+
for item in items:
395+
formatter(item)
325396

326397

327398
def _exit_if_errors(results: list[ValidationResult]) -> None:
@@ -348,20 +419,86 @@ def _group_key(issue: ValidationResult, grouping: str) -> str:
348419
raise NotImplementedError(f"Unsupported grouping: {grouping}")
349420

350421

422+
# Recursive grouped type: either a nested OrderedDict or leaf list
423+
GroupedResults = Union["OrderedDict[str, GroupedResults]", list[ValidationResult]]
424+
425+
# Leaf items after possible truncation
426+
LeafItem = Union[ValidationResult, TruncationNotice]
427+
TruncatedResults = Union["OrderedDict[str, TruncatedResults]", list[LeafItem]]
428+
429+
430+
def _group_results(
431+
results: list[ValidationResult],
432+
levels: tuple[str, ...],
433+
) -> GroupedResults:
434+
"""Group results recursively by the given hierarchy of grouping levels.
435+
436+
Returns a nested OrderedDict with leaf values as lists of ValidationResult.
437+
With zero levels, returns the flat list unchanged.
438+
"""
439+
if not levels:
440+
return results
441+
key_fn = levels[0]
442+
remaining = levels[1:]
443+
groups: OrderedDict[str, list[ValidationResult]] = OrderedDict()
444+
for r in results:
445+
k = _group_key(r, key_fn)
446+
groups.setdefault(k, []).append(r)
447+
if remaining:
448+
return OrderedDict((k, _group_results(v, remaining)) for k, v in groups.items())
449+
# mypy can't resolve the recursive type alias, but this is correct:
450+
# OrderedDict[str, list[VR]] is a valid GroupedResults
451+
return cast("GroupedResults", groups)
452+
453+
454+
def _truncate_leaves(grouped: GroupedResults, max_per_group: int) -> TruncatedResults:
455+
"""Truncate leaf lists to *max_per_group* items, appending a TruncationNotice."""
456+
if isinstance(grouped, list):
457+
if len(grouped) > max_per_group:
458+
kept: list[LeafItem] = list(grouped[:max_per_group])
459+
kept.append(TruncationNotice(len(grouped) - max_per_group))
460+
return kept
461+
return grouped
462+
return OrderedDict(
463+
(k, _truncate_leaves(v, max_per_group)) for k, v in grouped.items()
464+
)
465+
466+
467+
def _serialize_grouped(grouped: GroupedResults | TruncatedResults) -> dict | list:
468+
"""Convert grouped results to a JSON-serializable nested dict/list."""
469+
if isinstance(grouped, list):
470+
result: list[dict] = []
471+
for item in grouped:
472+
if isinstance(item, TruncationNotice):
473+
result.append({"_truncated": True, "omitted_count": item.omitted_count})
474+
else:
475+
result.append(item.model_dump(mode="json"))
476+
return result
477+
return {k: _serialize_grouped(v) for k, v in grouped.items()}
478+
479+
351480
def _render_human(
352481
issues: list[ValidationResult],
353-
grouping: str,
482+
grouping: tuple[str, ...],
483+
max_per_group: int | None = None,
354484
) -> None:
355485
"""Render validation results in human-readable colored format."""
356-
if grouping == "none":
357-
purviews = [i.purview for i in issues]
486+
if not grouping:
487+
shown = issues
488+
omitted = 0
489+
if max_per_group is not None and len(issues) > max_per_group:
490+
shown = issues[:max_per_group]
491+
omitted = len(issues) - max_per_group
492+
purviews = [i.purview for i in shown]
358493
display_errors(
359494
purviews,
360-
[i.id for i in issues],
361-
cast("list[Severity]", [i.severity for i in issues]),
362-
[i.message for i in issues],
495+
[i.id for i in shown],
496+
cast("list[Severity]", [i.severity for i in shown]),
497+
[i.message for i in shown],
363498
)
364-
elif grouping == "path":
499+
if omitted:
500+
click.secho(f"... and {pluralize(omitted, 'more issue')}", fg="cyan")
501+
elif grouping == ("path",):
365502
# Legacy path grouping: de-duplicate purviews, show per-path
366503
purviews = list(set(i.purview for i in issues))
367504
for purview in purviews:
@@ -373,39 +510,80 @@ def _render_human(
373510
[i.message for i in applies_to],
374511
)
375512
else:
376-
# Generic grouped rendering with section headers
377-
from collections import OrderedDict
513+
grouped: GroupedResults | TruncatedResults = _group_results(issues, grouping)
514+
if max_per_group is not None:
515+
grouped = _truncate_leaves(grouped, max_per_group)
516+
_render_human_grouped(grouped, depth=0)
517+
518+
if not any(r.severity is not None and r.severity >= Severity.ERROR for r in issues):
519+
click.secho("No errors found.", fg="green")
520+
378521

379-
groups: OrderedDict[str, list[ValidationResult]] = OrderedDict()
380-
for issue in issues:
381-
key = _group_key(issue, grouping)
382-
groups.setdefault(key, []).append(issue)
522+
def _count_leaves(grouped: GroupedResults | TruncatedResults) -> int:
523+
"""Count total items in a grouped structure (including omitted counts)."""
524+
if isinstance(grouped, list):
525+
return sum(
526+
item.omitted_count if isinstance(item, TruncationNotice) else 1
527+
for item in grouped
528+
)
529+
return sum(_count_leaves(v) for v in grouped.values())
383530

384-
for key, group_issues in groups.items():
385-
header = f"=== {key} ({pluralize(len(group_issues), 'issue')}) ==="
531+
532+
def _render_human_grouped(
533+
grouped: GroupedResults | TruncatedResults,
534+
depth: int,
535+
) -> None:
536+
"""Recursively render grouped results with nested indented section headers."""
537+
indent = " " * depth
538+
if isinstance(grouped, list):
539+
# Leaf level: render individual issues
540+
for issue in grouped:
541+
if isinstance(issue, TruncationNotice):
542+
click.secho(
543+
f"{indent}... and {pluralize(issue.omitted_count, 'more issue')}",
544+
fg="cyan",
545+
)
546+
continue
547+
msg = f"{indent}[{issue.id}] {issue.purview}{issue.message}"
548+
fg = _get_severity_color(
549+
[issue.severity] if issue.severity is not None else []
550+
)
551+
click.secho(msg, fg=fg)
552+
else:
553+
for key, value in grouped.items():
554+
count = _count_leaves(value)
555+
header = f"{indent}=== {key} ({pluralize(count, 'issue')}) ==="
556+
# Determine color from all issues in this group
557+
all_issues = _collect_all_issues(value)
386558
fg = _get_severity_color(
387559
cast(
388560
"list[Severity]",
389-
[i.severity for i in group_issues if i.severity is not None],
561+
[i.severity for i in all_issues if i.severity is not None],
390562
)
391563
)
392564
click.secho(header, fg=fg, bold=True)
393-
for issue in group_issues:
394-
msg = f" [{issue.id}] {issue.purview}{issue.message}"
395-
ifg = _get_severity_color(
396-
[issue.severity] if issue.severity is not None else []
397-
)
398-
click.secho(msg, fg=ifg)
565+
_render_human_grouped(value, depth + 1)
399566

400-
if not any(r.severity is not None and r.severity >= Severity.ERROR for r in issues):
401-
click.secho("No errors found.", fg="green")
567+
568+
def _collect_all_issues(
569+
grouped: GroupedResults | TruncatedResults,
570+
) -> list[ValidationResult]:
571+
"""Flatten a grouped structure into a list of all ValidationResults."""
572+
if isinstance(grouped, list):
573+
return [item for item in grouped if isinstance(item, ValidationResult)]
574+
result: list[ValidationResult] = []
575+
for v in grouped.values():
576+
result.extend(_collect_all_issues(v))
577+
return result
402578

403579

404580
def _process_issues(
405581
issues: list[ValidationResult],
406-
grouping: str,
582+
grouping: str | tuple[str, ...],
407583
) -> None:
408584
"""Legacy wrapper: render human output and exit if errors."""
585+
if isinstance(grouping, str):
586+
grouping = (grouping,) if grouping != "none" else ()
409587
_render_human(issues, grouping)
410588
_exit_if_errors(issues)
411589

0 commit comments

Comments
 (0)