11from __future__ import annotations
22
3+ from collections import OrderedDict
4+ import dataclasses
5+ import json as json_mod
36import logging
47import os
58import re
69import sys
7- from typing import IO , cast
10+ from typing import IO , Union , cast
811import warnings
912
1013import click
1821
1922lgr = logging .getLogger (__name__ )
2023
24+
25+ @dataclasses .dataclass
26+ class TruncationNotice :
27+ """Placeholder indicating omitted results in truncated output."""
28+
29+ omitted_count : int
30+
31+
2132STRUCTURED_FORMATS = ("json" , "json_pp" , "json_lines" , "yaml" )
2233
2334_EXT_TO_FORMAT = {
@@ -131,7 +142,9 @@ def validate_bids(
131142 "`dandi validate` instead. Proceeding to parse the call to `dandi validate` now." ,
132143 DeprecationWarning ,
133144 )
134- ctx .invoke (validate , paths = paths , grouping = grouping )
145+ ctx .invoke (
146+ validate , paths = paths , grouping = (grouping ,) if grouping != "none" else ()
147+ )
135148
136149
137150@click .command ()
@@ -145,12 +158,13 @@ def validate_bids(
145158@click .option (
146159 "--grouping" ,
147160 "-g" ,
148- help = "How to group error/warning reporting ." ,
161+ help = "How to group output. Repeat for hierarchical nesting, e.g. -g severity -g id ." ,
149162 type = click .Choice (
150163 ["none" , "path" , "severity" , "id" , "validator" , "standard" , "dandiset" ],
151164 case_sensitive = False ,
152165 ),
153- default = "none" ,
166+ multiple = True ,
167+ default = (),
154168)
155169@click .option ("--ignore" , metavar = "REGEX" , help = "Regex matching error IDs to ignore" )
156170@click .option (
@@ -181,6 +195,13 @@ def validate_bids(
181195 help = "Show summary statistics." ,
182196 default = False ,
183197)
198+ @click .option (
199+ "--max-per-group" ,
200+ type = int ,
201+ default = None ,
202+ help = "Limit results per group (or total if ungrouped). "
203+ "Excess results are replaced by a count of omitted items." ,
204+ )
184205@click .option (
185206 "--load" ,
186207 help = "Load validation results from JSONL file(s) instead of running validation." ,
@@ -196,11 +217,12 @@ def validate(
196217 ctx : click .Context ,
197218 paths : tuple [str , ...],
198219 ignore : str | None ,
199- grouping : str ,
220+ grouping : tuple [ str , ...] ,
200221 min_severity : str ,
201222 output_format : str = "human" ,
202223 output_file : str | None = None ,
203224 summary : bool = False ,
225+ max_per_group : int | None = None ,
204226 load : tuple [str , ...] = (),
205227 schema : str | None = None ,
206228 devel_debug : bool = False ,
@@ -210,6 +232,9 @@ def validate(
210232
211233 Exits with non-0 exit code if any file is not compliant.
212234 """
235+ # Normalize grouping: strip "none" values
236+ grouping = tuple (g for g in grouping if g != "none" )
237+
213238 # Auto-detect format from output file extension when --format not given
214239 if output_file is not None and output_format == "human" :
215240 detected = _format_from_ext (output_file )
@@ -221,6 +246,13 @@ def validate(
221246 )
222247 output_format = detected
223248
249+ # JSONL is incompatible with grouping (flat format, no nesting)
250+ if grouping and output_format == "json_lines" :
251+ raise click .UsageError (
252+ "--grouping is incompatible with json_lines format "
253+ "(JSONL is a flat format that cannot represent nested groups)."
254+ )
255+
224256 if load and paths :
225257 raise click .UsageError ("--load and positional paths are mutually exclusive." )
226258
@@ -234,19 +266,31 @@ def validate(
234266 filtered = _filter_results (results , min_severity , ignore )
235267
236268 if output_format == "human" :
237- _render_human (filtered , grouping )
269+ _render_human (filtered , grouping , max_per_group = max_per_group )
238270 if summary :
239271 _print_summary (filtered , sys .stdout )
240272 _exit_if_errors (filtered )
241273 elif output_file is not None :
242274 with open (output_file , "w" ) as fh :
243- _render_structured (filtered , output_format , fh )
275+ _render_structured (
276+ filtered ,
277+ output_format ,
278+ fh ,
279+ grouping ,
280+ max_per_group = max_per_group ,
281+ )
244282 lgr .info ("Validation output written to %s" , output_file )
245283 if summary :
246284 _print_summary (filtered , sys .stderr )
247285 _exit_if_errors (filtered )
248286 else :
249- _render_structured (filtered , output_format , sys .stdout )
287+ _render_structured (
288+ filtered ,
289+ output_format ,
290+ sys .stdout ,
291+ grouping ,
292+ max_per_group = max_per_group ,
293+ )
250294 if summary :
251295 _print_summary (filtered , sys .stderr )
252296 # Auto-save sidecar next to logfile (skip when loading)
@@ -316,12 +360,39 @@ def _render_structured(
316360 results : list [ValidationResult ],
317361 output_format : str ,
318362 out : IO [str ],
363+ grouping : tuple [str , ...] = (),
364+ max_per_group : int | None = None ,
319365) -> None :
320366 """Render validation results in a structured format."""
321- formatter = _get_formatter (output_format , out = out )
322- with formatter :
323- for r in results :
324- formatter (r .model_dump (mode = "json" ))
367+ if grouping :
368+ # Grouped output: build nested dict, serialize directly
369+ grouped : GroupedResults | TruncatedResults = _group_results (results , grouping )
370+ if max_per_group is not None :
371+ grouped = _truncate_leaves (grouped , max_per_group )
372+ data = _serialize_grouped (grouped )
373+ if output_format in ("json" , "json_pp" ):
374+ indent = 2 if output_format == "json_pp" else None
375+ json_mod .dump (data , out , indent = indent , sort_keys = True , default = str )
376+ out .write ("\n " )
377+ elif output_format == "yaml" :
378+ import ruamel .yaml
379+
380+ yaml = ruamel .yaml .YAML (typ = "safe" )
381+ yaml .default_flow_style = False
382+ yaml .dump (data , out )
383+ else :
384+ raise ValueError (f"Unsupported format for grouped output: { output_format } " )
385+ else :
386+ items : list [dict ] = [r .model_dump (mode = "json" ) for r in results ]
387+ if max_per_group is not None and len (items ) > max_per_group :
388+ items = items [:max_per_group ]
389+ items .append (
390+ {"_truncated" : True , "omitted_count" : len (results ) - max_per_group }
391+ )
392+ formatter = _get_formatter (output_format , out = out )
393+ with formatter :
394+ for item in items :
395+ formatter (item )
325396
326397
327398def _exit_if_errors (results : list [ValidationResult ]) -> None :
@@ -348,20 +419,86 @@ def _group_key(issue: ValidationResult, grouping: str) -> str:
348419 raise NotImplementedError (f"Unsupported grouping: { grouping } " )
349420
350421
422+ # Recursive grouped type: either a nested OrderedDict or leaf list
423+ GroupedResults = Union ["OrderedDict[str, GroupedResults]" , list [ValidationResult ]]
424+
425+ # Leaf items after possible truncation
426+ LeafItem = Union [ValidationResult , TruncationNotice ]
427+ TruncatedResults = Union ["OrderedDict[str, TruncatedResults]" , list [LeafItem ]]
428+
429+
430+ def _group_results (
431+ results : list [ValidationResult ],
432+ levels : tuple [str , ...],
433+ ) -> GroupedResults :
434+ """Group results recursively by the given hierarchy of grouping levels.
435+
436+ Returns a nested OrderedDict with leaf values as lists of ValidationResult.
437+ With zero levels, returns the flat list unchanged.
438+ """
439+ if not levels :
440+ return results
441+ key_fn = levels [0 ]
442+ remaining = levels [1 :]
443+ groups : OrderedDict [str , list [ValidationResult ]] = OrderedDict ()
444+ for r in results :
445+ k = _group_key (r , key_fn )
446+ groups .setdefault (k , []).append (r )
447+ if remaining :
448+ return OrderedDict ((k , _group_results (v , remaining )) for k , v in groups .items ())
449+ # mypy can't resolve the recursive type alias, but this is correct:
450+ # OrderedDict[str, list[VR]] is a valid GroupedResults
451+ return cast ("GroupedResults" , groups )
452+
453+
454+ def _truncate_leaves (grouped : GroupedResults , max_per_group : int ) -> TruncatedResults :
455+ """Truncate leaf lists to *max_per_group* items, appending a TruncationNotice."""
456+ if isinstance (grouped , list ):
457+ if len (grouped ) > max_per_group :
458+ kept : list [LeafItem ] = list (grouped [:max_per_group ])
459+ kept .append (TruncationNotice (len (grouped ) - max_per_group ))
460+ return kept
461+ return grouped
462+ return OrderedDict (
463+ (k , _truncate_leaves (v , max_per_group )) for k , v in grouped .items ()
464+ )
465+
466+
467+ def _serialize_grouped (grouped : GroupedResults | TruncatedResults ) -> dict | list :
468+ """Convert grouped results to a JSON-serializable nested dict/list."""
469+ if isinstance (grouped , list ):
470+ result : list [dict ] = []
471+ for item in grouped :
472+ if isinstance (item , TruncationNotice ):
473+ result .append ({"_truncated" : True , "omitted_count" : item .omitted_count })
474+ else :
475+ result .append (item .model_dump (mode = "json" ))
476+ return result
477+ return {k : _serialize_grouped (v ) for k , v in grouped .items ()}
478+
479+
351480def _render_human (
352481 issues : list [ValidationResult ],
353- grouping : str ,
482+ grouping : tuple [str , ...],
483+ max_per_group : int | None = None ,
354484) -> None :
355485 """Render validation results in human-readable colored format."""
356- if grouping == "none" :
357- purviews = [i .purview for i in issues ]
486+ if not grouping :
487+ shown = issues
488+ omitted = 0
489+ if max_per_group is not None and len (issues ) > max_per_group :
490+ shown = issues [:max_per_group ]
491+ omitted = len (issues ) - max_per_group
492+ purviews = [i .purview for i in shown ]
358493 display_errors (
359494 purviews ,
360- [i .id for i in issues ],
361- cast ("list[Severity]" , [i .severity for i in issues ]),
362- [i .message for i in issues ],
495+ [i .id for i in shown ],
496+ cast ("list[Severity]" , [i .severity for i in shown ]),
497+ [i .message for i in shown ],
363498 )
364- elif grouping == "path" :
499+ if omitted :
500+ click .secho (f"... and { pluralize (omitted , 'more issue' )} " , fg = "cyan" )
501+ elif grouping == ("path" ,):
365502 # Legacy path grouping: de-duplicate purviews, show per-path
366503 purviews = list (set (i .purview for i in issues ))
367504 for purview in purviews :
@@ -373,39 +510,80 @@ def _render_human(
373510 [i .message for i in applies_to ],
374511 )
375512 else :
376- # Generic grouped rendering with section headers
377- from collections import OrderedDict
513+ grouped : GroupedResults | TruncatedResults = _group_results (issues , grouping )
514+ if max_per_group is not None :
515+ grouped = _truncate_leaves (grouped , max_per_group )
516+ _render_human_grouped (grouped , depth = 0 )
517+
518+ if not any (r .severity is not None and r .severity >= Severity .ERROR for r in issues ):
519+ click .secho ("No errors found." , fg = "green" )
520+
378521
379- groups : OrderedDict [str , list [ValidationResult ]] = OrderedDict ()
380- for issue in issues :
381- key = _group_key (issue , grouping )
382- groups .setdefault (key , []).append (issue )
522+ def _count_leaves (grouped : GroupedResults | TruncatedResults ) -> int :
523+ """Count total items in a grouped structure (including omitted counts)."""
524+ if isinstance (grouped , list ):
525+ return sum (
526+ item .omitted_count if isinstance (item , TruncationNotice ) else 1
527+ for item in grouped
528+ )
529+ return sum (_count_leaves (v ) for v in grouped .values ())
383530
384- for key , group_issues in groups .items ():
385- header = f"=== { key } ({ pluralize (len (group_issues ), 'issue' )} ) ==="
531+
532+ def _render_human_grouped (
533+ grouped : GroupedResults | TruncatedResults ,
534+ depth : int ,
535+ ) -> None :
536+ """Recursively render grouped results with nested indented section headers."""
537+ indent = " " * depth
538+ if isinstance (grouped , list ):
539+ # Leaf level: render individual issues
540+ for issue in grouped :
541+ if isinstance (issue , TruncationNotice ):
542+ click .secho (
543+ f"{ indent } ... and { pluralize (issue .omitted_count , 'more issue' )} " ,
544+ fg = "cyan" ,
545+ )
546+ continue
547+ msg = f"{ indent } [{ issue .id } ] { issue .purview } — { issue .message } "
548+ fg = _get_severity_color (
549+ [issue .severity ] if issue .severity is not None else []
550+ )
551+ click .secho (msg , fg = fg )
552+ else :
553+ for key , value in grouped .items ():
554+ count = _count_leaves (value )
555+ header = f"{ indent } === { key } ({ pluralize (count , 'issue' )} ) ==="
556+ # Determine color from all issues in this group
557+ all_issues = _collect_all_issues (value )
386558 fg = _get_severity_color (
387559 cast (
388560 "list[Severity]" ,
389- [i .severity for i in group_issues if i .severity is not None ],
561+ [i .severity for i in all_issues if i .severity is not None ],
390562 )
391563 )
392564 click .secho (header , fg = fg , bold = True )
393- for issue in group_issues :
394- msg = f" [{ issue .id } ] { issue .purview } — { issue .message } "
395- ifg = _get_severity_color (
396- [issue .severity ] if issue .severity is not None else []
397- )
398- click .secho (msg , fg = ifg )
565+ _render_human_grouped (value , depth + 1 )
399566
400- if not any (r .severity is not None and r .severity >= Severity .ERROR for r in issues ):
401- click .secho ("No errors found." , fg = "green" )
567+
568+ def _collect_all_issues (
569+ grouped : GroupedResults | TruncatedResults ,
570+ ) -> list [ValidationResult ]:
571+ """Flatten a grouped structure into a list of all ValidationResults."""
572+ if isinstance (grouped , list ):
573+ return [item for item in grouped if isinstance (item , ValidationResult )]
574+ result : list [ValidationResult ] = []
575+ for v in grouped .values ():
576+ result .extend (_collect_all_issues (v ))
577+ return result
402578
403579
404580def _process_issues (
405581 issues : list [ValidationResult ],
406- grouping : str ,
582+ grouping : str | tuple [ str , ...] ,
407583) -> None :
408584 """Legacy wrapper: render human output and exit if errors."""
585+ if isinstance (grouping , str ):
586+ grouping = (grouping ,) if grouping != "none" else ()
409587 _render_human (issues , grouping )
410588 _exit_if_errors (issues )
411589
0 commit comments