55
66import pystac
77
8- ## TODO : add fAIr specific validation rules here , mainly for existence of those keys which are required to integrate
9- # basemodels
8+ # TODO : extend the validation with complete set of requirements based on the prod stac , currently only handful checks
9+ # are in place
1010
1111
1212def validate_mlm_schema (item : pystac .Item ) -> list [str ]:
@@ -23,6 +23,85 @@ def _load_keywords_schema() -> dict:
2323 return json .loads (ref .read_text (encoding = "utf-8" ))
2424
2525
26+ def _load_base_model_requirements () -> dict :
27+ ref = importlib .resources .files ("fair.schemas" ).joinpath ("base_model_requirements.json" )
28+ return json .loads (ref .read_text (encoding = "utf-8" ))
29+
30+
31+ def _check_processing_fn (fn : object , path : str , required_fields : list [str ], errors : list [str ]) -> None :
32+ if not isinstance (fn , dict ):
33+ errors .append (f"{ path } must be an object" )
34+ return
35+ for field in required_fields :
36+ if field not in fn :
37+ errors .append (f"{ path } missing field: { field } " )
38+
39+
40+ def validate_base_model_item (item : pystac .Item ) -> list [str ]:
41+ """Validate a base-model STAC item against fAIr requirements from base_model_requirements.json."""
42+ reqs = _load_base_model_requirements ()
43+ kw_schema = _load_keywords_schema ()
44+ errors : list [str ] = []
45+
46+ declared = set (item .stac_extensions )
47+ for ext in reqs ["required_extensions" ]:
48+ if ext not in declared :
49+ errors .append (f"Missing extension: { ext } " )
50+
51+ props = item .properties
52+ for prop in reqs ["required_properties" ]:
53+ if prop not in props or props [prop ] is None :
54+ errors .append (f"Missing property: { prop } " )
55+
56+ for prop in reqs ["non_empty_list_properties" ]:
57+ val = props .get (prop )
58+ if isinstance (val , list ) and len (val ) == 0 :
59+ errors .append (f"Property must be non-empty list: { prop } " )
60+
61+ allowed_kw = (
62+ set (kw_schema ["allowed_keywords" ])
63+ | set (kw_schema ["allowed_tasks" ])
64+ | set (kw_schema .get ("allowed_geometry_types" , []))
65+ )
66+ unknown_kw = set (props .get ("keywords" , [])) - allowed_kw
67+ if unknown_kw :
68+ errors .append (f"Unknown keywords: { unknown_kw } " )
69+
70+ for prop , allowed in reqs .get ("allowed_values" , {}).items ():
71+ val = props .get (prop )
72+ if val is None :
73+ continue
74+ items = val if isinstance (val , list ) else [val ]
75+ invalid = set (items ) - set (allowed )
76+ if invalid :
77+ errors .append (f"Invalid { prop } values: { invalid } . Allowed: { allowed } " )
78+
79+ proc_fields = reqs ["processing_function_fields" ]
80+ for i , inp in enumerate (props .get ("mlm:input" ) or []):
81+ for field in reqs ["input_required_fields" ]:
82+ if field not in inp :
83+ errors .append (f"mlm:input[{ i } ] missing: { field } " )
84+ elif field == "pre_processing_function" :
85+ _check_processing_fn (inp [field ], f"mlm:input[{ i } ].{ field } " , proc_fields , errors )
86+
87+ for i , out in enumerate (props .get ("mlm:output" ) or []):
88+ for field in reqs ["output_required_fields" ]:
89+ if field not in out :
90+ errors .append (f"mlm:output[{ i } ] missing: { field } " )
91+ elif field == "post_processing_function" :
92+ _check_processing_fn (out [field ], f"mlm:output[{ i } ].{ field } " , proc_fields , errors )
93+
94+ for asset_key , required_fields in reqs ["required_assets" ].items ():
95+ if asset_key not in item .assets :
96+ errors .append (f"Missing asset: { asset_key } " )
97+ continue
98+ for field in required_fields :
99+ if field not in item .assets [asset_key ].extra_fields :
100+ errors .append (f"Asset '{ asset_key } ' missing field: { field } " )
101+
102+ return errors
103+
104+
26105def validate_compatibility (
27106 base_model_item : pystac .Item ,
28107 dataset_item : pystac .Item ,
0 commit comments