1+ from .jsonpath_ng import Fields , DatumInContext , auto_id_field , AutoIdForDatum , NOT_SET , Child
2+ from .jsonpath_ng .ext .parser import ExtentedJsonPathParser
3+ from .jsonpath_ng .ext .string import DefinitionInvalid
4+ import re
5+ from itertools import chain
6+
7+
8+ class AttributedFields (Fields ):
9+ """
10+ Support for Fields with additional (metadata) attributes in a dict-like structure.
11+
12+ Parameters
13+ ----------
14+ fields: list(str) or str
15+ The fields to consider. If "*" is passed, all fields at this specific level are used.
16+ attribute: str
17+ Additional attribute of the objects at this level to also consider (default: 'attrs'). HDF-Files
18+ usually have metadata attached at each group or dataset, which can be used for queries this way.
19+ """
20+ def __init__ (self , * fields , attribute = 'attrs' ):
21+ super ().__init__ (* fields )
22+ self .attribute = attribute
23+
24+ def get_field_datum (self , datum , field , create ):
25+ if field == auto_id_field :
26+ return AutoIdForDatum (datum )
27+ try :
28+ if field .startswith ("_" ):
29+ try :
30+ field = field [1 :]
31+ attr = getattr (datum .value , self .attribute )
32+ field_value = attr .get (field , NOT_SET )
33+ except AttributeError :
34+ field_value = NOT_SET
35+ else :
36+ field_value = datum .value .get (field , NOT_SET )
37+
38+ if field_value is NOT_SET :
39+ if create :
40+ datum .value [field ] = field_value = {}
41+ else :
42+ return None
43+ return DatumInContext (field_value , path = Fields (field ), context = datum )
44+ except (TypeError , AttributeError ):
45+ return None
46+
47+ def reified_fields (self , datum ):
48+ if '*' not in self .fields :
49+ return self .fields
50+ else :
51+ try :
52+ iterables = [datum .value .keys ()]
53+ try :
54+ attr = getattr (datum .value , self .attribute )
55+ iterables .append (("_" + k for k in attr .keys ()))
56+ except AttributeError :
57+ pass
58+
59+ fields = tuple (chain (* iterables ))
60+ return fields if auto_id_field is None else fields + (auto_id_field ,)
61+ except AttributeError :
62+ return ()
63+
64+
65+ REGEX = re .compile ("regex\((.*)\)" )
66+
67+
68+ class Regex (AttributedFields ):
69+ """
70+ Only consider fields that match the given regular expression. Different from the Fields-class only
71+ one expression is allowed here.
72+
73+ Parameters
74+ ----------
75+ method: str
76+ String containing a regular expression in the form: 'regex(<regex>)'.
77+ Backslashes an other regex-specific characters ('\' , etc.) have to be escaped properly.
78+ """
79+ def __init__ (self , method = None ):
80+ m = REGEX .match (method )
81+ if m is None :
82+ raise DefinitionInvalid ("%s is not valid" % method )
83+ expr = m .group (1 ).strip ()
84+ self .regex = re .compile (expr )
85+ super ().__init__ ("*" )
86+
87+ def reified_fields (self , datum ):
88+ fields = [field for field in super ().reified_fields (datum ) if self .regex .fullmatch (field )]
89+ return tuple (fields )
90+
91+ def __str__ (self ):
92+ return f'regex({ self .regex .pattern } )'
93+
94+ def __repr__ (self ):
95+ return f'{ self .__class__ .__name__ } ({ self .regex .pattern } )'
96+
97+ def __eq__ (self , other ):
98+ return isinstance (other , Regex ) and self .regex == other .regex
99+
100+
101+ class HDFPathParser (ExtentedJsonPathParser ):
102+ """Custom LALR-parser for HDF5 files based on JsonPath"""
103+ def __init__ (self , metadata_attribute = 'attrs' , debug = False , lexer_class = None ):
104+ super ().__init__ (debug = debug , lexer_class = lexer_class )
105+ self .metadata_attribute = metadata_attribute
106+
107+ def p_jsonpath_named_operator (self , p ):
108+ "jsonpath : NAMED_OPERATOR"
109+ if p [1 ].startswith ("regex(" ):
110+ p [0 ] = Regex (p [1 ])
111+ else :
112+ super ().p_jsonpath_named_operator (p )
113+
114+ def p_jsonpath_fields (self , p ):
115+ "jsonpath : fields_or_any"
116+ p [0 ] = AttributedFields (* p [1 ], attribute = self .metadata_attribute )
117+
118+ def p_jsonpath_fieldbrackets (self , p ):
119+ "jsonpath : '[' fields ']'"
120+ p [0 ] = AttributedFields (* p [2 ], attribute = self .metadata_attribute )
121+
122+ def p_jsonpath_child_fieldbrackets (self , p ):
123+ "jsonpath : jsonpath '[' fields ']'"
124+ p [0 ] = Child (p [1 ], AttributedFields (* p [3 ], attribute = self .metadata_attribute ))
125+
126+
127+ def parse (path , metadata_attribute = 'attrs' , debug = False ):
128+ return HDFPathParser (metadata_attribute = metadata_attribute , debug = debug ).parse (path )
0 commit comments