Skip to content

Commit fb52d23

Browse files
committed
run td api validation even when there are other errors
1 parent 96002cb commit fb52d23

File tree

4 files changed

+96
-111
lines changed

4 files changed

+96
-111
lines changed
Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,51 @@
1+
name: production
2+
keys:
3+
- name: td_client_id
4+
valid_regexp: "[0-9a-fA-F]{8}-..."
5+
invalid_texts: ['']
6+
- name: td_global_id
7+
valid_regexp: "[0-9a-fA-F]{8}-..."
8+
invalid_texts: ['', '0000000000-...']
9+
- name: email
10+
valid_regexp: ".*@.*"
11+
12+
tables:
13+
- database: prod #wei_js
14+
table: pageviews
15+
incremental_columns: [updated_at, user_id]
16+
key_columns:
17+
- {column: td_client_id, key: td_client_id}
18+
- database: prod2 # weichen
19+
table: pageviews
20+
as: brand2_pageviews
21+
key_columns:
22+
- {column: td_client_id, key: td_client_id}
23+
- {column: td_global_id, key: td_global_id}
24+
- {column: email, key: email}
25+
- database: wei_audience
26+
table: users
27+
as: contacts
28+
key_columns:
29+
- {column: email, key: email}
30+
31+
canonical_ids:
32+
- name: browser_id
33+
merge_by_keys: [td_client_id, td_global_id]
34+
35+
master_tables:
36+
- name: marketing_master
37+
canonical_id: browser_id
38+
39+
attributes:
40+
- name: first_name
41+
source_columns:
42+
- {table: contacts, column: first_name, priority: 1}
43+
- {table: pageviews, column: first_name, priority: 2}
44+
45+
- name: first_name
46+
source_columns:
47+
- {table: contacts, column: last_name}
48+
49+
- name: email
50+
source_columns:
51+
- {table: contacts, column: email}

tool-box/unification-validator/unification.yml

Lines changed: 0 additions & 47 deletions
This file was deleted.

tool-box/unification-validator/unification_fail.yml

Lines changed: 0 additions & 51 deletions
This file was deleted.

tool-box/unification-validator/validate.py

Lines changed: 45 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -166,32 +166,34 @@ def validate_yaml_file(file_path: Path, td_api_key: str = None) -> tuple[bool, s
166166

167167
# Even if Pydantic validation passes, check for additional issues
168168
additional_errors = collect_additional_errors(data)
169-
169+
170170
# Perform API-based table schema validation if API key is provided
171+
# This should run regardless of other validation errors
171172
api_errors = []
172173
if td_api_key:
173174
api_errors = validate_table_schemas_via_api(data, td_api_key)
174-
175+
176+
# Combine all errors
175177
all_errors = additional_errors + api_errors
176178
if all_errors:
177-
# If there are additional errors, format them
179+
# If there are any errors, format them
178180
error_msg = schema_tip + f"Validation failed for '{file_path}':\n"
179181
error_msg += "─" * 50 + "\n"
180-
182+
181183
# Separate API errors from other errors
182184
non_api_errors = [e for e in all_errors if e.get('type', '').startswith(('value_', 'structure_'))]
183185
schema_errors = [e for e in all_errors if e.get('type', '').startswith(('schema_', 'api_', 'auth_', 'table_', 'network_', 'parse_', 'unexpected_'))]
184-
186+
185187
if non_api_errors:
186188
error_msg += "VALIDATION WARNINGS:\n"
187189
error_msg += format_validation_errors(non_api_errors, data)
188-
190+
189191
if schema_errors:
190192
if non_api_errors:
191193
error_msg += "\n" + "─" * 50 + "\n"
192194
error_msg += "TABLE SCHEMA VALIDATION (via TD API):\n"
193195
error_msg += format_validation_errors(schema_errors, data)
194-
196+
195197
return False, error_msg
196198

197199
success_msg = schema_tip + f"✅ Validation successful! File '{file_path}' is valid."
@@ -231,22 +233,22 @@ def validate_yaml_file(file_path: Path, td_api_key: str = None) -> tuple[bool, s
231233
# Try to collect additional validation errors
232234
try:
233235
partial_validation_errors = collect_additional_errors(data)
234-
236+
235237
# Only add errors that aren't already in the list
236238
existing_error_sigs = set()
237239
for err in all_errors:
238240
msg = err['msg']
239241
if msg.startswith("Value error, "):
240242
msg = msg[13:]
241243
existing_error_sigs.add((err['loc'], msg))
242-
244+
243245
for new_error in partial_validation_errors:
244246
error_sig = (new_error['loc'], new_error['msg'])
245247
if error_sig not in existing_error_sigs:
246248
# Categorize additional errors
247249
loc = new_error['loc']
248250
msg = new_error['msg']
249-
251+
250252
if (len(loc) == 1 and loc[0] in ['name', 'keys', 'tables', 'canonical_ids']) or \
251253
(len(loc) == 0 and "either canonical_ids or persistent_ids must have at least one item" in msg) or \
252254
("Table definitions may be missing" in msg):
@@ -255,16 +257,22 @@ def validate_yaml_file(file_path: Path, td_api_key: str = None) -> tuple[bool, s
255257
detailed_errors.append(new_error)
256258
except Exception:
257259
pass
260+
261+
# Perform API-based table schema validation if API key is provided
262+
# This should run even when there are Pydantic validation errors
263+
api_errors = []
264+
if td_api_key:
265+
api_errors = validate_table_schemas_via_api(data, td_api_key)
258266

259267
# Format the error message
260268
error_msg = schema_tip + f"Validation failed for '{file_path}':\n"
261269
error_msg += "─" * 50 + "\n"
262-
270+
263271
# Show structural errors first
264272
if structural_errors:
265273
error_msg += "🔴 STRUCTURAL ISSUES (fix these first):\n"
266274
error_msg += format_validation_errors(structural_errors, data) + "\n"
267-
275+
268276
# Show detailed errors with warning if structural issues exist
269277
if detailed_errors:
270278
if structural_errors:
@@ -273,7 +281,18 @@ def validate_yaml_file(file_path: Path, td_api_key: str = None) -> tuple[bool, s
273281
else:
274282
error_msg += "VALIDATION ERRORS:\n"
275283
error_msg += format_validation_errors(detailed_errors, data)
276-
284+
285+
# Show API schema validation errors
286+
if api_errors:
287+
# Separate schema errors into their types
288+
schema_errors = [e for e in api_errors if e.get('type', '').startswith(('schema_', 'api_', 'auth_', 'table_', 'network_', 'parse_', 'unexpected_'))]
289+
290+
if schema_errors:
291+
if structural_errors or detailed_errors:
292+
error_msg += "\n" + "─" * 50 + "\n"
293+
error_msg += "TABLE SCHEMA VALIDATION (via TD API):\n"
294+
error_msg += format_validation_errors(schema_errors, data)
295+
277296
return False, error_msg
278297

279298
except Exception as e:
@@ -655,6 +674,19 @@ def validate_table_schemas_via_api(data: dict, api_key: str) -> list:
655674
'msg': f'table "{database}.{table_name}" (as: {table_as}): column "{column_name}" not found in schema',
656675
'input': table
657676
})
677+
678+
# Check if incremental_columns exist in schema
679+
incremental_columns = table.get('incremental_columns', [])
680+
if incremental_columns:
681+
for inc_col in incremental_columns:
682+
if inc_col and inc_col not in schema_columns:
683+
table_as = table.get('as_name', table_name)
684+
api_errors.append({
685+
'type': 'schema_error',
686+
'loc': ('tables', i),
687+
'msg': f'table "{database}.{table_name}" (as: {table_as}): incremental_column "{inc_col}" not found in schema',
688+
'input': table
689+
})
658690

659691
except urllib.error.HTTPError as e:
660692
if e.code == 401:

0 commit comments

Comments
 (0)