Skip to content

Commit 58d2665

Browse files
committed
Create Iceberg Biglake scenario
Signed-off-by: Dominik Dębowczyk <dominik.debowczyk@getindata.com>
1 parent b029e28 commit 58d2665

18 files changed

+534
-1
lines changed
Lines changed: 119 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,119 @@
1+
{
2+
"patterns": [
3+
"writing_to_big_query.adaptive_spark_plan._spark-bigquery-application_"
4+
],
5+
"tests": [
6+
{
7+
"name": "run_event_test",
8+
"path": "events/run_event_test.json",
9+
"tags": {
10+
"facets": [
11+
"run_event"
12+
]
13+
}
14+
},
15+
{
16+
"name": "parent_test",
17+
"path": "events/parent_test.json",
18+
"tags": {
19+
"facets": [
20+
"parent"
21+
]
22+
}
23+
},
24+
{
25+
"name": "spark_properties_test",
26+
"path": "events/spark_properties_test.json",
27+
"tags": {
28+
"facets": [
29+
"spark_properties"
30+
]
31+
}
32+
},
33+
{
34+
"name": "processing_engine_test",
35+
"path": "events/processing_engine_test.json",
36+
"tags": {
37+
"facets": [
38+
"processing_engine"
39+
]
40+
}
41+
},
42+
{
43+
"name": "gcp_dataproc_test",
44+
"path": "events/gcp_dataproc_test.json",
45+
"tags": {
46+
"facets": [
47+
"gcp_dataproc"
48+
],
49+
"min_version": "1.24.0"
50+
}
51+
},
52+
{
53+
"name": "jobType_test",
54+
"path": "events/jobType_test.json",
55+
"tags": {
56+
"facets": [
57+
"jobType"
58+
]
59+
}
60+
},
61+
{
62+
"name": "gcp_lineage_test",
63+
"path": "events/gcp_lineage_test.json",
64+
"tags": {
65+
"facets": [
66+
"gcp_lineage"
67+
]
68+
}
69+
},
70+
{
71+
"name": "dataSource_test",
72+
"path": "events/dataSource_test.json",
73+
"tags": {
74+
"facets": [
75+
"dataSource"
76+
]
77+
}
78+
},
79+
{
80+
"name": "schema_test",
81+
"path": "events/schema_test.json",
82+
"tags": {
83+
"facets": [
84+
"schema"
85+
],
86+
"lineage_level": {
87+
"bigquery": [
88+
"dataset"
89+
]
90+
}
91+
}
92+
},
93+
{
94+
"name": "columnLineage_test",
95+
"path": "events/columnLineage_test.json",
96+
"tags": {
97+
"facets": [
98+
"columnLineage"
99+
],
100+
"lineage_level": {
101+
"bigquery": [
102+
"dataset",
103+
"column",
104+
"transformation"
105+
]
106+
}
107+
}
108+
},
109+
{
110+
"name": "storage_test",
111+
"path": "events/storage_test.json",
112+
"tags": {
113+
"facets": [
114+
"storage"
115+
]
116+
}
117+
}
118+
]
119+
}
Lines changed: 73 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,73 @@
1+
{
2+
"eventType": "COMPLETE",
3+
"job": {
4+
"namespace": "default",
5+
"name": "big_lake_to_iceberg.atomic_replace_table_as_select.e2e_dataset_e2e_another_table"
6+
},
7+
"outputs": [
8+
{
9+
"namespace": "gs://open-lineage-e2e",
10+
"name": "data/bigquery_metastore/e2e_dataset.db/e2e_another_table",
11+
"facets": {
12+
"columnLineage": {
13+
"fields": {
14+
"word": {
15+
"inputFields": [
16+
{
17+
"namespace": "bigquery",
18+
"name": "gcp-open-lineage-testing.e2e_dataset.iceberg_biglake",
19+
"field": "word",
20+
"transformations": [
21+
{
22+
"type": "DIRECT",
23+
"subtype": "IDENTITY",
24+
"description": "",
25+
26+
"masking": false
27+
},
28+
{
29+
"type": "INDIRECT",
30+
"subtype": "GROUP_BY",
31+
"description": "",
32+
"masking": false
33+
}
34+
]
35+
}
36+
]
37+
},
38+
"word_count": {
39+
"inputFields": [
40+
{
41+
"namespace": "bigquery",
42+
"name": "gcp-open-lineage-testing.e2e_dataset.iceberg_biglake",
43+
"field": "word",
44+
"transformations": [
45+
{
46+
"type": "INDIRECT",
47+
"subtype": "GROUP_BY",
48+
"description": "",
49+
"masking": false
50+
}
51+
]
52+
},
53+
{
54+
"namespace": "bigquery",
55+
"name": "gcp-open-lineage-testing.e2e_dataset.iceberg_biglake",
56+
"field": "word_count",
57+
"transformations": [
58+
{
59+
"type": "DIRECT",
60+
"subtype": "AGGREGATION",
61+
"description": "",
62+
"masking": false
63+
}
64+
]
65+
}
66+
]
67+
}
68+
}
69+
}
70+
}
71+
}
72+
]
73+
}
Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
{
2+
"eventType": "COMPLETE",
3+
"job": {
4+
"namespace": "default",
5+
"name": "big_lake_to_iceberg.atomic_replace_table_as_select.e2e_dataset_e2e_another_table"
6+
},
7+
"inputs": [
8+
{
9+
"namespace": "bigquery",
10+
"name": "gcp-open-lineage-testing.e2e_dataset.iceberg_biglake",
11+
"facets": {
12+
"dataSource": {
13+
"name": "bigquery",
14+
"uri": "bigquery"
15+
}
16+
}
17+
}
18+
],
19+
"outputs": [
20+
{
21+
"namespace": "gs://open-lineage-e2e",
22+
"name": "data/bigquery_metastore/e2e_dataset.db/e2e_another_table",
23+
"facets": {
24+
"dataSource": {
25+
"name": "gs://open-lineage-e2e",
26+
"uri": "gs://open-lineage-e2e"
27+
}
28+
}
29+
}
30+
]
31+
}
Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
{
2+
"eventType": "COMPLETE",
3+
"job": {
4+
"namespace": "default",
5+
"name": "big_lake_to_iceberg.atomic_replace_table_as_select.e2e_dataset_e2e_another_table"
6+
},
7+
"run": {
8+
"facets": {
9+
"gcp_dataproc": {
10+
"clusterUuid": "{{ is_uuid(result) }}",
11+
"jobId": "{{ any(result) }}",
12+
"jobUuid": "{{ is_uuid(result) }}",
13+
"queryNodeName": "append_data",
14+
"appName": "BigQuery to Iceberg with BigQueryMetastoreCatalog",
15+
"clusterName": "{{ match(result, 'dataproc-producer-test-.+') }}",
16+
"appId": "{{ match(result, 'application_.+') }}",
17+
"projectId": "gcp-open-lineage-testing"
18+
}
19+
}
20+
}
21+
}
Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
{
2+
"eventType": "COMPLETE",
3+
"job": {
4+
"namespace": "default",
5+
"name": "big_lake_to_iceberg.atomic_replace_table_as_select.e2e_dataset_e2e_another_table",
6+
"facets": {
7+
"gcp_lineage": {
8+
"origin": {
9+
"sourceType": "DATAPROC",
10+
"name": "{{ match(result, 'projects/gcp-open-lineage-testing/regions/us-west1/clusters/dataproc-producer-test-.*') }}"
11+
}
12+
}
13+
}
14+
}
15+
}
Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
{
2+
"eventType": "COMPLETE",
3+
"job": {
4+
"namespace": "default",
5+
"name": "big_lake_to_iceberg.atomic_replace_table_as_select.e2e_dataset_e2e_another_table",
6+
"facets": {
7+
"jobType": {
8+
"processingType": "BATCH",
9+
"integration": "SPARK",
10+
"jobType": "SQL_JOB"
11+
}
12+
}
13+
}
14+
}
Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
{
2+
"eventType": "COMPLETE",
3+
"job": {
4+
"namespace": "default",
5+
"name": "big_lake_to_iceberg.atomic_replace_table_as_select.e2e_dataset_e2e_another_table"
6+
},
7+
"outputs": [
8+
{
9+
"namespace": "gs://open-lineage-e2e",
10+
"name": "data/bigquery_metastore/e2e_dataset.db/e2e_another_table",
11+
"outputFacets": {
12+
"outputStatistics": {
13+
"rowCount": "{{ any(result) }}",
14+
"size": "{{ any(result) }}",
15+
"fileCount": "{{ any(result) }}"
16+
}
17+
}
18+
}
19+
]
20+
}
Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
{
2+
"eventType": "COMPLETE",
3+
"job": {
4+
"namespace": "default",
5+
"name": "big_lake_to_iceberg.atomic_replace_table_as_select.e2e_dataset_e2e_another_table"
6+
},
7+
"run": {
8+
"facets": {
9+
"parent": {
10+
"run": {
11+
"runId": "{{ is_uuid(result) }}"
12+
},
13+
"job": {
14+
"namespace": "default",
15+
"name": "big_lake_to_iceberg"
16+
}
17+
}
18+
}
19+
}
20+
}
Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
{
2+
"eventType": "COMPLETE",
3+
"job": {
4+
"namespace": "default",
5+
"name": "big_lake_to_iceberg.atomic_replace_table_as_select.e2e_dataset_e2e_another_table"
6+
},
7+
"run": {
8+
"facets": {
9+
"processing_engine": {
10+
"version": "3.5.3",
11+
"name": "spark",
12+
"openlineageAdapterVersion": "{{ any(result) }}"
13+
}
14+
}
15+
}
16+
}
Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
{
2+
"eventTime": "{{ is_datetime(result) }}",
3+
"eventType": "COMPLETE",
4+
"run": {
5+
"runId": "{{ is_uuid(result) }}",
6+
"facets": {}
7+
},
8+
"job": {
9+
"namespace": "default",
10+
"name": "big_lake_to_iceberg.atomic_replace_table_as_select.e2e_dataset_e2e_another_table",
11+
"facets": {}
12+
},
13+
"inputs": [
14+
{
15+
"namespace": "bigquery",
16+
"name": "gcp-open-lineage-testing.e2e_dataset.iceberg_biglake",
17+
"facets": {},
18+
"inputFacets": {}
19+
}
20+
],
21+
"outputs": [
22+
{
23+
"namespace": "gs://open-lineage-e2e",
24+
"name": "data/bigquery_metastore/e2e_dataset.db/e2e_another_table",
25+
"facets": {},
26+
"outputFacets": {}
27+
}
28+
]
29+
}

0 commit comments

Comments
 (0)