Skip to content

Commit a4f49da

Browse files
Merge pull request #15 from deepesdl/tejas-xxx-support-multi-publication-mode
support multi publication mode
2 parents 6229b88 + bd6bcd6 commit a4f49da

File tree

12 files changed

+1204
-960
lines changed

12 files changed

+1204
-960
lines changed

CHANGES.md

Lines changed: 21 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,4 +33,24 @@
3333
- Introduced build_link_to_jnb method for creating STAC-compatible notebook links with
3434
metadata on kernel, environment, and containerization.
3535
- Added originating application platform metadata to generated OGC API records for
36-
DeepESDL experiments and workflows.
36+
DeepESDL experiments and workflows.
37+
38+
## Changes in 0.1.6
39+
40+
- Publisher now supports `mode` parameter, This allows more flexible publishing:
41+
- `"dataset"` → publish dataset only
42+
- `"workflow"` → publish workflow only
43+
- `"all"` → publish both (default)
44+
45+
- CLI: the `publish` command now auto-detects dataset vs workflow configs and also accepts
46+
--dataset-config / --workflow-config; single-file calls use -m to disambiguate
47+
(e.g., deep-code publish workflow.yaml -m workflow).
48+
49+
- Contacts in OGC API records no longer include default or empty fields, only
50+
properties explicitly defined in the workflow configuration will now be generated.
51+
52+
- Enhanced GitHub automation to automatically fork synchronize with upstream before
53+
committing and opening a PR to ensure branches are always up-to-date.
54+
55+
- Prevented duplicate item and self links when updating base catalogs of workflows and
56+
experiments.

README.md

Lines changed: 18 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -93,7 +93,8 @@ catalog
9393

9494
### Usage
9595
```
96-
deep-code publish DATASET_CONFIG WORKFLOW_CONFIG [--environment ENVIRONMENT]
96+
deep-code publish DATASET_CONFIG WORKFLOW_CONFIG [--environment ENVIRONMENT] [--mode
97+
all|dataset|workflow]
9798
```
9899

99100
#### Arguments
@@ -104,8 +105,12 @@ deep-code publish DATASET_CONFIG WORKFLOW_CONFIG [--environment ENVIRONMENT]
104105
(e.g., workflow-config.yaml)
105106

106107
#### Options
108+
--dataset-config, - Explict path to dataset config
109+
--workflow-config, - Explicit path to workflow config
107110
--environment, -e - Target catalog environment:
108111
production (default) | staging | testing
112+
--mode, -m Publishing mode:
113+
all (default) | dataset | workflow
109114

110115
#### Examples:
111116
1. Publish to staging catalog
@@ -120,6 +125,18 @@ deep-code publish dataset-config.yaml workflow-config.yaml -e testing
120125
```
121126
deep-code publish dataset-config.yaml workflow-config.yaml
122127
```
128+
4. Publish Dataset only
129+
```
130+
deep-code publish dataset-config.yaml -m dataset
131+
132+
deep-code publish --dataset-config dataset.yaml -m dataset
133+
```
134+
5. Publish Workflow only
135+
```
136+
deep-code publish dataset-config.yaml -m workflow
137+
138+
deep-code publish --workflow-config workflow.yaml -m dataset
139+
```
123140
#### dataset-config.yaml example
124141

125142
```

deep_code/cli/publish.py

Lines changed: 241 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -4,28 +4,261 @@
44
# Permissions are hereby granted under the terms of the MIT License:
55
# https://opensource.org/licenses/MIT.
66

7+
from pathlib import Path
8+
from typing import Literal
9+
710
import click
11+
import yaml
812

913
from deep_code.tools.publish import Publisher
1014

15+
Mode = Literal["all", "dataset", "workflow"]
16+
17+
DATASET_MARKERS = {
18+
"stac_version",
19+
"extent",
20+
"license",
21+
"summaries",
22+
"assets",
23+
"providers",
24+
"collection",
25+
"collection_id",
26+
"id",
27+
}
28+
WORKFLOW_MARKERS = {
29+
"workflow",
30+
"workflow_id",
31+
"workflow_title",
32+
"experiment",
33+
"jupyter_notebook_url",
34+
"notebook",
35+
"parameters",
36+
"input_datasets",
37+
}
38+
39+
40+
def _validate_inputs(
41+
dataset_config: str | None, workflow_config: str | None, mode: str
42+
):
43+
mode = mode.lower()
44+
45+
def ensure_file(path: str | None, label: str):
46+
if path is None:
47+
raise click.UsageError(f"{label} is required but was not provided.")
48+
if not Path(path).is_file():
49+
raise click.UsageError(f"{label} not found: {path} is not a file")
50+
51+
if mode == "dataset":
52+
ensure_file(dataset_config, "DATASET_CONFIG")
53+
if workflow_config is not None:
54+
click.echo("Ignoring WORKFLOW_CONFIG since mode=dataset.", err=True)
55+
56+
elif mode == "workflow":
57+
ensure_file(workflow_config, "WORKFLOW_CONFIG")
58+
59+
elif mode == "all":
60+
ensure_file(dataset_config, "DATASET_CONFIG")
61+
ensure_file(workflow_config, "WORKFLOW_CONFIG")
62+
63+
else:
64+
raise click.UsageError("Invalid mode. Choose one of: all, dataset, workflow.")
65+
66+
67+
def _detect_config_type(path: Path) -> Literal["dataset", "workflow"]:
68+
"""Detect config type via filename hints and YAML top-level keys."""
69+
name = path.name.lower()
70+
if "workflow" in name or "experiment" in name:
71+
return "workflow"
72+
if "dataset" in name or "collection" in name:
73+
return "dataset"
74+
75+
try:
76+
data = yaml.safe_load(path.read_text(encoding="utf-8"))
77+
except Exception as e:
78+
raise ValueError(f"Cannot read YAML from {path}: {e}")
79+
80+
if not isinstance(data, dict):
81+
raise ValueError(f"YAML in {path} must be a mapping/object at the top level.")
82+
83+
keys = set(data.keys())
84+
ds_score = len(keys & DATASET_MARKERS)
85+
wf_score = len(keys & WORKFLOW_MARKERS)
86+
87+
if ds_score > wf_score:
88+
return "dataset"
89+
if wf_score > ds_score:
90+
return "workflow"
91+
92+
raise ValueError(
93+
f"Ambiguous config type for {path}. "
94+
"Rename to include 'dataset' or 'workflow', or pass the missing file explicitly."
95+
)
96+
97+
98+
def _assign_configs(
99+
pos_first: str | None,
100+
pos_second: str | None,
101+
mode: Mode,
102+
explicit_dataset: str | None,
103+
explicit_workflow: str | None,
104+
) -> tuple[str | None, str | None]:
105+
"""
106+
Decide which file is dataset vs workflow.
107+
Precedence: explicit flags > positional + detection.
108+
Returns (dataset_config, workflow_config).
109+
"""
110+
ds = explicit_dataset
111+
wf = explicit_workflow
112+
113+
# If both explicit provided, we're done; warn if extra positionals are passed.
114+
pos_args = [p for p in (pos_first, pos_second) if p]
115+
if ds and wf:
116+
if pos_args:
117+
click.echo(
118+
"Positional config paths ignored because explicit flags were provided.",
119+
err=True,
120+
)
121+
return ds, wf
122+
123+
# Helper to assign a single positional file to the missing slot
124+
def _assign_single(p: str) -> tuple[str | None, str | None]:
125+
nonlocal ds, wf
126+
if ds and wf:
127+
raise click.UsageError(
128+
"Both dataset and workflow configs already provided; remove extra positional files."
129+
)
130+
# Use mode as a strong hint when only one is missing
131+
if not ds and mode == "dataset":
132+
ds = p
133+
return
134+
if not wf and mode == "workflow":
135+
wf = p
136+
return
137+
# Otherwise detect
138+
kind = _detect_config_type(Path(p))
139+
if kind == "dataset":
140+
if ds and Path(ds).resolve() != Path(p).resolve():
141+
raise click.UsageError(
142+
f"Multiple dataset configs supplied: {ds} and {p}"
143+
)
144+
ds = p
145+
else:
146+
if wf and Path(wf).resolve() != Path(p).resolve():
147+
raise click.UsageError(
148+
f"Multiple workflow configs supplied: {wf} and {p}"
149+
)
150+
wf = p
151+
152+
# If exactly one explicit provided, try to fill the other via positionals
153+
if ds and not wf:
154+
if len(pos_args) > 1:
155+
raise click.UsageError(
156+
"Provide at most one positional file when using --dataset-config."
157+
)
158+
if pos_args:
159+
_assign_single(pos_args[0])
160+
return ds, wf
161+
162+
if wf and not ds:
163+
if len(pos_args) > 1:
164+
raise click.UsageError(
165+
"Provide at most one positional file when using --workflow-config."
166+
)
167+
if pos_args:
168+
_assign_single(pos_args[0])
169+
return ds, wf
170+
171+
# No explicit flags: rely on positionals + detection
172+
if not pos_args:
173+
return None, None
174+
if len(pos_args) == 1:
175+
p = pos_args[0]
176+
if mode == "dataset":
177+
return p, None
178+
if mode == "workflow":
179+
return None, p
180+
# mode == "all": detect and require the other later in validation
181+
kind = _detect_config_type(Path(p))
182+
return (p, None) if kind == "dataset" else (None, p)
183+
184+
# Two positionals: detect both and assign
185+
p1, p2 = pos_args[0], pos_args[1]
186+
k1 = _detect_config_type(Path(p1))
187+
k2 = _detect_config_type(Path(p2))
188+
if k1 == k2:
189+
raise click.UsageError(
190+
f"Both files look like '{k1}' configs: {p1} and {p2}. "
191+
"Please rename one or use --dataset-config/--workflow-config."
192+
)
193+
ds = p1 if k1 == "dataset" else p2
194+
wf = p1 if k1 == "workflow" else p2
195+
return ds, wf
196+
11197

12198
@click.command(name="publish")
13-
@click.argument("dataset_config", type=click.Path(exists=True))
14-
@click.argument("workflow_config", type=click.Path(exists=True))
199+
@click.argument("dataset_config", type=click.Path(exists=True), required=False)
200+
@click.argument("workflow_config", type=click.Path(exists=True), required=False)
201+
@click.option(
202+
"--dataset-config",
203+
"dataset_config_opt",
204+
type=click.Path(exists=True),
205+
help="Explicit path to dataset config (overrides positional detection).",
206+
)
207+
@click.option(
208+
"--workflow-config",
209+
"workflow_config_opt",
210+
type=click.Path(exists=True),
211+
help="Explicit path to workflow config (overrides positional detection).",
212+
)
15213
@click.option(
16214
"--environment",
17215
"-e",
18216
type=click.Choice(["production", "staging", "testing"], case_sensitive=False),
19217
default="production",
20218
help="Target environment for publishing (production, staging, testing)",
21219
)
22-
def publish(dataset_config, workflow_config, environment):
23-
"""Request publishing a dataset along with experiment and workflow metadata to the
24-
open science catalogue.
220+
@click.option(
221+
"--mode",
222+
"-m",
223+
type=click.Choice(["all", "dataset", "workflow"], case_sensitive=False),
224+
default="all",
225+
help="Publishing mode: dataset only, workflow only, or both",
226+
)
227+
def publish(
228+
dataset_config,
229+
workflow_config,
230+
dataset_config_opt,
231+
workflow_config_opt,
232+
environment,
233+
mode,
234+
):
235+
"""
236+
Publish dataset and/or workflow/experiment metadata.
237+
238+
Examples:
239+
deep-code publish workflow.yaml -e staging -m workflow
240+
deep-code publish dataset.yaml -e staging -m dataset
241+
deep-code publish dataset.yaml workflow.yaml -m all
242+
deep-code publish --dataset-config dataset.yaml --workflow-config wf.yaml -m all
243+
deep-code publish --dataset-config dataset.yaml -m dataset
244+
deep-code publish --workflow-config wf.yaml -m workflow
25245
"""
246+
mode = mode.lower()
247+
ds_path, wf_path = _assign_configs(
248+
dataset_config,
249+
workflow_config,
250+
mode, # type: ignore[arg-type]
251+
dataset_config_opt,
252+
workflow_config_opt,
253+
)
254+
255+
_validate_inputs(ds_path, wf_path, mode)
256+
26257
publisher = Publisher(
27-
dataset_config_path=dataset_config,
28-
workflow_config_path=workflow_config,
258+
dataset_config_path=ds_path,
259+
workflow_config_path=wf_path,
29260
environment=environment.lower(),
30261
)
31-
publisher.publish_all()
262+
result = publisher.publish(mode=mode)
263+
264+
click.echo(result if isinstance(result, str) else "Wrote files locally.")

0 commit comments

Comments
 (0)