|
4 | 4 | # Permissions are hereby granted under the terms of the MIT License: |
5 | 5 | # https://opensource.org/licenses/MIT. |
6 | 6 |
|
| 7 | +from pathlib import Path |
| 8 | +from typing import Literal |
| 9 | + |
7 | 10 | import click |
| 11 | +import yaml |
8 | 12 |
|
9 | 13 | from deep_code.tools.publish import Publisher |
10 | 14 |
|
| 15 | +Mode = Literal["all", "dataset", "workflow"] |
| 16 | + |
| 17 | +DATASET_MARKERS = { |
| 18 | + "stac_version", |
| 19 | + "extent", |
| 20 | + "license", |
| 21 | + "summaries", |
| 22 | + "assets", |
| 23 | + "providers", |
| 24 | + "collection", |
| 25 | + "collection_id", |
| 26 | + "id", |
| 27 | +} |
| 28 | +WORKFLOW_MARKERS = { |
| 29 | + "workflow", |
| 30 | + "workflow_id", |
| 31 | + "workflow_title", |
| 32 | + "experiment", |
| 33 | + "jupyter_notebook_url", |
| 34 | + "notebook", |
| 35 | + "parameters", |
| 36 | + "input_datasets", |
| 37 | +} |
| 38 | + |
| 39 | + |
| 40 | +def _validate_inputs( |
| 41 | + dataset_config: str | None, workflow_config: str | None, mode: str |
| 42 | +): |
| 43 | + mode = mode.lower() |
| 44 | + |
| 45 | + def ensure_file(path: str | None, label: str): |
| 46 | + if path is None: |
| 47 | + raise click.UsageError(f"{label} is required but was not provided.") |
| 48 | + if not Path(path).is_file(): |
| 49 | + raise click.UsageError(f"{label} not found: {path} is not a file") |
| 50 | + |
| 51 | + if mode == "dataset": |
| 52 | + ensure_file(dataset_config, "DATASET_CONFIG") |
| 53 | + if workflow_config is not None: |
| 54 | + click.echo("Ignoring WORKFLOW_CONFIG since mode=dataset.", err=True) |
| 55 | + |
| 56 | + elif mode == "workflow": |
| 57 | + ensure_file(workflow_config, "WORKFLOW_CONFIG") |
| 58 | + |
| 59 | + elif mode == "all": |
| 60 | + ensure_file(dataset_config, "DATASET_CONFIG") |
| 61 | + ensure_file(workflow_config, "WORKFLOW_CONFIG") |
| 62 | + |
| 63 | + else: |
| 64 | + raise click.UsageError("Invalid mode. Choose one of: all, dataset, workflow.") |
| 65 | + |
| 66 | + |
| 67 | +def _detect_config_type(path: Path) -> Literal["dataset", "workflow"]: |
| 68 | + """Detect config type via filename hints and YAML top-level keys.""" |
| 69 | + name = path.name.lower() |
| 70 | + if "workflow" in name or "experiment" in name: |
| 71 | + return "workflow" |
| 72 | + if "dataset" in name or "collection" in name: |
| 73 | + return "dataset" |
| 74 | + |
| 75 | + try: |
| 76 | + data = yaml.safe_load(path.read_text(encoding="utf-8")) |
| 77 | + except Exception as e: |
| 78 | + raise ValueError(f"Cannot read YAML from {path}: {e}") |
| 79 | + |
| 80 | + if not isinstance(data, dict): |
| 81 | + raise ValueError(f"YAML in {path} must be a mapping/object at the top level.") |
| 82 | + |
| 83 | + keys = set(data.keys()) |
| 84 | + ds_score = len(keys & DATASET_MARKERS) |
| 85 | + wf_score = len(keys & WORKFLOW_MARKERS) |
| 86 | + |
| 87 | + if ds_score > wf_score: |
| 88 | + return "dataset" |
| 89 | + if wf_score > ds_score: |
| 90 | + return "workflow" |
| 91 | + |
| 92 | + raise ValueError( |
| 93 | + f"Ambiguous config type for {path}. " |
| 94 | + "Rename to include 'dataset' or 'workflow', or pass the missing file explicitly." |
| 95 | + ) |
| 96 | + |
| 97 | + |
| 98 | +def _assign_configs( |
| 99 | + pos_first: str | None, |
| 100 | + pos_second: str | None, |
| 101 | + mode: Mode, |
| 102 | + explicit_dataset: str | None, |
| 103 | + explicit_workflow: str | None, |
| 104 | +) -> tuple[str | None, str | None]: |
| 105 | + """ |
| 106 | + Decide which file is dataset vs workflow. |
| 107 | + Precedence: explicit flags > positional + detection. |
| 108 | + Returns (dataset_config, workflow_config). |
| 109 | + """ |
| 110 | + ds = explicit_dataset |
| 111 | + wf = explicit_workflow |
| 112 | + |
| 113 | + # If both explicit provided, we're done; warn if extra positionals are passed. |
| 114 | + pos_args = [p for p in (pos_first, pos_second) if p] |
| 115 | + if ds and wf: |
| 116 | + if pos_args: |
| 117 | + click.echo( |
| 118 | + "Positional config paths ignored because explicit flags were provided.", |
| 119 | + err=True, |
| 120 | + ) |
| 121 | + return ds, wf |
| 122 | + |
| 123 | + # Helper to assign a single positional file to the missing slot |
| 124 | + def _assign_single(p: str) -> tuple[str | None, str | None]: |
| 125 | + nonlocal ds, wf |
| 126 | + if ds and wf: |
| 127 | + raise click.UsageError( |
| 128 | + "Both dataset and workflow configs already provided; remove extra positional files." |
| 129 | + ) |
| 130 | + # Use mode as a strong hint when only one is missing |
| 131 | + if not ds and mode == "dataset": |
| 132 | + ds = p |
| 133 | + return |
| 134 | + if not wf and mode == "workflow": |
| 135 | + wf = p |
| 136 | + return |
| 137 | + # Otherwise detect |
| 138 | + kind = _detect_config_type(Path(p)) |
| 139 | + if kind == "dataset": |
| 140 | + if ds and Path(ds).resolve() != Path(p).resolve(): |
| 141 | + raise click.UsageError( |
| 142 | + f"Multiple dataset configs supplied: {ds} and {p}" |
| 143 | + ) |
| 144 | + ds = p |
| 145 | + else: |
| 146 | + if wf and Path(wf).resolve() != Path(p).resolve(): |
| 147 | + raise click.UsageError( |
| 148 | + f"Multiple workflow configs supplied: {wf} and {p}" |
| 149 | + ) |
| 150 | + wf = p |
| 151 | + |
| 152 | + # If exactly one explicit provided, try to fill the other via positionals |
| 153 | + if ds and not wf: |
| 154 | + if len(pos_args) > 1: |
| 155 | + raise click.UsageError( |
| 156 | + "Provide at most one positional file when using --dataset-config." |
| 157 | + ) |
| 158 | + if pos_args: |
| 159 | + _assign_single(pos_args[0]) |
| 160 | + return ds, wf |
| 161 | + |
| 162 | + if wf and not ds: |
| 163 | + if len(pos_args) > 1: |
| 164 | + raise click.UsageError( |
| 165 | + "Provide at most one positional file when using --workflow-config." |
| 166 | + ) |
| 167 | + if pos_args: |
| 168 | + _assign_single(pos_args[0]) |
| 169 | + return ds, wf |
| 170 | + |
| 171 | + # No explicit flags: rely on positionals + detection |
| 172 | + if not pos_args: |
| 173 | + return None, None |
| 174 | + if len(pos_args) == 1: |
| 175 | + p = pos_args[0] |
| 176 | + if mode == "dataset": |
| 177 | + return p, None |
| 178 | + if mode == "workflow": |
| 179 | + return None, p |
| 180 | + # mode == "all": detect and require the other later in validation |
| 181 | + kind = _detect_config_type(Path(p)) |
| 182 | + return (p, None) if kind == "dataset" else (None, p) |
| 183 | + |
| 184 | + # Two positionals: detect both and assign |
| 185 | + p1, p2 = pos_args[0], pos_args[1] |
| 186 | + k1 = _detect_config_type(Path(p1)) |
| 187 | + k2 = _detect_config_type(Path(p2)) |
| 188 | + if k1 == k2: |
| 189 | + raise click.UsageError( |
| 190 | + f"Both files look like '{k1}' configs: {p1} and {p2}. " |
| 191 | + "Please rename one or use --dataset-config/--workflow-config." |
| 192 | + ) |
| 193 | + ds = p1 if k1 == "dataset" else p2 |
| 194 | + wf = p1 if k1 == "workflow" else p2 |
| 195 | + return ds, wf |
| 196 | + |
11 | 197 |
|
12 | 198 | @click.command(name="publish") |
13 | | -@click.argument("dataset_config", type=click.Path(exists=True)) |
14 | | -@click.argument("workflow_config", type=click.Path(exists=True)) |
| 199 | +@click.argument("dataset_config", type=click.Path(exists=True), required=False) |
| 200 | +@click.argument("workflow_config", type=click.Path(exists=True), required=False) |
| 201 | +@click.option( |
| 202 | + "--dataset-config", |
| 203 | + "dataset_config_opt", |
| 204 | + type=click.Path(exists=True), |
| 205 | + help="Explicit path to dataset config (overrides positional detection).", |
| 206 | +) |
| 207 | +@click.option( |
| 208 | + "--workflow-config", |
| 209 | + "workflow_config_opt", |
| 210 | + type=click.Path(exists=True), |
| 211 | + help="Explicit path to workflow config (overrides positional detection).", |
| 212 | +) |
15 | 213 | @click.option( |
16 | 214 | "--environment", |
17 | 215 | "-e", |
18 | 216 | type=click.Choice(["production", "staging", "testing"], case_sensitive=False), |
19 | 217 | default="production", |
20 | 218 | help="Target environment for publishing (production, staging, testing)", |
21 | 219 | ) |
22 | | -def publish(dataset_config, workflow_config, environment): |
23 | | - """Request publishing a dataset along with experiment and workflow metadata to the |
24 | | - open science catalogue. |
| 220 | +@click.option( |
| 221 | + "--mode", |
| 222 | + "-m", |
| 223 | + type=click.Choice(["all", "dataset", "workflow"], case_sensitive=False), |
| 224 | + default="all", |
| 225 | + help="Publishing mode: dataset only, workflow only, or both", |
| 226 | +) |
| 227 | +def publish( |
| 228 | + dataset_config, |
| 229 | + workflow_config, |
| 230 | + dataset_config_opt, |
| 231 | + workflow_config_opt, |
| 232 | + environment, |
| 233 | + mode, |
| 234 | +): |
| 235 | + """ |
| 236 | + Publish dataset and/or workflow/experiment metadata. |
| 237 | +
|
| 238 | + Examples: |
| 239 | + deep-code publish workflow.yaml -e staging -m workflow |
| 240 | + deep-code publish dataset.yaml -e staging -m dataset |
| 241 | + deep-code publish dataset.yaml workflow.yaml -m all |
| 242 | + deep-code publish --dataset-config dataset.yaml --workflow-config wf.yaml -m all |
| 243 | + deep-code publish --dataset-config dataset.yaml -m dataset |
| 244 | + deep-code publish --workflow-config wf.yaml -m workflow |
25 | 245 | """ |
| 246 | + mode = mode.lower() |
| 247 | + ds_path, wf_path = _assign_configs( |
| 248 | + dataset_config, |
| 249 | + workflow_config, |
| 250 | + mode, # type: ignore[arg-type] |
| 251 | + dataset_config_opt, |
| 252 | + workflow_config_opt, |
| 253 | + ) |
| 254 | + |
| 255 | + _validate_inputs(ds_path, wf_path, mode) |
| 256 | + |
26 | 257 | publisher = Publisher( |
27 | | - dataset_config_path=dataset_config, |
28 | | - workflow_config_path=workflow_config, |
| 258 | + dataset_config_path=ds_path, |
| 259 | + workflow_config_path=wf_path, |
29 | 260 | environment=environment.lower(), |
30 | 261 | ) |
31 | | - publisher.publish_all() |
| 262 | + result = publisher.publish(mode=mode) |
| 263 | + |
| 264 | + click.echo(result if isinstance(result, str) else "Wrote files locally.") |
0 commit comments