Skip to content

Commit 394daed

Browse files
authored
feat(prepro): add nextclade_dataset_tag_map (#5072)
required for GenSpectrum/servers#294 Testing on genspectrum servers shows this is WAI ### Screenshot ### PR Checklist - ~[ ] All necessary documentation has been adapted.~ - ~[ ] The implemented feature is covered by appropriate, automated tests.~ - [x] Any manual testing that has been done is documented (i.e. what exactly was tested?) 🚀 Preview: Add `preview` label to enable
1 parent 0fada5c commit 394daed

File tree

2 files changed

+10
-2
lines changed

2 files changed

+10
-2
lines changed

preprocessing/nextclade/src/loculus_preprocessing/config.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,7 @@ class Config:
6060
nextclade_dataset_name: str | None = None
6161
nextclade_dataset_name_map: dict[str, str] | None = None
6262
nextclade_dataset_tag: str | None = None
63+
nextclade_dataset_tag_map: dict[str, str] | None = None
6364
nextclade_dataset_server: str = "https://data.clades.nextstrain.org/v3"
6465
nextclade_dataset_server_map: dict[str, str] | None = None
6566

preprocessing/nextclade/src/loculus_preprocessing/prepro.py

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -838,10 +838,17 @@ def get_nextclade_dataset_server(config: Config, segment: SegmentName) -> str:
838838
return config.nextclade_dataset_server
839839

840840

841+
def get_nextclade_dataset_tag(config: Config, segment: SegmentName) -> str | None:
842+
if config.nextclade_dataset_tag_map and segment in config.nextclade_dataset_tag_map:
843+
return config.nextclade_dataset_tag_map[segment]
844+
return config.nextclade_dataset_tag
845+
846+
841847
def download_nextclade_dataset(dataset_dir: str, config: Config) -> None:
842848
for segment in config.nucleotideSequences:
843849
nextclade_dataset_name = get_nextclade_dataset_name(config, segment)
844850
nextclade_dataset_server = get_nextclade_dataset_server(config, segment)
851+
nextclade_dataset_tag = get_nextclade_dataset_tag(config, segment)
845852

846853
dataset_dir_seg = dataset_dir if segment == "main" else dataset_dir + "/" + segment
847854
dataset_download_command = [
@@ -853,8 +860,8 @@ def download_nextclade_dataset(dataset_dir: str, config: Config) -> None:
853860
f"--output-dir={dataset_dir_seg}",
854861
]
855862

856-
if config.nextclade_dataset_tag is not None:
857-
dataset_download_command.append(f"--tag={config.nextclade_dataset_tag}")
863+
if nextclade_dataset_tag is not None:
864+
dataset_download_command.append(f"--tag={nextclade_dataset_tag}")
858865

859866
logger.info("Downloading Nextclade dataset: %s", dataset_download_command)
860867
if subprocess.run(dataset_download_command, check=False).returncode != 0: # noqa: S603

0 commit comments

Comments
 (0)