From 222b4027987414deb3b214f4116e425493958b4e Mon Sep 17 00:00:00 2001 From: pdimens Date: Fri, 17 Oct 2025 16:26:07 -0400 Subject: [PATCH 01/33] try a pixi-based container --- harpy/commands/environments.py | 39 ++------------- harpy/common/create_pixi.py | 89 ++++++++++++++++++++++++++++++++++ resources/Dockerfile | 7 +++ 3 files changed, 100 insertions(+), 35 deletions(-) create mode 100755 harpy/common/create_pixi.py create mode 100644 resources/Dockerfile diff --git a/harpy/commands/environments.py b/harpy/commands/environments.py index e9f770610..dbdd78334 100644 --- a/harpy/commands/environments.py +++ b/harpy/commands/environments.py @@ -2,53 +2,22 @@ import os import shutil -import subprocess +#import subprocess import rich_click as click from harpy.common.conda import create_conda_recipes +from harpy.common.create_pixi import create_pixi_toml from harpy.common.workflow import Workflow @click.command(hidden = True) def containerize(): """ - Configure conda and docker environments + Configure the harpy container **INTERNAL USE ONLY**. Used to recreate all the conda environments required by the workflows and build a dockerfile from that. """ - workflow = Workflow("container", "environments.smk", "container", 1) - workflow.fetch_snakefile() - create_conda_recipes("container") - - with open("container/Dockerfile", "w", encoding = "utf-8") as dockerraw: - _module = subprocess.run( - 'snakemake -s container/workflow/workflow.smk --containerize --directory container'.split(), - stdout = dockerraw - ) + create_pixi_toml() - #with open("Dockerfile.raw", "r") as dockerraw, open("Dockerfile", "w") as dockerfile: - # # copy over the first three lines - # dockerfile.write(dockerraw.readline()) - # dockerfile.write(dockerraw.readline()) - # dockerfile.write(dockerraw.readline()) - # dockerfile.write("\nCOPY container/workflow/envs/*.yaml /\n") - # env_hash = {} - # for line in dockerraw: - # if line.startswith("#"): - # continue - # if line.startswith("COPY"): - # dockercmd, env, hashname = line.split() - # env = Path(env).stem - # hashname = hashname.split("/")[-2] - # env_hash[env] = hashname - # runcmds = [] - # for env, _hash in env_hash.items(): - # runcmds.append(f"conda env create --prefix /conda-envs/{_hash} --file /{env}.yaml && \\") - # runcmds.append("conda clean --all -y") - # dockerfile.write("\nRUN ") - # dockerfile.write( - # "\n\t".join(runcmds) - # ) - #os.remove("Dockerfile.raw") @click.group(options_metavar='') def deps(): diff --git a/harpy/common/create_pixi.py b/harpy/common/create_pixi.py new file mode 100755 index 000000000..755717f5e --- /dev/null +++ b/harpy/common/create_pixi.py @@ -0,0 +1,89 @@ +#! /usr/bin/env python + +import os +import shutil +import subprocess + +def create_pixi_toml(): + environ = { + "align" : [ + "bwa-mem2", + "bwa", + "samtools==1.22", + "seqtk", + "strobealign", + "tabix" + ], + "assembly" : [ + "arcs", + "bwa", + "spades", + "cloudspades", + "links", + "quast", + "busco", + "samtools", + "tigmint" + ], + "deconvolution" : [ + "quickdeconvolution" + ], + + "demultiplex": [ + "dmox>=0.2" + ], + "metassembly": [ + "athena_meta==1.2" + ], + "phase" : [ + "hapcut2", + "whatshap" + ], + "qc" : [ + "click==8.2.1", + "falco==1.2.5", + "fastp", + "multiqc==1.30", + "pysam==0.23" + ], + "report" : [ + "quarto", + "r-dt", + "r-dplyr", + "r-highcharter", + "r-magrittr", + "r-plotly", + "r-scales", + "r-tidyr", + "r-viridislite", + "r-xml2", + "r-biocircos" + ], + "simulations" : [ + "mimick>=2.3", + "simug>=1.0.1", + ], + "stitch" : [ + "r-stitch>=1.8.4" + ], + "variants" : [ + "bcftools==1.22", + "freebayes==1.3.9", + "leviathan", + "naibr-plus", + "setuptools" + ] + } + + for env,deps in environ.items(): + if env == "report": + channels = ["conda-forge", "r"] + else: + channels = ["conda-forge", "bioconda"] + _chan = " ".join([f"--channel {i}" for i in channels]).split() + + if env == "deconvolve": + subprocess.run(["pixi", "global", "install", *_chan, "--environment", env, "--expose", "quickdeconvolve", *deps]) + else: + subprocess.run(["pixi", "global", "install", *_chan, "--environment", env, *deps]) + shutil.copy2(os.path.expanduser("~/.pixi/manifests/pixi-global.toml"), "resources/pixi.toml") diff --git a/resources/Dockerfile b/resources/Dockerfile new file mode 100644 index 000000000..2f34c5953 --- /dev/null +++ b/resources/Dockerfile @@ -0,0 +1,7 @@ +FROM ghcr.io/prefix-dev/pixi:bookworm-slim + +WORKDIR /app + +COPY ./pixi.toml . + +RUN pixi global install && rm -rf ~/.cache/rattler \ No newline at end of file From 6ecb9a1c7857f660420fd90fe1098c86a6fb57c2 Mon Sep 17 00:00:00 2001 From: pdimens Date: Mon, 20 Oct 2025 13:25:21 -0400 Subject: [PATCH 02/33] use smaller tagged and versioned pixi containers --- .github/workflows/createrelease.yml | 77 ++++--- .github/workflows/tests.yml | 65 +----- harpy/commands/environments.py | 7 +- harpy/common/conda.py | 6 +- harpy/common/create_pixi.py | 233 ++++++++++++++------- harpy/common/workflow.py | 2 +- harpy/snakefiles/align_bwa.smk | 28 +-- harpy/snakefiles/align_strobe.smk | 28 +-- harpy/snakefiles/assembly.smk | 16 +- harpy/snakefiles/deconvolve.smk | 6 +- harpy/snakefiles/demultiplex_meier2021.smk | 8 +- harpy/snakefiles/environments.smk | 2 +- harpy/snakefiles/impute.smk | 1 - harpy/snakefiles/metassembly.smk | 10 +- harpy/snakefiles/phase.smk | 26 +-- harpy/snakefiles/qc.smk | 10 +- harpy/snakefiles/simulate_snpindel.smk | 12 +- harpy/snakefiles/simulate_variants.smk | 10 +- harpy/snakefiles/snp_freebayes.smk | 20 +- harpy/snakefiles/snp_mpileup.smk | 20 +- harpy/snakefiles/sv_leviathan.smk | 14 +- harpy/snakefiles/sv_leviathan_pop.smk | 18 +- harpy/snakefiles/sv_naibr.smk | 12 +- harpy/snakefiles/sv_naibr_phase.smk | 22 +- harpy/snakefiles/sv_naibr_pop.smk | 14 +- harpy/snakefiles/sv_naibr_pop_phase.smk | 26 +-- harpy/snakefiles/validate_bam.smk | 8 +- harpy/snakefiles/validate_fastq.smk | 10 +- 28 files changed, 325 insertions(+), 386 deletions(-) diff --git a/.github/workflows/createrelease.yml b/.github/workflows/createrelease.yml index 9e50b4663..06115eed8 100644 --- a/.github/workflows/createrelease.yml +++ b/.github/workflows/createrelease.yml @@ -6,12 +6,46 @@ on: - '*' # Push events of any tag created jobs: - build_versioned_container: + build_tarball: + name: Upload Release Tarball + runs-on: ubuntu-latest + permissions: + contents: write + pull-requests: write + repository-projects: write + steps: + - name: Checkout code + uses: actions/checkout@v4 + - name: Version the Container + # this removes the :*_latest tag and replaces with versioned container + run: | + for i in harpy/snakefiles/*.smk; do + sed -i "s/_latest/_${{ github.ref_name }}/g" $i + done + - name: Bump Harpy Version + # this removes the :latest tag and replaces with versioned container + run: | + sed -i "s/0\.0\.0/${{ github.ref_name }}/g" harpy/__main__.py + sed -i "s/0\.0\.0/${{ github.ref_name }}/g" pyproject.toml + - name: Build project + # This builds the release tarball, stripped of unneccessary things + run: | + mkdir artifacts + tar --exclude="test" --exclude=".deprecated" --exclude="resources" --exclude="artifacts" --exclude=".git" --exclude=".github" -zcvf artifacts/harpy.${{ github.ref_name }}.tar.gz . + - name: Create Release with Assets + uses: softprops/action-gh-release@v2 + with: + files: ./artifacts/harpy.${{ github.ref_name }}.tar.gz + + build_versioned_containers: name: Build and Push versioned container runs-on: ubuntu-latest permissions: contents: write pull-requests: write + strategy: + matrix: + env: [align, assembly, deconvolution, demultiplex, metassembly, phase, qc, report, simulations, stitch, variants] steps: - name: Checkout uses: actions/checkout@v4 @@ -34,6 +68,12 @@ jobs: - name: Clear space run: rm -rf /opt/hostedtoolcache - name: Recreate container + - uses: prefix-dev/setup-pixi@v0.9.2 + with: + pixi-version: v0.57.0 + cache: true + auth-host: prefix.dev + auth-token: ${{ secrets.PREFIX_DEV_TOKEN }} shell: micromamba-shell {0} run: harpy containerize - name: Set up Docker Buildx @@ -46,36 +86,7 @@ jobs: - name: Build and push uses: docker/build-push-action@v6 with: - context: ./container + context: ./${{ matrix.env }} push: true - tags: pdimens/harpy:${{ github.ref_name }} - build_tarball: - name: Upload Release Tarball - runs-on: ubuntu-latest - permissions: - contents: write - pull-requests: write - repository-projects: write - steps: - - name: Checkout code - uses: actions/checkout@v4 - - name: Version the Container - # this removes the :latest tag and replaces with versioned container - run: | - for i in harpy/snakefiles/*.smk; do - sed -i "s/harpy\:latest/harpy\:${{ github.ref_name }}/g" $i - done - - name: Bump Harpy Version - # this removes the :latest tag and replaces with versioned container - run: | - sed -i "s/0\.0\.0/${{ github.ref_name }}/g" harpy/__main__.py - sed -i "s/0\.0\.0/${{ github.ref_name }}/g" pyproject.toml - - name: Build project - # This builds the release tarball, stripped of unneccessary things - run: | - mkdir artifacts - tar --exclude="test" --exclude=".deprecated" --exclude="resources" --exclude="artifacts" --exclude=".git" --exclude=".github" -zcvf artifacts/harpy.${{ github.ref_name }}.tar.gz . - - name: Create Release with Assets - uses: softprops/action-gh-release@v2 - with: - files: ./artifacts/harpy.${{ github.ref_name }}.tar.gz + tags: pdimens/harpy:$${{ matrix.version }}_{{ github.ref_name }} + diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 5d1a00e18..bafb3616d 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -76,68 +76,6 @@ jobs: run: | python3 -m pip install --upgrade build && python3 -m build && \ pip install --no-deps dist/*.whl - container: - needs: [changes, build] - if: ${{ needs.changes.outputs.environments == 'true' }} - name: Rebuild Container - runs-on: ubuntu-latest - steps: - - name: Checkout - uses: actions/checkout@v4 - with: - fetch-depth: 1 - - name: Setup Mamba - uses: mamba-org/setup-micromamba@v2 - with: - init-shell: bash - generate-run-shell: true - environment-file: resources/harpy.yaml - cache-environment: true - post-cleanup: 'all' - - name: Install Harpy - id: harpybuild - shell: micromamba-shell {0} - run: | - python3 -m pip install --upgrade build && python3 -m build - pip install --no-deps dist/*.whl - - name: Clear Space - uses: jlumbroso/free-disk-space@main - - name: Rebuild Dockerfile - id: rebuild - shell: micromamba-shell {0} - run: harpy containerize - - name: Set up Docker Buildx - id: buildx - if: ${{ steps.rebuild.outcome == 'success' }} - uses: docker/setup-buildx-action@v3 - - name: Login to Docker Hub - id: dockerhub - if: ${{ steps.buildx.outcome == 'success' }} - uses: docker/login-action@v3 - with: - username: ${{ secrets.DOCKERHUB_USERNAME }} - password: ${{ secrets.DOCKERHUB_TOKEN }} - - name: Build and Push to Dockerhub - if: ${{ steps.dockerhub.outcome == 'success' }} - uses: docker/build-push-action@v6 - with: - context: ./container - push: true - tags: pdimens/harpy:latest -# - name: Pull Image Locally -# id: singularity -# shell: micromamba-shell {0} -# if: ${{ needs.changes.outputs.modules == 'true' }} -# run: | -# export APPTAINER_TMPDIR=$PWD/test/ -# harpy qc --skip-reports --quiet 2 test/fastq/sample1.*.fq.gz -# - name: Create Singularity Artifact -# if: ${{ steps.singularity.outcome == 'success' }} -# uses: actions/upload-artifact@v4 -# with: -# name: deps-image -# path: .snakemake/singularity/*.simg -# retention-days: 1 dmux_meier2021: needs: [changes, build] @@ -673,7 +611,8 @@ jobs: run: | python3 -m pip install --upgrade build && python3 -m build pip install --no-deps dist/*.whl - + - name: Clear Space + uses: jlumbroso/free-disk-space@main - name: test assembly shell: micromamba-shell {0} run: | diff --git a/harpy/commands/environments.py b/harpy/commands/environments.py index dbdd78334..6896595ae 100644 --- a/harpy/commands/environments.py +++ b/harpy/commands/environments.py @@ -2,10 +2,9 @@ import os import shutil -#import subprocess import rich_click as click from harpy.common.conda import create_conda_recipes -from harpy.common.create_pixi import create_pixi_toml +from harpy.common.create_pixi import create_pixi_dockerfiles, create_pixi_toml from harpy.common.workflow import Workflow @click.command(hidden = True) @@ -16,8 +15,8 @@ def containerize(): **INTERNAL USE ONLY**. Used to recreate all the conda environments required by the workflows and build a dockerfile from that. """ - create_pixi_toml() - + create_pixi_dockerfiles() + #create_pixi_toml() @click.group(options_metavar='') def deps(): diff --git a/harpy/common/conda.py b/harpy/common/conda.py index ec0482a0b..308179076 100644 --- a/harpy/common/conda.py +++ b/harpy/common/conda.py @@ -63,11 +63,7 @@ def create_conda_recipes(outdir: str, envs: list= []) -> None: "r::r-biocircos" ], "simulations" : [ - "bioconda::mimick>=2.3", - "bioconda::simug>1.0.0", - ], - "spades" : [ - "conda-forge::python=3" + "bioconda::simug>1.0.0" ], "stitch" : [ "bioconda::r-stitch>=1.8.4" diff --git a/harpy/common/create_pixi.py b/harpy/common/create_pixi.py index 755717f5e..fd98c93a0 100755 --- a/harpy/common/create_pixi.py +++ b/harpy/common/create_pixi.py @@ -1,79 +1,149 @@ #! /usr/bin/env python -import os +import glob import shutil import subprocess +import os +import sys + +environ = { + "align" : [ + "bwa-mem2", + "bwa", + "samtools==1.22", + "seqtk", + "strobealign", + "tabix" + ], + "assembly" : [ + "arcs", + "bwa", + "cloudspades", + "links", + "quast", + "busco", + "samtools", + "tigmint" + ], + "deconvolution" : [ + "quickdeconvolution" + ], + + "demultiplex": [ + "dmox>=0.2" + ], + "metassembly": [ + "athena_meta==1.2" + ], + "phase" : [ + "hapcut2", + "whatshap" + ], + "qc" : [ + "click==8.2.1", + "falco==1.2.5", + "fastp", + "multiqc==1.30", + "pysam==0.23" + ], + "report" : [ + "quarto", + "r-dt", + "r-dplyr", + "r-highcharter", + "r-magrittr", + "r-plotly", + "r-scales", + "r-tidyr", + "r-viridislite", + "r-xml2", + "r-biocircos" + ], + "simulations" : [ + "simug>=1.0.1" + ], + "stitch" : [ + "r-stitch>=1.8.4" + ], + "variants" : [ + "bcftools==1.22", + "freebayes==1.3.9", + "leviathan", + "naibr-plus", + "setuptools" + ] +} + +def create_pixi_dockerfiles(): + ''' + Using the defined environments, create a series of folders where each has a dockerfile to create one of the environments. + ''' + rm_cache = "&& rm -rf home/.cache/rattler".split() + for env,deps in environ.items(): + os.makedirs(f"container/{env}", exist_ok=True) + with open(f"container/{env}/Dockerfile", "w") as dockerfile: + dockerfile.write("FROM ghcr.io/prefix-dev/pixi:bookworm-slim\n\nRUN ") + if env == "report": + channels = ["conda-forge", "r"] + else: + channels = ["conda-forge", "bioconda"] + _chan = " ".join([f"--channel {i}" for i in channels]).split() + if env == "deconvolution": + dockerfile.write( + " ".join(["pixi", "global", "install", *_chan, "--environment", env, "--expose", "QuickDeconvolution", *deps, *rm_cache]) + ) + else: + dockerfile.write( + " ".join(["pixi", "global", "install", *_chan, "--environment", env, *deps, *rm_cache]) + ) + + +def reset_pixi_global(): + # remove any existing global packages + pixidir = os.environ['HOME'] + "/.pixi" + for f in glob.glob(pixidir + "/bin/*"): + if os.path.isdir(f): + shutil.rmtree(f, ignore_errors=True) + else: + os.remove(f) + for f in glob.glob(pixidir + "/envs/*"): + shutil.rmtree(f, ignore_errors=True) + +def create_pixi_dockerfile(): + with open("Dockerfile", "w") as dockerfile: + dockerfile.write("FROM ghcr.io/prefix-dev/pixi:bookworm-slim\n\nRUN ") + cmd = [] + for env,deps in environ.items(): + if env == "report": + channels = ["conda-forge", "r"] + else: + channels = ["conda-forge", "bioconda"] + _chan = " ".join([f"--channel {i}" for i in channels]).split() + if env == "deconvolution": + cmd.append( + " ".join(["pixi", "global", "install", *_chan, "--environment", env, "--expose", "QuickDeconvolution", *deps]) + ) + else: + cmd.append( + " ".join(["pixi", "global", "install", *_chan, "--environment", env, *deps]) + ) + cmd.append("rm -rf ~/.cache/rattler") + dockerfile.write(' &&\\ \n\t'.join(cmd)) def create_pixi_toml(): - environ = { - "align" : [ - "bwa-mem2", - "bwa", - "samtools==1.22", - "seqtk", - "strobealign", - "tabix" - ], - "assembly" : [ - "arcs", - "bwa", - "spades", - "cloudspades", - "links", - "quast", - "busco", - "samtools", - "tigmint" - ], - "deconvolution" : [ - "quickdeconvolution" - ], + with open("Dockerfile", "w") as dockerfile: + dockerfile.write("FROM ghcr.io/prefix-dev/pixi:bookworm-slim\n\n") + dockerfile.write("COPY ./pixi.toml /root/.pixi/manifests/pixi-global.toml\n\n") + dockerfile.write("RUN pixi global update && rm -rf ~/.cache/rattler\n\n") - "demultiplex": [ - "dmox>=0.2" - ], - "metassembly": [ - "athena_meta==1.2" - ], - "phase" : [ - "hapcut2", - "whatshap" - ], - "qc" : [ - "click==8.2.1", - "falco==1.2.5", - "fastp", - "multiqc==1.30", - "pysam==0.23" - ], - "report" : [ - "quarto", - "r-dt", - "r-dplyr", - "r-highcharter", - "r-magrittr", - "r-plotly", - "r-scales", - "r-tidyr", - "r-viridislite", - "r-xml2", - "r-biocircos" - ], - "simulations" : [ - "mimick>=2.3", - "simug>=1.0.1", - ], - "stitch" : [ - "r-stitch>=1.8.4" - ], - "variants" : [ - "bcftools==1.22", - "freebayes==1.3.9", - "leviathan", - "naibr-plus", - "setuptools" - ] - } + # get the name of the manifest file + _pix = subprocess.run("pixi global list".split(), capture_output = True, text = True) + global_manifest = _pix.stdout.splitlines()[0].split()[-1].strip("\'") + print(global_manifest) + # clear out the manifest + with open(global_manifest, "w") as toml: + toml.write("version = 1\n\n") + reset_pixi_global() for env,deps in environ.items(): if env == "report": @@ -81,9 +151,24 @@ def create_pixi_toml(): else: channels = ["conda-forge", "bioconda"] _chan = " ".join([f"--channel {i}" for i in channels]).split() - - if env == "deconvolve": - subprocess.run(["pixi", "global", "install", *_chan, "--environment", env, "--expose", "quickdeconvolve", *deps]) + if env == "deconvolution": + _pix = subprocess.run( + ["pixi", "global", "install", *_chan, "--environment", env, "--expose", "QuickDeconvolution", *deps] + ) + if _pix.returncode > 0: + print(_pix.stderr) + sys.exit(1) else: - subprocess.run(["pixi", "global", "install", *_chan, "--environment", env, *deps]) - shutil.copy2(os.path.expanduser("~/.pixi/manifests/pixi-global.toml"), "resources/pixi.toml") + _pix = subprocess.run( + ["pixi", "global", "install", *_chan, "--environment", env, *deps] + ) + if _pix.returncode > 0: + print(_pix.stderr) + sys.exit(1) + + # get the manifest file and copy to this directory + shutil.copy(global_manifest, "resources/pixi.toml") + # clean up global packages again + reset_pixi_global() + + diff --git a/harpy/common/workflow.py b/harpy/common/workflow.py index bb1793f66..78e78aecb 100644 --- a/harpy/common/workflow.py +++ b/harpy/common/workflow.py @@ -81,7 +81,7 @@ def setup_snakemake(self, container: bool, threads: int, hpc: str|None = None, s "rerun-triggers": ["mtime", "params"], "scheduler": "greedy", "nolock": True, - "software-deployment-method": "conda" if not container else ["conda", "apptainer"], + "software-deployment-method": "conda" if not container else "apptainer", "conda-prefix": filepath("./.environments"), "conda-cleanup-pkgs": "cache", "apptainer-prefix": filepath("./.environments"), diff --git a/harpy/snakefiles/align_bwa.smk b/harpy/snakefiles/align_bwa.smk index 77f9a3ed7..9bb40972f 100644 --- a/harpy/snakefiles/align_bwa.smk +++ b/harpy/snakefiles/align_bwa.smk @@ -1,5 +1,3 @@ -containerized: "docker://pdimens/harpy:latest" - import os import re import logging @@ -47,6 +45,8 @@ rule preprocess_reference: genome_zip conda: "envs/align.yaml" + container: + "docker://pdimens/harpy:align_latest" shell: """ {{ @@ -103,6 +103,8 @@ rule align: min(6, workflow.cores - 1) conda: "envs/align.yaml" + container: + "docker://pdimens/harpy:align_latest" shell: """ {{ @@ -117,8 +119,6 @@ rule standardize_barcodes: temp("samples/{sample}/{sample}.standard.sam") log: "logs/{sample}.standardize.log" - container: - None shell: "standardize_barcodes_sam > {output} 2> {log} < {input}" @@ -138,8 +138,6 @@ rule mark_duplicates: quality = config['alignment_quality'] resources: mem_mb = 2000 - container: - None threads: 4 shell: @@ -170,8 +168,6 @@ rule assign_molecules: "logs/assign_mi/{sample}.assign_mi.log" params: molecule_distance - container: - None shell: """ assign_mi -c {params} {input} > {output.bam} 2> {log} @@ -188,8 +184,6 @@ rule barcode_stats: "logs/bxstats/{sample}.bxstats.log" params: sample = lambda wc: d[wc.sample] - container: - None shell: "bx_stats {input.bam} > {output} 2> {log}" @@ -203,8 +197,6 @@ rule molecule_coverage: "logs/{sample}.molcov.log" params: windowsize - container: - None shell: "molecule_coverage -f {input.fai} -w {params} {input.stats} 2> {log} | gzip > {output}" @@ -215,8 +207,6 @@ rule alignment_coverage: bed = "reports/data/coverage/coverage.bed" output: "reports/data/coverage/{sample}.cov.gz" - container: - None shell: "samtools bedcov -c {input.bed} {input.bam} | awk '{{ $6 = ($4 / ($3 + 1 - $2)); print }}' | gzip > {output}" @@ -252,6 +242,8 @@ rule sample_reports: "logs/reports/{sample}.alignstats.log" conda: "envs/report.yaml" + container: + "docker://pdimens/harpy:report_latest" retries: 3 shell: @@ -270,8 +262,6 @@ if ignore_bx: output: "{sample}.bam.bai", bam = "{sample}.bam" - container: - None shell: """ mv {input} {output.bam} @@ -287,8 +277,6 @@ rule general_stats: flagstat = temp("reports/data/samtools_flagstat/{sample}.flagstat") log: "logs/stats/{sample}.samstats.log" - container: - None shell: """ {{ @@ -311,6 +299,8 @@ rule samtools_report: outdir = "reports/data/samtools_stats reports/data/samtools_flagstat" conda: "envs/qc.yaml" + container: + "docker://pdimens/harpy:qc_latest" shell: "multiqc {params} > {output} 2> {log}" @@ -329,6 +319,8 @@ rule barcode_report: f"logs/reports/bxstats.report.log" conda: "envs/report.yaml" + container: + "docker://pdimens/harpy:report_latest" retries: 3 shell: diff --git a/harpy/snakefiles/align_strobe.smk b/harpy/snakefiles/align_strobe.smk index 61b2774da..0d284415f 100644 --- a/harpy/snakefiles/align_strobe.smk +++ b/harpy/snakefiles/align_strobe.smk @@ -1,5 +1,3 @@ -containerized: "docker://pdimens/harpy:latest" - import os import re import logging @@ -41,8 +39,6 @@ rule preprocess_reference: fai = f"{workflow_geno}.fai" log: f"{workflow_geno}.preprocess.log" - container: - None shell: """ {{ @@ -86,6 +82,8 @@ rule align: min(4, workflow.cores - 1) conda: "envs/align.yaml" + container: + "docker://pdimens/harpy:align_latest" shell: """ {{ @@ -100,8 +98,6 @@ rule standardize_barcodes: temp("samples/{sample}/{sample}.standard.sam") log: "logs/{sample}.standardize.log" - container: - None shell: "standardize_barcodes_sam > {output} 2> {log} < {input}" @@ -123,8 +119,6 @@ rule mark_duplicates: mem_mb = 2000 threads: 2 - container: - None shell: """ if grep -q "^[ABCD]" <<< $(samtools head -h 0 -n 1 {input.sam}); then @@ -153,8 +147,6 @@ rule assign_molecules: "logs/assign_mi/{sample}.assign_me.log" params: molecule_distance - container: - None shell: """ assign_mi -c {params} {input} > {output.bam} 2> {log} @@ -171,8 +163,6 @@ rule barcode_stats: "logs/bxstats/{sample}.bxstats.log" params: sample = lambda wc: d[wc.sample] - container: - None shell: "bx_stats {input.bam} > {output} 2> {log}" @@ -186,8 +176,6 @@ rule molecule_coverage: "logs/molcov/{sample}.molcov.log" params: windowsize - container: - None shell: "molecule_coverage -f {input.fai} -w {params} {input.stats} 2> {log} | gzip > {output}" @@ -198,8 +186,6 @@ rule alignment_coverage: bed = "reports/data/coverage/coverage.bed" output: "reports/data/coverage/{sample}.cov.gz" - container: - None shell: "samtools bedcov -c {input.bed} {input.bam} | awk '{{ $6 = ($4 / ($3 + 1 - $2)); print }}' | gzip > {output}" @@ -235,6 +221,8 @@ rule sample_reports: "logs/reports/{sample}.alignstats.log" conda: "envs/report.yaml" + container: + "docker://pdimens/harpy:report_latest" retries: 3 shell: @@ -253,8 +241,6 @@ if ignore_bx: output: "{sample}.bam.bai", bam = "{sample}.bam" - container: - None shell: """ mv {input} {output.bam} @@ -270,8 +256,6 @@ rule general_stats: flagstat = temp("reports/data/samtools_flagstat/{sample}.flagstat") log: "logs/stats/{sample}.samstats.log" - container: - None shell: """ {{ @@ -294,6 +278,8 @@ rule samtools_report: outdir = "reports/data/samtools_stats reports/data/samtools_flagstat" conda: "envs/qc.yaml" + container: + "docker://pdimens/harpy:qc_latest" shell: "multiqc {params} > {output} 2> {log}" @@ -312,6 +298,8 @@ rule barcode_report: "logs/reports/bxstats.report.log" conda: "envs/report.yaml" + container: + "docker://pdimens/harpy:report_latest" retries: 3 shell: diff --git a/harpy/snakefiles/assembly.smk b/harpy/snakefiles/assembly.smk index 8e7a9e942..20e496637 100644 --- a/harpy/snakefiles/assembly.smk +++ b/harpy/snakefiles/assembly.smk @@ -1,5 +1,3 @@ -containerized: "docker://pdimens/harpy:latest" - import os import logging @@ -50,6 +48,8 @@ rule cloudspades: "logs/assembly.log" conda: "envs/assembly.yaml" + container: + "docker://pdimens/harpy:assembly_latest" threads: workflow.cores resources: @@ -63,8 +63,6 @@ rule interleave_fastq: FQ2 output: temp("scaffold/interleaved.fq.gz") - container: - None shell: "seqtk mergepe {input} | bgzip > {output}" @@ -73,8 +71,6 @@ rule link_assembly: "spades/scaffolds.fasta", output: "scaffold/spades.fa" - container: - None shell: "ln -sr {input} {output}" @@ -107,6 +103,8 @@ rule scaffolding: extra = arcs_extra conda: "envs/assembly.yaml" + container: + "docker://pdimens/harpy:assembly_latest" shell: """ arcs-make arcs-tigmint -C {params} 2> {log} @@ -131,6 +129,8 @@ rule QUAST_assessment: workflow.cores conda: "envs/assembly.yaml" + container: + "docker://pdimens/harpy:assembly_latest" shell: "quast.py --threads {threads} --pe12 {input.fastq} {params} {input.contigs} {input.scaffolds} 2> {log}" @@ -151,6 +151,8 @@ rule BUSCO_analysis: workflow.cores conda: "envs/assembly.yaml" + container: + "docker://pdimens/harpy:assembly_latest" shell: "( busco -f -i {input} -c {threads} -m genome {params} > {log} 2>&1 ) || touch {output}" @@ -167,6 +169,8 @@ rule build_report: title = "--title \"Assembly Metrics\"" conda: "envs/qc.yaml" + container: + "docker://pdimens/harpy:qc_latest" shell: "multiqc {params} {input} > {output} 2> {log}" diff --git a/harpy/snakefiles/deconvolve.smk b/harpy/snakefiles/deconvolve.smk index 7fc970e03..027c2e7d2 100644 --- a/harpy/snakefiles/deconvolve.smk +++ b/harpy/snakefiles/deconvolve.smk @@ -1,5 +1,3 @@ -containerized: "docker://pdimens/harpy:latest" - import os import re import logging @@ -55,6 +53,8 @@ rule deconvolve: 2 conda: "envs/deconvolution.yaml" + container: + "docker://pdimens/harpy:deconvolution_latest" shell: "QuickDeconvolution -t {threads} -i {input} -o {output} {params} > {log} 2>&1" @@ -65,8 +65,6 @@ rule extract_forward: "{sample}.R1.fq.gz" params: "-1" - container: - None shell: "seqtk seq {params} {input} | gzip > {output}" diff --git a/harpy/snakefiles/demultiplex_meier2021.smk b/harpy/snakefiles/demultiplex_meier2021.smk index a3be2b4a5..8ed35055a 100644 --- a/harpy/snakefiles/demultiplex_meier2021.smk +++ b/harpy/snakefiles/demultiplex_meier2021.smk @@ -1,5 +1,3 @@ -containerized: "docker://pdimens/harpy:latest" - import os import logging @@ -70,6 +68,8 @@ rule demultiplex: workflow.cores conda: "envs/demultiplex.yaml" + container: + "docker://pdimens/harpy:demultiplex_latest" shell: """ dmox --i1 {input.I1} --i2 {input.I2} --r1 {input.R1} --r2 {input.R2} \ @@ -90,6 +90,8 @@ rule assess_quality: 1 conda: "envs/qc.yaml" + container: + "docker://pdimens/harpy:qc_latest" shell: """ ( falco --quiet --threads {threads} -skip-report -skip-summary -data-filename {output} {input} ) > {log} 2>&1 || @@ -145,6 +147,8 @@ rule quality_report: logdir = "reports/data/" conda: "envs/qc.yaml" + container: + "docker://pdimens/harpy:qc_latest" shell: "multiqc --config {input.mqc_yaml} {params} > {output} 2> {log}" diff --git a/harpy/snakefiles/environments.smk b/harpy/snakefiles/environments.smk index 085cf9985..90068806c 100644 --- a/harpy/snakefiles/environments.smk +++ b/harpy/snakefiles/environments.smk @@ -13,6 +13,6 @@ rule all: rule conda_env: output: "{conda}.env" - container: "docker://pdimens/harpy:latest" + container: "docker://pdimens/harpy:{conda}_latest" conda: "envs/{conda}.yaml" shell: "touch {output}" diff --git a/harpy/snakefiles/impute.smk b/harpy/snakefiles/impute.smk index 3056d2fc1..3ccb7ad7d 100644 --- a/harpy/snakefiles/impute.smk +++ b/harpy/snakefiles/impute.smk @@ -1,4 +1,3 @@ -containerized: "docker://pdimens/harpy:latest" import os import re import logging diff --git a/harpy/snakefiles/metassembly.smk b/harpy/snakefiles/metassembly.smk index b280d4cdf..273a42a90 100644 --- a/harpy/snakefiles/metassembly.smk +++ b/harpy/snakefiles/metassembly.smk @@ -39,8 +39,6 @@ rule sort_by_barcode: barcode_tag = BX_TAG threads: workflow.cores - container: - None shell: """ {{ @@ -57,8 +55,6 @@ rule format_barcode: temp("fastq_preproc/input.R{FR}.fq.gz") params: barcode_tag = BX_TAG - container: - None shell: "sed 's/{params}:Z:[^[:space:]]*/&-1/g' {input} | bgzip > {output}" @@ -83,11 +79,11 @@ rule error_correction: resources: mem_mb=max_mem conda: - "envs/spades.yaml" + "envs/assembly.yaml" container: - None + "docker://pdimens/harpy:assembly_latest" shell: - "metaspades.py -t {threads} {params} -1 {input.FQ_R1} -2 {input.FQ_R2} > {log}" + "metaspades -t {threads} {params} -1 {input.FQ_R1} -2 {input.FQ_R2} > {log}" rule spades_assembly: input: diff --git a/harpy/snakefiles/phase.smk b/harpy/snakefiles/phase.smk index 75f3d0dea..d200c0961 100644 --- a/harpy/snakefiles/phase.smk +++ b/harpy/snakefiles/phase.smk @@ -1,5 +1,3 @@ -containerized: "docker://pdimens/harpy:latest" - import os import subprocess import logging @@ -75,8 +73,6 @@ rule isolate_sample: output: vcf = temp("workflow/input/original/{sample}.bcf"), csi = temp("workflow/input/original/{sample}.bcf.csi") - container: - None shell: "bcftools view -Ob -W -s {wildcards.sample} -o {output.vcf} {input}" @@ -85,8 +81,6 @@ rule isolate_het_snps: "workflow/input/original/{sample}.bcf" output: temp("workflow/input/heterozygotes/{sample}.het.vcf") - container: - None shell: "bcftools view -m 2 -M 2 -i 'GT=\"het\"' {input} > {output}" @@ -95,8 +89,6 @@ rule index_alignments: lambda wc: bamdict[wc.bam] output: "{bam}.bai" - container: - None shell: "samtools index {input}" @@ -109,8 +101,6 @@ if indels: fai = temp(genofai) log: f"workflow/reference/{bn}.preprocess.log" - container: - None shell: """ {{ @@ -138,6 +128,8 @@ rule extract_hairs: purge_invalid = invalid_regex.get(bc_type, "'$4 !~ /N/'") conda: "envs/phase.yaml" + container: + "docker://pdimens/harpy:phase_latest" shell: """ extractHAIRS {params.static} --bam {input.bam} --VCF {input.vcf} --out {output.all_bc} > {log} 2>&1 @@ -157,6 +149,8 @@ rule link_fragments: f"-d {molecule_distance} --use-tag" conda: "envs/phase.yaml" + container: + "docker://pdimens/harpy:phase_latest" shell: "LinkFragments.py --bam {input.bam} --VCF {input.vcf} --fragments {input.fragments} --out {output} {params} > {log} 2>&1" @@ -175,6 +169,8 @@ rule phase: extra = extra conda: "envs/phase.yaml" + container: + "docker://pdimens/harpy:phase_latest" shell: "HAPCUT2 --fragments {input.fragments} --vcf {input.vcf} --out {output.blocks} {params} > {log} 2>&1" @@ -183,8 +179,6 @@ rule compress_phaseblock: "phase_blocks/{sample}.blocks.phased.VCF" output: "phase_blocks/{sample}.phased.vcf.gz" - container: - None shell: "bcftools view -Oz6 -o {output} --write-index {input}" @@ -203,8 +197,6 @@ rule annotate_phase: "-Ob --write-index -c CHROM,POS,FMT/GT,FMT/PS,FMT/PQ,FMT/PD -m +HAPCUT" threads: 2 - container: - None shell: "bcftools annotate -a {input.phase} -o {output.bcf} {params} {input.orig} 2> {log}" @@ -228,8 +220,6 @@ rule merge_samples: bcf = "variants.phased.bcf" threads: workflow.cores - container: - None shell: "bcftools merge --threads {threads} --force-single -l {input.filelist} -Ob -o {output.bcf} --write-index" @@ -238,8 +228,6 @@ rule summarize_blocks: collect("phase_blocks/{sample}.blocks", sample = samplenames) output: "reports/blocks.summary.gz" - container: - None shell: """ {{ @@ -277,6 +265,8 @@ rule phase_report: f"-P contigs:{plot_contigs}" conda: "envs/report.yaml" + container: + "docker://pdimens/harpy:report_latest" retries: 3 shell: diff --git a/harpy/snakefiles/qc.smk b/harpy/snakefiles/qc.smk index b6a4cb730..07d547abf 100644 --- a/harpy/snakefiles/qc.smk +++ b/harpy/snakefiles/qc.smk @@ -1,5 +1,3 @@ -containerized: "docker://pdimens/harpy:latest" - import os import re import logging @@ -59,6 +57,8 @@ rule fastp: workflow.cores conda: "envs/qc.yaml" + container: + "docker://pdimens/harpy:qc_latest" shell: "fastp {params} --thread {threads} -i {input.fw} -I {input.rv} -o {output.fw} -O {output.rv} -h {output.html} -j {output.json} 2> {log.serr}" @@ -69,8 +69,6 @@ rule barcode_stats: temp("logs/bxcount/{sample}.count.log") params: lr_type - container: - None shell: "count_bx {params} {input} > {output}" @@ -101,6 +99,8 @@ rule barcode_report: "logs/barcode.report.log" conda: "envs/report.yaml" + container: + "docker://pdimens/harpy:report_latest" retries: 3 shell: @@ -125,6 +125,8 @@ rule qc_report: logdir = "reports/data/fastp/" conda: "envs/qc.yaml" + container: + "docker://pdimens/harpy:qc_latest" shell: "multiqc {params} > {output} 2> {log}" diff --git a/harpy/snakefiles/simulate_snpindel.smk b/harpy/snakefiles/simulate_snpindel.smk index 51fe7b773..64ed31aaf 100644 --- a/harpy/snakefiles/simulate_snpindel.smk +++ b/harpy/snakefiles/simulate_snpindel.smk @@ -1,5 +1,3 @@ -containerized: "docker://pdimens/harpy:latest" - import os import random import logging @@ -68,8 +66,6 @@ if snp_vcf: snp_vcf output: snp_vcf_correct - container: - None shell: "bcftools view -Oz {input} > {output}" @@ -79,8 +75,6 @@ if indel_vcf: indel_vcf output: indel_vcf_correct - container: - None shell: "bcftools view -Oz {input} > {output}" @@ -100,6 +94,8 @@ rule simulate_haploid: parameters = variant_params conda: "envs/simulations.yaml" + container: + "docker://pdimens/harpy:simulations_latest" shell: "simuG -refseq {input.geno} -prefix {params.prefix} {params.parameters} > {log}" @@ -170,6 +166,8 @@ rule simulate_diploid: indel = f"-indel_vcf haplotype_{{haplotype}}/{outprefix}.hap{{haplotype}}.indel.vcf" if indel else "" conda: "envs/simulations.yaml" + container: + "docker://pdimens/harpy:simulations_latest" shell: "simuG -refseq {input.geno} -prefix {params.prefix} {params.snp} {params.indel} > {log}" @@ -180,8 +178,6 @@ rule rename_diploid: output: fasta = f"haplotype_{{haplotype}}/{outprefix}.hap{{haplotype}}.fasta.gz", mapfile = f"haplotype_{{haplotype}}/{outprefix}.hap{{haplotype}}.map" - container: - None shell: """ bgzip -c {input.fasta} > {output.fasta} diff --git a/harpy/snakefiles/simulate_variants.smk b/harpy/snakefiles/simulate_variants.smk index 911a789bf..950bc4e9c 100644 --- a/harpy/snakefiles/simulate_variants.smk +++ b/harpy/snakefiles/simulate_variants.smk @@ -1,5 +1,3 @@ -containerized: "docker://pdimens/harpy:latest" - import os import random import logging @@ -43,8 +41,6 @@ if vcf: vcf output: vcf_correct - container: - None shell: "bcftools view -Oz {input} > {output}" @@ -63,6 +59,8 @@ rule simulate_haploid: parameters = variant_params conda: "envs/simulations.yaml" + container: + "docker://pdimens/harpy:simulations_latest" shell: "simuG -refseq {input.geno} -prefix {params.prefix} {params.parameters} > {log}" @@ -118,6 +116,8 @@ rule simulate_diploid: vcf_arg = f"-{variant}_vcf" conda: "envs/simulations.yaml" + container: + "docker://pdimens/harpy:simulations_latest" shell: "simuG -refseq {input.geno} -prefix {params.prefix} {params.vcf_arg} {input.hap} > {log}" @@ -128,8 +128,6 @@ rule rename_diploid: output: fasta = f"haplotype_{{haplotype}}/{outprefix}.hap{{haplotype}}.fasta.gz", mapfile = f"haplotype_{{haplotype}}/{outprefix}.hap{{haplotype}}.{variant}.map" - container: - None shell: """ bgzip -c {input.fasta} > {output.fasta} diff --git a/harpy/snakefiles/snp_freebayes.smk b/harpy/snakefiles/snp_freebayes.smk index d4e20a8f6..4eba1117f 100644 --- a/harpy/snakefiles/snp_freebayes.smk +++ b/harpy/snakefiles/snp_freebayes.smk @@ -1,5 +1,3 @@ -containerized: "docker://pdimens/harpy:latest" - import os import logging from pathlib import Path @@ -56,8 +54,6 @@ rule preprocess_reference: fai = f"{workflow_geno}.fai" log: f"{workflow_geno}.preprocess.log" - container: - None shell: """ {{ @@ -71,8 +67,6 @@ rule index_alignments: lambda wc: bamdict[wc.bam] output: "{bam}.bai" - container: - None shell: "samtools index {input}" @@ -108,6 +102,8 @@ rule call_variants: extra = extra conda: "envs/variants.yaml" + container: + "docker://pdimens/harpy:variants_latest" shell: """ freebayes -f {input.reference} -L {input.bamlist} {params} 2> {log} | @@ -134,8 +130,6 @@ rule concat_variants: "logs/concat.log" threads: workflow.cores - container: - None shell: "bcftools concat -f {input.filelist} --threads {threads} --naive -Ob -o {output} 2> {log}" @@ -145,8 +139,6 @@ rule sort_variants: output: bcf = "variants.raw.bcf", csi = "variants.raw.bcf.csi" - container: - None shell: "bcftools sort --write-index -Ob -o {output.bcf} {input} 2> /dev/null" @@ -164,10 +156,6 @@ rule realign_indels: workflow.cores params: "-m -both -d both --write-index -Ob -c w" - threads: - workflow.cores - container: - None shell: "bcftools norm --threads {threads} {params} -o {output.bcf} -f {input.genome} {input.bcf} 2> {log}" @@ -179,8 +167,6 @@ rule general_stats: idx = "variants.{type}.bcf.csi" output: "reports/data/variants.{type}.stats", - container: - None shell: """ bcftools stats -s "-" --fasta-ref {input.genome} {input.bcf} > {output} 2> /dev/null @@ -213,6 +199,8 @@ rule variant_report: "logs/variants.{type}.report.log" conda: "envs/report.yaml" + container: + "docker://pdimens/harpy:report_latest" retries: 3 shell: diff --git a/harpy/snakefiles/snp_mpileup.smk b/harpy/snakefiles/snp_mpileup.smk index 0df47f54f..cadd22f62 100644 --- a/harpy/snakefiles/snp_mpileup.smk +++ b/harpy/snakefiles/snp_mpileup.smk @@ -1,5 +1,3 @@ -containerized: "docker://pdimens/harpy:latest" - import os import logging from pathlib import Path @@ -46,8 +44,6 @@ rule preprocess_reference: f"{workflow_geno}.preprocess.log" params: f"--gzi-idx {workflow_geno}.gzi" if genome_zip else "" - container: - None shell: """ {{ @@ -75,8 +71,6 @@ rule index_alignments: lambda wc: bamdict[wc.bam] output: "{bam}.bai" - container: - None shell: "samtools index {input}" @@ -113,8 +107,6 @@ rule call_genotypes: groups = "--group-samples workflow/sample.groups" if groupings else "--group-samples -" threads: 1 - container: - None shell: """ bcftools mpileup --threads {threads} --fasta-ref {input.genome} --bam-list {input.bamlist} -Ou {params.region} {params.annot_mp} {params.extra} 2> {output.logfile} | @@ -129,8 +121,6 @@ rule sort_genotypes: idx = temp("sort/{part}.bcf.csi") log: "logs/sort/{part}.sort.log" - container: - None shell: "bcftools sort --output {output.bcf} --write-index {input.bcf} 2> {log}" @@ -167,8 +157,6 @@ rule concat_variants: "logs/concat.log" threads: workflow.cores - container: - None shell: "bcftools concat -f {input.filelist} --threads {threads} --naive -Ob -o {output} 2> {log}" @@ -178,8 +166,6 @@ rule sort_variants: output: bcf = "variants.raw.bcf", csi = "variants.raw.bcf.csi" - container: - None shell: "bcftools sort --write-index -Ob -o {output.bcf} {input} 2> /dev/null" @@ -197,8 +183,6 @@ rule realign_indels: "-m -both -d both --write-index -Ob -c w" threads: workflow.cores - container: - None shell: "bcftools norm --threads {threads} {params} -o {output.bcf} -f {input.genome} {input.bcf} 2> {log}" @@ -209,8 +193,6 @@ rule general_stats: idx = "variants.{type}.bcf.csi" output: "reports/data/variants.{type}.stats" - container: - None shell: """ bcftools stats -s "-" --fasta-ref {input.genome} {input.bcf} > {output} 2> /dev/null @@ -243,6 +225,8 @@ rule variant_report: "logs/variants.{type}.report.log" conda: "envs/report.yaml" + container: + "docker://pdimens/harpy:report_latest" retries: 3 shell: diff --git a/harpy/snakefiles/sv_leviathan.smk b/harpy/snakefiles/sv_leviathan.smk index 9ddd1c76f..58d4cdd77 100644 --- a/harpy/snakefiles/sv_leviathan.smk +++ b/harpy/snakefiles/sv_leviathan.smk @@ -1,5 +1,3 @@ -containerized: "docker://pdimens/harpy:latest" - import os import re import logging @@ -49,6 +47,8 @@ rule index_barcodes: min(10, workflow.cores) conda: "envs/variants.yaml" + container: + "docker://pdimens/harpy:variants_latest" shell: """ {{ @@ -68,6 +68,8 @@ rule preprocess_reference: f"{workflow_geno}.preprocess.log" conda: "envs/align.yaml" + container: + "docker://pdimens/harpy:align_latest" shell: """ {{ @@ -101,6 +103,8 @@ rule call_variants: workflow.cores - 1 conda: "envs/variants.yaml" + container: + "docker://pdimens/harpy:variants_latest" shell: "LEVIATHAN -b {input.bam} -i {input.bc_idx} {params} -g {input.genome} -o {output.vcf} -t {threads} --candidates {output.candidates} 2> {log.runlog}" @@ -110,8 +114,6 @@ rule sort_variants: "vcf/{sample}.vcf" output: "vcf/{sample}.bcf" - container: - None shell: "bcftools sort -Ob --output {output} {input} 2> /dev/null" @@ -120,8 +122,6 @@ rule variant_stats: "vcf/{sample}.bcf" output: temp("reports/data/{sample}.sv.stats") - container: - None shell: """ {{ @@ -197,6 +197,8 @@ rule sample_reports: contigs= f"-P contigs:{plot_contigs}" conda: "envs/report.yaml" + container: + "docker://pdimens/harpy:report_latest" retries: 3 shell: diff --git a/harpy/snakefiles/sv_leviathan_pop.smk b/harpy/snakefiles/sv_leviathan_pop.smk index 4d2613ff8..25a7d94a4 100644 --- a/harpy/snakefiles/sv_leviathan_pop.smk +++ b/harpy/snakefiles/sv_leviathan_pop.smk @@ -1,5 +1,3 @@ -containerized: "docker://pdimens/harpy:latest" - import os import re import logging @@ -82,8 +80,6 @@ rule concat_groups: mem_mb = 2000 threads: workflow.cores - container: - None shell: """ {{ @@ -104,6 +100,8 @@ rule index_barcode: min(5, workflow.cores) conda: "envs/variants.yaml" + container: + "docker://pdimens/harpy:variants_latest" shell: "LRez index bam -p -b {input.bam} -o {output} --threads {threads}" @@ -118,6 +116,8 @@ rule preprocess_reference: f"{workflow_geno}.preprocess.log" conda: "envs/align.yaml" + container: + "docker://pdimens/harpy:align_latest" shell: """ {{ @@ -152,6 +152,8 @@ rule call_variants: workflow.cores - 1 conda: "envs/variants.yaml" + container: + "docker://pdimens/harpy:variants_latest" shell: "LEVIATHAN -b {input.bam} -i {input.bc_idx} {params} -g {input.genome} -o {output.vcf} -t {threads} --candidates {output.candidates} 2> {log.runlog}" @@ -163,8 +165,6 @@ rule sort_variants: "vcf/{population}.bcf" params: lambda wc: wc.population - container: - None shell: "bcftools sort -Ob --output {output} {input} 2> /dev/null" @@ -173,8 +173,6 @@ rule variant_stats: "vcf/{population}.bcf" output: temp("reports/data/{population}.sv.stats") - container: - None shell: """ {{ @@ -245,6 +243,8 @@ rule group_reports: contigs= f"-P contigs:{plot_contigs}" conda: "envs/report.yaml" + container: + "docker://pdimens/harpy:report_latest" retries: 3 shell: @@ -272,6 +272,8 @@ rule aggregate_report: contigs = f"-P contigs:{plot_contigs}" conda: "envs/report.yaml" + container: + "docker://pdimens/harpy:report_latest" retries: 3 shell: diff --git a/harpy/snakefiles/sv_naibr.smk b/harpy/snakefiles/sv_naibr.smk index 92a0ef8a8..d26d090cf 100644 --- a/harpy/snakefiles/sv_naibr.smk +++ b/harpy/snakefiles/sv_naibr.smk @@ -1,5 +1,3 @@ -containerized: "docker://pdimens/harpy:latest" - import os import re import logging @@ -60,8 +58,6 @@ rule index_alignments: lambda wc: bamdict[wc.bam] output: "{bam}.bai" - container: - None shell: "samtools index {input}" @@ -98,6 +94,8 @@ rule call_variants: min(10, workflow.cores -1) conda: "envs/variants.yaml" + container: + "docker://pdimens/harpy:variants_latest" shell: "naibr {input.conf} > {log} 2>&1 && rm -rf naibrlog" @@ -111,8 +109,6 @@ rule infer_variants: refmt = "IGV/{sample}.reformat.bedpe", fail = "bedpe/qc_fail/{sample}.fail.bedpe", vcf = "vcf/{sample}.vcf" - container: - None shell: """ infer_sv {input.bedpe} -f {output.fail} > {output.bedpe} @@ -163,8 +159,6 @@ rule preprocess_reference: f"{workflow_geno}.preprocess.log" params: f"--gzi-idx {workflow_geno}.gzi" if genome_zip else "" - container: - None shell: """ {{ @@ -208,6 +202,8 @@ rule sample_reports: contigs= f"-P contigs:{plot_contigs}" conda: "envs/report.yaml" + container: + "docker://pdimens/harpy:report_latest" retries: 3 shell: diff --git a/harpy/snakefiles/sv_naibr_phase.smk b/harpy/snakefiles/sv_naibr_phase.smk index e62806cbb..a54d445ab 100644 --- a/harpy/snakefiles/sv_naibr_phase.smk +++ b/harpy/snakefiles/sv_naibr_phase.smk @@ -1,5 +1,3 @@ -containerized: "docker://pdimens/harpy:latest" - import os import re import logging @@ -69,8 +67,6 @@ rule preprocess_reference: fai = f"{workflow_geno}.fai" log: f"{workflow_geno}.preprocess.log" - container: - None shell: """ {{ @@ -84,8 +80,6 @@ rule index_alignments: lambda wc: bamdict[wc.bam] output: "{bam}.bai" - container: - None shell: "samtools index {input}" @@ -94,8 +88,6 @@ rule index_snps: vcffile output: vcffile + ".csi" - container: - None shell: "bcftools index {input}" @@ -104,8 +96,6 @@ rule index_snps_gz: vcffile output: vcffile + ".tbi" - container: - None shell: "tabix {input}" @@ -124,6 +114,8 @@ rule phase_alignments: mol_dist conda: "envs/phase.yaml" + container: + "docker://pdimens/harpy:phase_latest" threads: 4 shell: @@ -134,8 +126,6 @@ rule log_phasing: collect("logs/whatshap-haplotag/{sample}.phase.log", sample = samplenames) output: "logs/whatshap-haplotag.log" - container: - None shell: """ echo -e "sample\\ttotal_alignments\\tphased_alignments" > {output} @@ -168,8 +158,6 @@ rule index_phased: "phasedbam/{sample}.bam" output: "phasedbam/{sample}.bam.bai" - container: - None shell: "samtools index {input} {output} 2> /dev/null" @@ -189,6 +177,8 @@ rule call_variants: 10 conda: "envs/variants.yaml" + container: + "docker://pdimens/harpy:variants_latest" shell: "naibr {input.conf} > {log} 2>&1 && rm -rf naibrlog" @@ -203,8 +193,6 @@ rule infer_variants: refmt = "IGV/{sample}.reformat.bedpe", fail = "bedpe/qc_fail/{sample}.fail.bedpe", vcf = "vcf/{sample}.vcf" - container: - None shell: """ infer_sv {input.bedpe} -f {output.fail} > {output.bedpe} @@ -273,6 +261,8 @@ rule sample_reports: contigs= f"-P contigs:{plot_contigs}" conda: "envs/report.yaml" + container: + "docker://pdimens/harpy:report_latest" retries: 3 shell: diff --git a/harpy/snakefiles/sv_naibr_pop.smk b/harpy/snakefiles/sv_naibr_pop.smk index be7450b6c..0cd2e39ae 100644 --- a/harpy/snakefiles/sv_naibr_pop.smk +++ b/harpy/snakefiles/sv_naibr_pop.smk @@ -1,5 +1,3 @@ -containerized: "docker://pdimens/harpy:latest" - import os import re import logging @@ -96,8 +94,6 @@ rule concat_groups: mem_mb = 2000 threads: 10 - container: - None shell: """ {{ @@ -139,6 +135,8 @@ rule call_variants: min(10, workflow.cores - 1) conda: "envs/variants.yaml" + container: + "docker://pdimens/harpy:variants_latest" shell: "naibr {input.conf} > {log} 2>&1 && rm -rf naibrlog" @@ -153,8 +151,6 @@ rule infer_variants: refmt = "IGV/{population}.reformat.bedpe", fail = "bedpe/qc_fail/{population}.fail.bedpe", vcf = "vcf/{population}.vcf" - container: - None shell: """ infer_sv {input.bedpe} -f {output.fail} > {output.bedpe} @@ -202,8 +198,6 @@ rule preprocess_reference: fai = f"{workflow_geno}.fai" log: f"{workflow_geno}.preprocess.log" - container: - None shell: """ {{ @@ -241,6 +235,8 @@ rule group_reports: contigs= f"-P contigs:{plot_contigs}" conda: "envs/report.yaml" + container: + "docker://pdimens/harpy:report_latest" retries: 3 shell: @@ -268,6 +264,8 @@ rule aggregate_report: contigs = f"-P contigs:{plot_contigs}" conda: "envs/report.yaml" + container: + "docker://pdimens/harpy:report_latest" retries: 3 shell: diff --git a/harpy/snakefiles/sv_naibr_pop_phase.smk b/harpy/snakefiles/sv_naibr_pop_phase.smk index 47d1d446e..5cd743db8 100644 --- a/harpy/snakefiles/sv_naibr_pop_phase.smk +++ b/harpy/snakefiles/sv_naibr_pop_phase.smk @@ -1,5 +1,3 @@ -containerized: "docker://pdimens/harpy:latest" - import os import re import logging @@ -90,8 +88,6 @@ rule preprocess_reference: fai = f"{workflow_geno}.fai" log: f"{workflow_geno}.preprocess.log" - container: - None shell: """ {{ @@ -105,8 +101,6 @@ rule index_snps: vcffile output: vcffile + ".csi" - container: - None shell: "bcftools index {input}" @@ -115,8 +109,6 @@ rule index_snps_gz: vcffile output: vcffile + ".tbi" - container: - None shell: "tabix {input}" @@ -125,8 +117,6 @@ rule index_alignments: lambda wc: bamdict[wc.bam] output: "{bam}.bai" - container: - None shell: "samtools index {input}" @@ -147,6 +137,8 @@ rule phase_alignments: 4 conda: "envs/phase.yaml" + container: + "docker://pdimens/harpy:phase_latest" shell: "whatshap haplotag --sample {wildcards.sample} --linked-read-distance-cutoff {params} --ignore-read-groups --tag-supplementary --output-threads={threads} -o {output.bam} --reference {input.ref} {input.vcf} {input.aln} 2> {output.log}" @@ -155,8 +147,6 @@ rule log_phasing: collect("logs/whatshap-haplotag/{sample}.phase.log", sample = samplenames) output: "logs/whatshap-haplotag.log" - container: - None shell: """ echo -e "sample\\ttotal_alignments\\tphased_alignments" > {output} @@ -199,8 +189,6 @@ rule concat_groups: mem_mb = 2000 threads: 10 - container: - None shell: """ {{ @@ -242,6 +230,8 @@ rule call_variants: min(10, workflow.cores - 1) conda: "envs/variants.yaml" + container: + "docker://pdimens/harpy:variants_latest" shell: "naibr {input.conf} > {log} 2>&1 && rm -rf naibrlog" @@ -255,9 +245,7 @@ rule infer_variants: bedpe = "bedpe/{population}.bedpe", refmt = "IGV/{population}.reformat.bedpe", fail = "bedpe/qc_fail/{population}.fail.bedpe", - vcf = "vcf/{population}.vcf" - container: - None + vcf = "vcf/{population}.vcf" shell: """ infer_sv {input.bedpe} -f {output.fail} > {output.bedpe} @@ -326,6 +314,8 @@ rule sample_reports: contigs= f"-P contigs:{plot_contigs}" conda: "envs/report.yaml" + container: + "docker://pdimens/harpy:report_latest" retries: 3 shell: @@ -353,6 +343,8 @@ rule aggregate_report: contigs = f"-P contigs:{plot_contigs}" conda: "envs/report.yaml" + container: + "docker://pdimens/harpy:report_latest" retries: 3 shell: diff --git a/harpy/snakefiles/validate_bam.smk b/harpy/snakefiles/validate_bam.smk index 1214737ef..19c3f405e 100644 --- a/harpy/snakefiles/validate_bam.smk +++ b/harpy/snakefiles/validate_bam.smk @@ -1,5 +1,3 @@ -containerized: "docker://pdimens/harpy:latest" - import os import re import logging @@ -29,8 +27,6 @@ rule check_bam: temp("{sample}.log") params: lr_platform - container: - None shell: "check_bam {params} {input} > {output}" @@ -39,8 +35,6 @@ rule concat_results: collect("{sample}.log", sample = samplenames) output: "validate.bam.tsv" - container: - None shell: """ {{ @@ -76,6 +70,8 @@ rule create_report: "logs/report.log" conda: "envs/report.yaml" + container: + "docker://pdimens/harpy:report_latest" retries: 3 shell: diff --git a/harpy/snakefiles/validate_fastq.smk b/harpy/snakefiles/validate_fastq.smk index a854df6f4..e7b6248e9 100644 --- a/harpy/snakefiles/validate_fastq.smk +++ b/harpy/snakefiles/validate_fastq.smk @@ -1,5 +1,3 @@ -containerized: "docker://pdimens/harpy:latest" - import os import re import logging @@ -34,8 +32,6 @@ rule check_forward: temp("{sample}.F.log") params: lr_platform - container: - None shell: "check_fastq {params} {input} > {output}" @@ -46,8 +42,6 @@ rule check_reverse: temp("{sample}.R.log") params: lr_platform - container: - None shell: "check_fastq {params} {input} > {output}" @@ -56,8 +50,6 @@ rule concat_results: collect("{sample}.{FR}.log", sample = samplenames, FR = ["F","R"]) output: "validate.fastq.tsv" - container: - None shell: """ {{ @@ -93,6 +85,8 @@ rule create_report: lr_platform conda: "envs/report.yaml" + container: + "docker://pdimens/harpy:report_latest" retries: 3 shell: From 957f3b793add3970295e779cfa891258e7e8957e Mon Sep 17 00:00:00 2001 From: pdimens Date: Mon, 20 Oct 2025 13:27:39 -0400 Subject: [PATCH 03/33] fix namne --- .github/workflows/createrelease.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/createrelease.yml b/.github/workflows/createrelease.yml index 06115eed8..9cbb980b5 100644 --- a/.github/workflows/createrelease.yml +++ b/.github/workflows/createrelease.yml @@ -68,7 +68,7 @@ jobs: - name: Clear space run: rm -rf /opt/hostedtoolcache - name: Recreate container - - uses: prefix-dev/setup-pixi@v0.9.2 + uses: prefix-dev/setup-pixi@v0.9.2 with: pixi-version: v0.57.0 cache: true From 796eb536ebe636825f053ce85cd41acacacdac45 Mon Sep 17 00:00:00 2001 From: pdimens Date: Mon, 20 Oct 2025 13:36:20 -0400 Subject: [PATCH 04/33] fix the tag --- .github/workflows/createrelease.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/createrelease.yml b/.github/workflows/createrelease.yml index 9cbb980b5..77ab6d4b8 100644 --- a/.github/workflows/createrelease.yml +++ b/.github/workflows/createrelease.yml @@ -88,5 +88,5 @@ jobs: with: context: ./${{ matrix.env }} push: true - tags: pdimens/harpy:$${{ matrix.version }}_{{ github.ref_name }} + tags: pdimens/harpy:$${{ matrix.env }}_{{ github.ref_name }} From 44c175f939574fbcc992bf754b27556f6e267c9f Mon Sep 17 00:00:00 2001 From: pdimens Date: Mon, 20 Oct 2025 14:59:59 -0400 Subject: [PATCH 05/33] images are much smaller --- .github/workflows/createrelease.yml | 4 +- harpy/commands/environments.py | 3 +- harpy/common/create_pixi.py | 123 ++++++++-------------------- resources/Dockerfile | 7 -- 4 files changed, 36 insertions(+), 101 deletions(-) delete mode 100644 resources/Dockerfile diff --git a/.github/workflows/createrelease.yml b/.github/workflows/createrelease.yml index 77ab6d4b8..1b43d1b48 100644 --- a/.github/workflows/createrelease.yml +++ b/.github/workflows/createrelease.yml @@ -67,7 +67,7 @@ jobs: pip install importlib-resources - name: Clear space run: rm -rf /opt/hostedtoolcache - - name: Recreate container + - name: Recreate containers uses: prefix-dev/setup-pixi@v0.9.2 with: pixi-version: v0.57.0 @@ -86,7 +86,7 @@ jobs: - name: Build and push uses: docker/build-push-action@v6 with: - context: ./${{ matrix.env }} + context: .container/${{ matrix.env }} push: true tags: pdimens/harpy:$${{ matrix.env }}_{{ github.ref_name }} diff --git a/harpy/commands/environments.py b/harpy/commands/environments.py index 6896595ae..7ebc4cd7f 100644 --- a/harpy/commands/environments.py +++ b/harpy/commands/environments.py @@ -4,7 +4,7 @@ import shutil import rich_click as click from harpy.common.conda import create_conda_recipes -from harpy.common.create_pixi import create_pixi_dockerfiles, create_pixi_toml +from harpy.common.create_pixi import create_pixi_dockerfiles from harpy.common.workflow import Workflow @click.command(hidden = True) @@ -16,7 +16,6 @@ def containerize(): by the workflows and build a dockerfile from that. """ create_pixi_dockerfiles() - #create_pixi_toml() @click.group(options_metavar='') def deps(): diff --git a/harpy/common/create_pixi.py b/harpy/common/create_pixi.py index fd98c93a0..d02d98aa4 100755 --- a/harpy/common/create_pixi.py +++ b/harpy/common/create_pixi.py @@ -1,10 +1,8 @@ #! /usr/bin/env python -import glob import shutil import subprocess import os -import sys environ = { "align" : [ @@ -74,101 +72,46 @@ ] } -def create_pixi_dockerfiles(): - ''' - Using the defined environments, create a series of folders where each has a dockerfile to create one of the environments. - ''' - rm_cache = "&& rm -rf home/.cache/rattler".split() - for env,deps in environ.items(): - os.makedirs(f"container/{env}", exist_ok=True) - with open(f"container/{env}/Dockerfile", "w") as dockerfile: - dockerfile.write("FROM ghcr.io/prefix-dev/pixi:bookworm-slim\n\nRUN ") - if env == "report": - channels = ["conda-forge", "r"] - else: - channels = ["conda-forge", "bioconda"] - _chan = " ".join([f"--channel {i}" for i in channels]).split() - if env == "deconvolution": - dockerfile.write( - " ".join(["pixi", "global", "install", *_chan, "--environment", env, "--expose", "QuickDeconvolution", *deps, *rm_cache]) - ) - else: - dockerfile.write( - " ".join(["pixi", "global", "install", *_chan, "--environment", env, *deps, *rm_cache]) - ) +dockerfile_text = """ +FROM ghcr.io/prefix-dev/pixi:0.56.0 AS build +# copy source code, pixi.toml and pixi.lock to the container +WORKDIR /app +COPY . . -def reset_pixi_global(): - # remove any existing global packages - pixidir = os.environ['HOME'] + "/.pixi" - for f in glob.glob(pixidir + "/bin/*"): - if os.path.isdir(f): - shutil.rmtree(f, ignore_errors=True) - else: - os.remove(f) - for f in glob.glob(pixidir + "/envs/*"): - shutil.rmtree(f, ignore_errors=True) +# use `--locked` to ensure the lockfile is up to date with pixi.toml +RUN pixi install --locked && rm -rf ~/.cache/rattler -def create_pixi_dockerfile(): - with open("Dockerfile", "w") as dockerfile: - dockerfile.write("FROM ghcr.io/prefix-dev/pixi:bookworm-slim\n\nRUN ") - cmd = [] - for env,deps in environ.items(): - if env == "report": - channels = ["conda-forge", "r"] - else: - channels = ["conda-forge", "bioconda"] - _chan = " ".join([f"--channel {i}" for i in channels]).split() - if env == "deconvolution": - cmd.append( - " ".join(["pixi", "global", "install", *_chan, "--environment", env, "--expose", "QuickDeconvolution", *deps]) - ) - else: - cmd.append( - " ".join(["pixi", "global", "install", *_chan, "--environment", env, *deps]) - ) - cmd.append("rm -rf ~/.cache/rattler") - dockerfile.write(' &&\\ \n\t'.join(cmd)) +# create the shell-hook bash script to activate the environment +RUN echo "#!/bin/bash" > /app/entrypoint.sh && \\ + pixi shell-hook -s bash >> /app/entrypoint.sh && \\ + echo 'exec "$@"' >> /app/entrypoint.sh && \\ + chmod +x /app/entrypoint.sh -def create_pixi_toml(): - with open("Dockerfile", "w") as dockerfile: - dockerfile.write("FROM ghcr.io/prefix-dev/pixi:bookworm-slim\n\n") - dockerfile.write("COPY ./pixi.toml /root/.pixi/manifests/pixi-global.toml\n\n") - dockerfile.write("RUN pixi global update && rm -rf ~/.cache/rattler\n\n") +FROM ubuntu:24.04 AS production +WORKDIR /app +COPY --from=build --chmod=0755 /app/entrypoint.sh /app/entrypoint.sh +COPY --from=build /app/.pixi/envs/default /app/.pixi/envs/default - # get the name of the manifest file - _pix = subprocess.run("pixi global list".split(), capture_output = True, text = True) - global_manifest = _pix.stdout.splitlines()[0].split()[-1].strip("\'") - print(global_manifest) - # clear out the manifest - with open(global_manifest, "w") as toml: - toml.write("version = 1\n\n") - reset_pixi_global() +ENTRYPOINT ["/app/entrypoint.sh"] +""" +def create_pixi_dockerfiles(): + ''' + Using the defined environments, create a series of folders where each has a dockerfile + and pixi.toml file to create one of the environments. + ''' + shutil.rmtree("container", ignore_errors=True) for env,deps in environ.items(): + os.makedirs(f"container/{env}", exist_ok=True) + with open(f"container/{env}/Dockerfile", "w") as dockerfile: + dockerfile.write(dockerfile_text) if env == "report": - channels = ["conda-forge", "r"] + subprocess.run(f"pixi init container/{env} -c conda-forge -c r".split()) else: - channels = ["conda-forge", "bioconda"] - _chan = " ".join([f"--channel {i}" for i in channels]).split() - if env == "deconvolution": - _pix = subprocess.run( - ["pixi", "global", "install", *_chan, "--environment", env, "--expose", "QuickDeconvolution", *deps] - ) - if _pix.returncode > 0: - print(_pix.stderr) - sys.exit(1) - else: - _pix = subprocess.run( - ["pixi", "global", "install", *_chan, "--environment", env, *deps] - ) - if _pix.returncode > 0: - print(_pix.stderr) - sys.exit(1) - - # get the manifest file and copy to this directory - shutil.copy(global_manifest, "resources/pixi.toml") - # clean up global packages again - reset_pixi_global() - + subprocess.run(f"pixi init container/{env} -c conda-forge -c bioconda".split()) + subprocess.run( + ["pixi", "add", "--no-progress", "--manifest-path", f"container/{env}/pixi.toml", *deps] + ) + shutil.rmtree("container/.pixi", ignore_errors=True) diff --git a/resources/Dockerfile b/resources/Dockerfile deleted file mode 100644 index 2f34c5953..000000000 --- a/resources/Dockerfile +++ /dev/null @@ -1,7 +0,0 @@ -FROM ghcr.io/prefix-dev/pixi:bookworm-slim - -WORKDIR /app - -COPY ./pixi.toml . - -RUN pixi global install && rm -rf ~/.cache/rattler \ No newline at end of file From de99bf9750a0976d32f62f33edec19b60a1c4a63 Mon Sep 17 00:00:00 2001 From: pdimens Date: Mon, 20 Oct 2025 15:14:17 -0400 Subject: [PATCH 06/33] swap mamba for pixi --- .github/workflows/createrelease.yml | 2 +- .github/workflows/tests.yml | 379 +++++++++------------------- 2 files changed, 116 insertions(+), 265 deletions(-) diff --git a/.github/workflows/createrelease.yml b/.github/workflows/createrelease.yml index 1b43d1b48..bd513ecbf 100644 --- a/.github/workflows/createrelease.yml +++ b/.github/workflows/createrelease.yml @@ -67,13 +67,13 @@ jobs: pip install importlib-resources - name: Clear space run: rm -rf /opt/hostedtoolcache - - name: Recreate containers uses: prefix-dev/setup-pixi@v0.9.2 with: pixi-version: v0.57.0 cache: true auth-host: prefix.dev auth-token: ${{ secrets.PREFIX_DEV_TOKEN }} + - name: Recreate containers shell: micromamba-shell {0} run: harpy containerize - name: Set up Docker Buildx diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index bafb3616d..aa44f0c48 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -62,21 +62,13 @@ jobs: uses: actions/checkout@v4 with: fetch-depth: 1 - - name: Setup Mamba - uses: mamba-org/setup-micromamba@v2 - with: - init-shell: bash - generate-run-shell: true - environment-file: resources/harpy.yaml - cache-environment: true - post-cleanup: 'all' - name: Install Harpy - id: harpybuild - shell: micromamba-shell {0} - run: | - python3 -m pip install --upgrade build && python3 -m build && \ - pip install --no-deps dist/*.whl - + uses: prefix-dev/setup-pixi@v0.9.2 + with: + pixi-version: v0.56.0 + cache: true + auth-host: prefix.dev + auth-token: ${{ secrets.PREFIX_DEV_TOKEN }} dmux_meier2021: needs: [changes, build] if: ${{ needs.changes.outputs.demux == 'true' }} @@ -87,21 +79,15 @@ jobs: uses: actions/checkout@v4 with: fetch-depth: 1 - - name: Setup Mamba - uses: mamba-org/setup-micromamba@v2 - with: - init-shell: bash - generate-run-shell: true - environment-file: resources/harpy.yaml - cache-environment: true - post-cleanup: 'all' - name: Install Harpy - shell: micromamba-shell {0} - run: | - python3 -m pip install --upgrade build && python3 -m build - pip install --no-deps dist/*.whl + uses: prefix-dev/setup-pixi@v0.9.2 + with: + pixi-version: v0.56.0 + cache: true + auth-host: prefix.dev + auth-token: ${{ secrets.PREFIX_DEV_TOKEN }} + activate-environment: true - name: harpy demultiplex - shell: micromamba-shell {0} run: | harpy demultiplex meier2021 --quiet 2 test/demux/samples.schema test/demux/multiplex.R* test/demux/multiplex.I* && \ ls -lh Demultiplex @@ -116,30 +102,20 @@ jobs: uses: actions/checkout@v4 with: fetch-depth: 1 - - name: Setup Mamba - uses: mamba-org/setup-micromamba@v2 - env: - ACTIONS_STEP_DEBUG: true - with: - init-shell: bash - generate-run-shell: true - environment-file: resources/harpy.yaml - cache-environment: true - post-cleanup: 'all' - log-level: error - name: Install Harpy - shell: micromamba-shell {0} - run: | - python3 -m pip install --upgrade build && python3 -m build - pip install --no-deps dist/*.whl + uses: prefix-dev/setup-pixi@v0.9.2 + with: + pixi-version: v0.56.0 + cache: true + auth-host: prefix.dev + auth-token: ${{ secrets.PREFIX_DEV_TOKEN }} + activate-environment: true - name: test validate fastq - shell: micromamba-shell {0} run: | harpy validate fastq test/fastq && \ ls -lh Validate/fastq - name: test validate bam if: always() - shell: micromamba-shell {0} run: | harpy validate bam --quiet 2 test/bam && \ ls -lh Validate/bam @@ -154,24 +130,15 @@ jobs: uses: actions/checkout@v4 with: fetch-depth: 1 - - name: Setup Mamba - uses: mamba-org/setup-micromamba@v2 - env: - ACTIONS_STEP_DEBUG: true - with: - init-shell: bash - generate-run-shell: true - environment-file: resources/harpy.yaml - cache-environment: true - post-cleanup: 'all' - log-level: error - name: Install Harpy - shell: micromamba-shell {0} - run: | - python3 -m pip install --upgrade build && python3 -m build - pip install --no-deps dist/*.whl + uses: prefix-dev/setup-pixi@v0.9.2 + with: + pixi-version: v0.56.0 + cache: true + auth-host: prefix.dev + auth-token: ${{ secrets.PREFIX_DEV_TOKEN }} + activate-environment: true - name: harpy qc - shell: micromamba-shell {0} run: | harpy qc -x "--low_complexity_filter" --quiet 2 test/fastq && \ ls -lh QC @@ -186,24 +153,19 @@ jobs: uses: actions/checkout@v4 with: fetch-depth: 1 - - name: Setup Mamba - uses: mamba-org/setup-micromamba@v2 - env: - ACTIONS_STEP_DEBUG: true - with: - init-shell: bash - generate-run-shell: true - environment-file: resources/harpy.yaml - cache-environment: true - post-cleanup: 'all' - log-level: error - name: Install Harpy - shell: micromamba-shell {0} + uses: prefix-dev/setup-pixi@v0.9.2 + with: + pixi-version: v0.56.0 + cache: true + auth-host: prefix.dev + auth-token: ${{ secrets.PREFIX_DEV_TOKEN }} + activate-environment: true + - name: Install Harpy run: | python3 -m pip install --upgrade build && python3 -m build pip install --no-deps dist/*.whl - name: harpy deconvolve - shell: micromamba-shell {0} run: | harpy deconvolve --quiet 2 test/fastq && \ ls -lh Deconvolve @@ -218,24 +180,18 @@ jobs: uses: actions/checkout@v4 with: fetch-depth: 1 - - name: Setup Mamba - uses: mamba-org/setup-micromamba@v2 - env: - ACTIONS_STEP_DEBUG: true - with: - init-shell: bash - generate-run-shell: true - environment-file: resources/harpy.yaml - cache-environment: true - post-cleanup: 'all' - log-level: error - name: Install Harpy - shell: micromamba-shell {0} + uses: prefix-dev/setup-pixi@v0.9.2 + with: + pixi-version: v0.56.0 + cache: true + auth-host: prefix.dev + auth-token: ${{ secrets.PREFIX_DEV_TOKEN }} + activate-environment: true run: | python3 -m pip install --upgrade build && python3 -m build pip install --no-deps dist/*.whl - name: test bwa - shell: micromamba-shell {0} run: | harpy align bwa --quiet 2 -x "-A 2" test/genome/genome.fasta.gz test/fastq && \ ls -lh Align/bwa @@ -250,24 +206,19 @@ jobs: uses: actions/checkout@v4 with: fetch-depth: 1 - - name: Setup Mamba - uses: mamba-org/setup-micromamba@v2 - env: - ACTIONS_STEP_DEBUG: true - with: - init-shell: bash - generate-run-shell: true - environment-file: resources/harpy.yaml - cache-environment: true - post-cleanup: 'all' - log-level: error - name: Install Harpy - shell: micromamba-shell {0} + uses: prefix-dev/setup-pixi@v0.9.2 + with: + pixi-version: v0.56.0 + cache: true + auth-host: prefix.dev + auth-token: ${{ secrets.PREFIX_DEV_TOKEN }} + activate-environment: true + - name: Install Harpy run: | python3 -m pip install --upgrade build && python3 -m build pip install --no-deps dist/*.whl - name: test strobealign - shell: micromamba-shell {0} run: | harpy align strobe --quiet 2 test/genome/genome.fasta.gz test/fastq && \ ls -lh Align/strobealign @@ -282,29 +233,23 @@ jobs: uses: actions/checkout@v4 with: fetch-depth: 1 - - name: Setup Mamba - uses: mamba-org/setup-micromamba@v2 - env: - ACTIONS_STEP_DEBUG: true - with: - init-shell: bash - generate-run-shell: true - environment-file: resources/harpy.yaml - cache-environment: true - post-cleanup: 'all' - log-level: error - name: Install Harpy - shell: micromamba-shell {0} + uses: prefix-dev/setup-pixi@v0.9.2 + with: + pixi-version: v0.56.0 + cache: true + auth-host: prefix.dev + auth-token: ${{ secrets.PREFIX_DEV_TOKEN }} + activate-environment: true + - name: Install Harpy run: | python3 -m pip install --upgrade build && python3 -m build pip install --no-deps dist/*.whl - name: snp mpileup - shell: micromamba-shell {0} run: | harpy snp mpileup --quiet 2 -r test/positions.bed -x "--ignore-RG" test/genome/genome.fasta.gz test/bam && \ ls -lh SNP/mpileup - name: snp mpileup-pop - shell: micromamba-shell {0} run: | harpy snp mpileup --quiet 2 -r test/positions.bed -o SNP/poptest -p test/samples.groups test/genome/genome.fasta.gz test/bam && \ ls -lh SNP/poptest @@ -319,29 +264,19 @@ jobs: uses: actions/checkout@v4 with: fetch-depth: 1 - - name: Setup Mamba - uses: mamba-org/setup-micromamba@v2 - env: - ACTIONS_STEP_DEBUG: true - with: - init-shell: bash - generate-run-shell: true - environment-file: resources/harpy.yaml - cache-environment: true - post-cleanup: 'all' - log-level: error - name: Install Harpy - shell: micromamba-shell {0} - run: | - python3 -m pip install --upgrade build && python3 -m build - pip install --no-deps dist/*.whl + uses: prefix-dev/setup-pixi@v0.9.2 + with: + pixi-version: v0.56.0 + cache: true + auth-host: prefix.dev + auth-token: ${{ secrets.PREFIX_DEV_TOKEN }} + activate-environment: true - name: snp freebayes - shell: micromamba-shell {0} run: | harpy snp freebayes --quiet 2 -r test/positions.bed -x "-g 200" test/genome/genome.fasta.gz test/bam && \ ls -lh SNP/freebayes - name: snp freebayes-pop - shell: micromamba-shell {0} run: | harpy snp freebayes --quiet 2 -r test/positions.bed -o SNP/poptest -p test/samples.groups test/genome/genome.fasta.gz test/bam && \ ls -lh SNP/poptest @@ -356,35 +291,24 @@ jobs: uses: actions/checkout@v4 with: fetch-depth: 1 - - name: Setup Mamba - uses: mamba-org/setup-micromamba@v2 - env: - ACTIONS_STEP_DEBUG: true - with: - init-shell: bash - generate-run-shell: true - environment-file: resources/harpy.yaml - cache-environment: true - post-cleanup: 'all' - log-level: error - name: Install Harpy - shell: micromamba-shell {0} - run: | - python3 -m pip install --upgrade build && python3 -m build - pip install --no-deps dist/*.whl + uses: prefix-dev/setup-pixi@v0.9.2 + with: + pixi-version: v0.56.0 + cache: true + auth-host: prefix.dev + auth-token: ${{ secrets.PREFIX_DEV_TOKEN }} + activate-environment: true - name: impute - shell: micromamba-shell {0} run: | harpy impute --quiet 2 --grid-size 1500 test/stitch.params test/vcf/test.bcf test/bam && \ ls -lh Impute/*/* - name: impute from vcf - shell: micromamba-shell {0} if: always() run: | harpy impute --quiet 2 --grid-size 1500 --vcf-samples -o vcfImpute test/stitch.params test/vcf/test.bcf test/bam && \ ls -lh vcfImpute/*/* - name: impute one region - shell: micromamba-shell {0} if: always() run: | harpy impute --quiet 2 --grid-size 1500 --vcf-samples -o regionImpute -r 3L:3000-28110227-1000 test/stitch.params test/vcf/test.bcf test/bam && \ @@ -400,40 +324,28 @@ jobs: uses: actions/checkout@v4 with: fetch-depth: 1 - - name: Setup Mamba - uses: mamba-org/setup-micromamba@v2 - env: - ACTIONS_STEP_DEBUG: true - with: - init-shell: bash - generate-run-shell: true - environment-file: resources/harpy.yaml - cache-environment: true - post-cleanup: 'all' - log-level: error - name: Install Harpy - shell: micromamba-shell {0} - run: | - python3 -m pip install --upgrade build && python3 -m build - pip install --no-deps dist/*.whl - + uses: prefix-dev/setup-pixi@v0.9.2 + with: + pixi-version: v0.56.0 + cache: true + auth-host: prefix.dev + auth-token: ${{ secrets.PREFIX_DEV_TOKEN }} + activate-environment: true - name: phase - shell: micromamba-shell {0} run: | harpy phase --quiet 2 -x "--max_iter 10001" test/vcf/test.bcf test/bam && \ ls -lh Phase - name: phase with indels - shell: micromamba-shell {0} if: always() run: | harpy phase --quiet 2 -o phaseindel -r test/genome/genome.fasta.gz test/vcf/test.bcf test/bam && \ ls -lh phaseindel - name: phase from vcf - shell: micromamba-shell {0} if: always() run: | cp test/bam/sample1.bam test/bam/pineapple.bam && rename_bam -d pineapple1 test/bam/pineapple.bam - harpy phase --quiet 2 --vcf-samples -o phasevcf test/vcf/test.bcf test/bam + harpy phase --quiet 2 --vcf-samples -o phasevcf test/vcf/test.bcf test/bam && \ ls -lh phasevcf leviathan: @@ -446,31 +358,20 @@ jobs: uses: actions/checkout@v4 with: fetch-depth: 1 - - name: Setup Mamba - uses: mamba-org/setup-micromamba@v2 - env: - ACTIONS_STEP_DEBUG: true - with: - init-shell: bash - generate-run-shell: true - environment-file: resources/harpy.yaml - cache-environment: true - post-cleanup: 'all' - log-level: error - name: Install Harpy - shell: micromamba-shell {0} - run: | - python3 -m pip install --upgrade build && python3 -m build - pip install --no-deps dist/*.whl - + uses: prefix-dev/setup-pixi@v0.9.2 + with: + pixi-version: v0.56.0 + cache: true + auth-host: prefix.dev + auth-token: ${{ secrets.PREFIX_DEV_TOKEN }} + activate-environment: true - name: leviathan - shell: micromamba-shell {0} run: | harpy sv leviathan --quiet 2 -m 100 -s 80,80,80 -b 1 -x "-M 2002" test/genome/genome.fasta.gz test/bam && \ ls -lh SV/leviathan - name: leviathan-pop if: always() - shell: micromamba-shell {0} run: | harpy sv leviathan --quiet 2 -m 100 -s 80,80,80 -b 1 -o SV/leviathanpop -p test/samples.groups test/genome/genome.fasta.gz test/bam && \ ls -lh SV/leviathanpop @@ -485,43 +386,30 @@ jobs: uses: actions/checkout@v4 with: fetch-depth: 1 - - name: Setup Mamba - uses: mamba-org/setup-micromamba@v2 - env: - ACTIONS_STEP_DEBUG: true - with: - init-shell: bash - generate-run-shell: true - environment-file: resources/harpy.yaml - cache-environment: true - post-cleanup: 'all' - log-level: error - name: Install Harpy - shell: micromamba-shell {0} - run: | - python3 -m pip install --upgrade build && python3 -m build - pip install --no-deps dist/*.whl - + uses: prefix-dev/setup-pixi@v0.9.2 + with: + pixi-version: v0.56.0 + cache: true + auth-host: prefix.dev + auth-token: ${{ secrets.PREFIX_DEV_TOKEN }} + activate-environment: true - name: naibr - shell: micromamba-shell {0} run: | harpy sv naibr --quiet 2 -o SV/naibr test/genome/genome.fasta.gz test/bam_phased && \ ls -lh SV/naibr - name: naibr pop if: always() - shell: micromamba-shell {0} run: | harpy sv naibr --quiet 2 -o SV/pop -p test/samples.groups test/genome/genome.fasta.gz test/bam_phased && \ ls -lh SV/pop - name: naibr with phasing if: always() - shell: micromamba-shell {0} run: | harpy sv naibr --quiet 2 -o SV/phase -v test/vcf/test.phased.bcf test/genome/genome.fasta.gz test/bam && \ ls -lh SV/phase - name: naibr pop with phasing if: always() - shell: micromamba-shell {0} run: | harpy sv naibr --quiet 2 -o SV/phasepop -v test/vcf/test.phased.bcf -p test/samples.groups test/genome/genome.fasta.gz test/bam && \ ls -lh SV/phasepop @@ -536,32 +424,21 @@ jobs: uses: actions/checkout@v4 with: fetch-depth: 1 - - name: Setup Mamba - uses: mamba-org/setup-micromamba@v2 - env: - ACTIONS_STEP_DEBUG: true - with: - init-shell: bash - generate-run-shell: true - environment-file: resources/harpy.yaml - cache-environment: true - post-cleanup: 'all' - log-level: error - name: Install Harpy - shell: micromamba-shell {0} - run: | - python3 -m pip install --upgrade build && python3 -m build - pip install --no-deps dist/*.whl - + uses: prefix-dev/setup-pixi@v0.9.2 + with: + pixi-version: v0.56.0 + cache: true + auth-host: prefix.dev + auth-token: ${{ secrets.PREFIX_DEV_TOKEN }} + activate-environment: true - name: simulate random snps/indels - shell: micromamba-shell {0} run: | harpy simulate snpindel --quiet 2 --snp-count 10 --indel-count 10 -z 0.5 test/genome/genome.fasta.gz ls -lh Simulate/snpindel harpy simulate snpindel --quiet 2 --prefix Simulate/snpvcf --snp-vcf Simulate/snpindel/haplotype_1/sim.hap1.snp.vcf --indel-vcf Simulate/snpindel/haplotype_1/sim.hap1.indel.vcf test/genome/genome.fasta.gz && \ ls -lh Simulate - name: simulate inversions - shell: micromamba-shell {0} if: always() run: | harpy simulate inversion --quiet 2 --count 10 -z 0.5 test/genome/genome.fasta.gz @@ -569,7 +446,6 @@ jobs: harpy simulate inversion --quiet 2 --prefix Simulate/invvcf --vcf Simulate/inversion/haplotype_1/sim.hap1.inversion.vcf test/genome/genome.fasta.gz && \ ls -lh Simulate - name: simulate cnv - shell: micromamba-shell {0} if: always() run: | harpy simulate cnv --quiet 2 --count 10 -z 0.5 test/genome/genome.fasta.gz @@ -577,7 +453,6 @@ jobs: harpy simulate cnv --quiet 2 --prefix Simulate/cnvvcf --vcf Simulate/cnv/haplotype_1/sim.hap1.cnv.vcf test/genome/genome.fasta.gz && \ ls -lh Simulate - name: simulate translocations - shell: micromamba-shell {0} if: always() run: | harpy simulate translocation --quiet 2 --count 10 -z 0.5 test/genome/genome.fasta.gz @@ -595,36 +470,25 @@ jobs: uses: actions/checkout@v4 with: fetch-depth: 1 - - name: Setup Mamba - uses: mamba-org/setup-micromamba@v2 - env: - ACTIONS_STEP_DEBUG: true - with: - init-shell: bash - generate-run-shell: true - environment-file: resources/harpy.yaml - cache-environment: true - post-cleanup: 'all' - log-level: error - name: Install Harpy - shell: micromamba-shell {0} - run: | - python3 -m pip install --upgrade build && python3 -m build - pip install --no-deps dist/*.whl + uses: prefix-dev/setup-pixi@v0.9.2 + with: + pixi-version: v0.56.0 + cache: true + auth-host: prefix.dev + auth-token: ${{ secrets.PREFIX_DEV_TOKEN }} + activate-environment: true - name: Clear Space uses: jlumbroso/free-disk-space@main - name: test assembly - shell: micromamba-shell {0} run: | harpy assembly --quiet 2 -r 4000 test/fastq/sample1.* && \ ls -lh Assembly - name: test metassembly - shell: micromamba-shell {0} run: | harpy metassembly --quiet 2 --force -r 4000 test/fastq/sample1.* && \ ls -lh Metassembly - name: test metassembly without barcodes - shell: micromamba-shell {0} run: | harpy metassembly --unlinked --force --quiet 2 -r 4000 test/fastq/sample1.* && \ ls -lh Metassembly @@ -639,32 +503,19 @@ jobs: uses: actions/checkout@v4 with: fetch-depth: 1 - - name: Setup Mamba - uses: mamba-org/setup-micromamba@v2 - env: - ACTIONS_STEP_DEBUG: true - with: - init-shell: bash - generate-run-shell: true - environment-file: resources/harpy.yaml - cache-environment: true - post-cleanup: 'all' - log-level: error - name: Install Harpy - shell: micromamba-shell {0} - run: | - python3 -m pip install --upgrade build && python3 -m build - pip install --no-deps dist/*.whl -# - name: Clear Space -# uses: jlumbroso/free-disk-space@main + uses: prefix-dev/setup-pixi@v0.9.2 + with: + pixi-version: v0.56.0 + cache: true + auth-host: prefix.dev + auth-token: ${{ secrets.PREFIX_DEV_TOKEN }} + activate-environment: true - name: template impute - shell: micromamba-shell {0} run: harpy template impute - name: template groupings - shell: micromamba-shell {0} run: harpy template groupings test/fastq - name: template hpc - shell: micromamba-shell {0} run: | harpy template hpc-slurm harpy template hpc-googlebatch From 0878e1e9c356c28206557486d69be14aaccf519c Mon Sep 17 00:00:00 2001 From: pdimens Date: Mon, 20 Oct 2025 15:14:53 -0400 Subject: [PATCH 07/33] rm mamba --- .github/workflows/createrelease.yml | 23 +++++------------------ 1 file changed, 5 insertions(+), 18 deletions(-) diff --git a/.github/workflows/createrelease.yml b/.github/workflows/createrelease.yml index bd513ecbf..052e252f6 100644 --- a/.github/workflows/createrelease.yml +++ b/.github/workflows/createrelease.yml @@ -51,30 +51,17 @@ jobs: uses: actions/checkout@v4 with: fetch-depth: 1 - - name: Setup mamba - uses: mamba-org/setup-micromamba@v2 - with: - init-shell: bash - generate-run-shell: true - environment-file: resources/harpy.yaml - cache-environment: false - post-cleanup: 'all' - - name: Install harpy - shell: micromamba-shell {0} - run: | - python3 -m pip install --upgrade build && python3 -m build - pip install dist/*.whl - pip install importlib-resources - - name: Clear space - run: rm -rf /opt/hostedtoolcache + - name: Install Harpy uses: prefix-dev/setup-pixi@v0.9.2 with: - pixi-version: v0.57.0 + pixi-version: v0.56.0 cache: true auth-host: prefix.dev auth-token: ${{ secrets.PREFIX_DEV_TOKEN }} + activate-environment: true + - name: Clear space + run: rm -rf /opt/hostedtoolcache - name: Recreate containers - shell: micromamba-shell {0} run: harpy containerize - name: Set up Docker Buildx uses: docker/setup-buildx-action@v3 From 4e7006f0f59c77fd89c1e095fe0ccf318d9d038f Mon Sep 17 00:00:00 2001 From: pdimens Date: Mon, 20 Oct 2025 15:16:20 -0400 Subject: [PATCH 08/33] rm dep --- .github/workflows/tests.yml | 15 --------------- 1 file changed, 15 deletions(-) diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index aa44f0c48..9b216a6bb 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -161,10 +161,6 @@ jobs: auth-host: prefix.dev auth-token: ${{ secrets.PREFIX_DEV_TOKEN }} activate-environment: true - - name: Install Harpy - run: | - python3 -m pip install --upgrade build && python3 -m build - pip install --no-deps dist/*.whl - name: harpy deconvolve run: | harpy deconvolve --quiet 2 test/fastq && \ @@ -188,9 +184,6 @@ jobs: auth-host: prefix.dev auth-token: ${{ secrets.PREFIX_DEV_TOKEN }} activate-environment: true - run: | - python3 -m pip install --upgrade build && python3 -m build - pip install --no-deps dist/*.whl - name: test bwa run: | harpy align bwa --quiet 2 -x "-A 2" test/genome/genome.fasta.gz test/fastq && \ @@ -214,10 +207,6 @@ jobs: auth-host: prefix.dev auth-token: ${{ secrets.PREFIX_DEV_TOKEN }} activate-environment: true - - name: Install Harpy - run: | - python3 -m pip install --upgrade build && python3 -m build - pip install --no-deps dist/*.whl - name: test strobealign run: | harpy align strobe --quiet 2 test/genome/genome.fasta.gz test/fastq && \ @@ -241,10 +230,6 @@ jobs: auth-host: prefix.dev auth-token: ${{ secrets.PREFIX_DEV_TOKEN }} activate-environment: true - - name: Install Harpy - run: | - python3 -m pip install --upgrade build && python3 -m build - pip install --no-deps dist/*.whl - name: snp mpileup run: | harpy snp mpileup --quiet 2 -r test/positions.bed -x "--ignore-RG" test/genome/genome.fasta.gz test/bam && \ From a2efec82ebe701572318c6c259252a6bfada3b92 Mon Sep 17 00:00:00 2001 From: pdimens Date: Mon, 20 Oct 2025 15:18:03 -0400 Subject: [PATCH 09/33] rm auth --- .github/workflows/createrelease.yml | 2 -- .github/workflows/tests.yml | 32 ----------------------------- 2 files changed, 34 deletions(-) diff --git a/.github/workflows/createrelease.yml b/.github/workflows/createrelease.yml index 052e252f6..ac03a3263 100644 --- a/.github/workflows/createrelease.yml +++ b/.github/workflows/createrelease.yml @@ -56,8 +56,6 @@ jobs: with: pixi-version: v0.56.0 cache: true - auth-host: prefix.dev - auth-token: ${{ secrets.PREFIX_DEV_TOKEN }} activate-environment: true - name: Clear space run: rm -rf /opt/hostedtoolcache diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 9b216a6bb..c7191a1de 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -67,8 +67,6 @@ jobs: with: pixi-version: v0.56.0 cache: true - auth-host: prefix.dev - auth-token: ${{ secrets.PREFIX_DEV_TOKEN }} dmux_meier2021: needs: [changes, build] if: ${{ needs.changes.outputs.demux == 'true' }} @@ -84,8 +82,6 @@ jobs: with: pixi-version: v0.56.0 cache: true - auth-host: prefix.dev - auth-token: ${{ secrets.PREFIX_DEV_TOKEN }} activate-environment: true - name: harpy demultiplex run: | @@ -107,8 +103,6 @@ jobs: with: pixi-version: v0.56.0 cache: true - auth-host: prefix.dev - auth-token: ${{ secrets.PREFIX_DEV_TOKEN }} activate-environment: true - name: test validate fastq run: | @@ -135,8 +129,6 @@ jobs: with: pixi-version: v0.56.0 cache: true - auth-host: prefix.dev - auth-token: ${{ secrets.PREFIX_DEV_TOKEN }} activate-environment: true - name: harpy qc run: | @@ -158,8 +150,6 @@ jobs: with: pixi-version: v0.56.0 cache: true - auth-host: prefix.dev - auth-token: ${{ secrets.PREFIX_DEV_TOKEN }} activate-environment: true - name: harpy deconvolve run: | @@ -181,8 +171,6 @@ jobs: with: pixi-version: v0.56.0 cache: true - auth-host: prefix.dev - auth-token: ${{ secrets.PREFIX_DEV_TOKEN }} activate-environment: true - name: test bwa run: | @@ -204,8 +192,6 @@ jobs: with: pixi-version: v0.56.0 cache: true - auth-host: prefix.dev - auth-token: ${{ secrets.PREFIX_DEV_TOKEN }} activate-environment: true - name: test strobealign run: | @@ -227,8 +213,6 @@ jobs: with: pixi-version: v0.56.0 cache: true - auth-host: prefix.dev - auth-token: ${{ secrets.PREFIX_DEV_TOKEN }} activate-environment: true - name: snp mpileup run: | @@ -254,8 +238,6 @@ jobs: with: pixi-version: v0.56.0 cache: true - auth-host: prefix.dev - auth-token: ${{ secrets.PREFIX_DEV_TOKEN }} activate-environment: true - name: snp freebayes run: | @@ -281,8 +263,6 @@ jobs: with: pixi-version: v0.56.0 cache: true - auth-host: prefix.dev - auth-token: ${{ secrets.PREFIX_DEV_TOKEN }} activate-environment: true - name: impute run: | @@ -314,8 +294,6 @@ jobs: with: pixi-version: v0.56.0 cache: true - auth-host: prefix.dev - auth-token: ${{ secrets.PREFIX_DEV_TOKEN }} activate-environment: true - name: phase run: | @@ -348,8 +326,6 @@ jobs: with: pixi-version: v0.56.0 cache: true - auth-host: prefix.dev - auth-token: ${{ secrets.PREFIX_DEV_TOKEN }} activate-environment: true - name: leviathan run: | @@ -376,8 +352,6 @@ jobs: with: pixi-version: v0.56.0 cache: true - auth-host: prefix.dev - auth-token: ${{ secrets.PREFIX_DEV_TOKEN }} activate-environment: true - name: naibr run: | @@ -414,8 +388,6 @@ jobs: with: pixi-version: v0.56.0 cache: true - auth-host: prefix.dev - auth-token: ${{ secrets.PREFIX_DEV_TOKEN }} activate-environment: true - name: simulate random snps/indels run: | @@ -460,8 +432,6 @@ jobs: with: pixi-version: v0.56.0 cache: true - auth-host: prefix.dev - auth-token: ${{ secrets.PREFIX_DEV_TOKEN }} activate-environment: true - name: Clear Space uses: jlumbroso/free-disk-space@main @@ -493,8 +463,6 @@ jobs: with: pixi-version: v0.56.0 cache: true - auth-host: prefix.dev - auth-token: ${{ secrets.PREFIX_DEV_TOKEN }} activate-environment: true - name: template impute run: harpy template impute From f3d3771c6e0f3e86d161bdfa8096f568cd4a0679 Mon Sep 17 00:00:00 2001 From: pdimens Date: Mon, 20 Oct 2025 15:19:50 -0400 Subject: [PATCH 10/33] add pixi toml witout lock --- .gitignore | 2 +- pixi.toml | 24 ++++++++++++++++++++++++ 2 files changed, 25 insertions(+), 1 deletion(-) create mode 100644 pixi.toml diff --git a/.gitignore b/.gitignore index c45d82e55..5c18fb758 100644 --- a/.gitignore +++ b/.gitignore @@ -35,7 +35,7 @@ haplotag.bc _Inline # pixi environments .pixi -pixi* +pixi.lock .gitattributes *.egg-info .deprecated/ diff --git a/pixi.toml b/pixi.toml new file mode 100644 index 000000000..34be301b1 --- /dev/null +++ b/pixi.toml @@ -0,0 +1,24 @@ +[workspace] +name = "harpy" +authors = ["pdimens "] +channels = ["conda-forge", "bioconda"] +platforms = ["linux-64"] +version = "3.0.0" + +[tasks] + +[dependencies] +bcftools = "1.22.*" +click = ">=8.2" +conda = ">=24.8" +htslib = "1.22.*" +pysam = "0.23.*" +python = ">=3.11" +rich-click = "1.9.*" +snakemake-minimal = "9.*" +samtools = "1.22.*" +seqtk = "*" +apptainer = ">=1.4.2,<2" + +[pypi-dependencies] +harpy = { path = ".", editable = true} From 426c7ac460dd98f4ef1c9a1b25a1a5b6cbc1adf5 Mon Sep 17 00:00:00 2001 From: pdimens Date: Mon, 20 Oct 2025 15:21:01 -0400 Subject: [PATCH 11/33] rm cache --- .github/workflows/createrelease.yml | 2 +- .github/workflows/tests.yml | 32 ++++++++++++++--------------- 2 files changed, 17 insertions(+), 17 deletions(-) diff --git a/.github/workflows/createrelease.yml b/.github/workflows/createrelease.yml index ac03a3263..8284f5b6d 100644 --- a/.github/workflows/createrelease.yml +++ b/.github/workflows/createrelease.yml @@ -55,7 +55,7 @@ jobs: uses: prefix-dev/setup-pixi@v0.9.2 with: pixi-version: v0.56.0 - cache: true + cache: false activate-environment: true - name: Clear space run: rm -rf /opt/hostedtoolcache diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index c7191a1de..81bcebeb3 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -66,7 +66,7 @@ jobs: uses: prefix-dev/setup-pixi@v0.9.2 with: pixi-version: v0.56.0 - cache: true + cache: false dmux_meier2021: needs: [changes, build] if: ${{ needs.changes.outputs.demux == 'true' }} @@ -81,7 +81,7 @@ jobs: uses: prefix-dev/setup-pixi@v0.9.2 with: pixi-version: v0.56.0 - cache: true + cache: false activate-environment: true - name: harpy demultiplex run: | @@ -102,7 +102,7 @@ jobs: uses: prefix-dev/setup-pixi@v0.9.2 with: pixi-version: v0.56.0 - cache: true + cache: false activate-environment: true - name: test validate fastq run: | @@ -128,7 +128,7 @@ jobs: uses: prefix-dev/setup-pixi@v0.9.2 with: pixi-version: v0.56.0 - cache: true + cache: false activate-environment: true - name: harpy qc run: | @@ -149,7 +149,7 @@ jobs: uses: prefix-dev/setup-pixi@v0.9.2 with: pixi-version: v0.56.0 - cache: true + cache: false activate-environment: true - name: harpy deconvolve run: | @@ -170,7 +170,7 @@ jobs: uses: prefix-dev/setup-pixi@v0.9.2 with: pixi-version: v0.56.0 - cache: true + cache: false activate-environment: true - name: test bwa run: | @@ -191,7 +191,7 @@ jobs: uses: prefix-dev/setup-pixi@v0.9.2 with: pixi-version: v0.56.0 - cache: true + cache: false activate-environment: true - name: test strobealign run: | @@ -212,7 +212,7 @@ jobs: uses: prefix-dev/setup-pixi@v0.9.2 with: pixi-version: v0.56.0 - cache: true + cache: false activate-environment: true - name: snp mpileup run: | @@ -237,7 +237,7 @@ jobs: uses: prefix-dev/setup-pixi@v0.9.2 with: pixi-version: v0.56.0 - cache: true + cache: false activate-environment: true - name: snp freebayes run: | @@ -262,7 +262,7 @@ jobs: uses: prefix-dev/setup-pixi@v0.9.2 with: pixi-version: v0.56.0 - cache: true + cache: false activate-environment: true - name: impute run: | @@ -293,7 +293,7 @@ jobs: uses: prefix-dev/setup-pixi@v0.9.2 with: pixi-version: v0.56.0 - cache: true + cache: false activate-environment: true - name: phase run: | @@ -325,7 +325,7 @@ jobs: uses: prefix-dev/setup-pixi@v0.9.2 with: pixi-version: v0.56.0 - cache: true + cache: false activate-environment: true - name: leviathan run: | @@ -351,7 +351,7 @@ jobs: uses: prefix-dev/setup-pixi@v0.9.2 with: pixi-version: v0.56.0 - cache: true + cache: false activate-environment: true - name: naibr run: | @@ -387,7 +387,7 @@ jobs: uses: prefix-dev/setup-pixi@v0.9.2 with: pixi-version: v0.56.0 - cache: true + cache: false activate-environment: true - name: simulate random snps/indels run: | @@ -431,7 +431,7 @@ jobs: uses: prefix-dev/setup-pixi@v0.9.2 with: pixi-version: v0.56.0 - cache: true + cache: false activate-environment: true - name: Clear Space uses: jlumbroso/free-disk-space@main @@ -462,7 +462,7 @@ jobs: uses: prefix-dev/setup-pixi@v0.9.2 with: pixi-version: v0.56.0 - cache: true + cache: false activate-environment: true - name: template impute run: harpy template impute From 26a6ef62fb3bf931353d41cd5e0950fdeb09ab1f Mon Sep 17 00:00:00 2001 From: pdimens Date: Mon, 20 Oct 2025 15:26:07 -0400 Subject: [PATCH 12/33] parsimony --- .github/workflows/createrelease.yml | 2 +- .github/workflows/tests.yml | 43 ++++++++++------------------- 2 files changed, 16 insertions(+), 29 deletions(-) diff --git a/.github/workflows/createrelease.yml b/.github/workflows/createrelease.yml index 8284f5b6d..64363ad6c 100644 --- a/.github/workflows/createrelease.yml +++ b/.github/workflows/createrelease.yml @@ -71,7 +71,7 @@ jobs: - name: Build and push uses: docker/build-push-action@v6 with: - context: .container/${{ matrix.env }} + context: container/${{ matrix.env }} push: true tags: pdimens/harpy:$${{ matrix.env }}_{{ github.ref_name }} diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 81bcebeb3..dca693e71 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -54,21 +54,8 @@ jobs: with: filters: .github/filters.yml - build: - name: Build and Install - runs-on: ubuntu-latest - steps: - - name: Checkout - uses: actions/checkout@v4 - with: - fetch-depth: 1 - - name: Install Harpy - uses: prefix-dev/setup-pixi@v0.9.2 - with: - pixi-version: v0.56.0 - cache: false dmux_meier2021: - needs: [changes, build] + needs: changes if: ${{ needs.changes.outputs.demux == 'true' }} name: demux meier2021 runs-on: ubuntu-latest @@ -89,7 +76,7 @@ jobs: ls -lh Demultiplex validate: - needs: [changes, build] + needs: changes if: ${{ needs.changes.outputs.validate == 'true' }} name: validate runs-on: ubuntu-latest @@ -115,7 +102,7 @@ jobs: ls -lh Validate/bam qc: - needs: [changes,build] + needs: changes if: ${{ needs.changes.outputs.qc == 'true' }} name: qc runs-on: ubuntu-latest @@ -136,7 +123,7 @@ jobs: ls -lh QC deconvolve: - needs: [changes,build] + needs: changes if: ${{ needs.changes.outputs.deconvolve == 'true' }} name: deconvolve runs-on: ubuntu-latest @@ -157,7 +144,7 @@ jobs: ls -lh Deconvolve bwa: - needs: [changes,build] + needs: changes if: ${{ needs.changes.outputs.bwa == 'true' }} name: align BWA runs-on: ubuntu-latest @@ -178,7 +165,7 @@ jobs: ls -lh Align/bwa strobe: - needs: [changes,build] + needs: changes if: ${{ needs.changes.outputs.strobealign == 'true' }} name: align strobe runs-on: ubuntu-latest @@ -199,7 +186,7 @@ jobs: ls -lh Align/strobealign mpileup: - needs: [changes,build] + needs: changes if: ${{ needs.changes.outputs.mpileup == 'true' }} name: mpileup runs-on: ubuntu-latest @@ -224,7 +211,7 @@ jobs: ls -lh SNP/poptest freebayes: - needs: [changes,build] + needs: changes if: ${{ needs.changes.outputs.freebayes == 'true' }} name: freebayes runs-on: ubuntu-latest @@ -249,7 +236,7 @@ jobs: ls -lh SNP/poptest impute: - needs: [changes,build] + needs: changes if: ${{ needs.changes.outputs.impute == 'true' }} name: impute runs-on: ubuntu-latest @@ -280,7 +267,7 @@ jobs: ls -lh regionImpute/*/* phase: - needs: [changes,build] + needs: changes if: ${{ needs.changes.outputs.phase == 'true' }} name: phase runs-on: ubuntu-latest @@ -312,7 +299,7 @@ jobs: ls -lh phasevcf leviathan: - needs: [changes,build] + needs: changes if: ${{ needs.changes.outputs.leviathan == 'true' }} name: sv leviathan runs-on: ubuntu-latest @@ -338,7 +325,7 @@ jobs: ls -lh SV/leviathanpop naibr: - needs: [changes,build] + needs: changes if: ${{ needs.changes.outputs.naibr == 'true' }} name: sv naibr runs-on: ubuntu-latest @@ -374,7 +361,7 @@ jobs: ls -lh SV/phasepop simulate_variants: - needs: [changes,build] + needs: changes if: ${{ needs.changes.outputs.simvars == 'true' }} name: simulate variants runs-on: ubuntu-latest @@ -418,7 +405,7 @@ jobs: ls -lh Simulate assembly: - needs: [changes,build] + needs: changes if: ${{ needs.changes.outputs.assembly == 'true' }} name: assembly runs-on: ubuntu-latest @@ -449,7 +436,7 @@ jobs: ls -lh Metassembly other: - needs: [changes,build] + needs: changes if: ${{ needs.changes.outputs.other == 'true' }} name: miscellaneous runs-on: ubuntu-latest From 1223f4b785d3bcabca28f44d23953e7cc572331e Mon Sep 17 00:00:00 2001 From: pdimens Date: Mon, 20 Oct 2025 15:43:56 -0400 Subject: [PATCH 13/33] fix env --- harpy/snakefiles/metassembly.smk | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/harpy/snakefiles/metassembly.smk b/harpy/snakefiles/metassembly.smk index 273a42a90..236750dfc 100644 --- a/harpy/snakefiles/metassembly.smk +++ b/harpy/snakefiles/metassembly.smk @@ -105,11 +105,11 @@ rule spades_assembly: resources: mem_mb=max_mem conda: - "envs/spades.yaml" + "envs/assembly.yaml" container: None shell: - "metaspades.py -t {threads} {params} -1 {input.fastq_R1C} -2 {input.fastq_R2C} -s {input.fastq_UNC} > {log}" + "metaspades -t {threads} {params} -1 {input.fastq_R1C} -2 {input.fastq_R2C} -s {input.fastq_UNC} > {log}" rule cloudspades_metassembly: input: From d90efca7c598f90aa7ee1bc72fcd23f883dd4d76 Mon Sep 17 00:00:00 2001 From: pdimens Date: Mon, 20 Oct 2025 16:34:22 -0400 Subject: [PATCH 14/33] fix envs --- harpy/commands/metassembly.py | 2 +- harpy/snakefiles/metassembly.smk | 6 ++---- 2 files changed, 3 insertions(+), 5 deletions(-) diff --git a/harpy/commands/metassembly.py b/harpy/commands/metassembly.py index 820b91a87..27f56211a 100644 --- a/harpy/commands/metassembly.py +++ b/harpy/commands/metassembly.py @@ -41,7 +41,7 @@ def metassembly(fastq_r1, fastq_r2, bx_tag, kmer_length, max_memory, unlinked, o """ workflow = Workflow("metassembly","metassembly.smk", output_dir, quiet) workflow.setup_snakemake(container, threads, hpc, snakemake) - workflow.conda = ["align", "assembly", "metassembly", "qc", "spades"] + workflow.conda = ["align", "assembly", "metassembly", "qc"] ## checks and validations ## fastq = FASTQ([fastq_r1,fastq_r2]) diff --git a/harpy/snakefiles/metassembly.smk b/harpy/snakefiles/metassembly.smk index 236750dfc..f4f8c67a3 100644 --- a/harpy/snakefiles/metassembly.smk +++ b/harpy/snakefiles/metassembly.smk @@ -1,5 +1,3 @@ -containerized: "docker://pdimens/harpy:latest" - import os import logging @@ -83,7 +81,7 @@ rule error_correction: container: "docker://pdimens/harpy:assembly_latest" shell: - "metaspades -t {threads} {params} -1 {input.FQ_R1} -2 {input.FQ_R2} > {log}" + "metaspades.py -t {threads} {params} -1 {input.FQ_R1} -2 {input.FQ_R2} > {log}" rule spades_assembly: input: @@ -109,7 +107,7 @@ rule spades_assembly: container: None shell: - "metaspades -t {threads} {params} -1 {input.fastq_R1C} -2 {input.fastq_R2C} -s {input.fastq_UNC} > {log}" + "metaspades.py -t {threads} {params} -1 {input.fastq_R1C} -2 {input.fastq_R2C} -s {input.fastq_UNC} > {log}" rule cloudspades_metassembly: input: From 3191f9137d196def9e3f8cc6f10450c03b5cdfb8 Mon Sep 17 00:00:00 2001 From: pdimens Date: Tue, 21 Oct 2025 09:50:46 -0400 Subject: [PATCH 15/33] fixes --- .github/workflows/createrelease.yml | 62 ++++++++++++++--------------- harpy/common/create_pixi.py | 13 ++++-- harpy/snakefiles/metassembly.smk | 5 +-- pixi.toml | 6 +-- 4 files changed, 46 insertions(+), 40 deletions(-) diff --git a/.github/workflows/createrelease.yml b/.github/workflows/createrelease.yml index 64363ad6c..354b9d5fc 100644 --- a/.github/workflows/createrelease.yml +++ b/.github/workflows/createrelease.yml @@ -6,37 +6,6 @@ on: - '*' # Push events of any tag created jobs: - build_tarball: - name: Upload Release Tarball - runs-on: ubuntu-latest - permissions: - contents: write - pull-requests: write - repository-projects: write - steps: - - name: Checkout code - uses: actions/checkout@v4 - - name: Version the Container - # this removes the :*_latest tag and replaces with versioned container - run: | - for i in harpy/snakefiles/*.smk; do - sed -i "s/_latest/_${{ github.ref_name }}/g" $i - done - - name: Bump Harpy Version - # this removes the :latest tag and replaces with versioned container - run: | - sed -i "s/0\.0\.0/${{ github.ref_name }}/g" harpy/__main__.py - sed -i "s/0\.0\.0/${{ github.ref_name }}/g" pyproject.toml - - name: Build project - # This builds the release tarball, stripped of unneccessary things - run: | - mkdir artifacts - tar --exclude="test" --exclude=".deprecated" --exclude="resources" --exclude="artifacts" --exclude=".git" --exclude=".github" -zcvf artifacts/harpy.${{ github.ref_name }}.tar.gz . - - name: Create Release with Assets - uses: softprops/action-gh-release@v2 - with: - files: ./artifacts/harpy.${{ github.ref_name }}.tar.gz - build_versioned_containers: name: Build and Push versioned container runs-on: ubuntu-latest @@ -75,3 +44,34 @@ jobs: push: true tags: pdimens/harpy:$${{ matrix.env }}_{{ github.ref_name }} + build_tarball: + needs: build_versioned_containers + name: Upload Release Tarball + runs-on: ubuntu-latest + permissions: + contents: write + pull-requests: write + repository-projects: write + steps: + - name: Checkout code + uses: actions/checkout@v4 + - name: Version the Container + # this removes the :*_latest tag and replaces with versioned container + run: | + for i in harpy/snakefiles/*.smk; do + sed -i "s/_latest/_${{ github.ref_name }}/g" $i + done + - name: Bump Harpy Version + # this removes the :latest tag and replaces with versioned container + run: | + sed -i "s/0\.0\.0/${{ github.ref_name }}/g" harpy/__main__.py + sed -i "s/0\.0\.0/${{ github.ref_name }}/g" pyproject.toml + - name: Build project + # This builds the release tarball, stripped of unneccessary things + run: | + mkdir artifacts + tar --exclude="test" --exclude=".deprecated" --exclude="resources" --exclude="artifacts" --exclude=".git" --exclude=".github" -zcvf artifacts/harpy.${{ github.ref_name }}.tar.gz . + - name: Create Release with Assets + uses: softprops/action-gh-release@v2 + with: + files: ./artifacts/harpy.${{ github.ref_name }}.tar.gz diff --git a/harpy/common/create_pixi.py b/harpy/common/create_pixi.py index d02d98aa4..566cd1a1b 100755 --- a/harpy/common/create_pixi.py +++ b/harpy/common/create_pixi.py @@ -107,11 +107,18 @@ def create_pixi_dockerfiles(): with open(f"container/{env}/Dockerfile", "w") as dockerfile: dockerfile.write(dockerfile_text) if env == "report": - subprocess.run(f"pixi init container/{env} -c conda-forge -c r".split()) + subprocess.run( + f"pixi init container/{env} -c conda-forge -c r".split(), + check = True + ) else: - subprocess.run(f"pixi init container/{env} -c conda-forge -c bioconda".split()) + subprocess.run( + f"pixi init container/{env} -c conda-forge -c bioconda".split(), + check = True + ) subprocess.run( - ["pixi", "add", "--no-progress", "--manifest-path", f"container/{env}/pixi.toml", *deps] + ["pixi", "add", "--no-progress", "--manifest-path", f"container/{env}/pixi.toml", *deps], + check = True ) shutil.rmtree("container/.pixi", ignore_errors=True) diff --git a/harpy/snakefiles/metassembly.smk b/harpy/snakefiles/metassembly.smk index f4f8c67a3..0e87ff0b7 100644 --- a/harpy/snakefiles/metassembly.smk +++ b/harpy/snakefiles/metassembly.smk @@ -221,14 +221,13 @@ rule athena_metassembly: params: force = "--force_reads" if force_athena else "", local_asm = "athena/results/olc/flye-input-contigs.fa", - final_asm = "athena/results/olc/athena.asm.fa", - result_dir = "athena" + final_asm = "athena/results/olc/athena.asm.fa" conda: "envs/metassembly.yaml" shell: """ athena-meta {params.force} --config {input.config} &> {log} &&\\ - mv {params.local_asm} {params.final_asm} {params.result_dir} + mv {params.local_asm} {params.final_asm} athena """ rule QUAST_assessment: diff --git a/pixi.toml b/pixi.toml index 34be301b1..172a5f61f 100644 --- a/pixi.toml +++ b/pixi.toml @@ -3,7 +3,7 @@ name = "harpy" authors = ["pdimens "] channels = ["conda-forge", "bioconda"] platforms = ["linux-64"] -version = "3.0.0" +version = "3.2.0" [tasks] @@ -14,8 +14,8 @@ conda = ">=24.8" htslib = "1.22.*" pysam = "0.23.*" python = ">=3.11" -rich-click = "1.9.*" -snakemake-minimal = "9.*" +rich-click >= "1.9.3" +snakemake-minimal >= "9.13" samtools = "1.22.*" seqtk = "*" apptainer = ">=1.4.2,<2" From 0a9d63ea02833dcd357a79e55ba4736d3705719e Mon Sep 17 00:00:00 2001 From: pdimens Date: Tue, 21 Oct 2025 09:53:14 -0400 Subject: [PATCH 16/33] add it for posterity, possibly remove later --- .github/workflows/createrelease.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/createrelease.yml b/.github/workflows/createrelease.yml index 354b9d5fc..5c91bd823 100644 --- a/.github/workflows/createrelease.yml +++ b/.github/workflows/createrelease.yml @@ -28,6 +28,8 @@ jobs: activate-environment: true - name: Clear space run: rm -rf /opt/hostedtoolcache +# - name: Clear Space +# uses: jlumbroso/free-disk-space@main - name: Recreate containers run: harpy containerize - name: Set up Docker Buildx From 0bb4d1d8d1ff35bd1564d2eebc8700567ce80d14 Mon Sep 17 00:00:00 2001 From: pdimens Date: Tue, 21 Oct 2025 09:54:21 -0400 Subject: [PATCH 17/33] fix the pixi toml --- pixi.toml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pixi.toml b/pixi.toml index 172a5f61f..7fd96fb71 100644 --- a/pixi.toml +++ b/pixi.toml @@ -14,8 +14,8 @@ conda = ">=24.8" htslib = "1.22.*" pysam = "0.23.*" python = ">=3.11" -rich-click >= "1.9.3" -snakemake-minimal >= "9.13" +rich-click = ">=1.9.3" +snakemake-minimal = ">=9.13" samtools = "1.22.*" seqtk = "*" apptainer = ">=1.4.2,<2" From 99bed86f08351119b675dffbf6795f4dda2f744c Mon Sep 17 00:00:00 2001 From: pdimens Date: Tue, 21 Oct 2025 09:59:17 -0400 Subject: [PATCH 18/33] updates --- harpy/scripts/separate_singletons.py | 81 ---------------------------- resources/harpy.yaml | 2 +- resources/meta.yaml | 6 +-- 3 files changed, 2 insertions(+), 87 deletions(-) delete mode 100755 harpy/scripts/separate_singletons.py diff --git a/harpy/scripts/separate_singletons.py b/harpy/scripts/separate_singletons.py deleted file mode 100755 index 92402876b..000000000 --- a/harpy/scripts/separate_singletons.py +++ /dev/null @@ -1,81 +0,0 @@ -#! /usr/bin/env python - -import os -import re -import sys -import argparse -import subprocess -import pysam - -def main(): - parser = argparse.ArgumentParser( - prog='separate_singletons', - description='Isolate singleton and non-singleton linked-read BAM records into separate files.', - usage = "separate_singletons -t threads -b barcode_tag -s singletons.bam input.bam > output.bam", - ) - parser.add_argument("-b", dest = "bx_tag", metavar = "barcode_tag", type=str, default = "BX", help="The header tag with the barcode (default: %(default)s)") - parser.add_argument("-s", dest = "singletons", metavar = "singletons_file", type=str, default = "singletons.bam", help="Name of output singleton file (default: %(default)s)") - parser.add_argument("-t", dest = "threads", metavar="threads", type=int, default = 4, help="Number of threads to use (default: %(default)s)") - parser.add_argument('input', type = str, help = "Input bam file") - if len(sys.argv) == 1: - parser.print_help(sys.stderr) - sys.exit(1) - - args = parser.parse_args() - if args.threads <1: - parser.error(f"Threads supplied to -t ({args.threads}) must be positive (e.g. >=1)") - if not os.path.exists(args.input): - parser.error(f"{args.input} was not found") - if len(args.bx_tag) != 2 or not args.bx_tag.isalnum(): - parser.error(f"The header tag supplied to -b ({args.bx_tag}) must be alphanumeric and exactly two characters long") - - invalid_pattern = re.compile(r'[AaBbCcDd]00') - sorted_bam = f"{args.input[:-4]}.bxsort.bam" - result = subprocess.run(f"samtools sort -@ {args.threads} -o {sorted_bam} -t {args.bx_tag} {args.input}".split(), stderr=sys.stderr) - if result.returncode != 0: - sys.stderr.write(f"Error: samtools sort failed with exit code {result.returncode}\n") - sys.exit(1) - with ( - pysam.AlignmentFile(sorted_bam, "rb", check_sq=False) as infile, - pysam.AlignmentFile(sys.stdout, "wb", template=infile) as nonsingleton, - pysam.AlignmentFile(args.singletons, "wb", template=infile) as singleton, - ): - record_store = [] - read_count = 0 - last_barcode = None - for record in infile: - try: - barcode = record.get_tag(args.bx_tag) - if isinstance(barcode, int): - pass # an int from an MI-type tag - elif invalid_pattern.search(barcode): - continue - except KeyError: - continue - # write the stored records when the barcode changes - if last_barcode and barcode != last_barcode: - target_file = nonsingleton if read_count > 1 else singleton - for record in record_store: - target_file.write(record) - - # reset the record store and read count - record_store = [] - read_count = 0 - - record_store.append(record) - if record.is_forward: - # +1 for a forward read, whether it is paired or not - read_count += 1 - elif record.is_reverse and not record.is_paired: - # +1 for reverse only if it's unpaired, so the paired read doesn't count twice - read_count += 1 - # update the last barcode with the current one - last_barcode = barcode - # After the for loop ends - if record_store: - target_file = nonsingleton if read_count > 1 else singleton - for i in record_store: - target_file.write(i) - - # final housekeeping to remove intermediate - os.remove(sorted_bam) \ No newline at end of file diff --git a/resources/harpy.yaml b/resources/harpy.yaml index dc2b067cd..77edd8d82 100644 --- a/resources/harpy.yaml +++ b/resources/harpy.yaml @@ -9,7 +9,7 @@ dependencies: - htslib =1.22 - pysam =0.23 - python >=3.11 - - rich-click =1.9 + - rich-click >=1.9.3 - snakemake-minimal =9 - samtools =1.22 - seqtk \ No newline at end of file diff --git a/resources/meta.yaml b/resources/meta.yaml index 77c6c5f02..fad72eff1 100644 --- a/resources/meta.yaml +++ b/resources/meta.yaml @@ -26,9 +26,7 @@ build: - concatenate_bam = harpy.scripts:concatenate_bam.main - count_bx = harpy.scripts:count_bx.main - create_simulation_data = harpy.scripts:create_simulation_data.main - - deconvolve_alignments = harpy.scripts:deconvolve_alignments.main - depth_windows = harpy.scripts:depth_windows.main - - extract_bxtags = harpy.scripts:extract_bxtags.main - haplotag_acbd = harpy.scripts:haplotag_acbd.main - haplotag_barcodes = harpy.scripts:haplotag_barcodes.main - infer_sv = harpy.scripts:infer_sv.main @@ -36,8 +34,6 @@ build: - molecule_coverage = harpy.scripts:molecule_coverage.main - parse_phaseblocks = harpy.scripts:parse_phaseblocks.main - rename_bam = harpy.scripts:rename_bam.main - - separate_singletons = harpy.scripts:separate_singletons.main - - separate_validbx = harpy.scripts:separate_validbx.main - standardize_barcodes_sam = harpy.scripts:standardize_barcodes_sam.main run_exports: - {{ pin_subpackage('harpy', max_pin="x") }} @@ -53,7 +49,7 @@ requirements: - conda >24.7 - htslib >=1.22 - pysam >=0.23 - - rich-click >=1.8 + - rich-click >=1.9.3 - snakemake-minimal >=9.0 - samtools >=1.22 - seqtk From 27af736b279aa7a743a70812478e09e6cdae78c8 Mon Sep 17 00:00:00 2001 From: pdimens Date: Tue, 21 Oct 2025 11:08:18 -0400 Subject: [PATCH 19/33] avoid conda stuff if container is specified --- harpy/commands/align.py | 8 +++---- harpy/commands/assembly.py | 4 ++-- harpy/commands/deconvolve.py | 4 ++-- harpy/commands/demultiplex.py | 4 ++-- harpy/commands/impute.py | 4 ++-- harpy/commands/metassembly.py | 4 ++-- harpy/commands/phase.py | 4 ++-- harpy/commands/qc.py | 4 ++-- harpy/commands/resume.py | 22 +++++++++--------- harpy/commands/snp.py | 8 +++---- harpy/commands/sv.py | 8 +++---- harpy/commands/validate.py | 8 +++---- harpy/common/file_ops.py | 19 ---------------- harpy/common/workflow.py | 42 +++++++++++++++++++++++------------ 14 files changed, 68 insertions(+), 75 deletions(-) diff --git a/harpy/commands/align.py b/harpy/commands/align.py index b62cd6a79..d13845349 100644 --- a/harpy/commands/align.py +++ b/harpy/commands/align.py @@ -51,8 +51,8 @@ def bwa(reference, inputs, output_dir, depth_window, unlinked, threads, keep_unm Presence and type of linked-read data is auto-detected, but can be deliberately ignored using `-U`. Setting `--molecule-distance` to `>0` activates alignment-distance based barcode deconvolution. """ - workflow = Workflow("align_bwa", "align_bwa.smk", output_dir, quiet) - workflow.setup_snakemake(container, threads, hpc, snakemake) + workflow = Workflow("align_bwa", "align_bwa.smk", output_dir, container, quiet) + workflow.setup_snakemake(threads, hpc, snakemake) workflow.reports = ["align_stats.qmd", "align_bxstats.qmd"] workflow.conda = ["align", "report", "qc"] @@ -122,8 +122,8 @@ def strobe(reference, inputs, output_dir, unlinked, keep_unmapped, depth_window, but can be deliberately ignored using `-U`. Setting `--molecule-distance` to `>0` activates alignment-distance based barcode deconvolution. """ - workflow = Workflow("align_strobe", "align_strobe.smk", output_dir, quiet) - workflow.setup_snakemake(container, threads, hpc, snakemake) + workflow = Workflow("align_strobe", "align_strobe.smk", output_dir, container, quiet) + workflow.setup_snakemake(threads, hpc, snakemake) workflow.reports = ["align_stats.qmd", "align_bxstats.qmd"] workflow.conda = ["align", "report", "qc"] diff --git a/harpy/commands/assembly.py b/harpy/commands/assembly.py index c701bac21..65f9150c2 100644 --- a/harpy/commands/assembly.py +++ b/harpy/commands/assembly.py @@ -46,8 +46,8 @@ def assembly(fastq_r1, fastq_r2, kmer_length, max_memory, output_dir, extra_para separated by commas and without spaces (e.g. `-k 15,23,51`). It is strongly recommended to first deconvolve the input FASTQ files with `harpy deconvolve`. """ - workflow = Workflow("assembly", "assembly.smk", output_dir, quiet) - workflow.setup_snakemake(container, threads, hpc, snakemake) + workflow = Workflow("assembly", "assembly.smk", output_dir, container, quiet) + workflow.setup_snakemake(threads, hpc, snakemake) workflow.conda = ["assembly","qc"] ## checks and validations ## diff --git a/harpy/commands/deconvolve.py b/harpy/commands/deconvolve.py index fb5e8a9a0..3086e0677 100644 --- a/harpy/commands/deconvolve.py +++ b/harpy/commands/deconvolve.py @@ -33,8 +33,8 @@ def deconvolve(inputs, output_dir, kmer_length, window_size, density, dropout, t `dropout` is set to `0`, meaning it will consider all barcodes, even clouds with singleton. """ is_arm(allowed = False) - workflow = Workflow("deconvolve", "deconvolve.smk", output_dir, quiet) - workflow.setup_snakemake(container, threads, hpc, snakemake) + workflow = Workflow("deconvolve", "deconvolve.smk", output_dir, container, quiet) + workflow.setup_snakemake(threads, hpc, snakemake) workflow.conda = ["deconvolution"] ## checks and validations ## diff --git a/harpy/commands/demultiplex.py b/harpy/commands/demultiplex.py index 35aa34714..a15fe2d8d 100644 --- a/harpy/commands/demultiplex.py +++ b/harpy/commands/demultiplex.py @@ -47,8 +47,8 @@ def meier2021(r12_fq, i12_fq, output_dir, schema, qx_rx, keep_unknown_samples, k `QX:Z` (barcode PHRED scores) and `RX:Z` (nucleotide barcode) tags in the sequence headers. These tags aren't used by any subsequent analyses, but may be useful for your own diagnostics. """ - workflow = Workflow("demultiplex_meier2021", "demultiplex_meier2021.smk", output_dir, quiet) - workflow.setup_snakemake(container, threads, hpc, snakemake) + workflow = Workflow("demultiplex_meier2021", "demultiplex_meier2021.smk", output_dir, container, quiet) + workflow.setup_snakemake(threads, hpc, snakemake) workflow.conda = ["demultiplex", "qc"] workflow.inputs = { diff --git a/harpy/commands/impute.py b/harpy/commands/impute.py index 6b6796247..4dab688c1 100644 --- a/harpy/commands/impute.py +++ b/harpy/commands/impute.py @@ -42,8 +42,8 @@ def impute(parameters, vcf, inputs, output_dir, region, grid_size, threads, vcf_ `contig:start-end-buffer`, otherwise all contigs will be imputed. If providing additional STITCH arguments, they must be in quotes and in the `--option=value` format, without spaces (e.g. `"--switchModelIteration=39"`). """ - workflow = Workflow("impute", "impute.smk", output_dir, quiet) - workflow.setup_snakemake(container, threads, hpc, snakemake) + workflow = Workflow("impute", "impute.smk", output_dir, container, quiet) + workflow.setup_snakemake(threads, hpc, snakemake) workflow.reports = ["impute.qmd", "stitch_collate.qmd"] workflow.conda = ["report", "stitch"] diff --git a/harpy/commands/metassembly.py b/harpy/commands/metassembly.py index 27f56211a..fd68186f6 100644 --- a/harpy/commands/metassembly.py +++ b/harpy/commands/metassembly.py @@ -39,8 +39,8 @@ def metassembly(fastq_r1, fastq_r2, bx_tag, kmer_length, max_memory, unlinked, o separated by commas and without spaces (e.g. `-k 15,23,51`). It is strongly recommended to first deconvolve the input FASTQ files with `harpy deconvolve`. """ - workflow = Workflow("metassembly","metassembly.smk", output_dir, quiet) - workflow.setup_snakemake(container, threads, hpc, snakemake) + workflow = Workflow("metassembly","metassembly.smk", output_dir, container, quiet) + workflow.setup_snakemake(threads, hpc, snakemake) workflow.conda = ["align", "assembly", "metassembly", "qc"] ## checks and validations ## diff --git a/harpy/commands/phase.py b/harpy/commands/phase.py index 3cf848536..b1d513570 100644 --- a/harpy/commands/phase.py +++ b/harpy/commands/phase.py @@ -43,8 +43,8 @@ def phase(vcf, inputs, output_dir, threads, unlinked, min_map_quality, min_base_ information with `-U`. Use `--vcf-samples` to phase only the samples present in your input `VCF` file rather than all the samples present in the `INPUT` alignments. """ - workflow = Workflow("phase", "phase.smk", output_dir, quiet) - workflow.setup_snakemake(container, threads, hpc, snakemake) + workflow = Workflow("phase", "phase.smk", output_dir, container, quiet) + workflow.setup_snakemake(threads, hpc, snakemake) workflow.reports = ["hapcut.qmd"] workflow.conda = ["phase", "report"] diff --git a/harpy/commands/qc.py b/harpy/commands/qc.py index 21c9d2104..813ee91fe 100644 --- a/harpy/commands/qc.py +++ b/harpy/commands/qc.py @@ -46,8 +46,8 @@ def qc(inputs, output_dir, unlinked, min_length, max_length, trim_adapters, dedu - `-d` removes optical PCR duplicates - recommended to skip at this step in favor of barcode-assisted deduplication after alignment """ - workflow = Workflow("qc", "qc.smk", output_dir, quiet) - workflow.setup_snakemake(container, threads, hpc, snakemake) + workflow = Workflow("qc", "qc.smk", output_dir, container, quiet) + workflow.setup_snakemake(threads, hpc, snakemake) workflow.reports = ["qc_bx_stats.qmd"] workflow.conda = ["qc", "report"] diff --git a/harpy/commands/resume.py b/harpy/commands/resume.py index 862608d8e..9555c258b 100644 --- a/harpy/commands/resume.py +++ b/harpy/commands/resume.py @@ -5,29 +5,28 @@ import re import yaml import rich_click as click -from harpy.common.conda import check_environments, create_conda_recipes +from harpy.common.conda import check_environments from harpy.common.printing import print_error, workflow_info from harpy.common.workflow import Workflow @click.command(no_args_is_help = True, context_settings={"allow_interspersed_args" : False}, epilog = "Documentation: https://pdimens.github.io/harpy/workflows/other") -@click.option('-c', '--conda', is_flag = True, default = False, help = 'Recreate the conda environments') @click.option('-a', '--absolute', is_flag = True, default = False, help = 'Call Snakemake with absolute paths') @click.option('-t', '--threads', type = click.IntRange(2, 999, clamp = True), help = 'Change the number of threads (>1)') @click.option('--quiet', default = 0, type = click.IntRange(0,2,clamp=True), help = '`0` all output, `1` progress bar, `2` no output') @click.argument('directory', required=True, type=click.Path(exists=True, file_okay=False, readable=True, resolve_path=True), nargs=1) -def resume(directory, conda, absolute, threads, quiet): +def resume(directory, absolute, threads, quiet): """ Continue an incomplete Harpy workflow In the event you need to run the Snakemake workflow present in a Harpy output directory (e.g. `Align/bwa`) without Harpy redoing validations and rewriting any of the configuration files, this command bypasses all the preprocessing steps of Harpy workflows and executes the Snakemake command - present in `directory/workflow/workflow.yaml`. It will reuse an existing `workflow/envs/` folder - to validate software dependencies, otherwise use `--conda` to create a populated one. + present in `directory/workflow/workflow.yaml`. The only requirements are: - the target directory has `workflow/config.yaml` present in it - - the targest directory has `workflow/envs/*.yaml` present in it + - the target directory has `workflow/workflow.yaml` present in it + - the targest directory has `workflow/envs/*.yaml` present in it (if using conda) """ CONFIG_FILE = os.path.join(directory, "workflow", "workflow.yaml") PROFILE_FILE = os.path.join(directory, "workflow", "config.yaml") @@ -41,12 +40,11 @@ def resume(directory, conda, absolute, threads, quiet): with open(PROFILE_FILE, 'r', encoding="utf-8") as f: snakemake_config = yaml.full_load(f) - workflow = Workflow(harpy_config["workflow"], "NA", snakemake_config["directory"], quiet) + is_conda = snakemake_config["software-deployment-method"] == "conda" + workflow = Workflow(harpy_config["workflow"], "NA", snakemake_config["directory"], is_conda, quiet) workflow.conda = harpy_config["snakemake"]["conda_envs"] - if conda: - create_conda_recipes(directory, workflow.conda) - else: + if is_conda: check_environments(directory, workflow.conda) sm_log = os.path.join(directory, harpy_config["snakemake"]["log"]) @@ -64,8 +62,8 @@ def resume(directory, conda, absolute, threads, quiet): workflow.snakemake_cmd_absolute = harpy_config["snakemake"]["absolute"] workflow.snakemake_cmd_relative = harpy_config["snakemake"]["relative"] - # pull in the inputs and store them, removing the original so it doesn't g - workflow.inputs = harpy_config["inputs"] + # pull in the inputs and store them, removing the original + workflow.inputs = harpy_config.pop("inputs") workflow.config = harpy_config workflow.start_text = workflow_info( ("Workflow:", workflow.name.replace("_", " ")), diff --git a/harpy/commands/snp.py b/harpy/commands/snp.py index 292b0f541..90425f2b7 100644 --- a/harpy/commands/snp.py +++ b/harpy/commands/snp.py @@ -56,8 +56,8 @@ def freebayes(reference, inputs, output_dir, threads, populations, ploidy, regio Optionally specify `--populations` for population-aware variant calling (**harpy template** can create that file). """ - workflow = Workflow("snp_freebayes", "snp_freebayes.smk", output_dir, quiet) - workflow.setup_snakemake(container, threads, hpc, snakemake) + workflow = Workflow("snp_freebayes", "snp_freebayes.smk", output_dir, container, quiet) + workflow.setup_snakemake(threads, hpc, snakemake) workflow.reports = ["bcftools_stats.qmd"] workflow.conda = ["report", "variants"] @@ -129,8 +129,8 @@ def mpileup(reference, inputs, output_dir, regions, threads, populations, ploidy Optionally specify `--populations` for population-aware variant calling (**harpy template** can create that file). """ - workflow = Workflow("snp_mpileup", "snp_mpileup.smk", output_dir, quiet) - workflow.setup_snakemake(container, threads, hpc, snakemake) + workflow = Workflow("snp_mpileup", "snp_mpileup.smk", output_dir, container, quiet) + workflow.setup_snakemake(threads, hpc, snakemake) workflow.reports = ["bcftools_stats.qmd"] workflow.conda = ["report"] diff --git a/harpy/commands/sv.py b/harpy/commands/sv.py index 683d7646a..def966a5f 100644 --- a/harpy/commands/sv.py +++ b/harpy/commands/sv.py @@ -59,8 +59,8 @@ def leviathan(inputs, output_dir, reference, min_size, min_barcodes, iterations, have to be the same across the different size classes. """ vcaller = "sv_leviathan" if not populations else "sv_leviathan_pop" - workflow = Workflow("sv_leviathan", f"{vcaller}.smk", output_dir, quiet) - workflow.setup_snakemake(container, threads, hpc, snakemake) + workflow = Workflow("sv_leviathan", f"{vcaller}.smk", output_dir, container, quiet) + workflow.setup_snakemake(threads, hpc, snakemake) workflow.reports = ["leviathan.qmd"] if populations: workflow.reports.append("leviathan_pop.qmd") @@ -142,8 +142,8 @@ def naibr(inputs, output_dir, reference, vcf, min_size, min_barcodes, min_qualit """ vcaller = "sv_naibr" if not populations else "sv_naibr_pop" vcaller += "_phase" if vcf else "" - workflow = Workflow("sv_naibr", f"{vcaller}.smk", output_dir, quiet) - workflow.setup_snakemake(container, threads, hpc, snakemake) + workflow = Workflow("sv_naibr", f"{vcaller}.smk", output_dir, container, quiet) + workflow.setup_snakemake(threads, hpc, snakemake) workflow.reports = ["naibr.qmd"] if populations: workflow.reports.append("naibr_pop.qmd") diff --git a/harpy/commands/validate.py b/harpy/commands/validate.py index 6a173e373..337aa640b 100755 --- a/harpy/commands/validate.py +++ b/harpy/commands/validate.py @@ -41,8 +41,8 @@ def bam(inputs, output_dir, threads, snakemake, quiet, hpc, container, setup_onl fix your data, but it will report the number of records that feature errors to help you diagnose if file formatting will cause downstream issues. """ - workflow = Workflow("validate_bam", "validate_bam.smk", output_dir, quiet) - workflow.setup_snakemake(container, threads, hpc, snakemake) + workflow = Workflow("validate_bam", "validate_bam.smk", output_dir, container, quiet) + workflow.setup_snakemake(threads, hpc, snakemake) workflow.reports = ["validate_bam.qmd"] workflow.conda = ["report"] @@ -86,8 +86,8 @@ def fastq(inputs, output_dir, threads, snakemake, quiet, hpc, container, setup_o of `TAG:TYPE:VALUE`. This **will not** fix your data, but it will report the number of reads that feature errors to help you diagnose if file formatting will cause downstream issues. """ - workflow = Workflow("validate_fastq", "validate_fastq.smk", output_dir, quiet) - workflow.setup_snakemake(container, threads, hpc, snakemake) + workflow = Workflow("validate_fastq", "validate_fastq.smk", output_dir, container, quiet) + workflow.setup_snakemake(threads, hpc, snakemake) workflow.reports = ["validate_fastq.qmd"] workflow.conda = ["report"] diff --git a/harpy/common/file_ops.py b/harpy/common/file_ops.py index e58349d94..62a4ed43d 100644 --- a/harpy/common/file_ops.py +++ b/harpy/common/file_ops.py @@ -5,25 +5,6 @@ import gzip import shutil from pathlib import Path -import importlib.resources as resources -from harpy.common.printing import print_error - -def fetch_snakefile(workdir: str, target: str) -> None: - """ - Retrieve the target harpy rule and write it into the workdir as workflow.smk - """ - os.makedirs(workdir, exist_ok= True) - dest_file = os.path.join(workdir,"workflow.smk") - source_file = resources.files("harpy.snakefiles") / target - try: - with resources.as_file(source_file) as _source: - shutil.copy2(_source, dest_file) - except (FileNotFoundError, KeyError): - print_error( - "snakefile missing", - f"The required snakefile [blue bold]{target}[/] was not found in the Harpy installation.", - "There may be an issue with your Harpy installation, which would require reinstalling Harpy. Alternatively, there may be an issue with your conda/mamba environment or configuration." - ) def filepath(infile: str) -> str: """returns a posix-formatted absolute path of infile""" diff --git a/harpy/common/workflow.py b/harpy/common/workflow.py index 78e78aecb..58f00ca3c 100644 --- a/harpy/common/workflow.py +++ b/harpy/common/workflow.py @@ -10,11 +10,10 @@ import urllib.request import urllib.error import yaml -from rich import print as rprint from rich import box from rich.table import Table from harpy.common.conda import create_conda_recipes -from harpy.common.file_ops import filepath, gzip_file, fetch_snakefile, purge_empty_logs +from harpy.common.file_ops import filepath, gzip_file, purge_empty_logs from harpy.common.printing import CONSOLE, print_error from harpy.common.launch import launch_snakemake from harpy.common.summaries import Summary @@ -23,9 +22,11 @@ class Workflow(): ''' The container for workflow parameters. Set inputdir = True to create a workflow/input directory ''' - def __init__(self, name, snakefile, outdir, quiet, inputdir = False): - creatdir = os.path.join(outdir, 'workflow') if not inputdir else os.path.join(outdir, 'workflow', 'input') - os.makedirs(creatdir, exist_ok = True) + def __init__(self, name, snakefile, outdir, container, quiet, inputdir = False): + os.makedirs( + os.path.join(outdir, 'workflow') if not inputdir else os.path.join(outdir, 'workflow', 'input'), + exist_ok = True + ) self.name: str = name self.output_directory: str = outdir self.workflow_directory = os.path.join(outdir, 'workflow') @@ -39,6 +40,7 @@ def __init__(self, name, snakefile, outdir, quiet, inputdir = False): self.config: Dict = {} self.profile: Dict = {} self.hpc: str = "" + self.container: bool = container self.conda: list[str] = [] self.start_text: None|Table = None self.quiet: bool = quiet @@ -56,7 +58,7 @@ def snakemake_log(self, outdir: str, workflow: str) -> str: increment = sorted([int(i.split(".")[1]) for i in attempts])[-1] + 1 return os.path.join("logs", "snakemake", f"{workflow}.{increment}.{timestamp}") - def setup_snakemake(self, container: bool, threads: int, hpc: str|None = None, sm_extra: str|None = None): + def setup_snakemake(self, threads: int, hpc: str|None = None, sm_extra: str|None = None): """ Sets up the snakemake command based on hpc, threads, and extra snakemake params. """ @@ -81,7 +83,7 @@ def setup_snakemake(self, container: bool, threads: int, hpc: str|None = None, s "rerun-triggers": ["mtime", "params"], "scheduler": "greedy", "nolock": True, - "software-deployment-method": "conda" if not container else "apptainer", + "software-deployment-method": "conda" if not self.container else "apptainer", "conda-prefix": filepath("./.environments"), "conda-cleanup-pkgs": "cache", "apptainer-prefix": filepath("./.environments"), @@ -164,10 +166,21 @@ def fetch_report_configs(self): ) def fetch_snakefile(self): - """ - Retrieve the target harpy rule and write it into the workdir as workflow.smk - """ - fetch_snakefile(self.workflow_directory, self.snakefile) + """ + Retrieve the target harpy rule and write it into the workdir as workflow.smk + """ + os.makedirs(self.workflow_directory, exist_ok= True) + dest_file = os.path.join(self.workflow_directory,"workflow.smk") + source_file = resources.files("harpy.snakefiles") / self.snakefile + try: + with resources.as_file(source_file) as _source: + shutil.copy2(_source, dest_file) + except (FileNotFoundError, KeyError): + print_error( + "snakefile missing", + f"The required snakefile [blue bold]{self.snakefile}[/] was not found in the Harpy installation.", + "There may be an issue with your Harpy installation, which would require reinstalling Harpy. Alternatively, there may be an issue with your conda/mamba environment or configuration." + ) def fetch_script(self, target: str) -> None: """ @@ -180,7 +193,7 @@ def fetch_script(self, target: str) -> None: shutil.copy2(_source, dest_file) except (FileNotFoundError, KeyError): print_error( - "snakefile missing", + "script missing", f"The required script [blue bold]{target}[/] was not found in the Harpy installation.", "There may be an issue with your Harpy installation, which would require reinstalling Harpy. Alternatively, there may be an issue with your conda/mamba environment or configuration." ) @@ -226,7 +239,7 @@ def print_onstart(self): """Print a panel of info on workflow run""" if self.quiet == 2: return - rprint("") + CONSOLE.print("") CONSOLE.rule("[bold]harpy " + self.name.replace("_", " "), style = "light_steel_blue") CONSOLE.print(self.start_text) @@ -250,7 +263,8 @@ def initialize(self, setup_only: bool = False): """Using the configurations, create all necessary folders and files""" self.write_workflow_config() self.write_snakemake_profile() - create_conda_recipes(self.output_directory, self.conda) + if not self.container: + create_conda_recipes(self.output_directory, self.conda) self.fetch_snakefile() for i in self.reports: self.fetch_report(i) From 3f77b83579e20a63aa7861dcf3f7f11a509dd297 Mon Sep 17 00:00:00 2001 From: pdimens Date: Tue, 21 Oct 2025 11:11:27 -0400 Subject: [PATCH 20/33] specify the type, if only for linting --- harpy/commands/resume.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/harpy/commands/resume.py b/harpy/commands/resume.py index 9555c258b..759da0265 100644 --- a/harpy/commands/resume.py +++ b/harpy/commands/resume.py @@ -36,9 +36,9 @@ def resume(directory, absolute, threads, quiet): print_error("missing workflow config", f"Target directory [yellow]{directory}[/] does not contain the file [blue]workflow/workflow.yaml[/]") with open(CONFIG_FILE, 'r', encoding="utf-8") as f: - harpy_config = yaml.full_load(f) + harpy_config: dict = yaml.full_load(f) with open(PROFILE_FILE, 'r', encoding="utf-8") as f: - snakemake_config = yaml.full_load(f) + snakemake_config: dict = yaml.full_load(f) is_conda = snakemake_config["software-deployment-method"] == "conda" workflow = Workflow(harpy_config["workflow"], "NA", snakemake_config["directory"], is_conda, quiet) From 0c7b33bcf26a9b3e12031ec6728ada1ec64bd2ab Mon Sep 17 00:00:00 2001 From: pdimens Date: Tue, 21 Oct 2025 11:21:03 -0400 Subject: [PATCH 21/33] fix the setup call --- harpy/commands/simulate_variants.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/harpy/commands/simulate_variants.py b/harpy/commands/simulate_variants.py index 51347d09b..a1769f336 100644 --- a/harpy/commands/simulate_variants.py +++ b/harpy/commands/simulate_variants.py @@ -62,7 +62,7 @@ def snpindel(genome, snp_vcf, indel_vcf, only_vcf, output_dir, prefix, snp_count | `--indel-ratio` | insertions / deletions | insert. only | delet. only | """ workflow = Workflow("simulate_snpindel", "simulate_snpindel.smk", output_dir, quiet, True) - workflow.setup_snakemake(container, 2, hpc, snakemake) + workflow.setup_snakemake(2, hpc, snakemake) workflow.conda = ["simulations"] ## checks and validations ## @@ -157,7 +157,7 @@ def inversion(genome, vcf, only_vcf, prefix, output_dir, count, min_size, max_si Use `--only-vcf` alongside `--heterozygosity` to only generate the second VCF file and not simulate a second FASTA file. """ workflow = Workflow("simulate_inversion", "simulate_variants.smk", output_dir, quiet, True) - workflow.setup_snakemake(container, 2, hpc, snakemake) + workflow.setup_snakemake(2, hpc, snakemake) workflow.conda = ["simulations"] ## checks and validations ## @@ -248,7 +248,7 @@ def cnv(genome, output_dir, vcf, only_vcf, prefix, count, min_size, max_size, du | `--gain-ratio` | copy gain / loss | gain only | loss only | """ workflow = Workflow("simulate_cnv", "simulate_variants.smk", output_dir, quiet, True) - workflow.setup_snakemake(container, 2, hpc, snakemake) + workflow.setup_snakemake(2, hpc, snakemake) workflow.conda = ["simulations"] ## checks and validations ## @@ -330,7 +330,7 @@ def translocation(genome, output_dir, prefix, vcf, only_vcf, count, centromeres, Use `--only-vcf` alongside `--heterozygosity` to only generate the second VCF file and not simulate a second FASTA file. """ workflow = Workflow("simulate_translocation", "simulate_variants.smk", output_dir, quiet, True) - workflow.setup_snakemake(container, 2, hpc, snakemake) + workflow.setup_snakemake(2, hpc, snakemake) workflow.conda = ["simulations"] ## checks and validations ## From 99df6b8908b27dfff081d5217b594850d5d4d238 Mon Sep 17 00:00:00 2001 From: pdimens Date: Tue, 21 Oct 2025 11:31:58 -0400 Subject: [PATCH 22/33] fix calls --- harpy/commands/simulate_variants.py | 8 ++++---- harpy/common/system_ops.py | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/harpy/commands/simulate_variants.py b/harpy/commands/simulate_variants.py index a1769f336..22b90211a 100644 --- a/harpy/commands/simulate_variants.py +++ b/harpy/commands/simulate_variants.py @@ -61,7 +61,7 @@ def snpindel(genome, snp_vcf, indel_vcf, only_vcf, output_dir, prefix, snp_count | `--snp-ratio` | transitions / transversions | transit. only | transv. only | | `--indel-ratio` | insertions / deletions | insert. only | delet. only | """ - workflow = Workflow("simulate_snpindel", "simulate_snpindel.smk", output_dir, quiet, True) + workflow = Workflow("simulate_snpindel", "simulate_snpindel.smk", output_dir, container, quiet, True) workflow.setup_snakemake(2, hpc, snakemake) workflow.conda = ["simulations"] @@ -156,7 +156,7 @@ def inversion(genome, vcf, only_vcf, prefix, output_dir, count, min_size, max_si To simulate a diploid genome with heterozygous and homozygous variants, set `--heterozygosity` to a value greater than `0`. Use `--only-vcf` alongside `--heterozygosity` to only generate the second VCF file and not simulate a second FASTA file. """ - workflow = Workflow("simulate_inversion", "simulate_variants.smk", output_dir, quiet, True) + workflow = Workflow("simulate_inversion", "simulate_variants.smk", output_dir, container, quiet, True) workflow.setup_snakemake(2, hpc, snakemake) workflow.conda = ["simulations"] @@ -247,7 +247,7 @@ def cnv(genome, output_dir, vcf, only_vcf, prefix, count, min_size, max_size, du | `--dup-ratio` | tandem / dispersed | tand. only | disp. only | | `--gain-ratio` | copy gain / loss | gain only | loss only | """ - workflow = Workflow("simulate_cnv", "simulate_variants.smk", output_dir, quiet, True) + workflow = Workflow("simulate_cnv", "simulate_variants.smk", output_dir, container, quiet, True) workflow.setup_snakemake(2, hpc, snakemake) workflow.conda = ["simulations"] @@ -329,7 +329,7 @@ def translocation(genome, output_dir, prefix, vcf, only_vcf, count, centromeres, To simulate a diploid genome with heterozygous and homozygous variants, set `--heterozygosity` to a value greater than `0`. Use `--only-vcf` alongside `--heterozygosity` to only generate the second VCF file and not simulate a second FASTA file. """ - workflow = Workflow("simulate_translocation", "simulate_variants.smk", output_dir, quiet, True) + workflow = Workflow("simulate_translocation", "simulate_variants.smk", output_dir, container, quiet, True) workflow.setup_snakemake(2, hpc, snakemake) workflow.conda = ["simulations"] diff --git a/harpy/common/system_ops.py b/harpy/common/system_ops.py index d1cf57f14..146387595 100644 --- a/harpy/common/system_ops.py +++ b/harpy/common/system_ops.py @@ -104,7 +104,7 @@ def container_ok(ctx, param, value) -> bool: Check if the system is linux or has apptainer installed """ if value: - if os.sys.platform != 'linux': + if platform.system().lower() != 'linux': raise click.BadParameter( "Snakemake uses Apptainer (formerly Singularity) to manage containers, which is only available for Linux systems.", ctx, param ) From eefecad8cff66a5b209b11139492b9eaa1b42c6d Mon Sep 17 00:00:00 2001 From: pdimens Date: Tue, 21 Oct 2025 11:34:33 -0400 Subject: [PATCH 23/33] exit early if everything is fine --- harpy/common/system_ops.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/harpy/common/system_ops.py b/harpy/common/system_ops.py index 146387595..81f404af3 100644 --- a/harpy/common/system_ops.py +++ b/harpy/common/system_ops.py @@ -104,12 +104,13 @@ def container_ok(ctx, param, value) -> bool: Check if the system is linux or has apptainer installed """ if value: + if shutil.which("apptainer"): + return value + if platform.system().lower() != 'linux': raise click.BadParameter( "Snakemake uses Apptainer (formerly Singularity) to manage containers, which is only available for Linux systems.", ctx, param ) - if shutil.which("apptainer"): - return value else: raise click.BadParameter( "Container software management requires apptainer, which wasn't detected in this environment.", ctx, param From 971f7bf298aa213d55dc8644fb3a4f974c7eb900 Mon Sep 17 00:00:00 2001 From: pdimens Date: Tue, 21 Oct 2025 12:25:21 -0400 Subject: [PATCH 24/33] update the API --- harpy/commands/environments.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/harpy/commands/environments.py b/harpy/commands/environments.py index 7ebc4cd7f..4d60402ba 100644 --- a/harpy/commands/environments.py +++ b/harpy/commands/environments.py @@ -48,7 +48,7 @@ def conda(workflows): - stitch - variants """ - workflow = Workflow("localenv", "environments.smk", "localenv/", 1) + workflow = Workflow("localenv", "environments.smk", "localenv/", False, 1) # if "all" was mixed with other workflows, default to just all and avoid doubling up create_conda_recipes(workflow.output_directory) if "all" in workflows: @@ -66,12 +66,12 @@ def conda(workflows): @click.command(context_settings={"help_option_names" : ["-h", "--help"]}) def container(): """ - Install workflow dependency container + Install workflow dependency containers Manually pull the harpy dependency container from dockerhub and convert it into an Apptainer .sif. To use, run this command again without arguments. """ - workflow = Workflow("localcontainer", "environments.smk", "localenv/", 1) + workflow = Workflow("localcontainer", "environments.smk", "localenv/", True, 1) workflow.fetch_snakefile() workflow.snakemake_cmd_relative = " ".join(["snakemake", "-s", os.path.join(workflow.workflow_directory, "workflow.smk"), "--sdm", "conda apptainer", "--cores 2", "--apptainer-prefix ../.environments", "--directory localenv"]) workflow.launch() From 3381f7ad73027552d38d170dad8ed41db444b8d3 Mon Sep 17 00:00:00 2001 From: pdimens Date: Tue, 21 Oct 2025 12:32:29 -0400 Subject: [PATCH 25/33] more obvious logic --- harpy/commands/resume.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/harpy/commands/resume.py b/harpy/commands/resume.py index 759da0265..45539d5a7 100644 --- a/harpy/commands/resume.py +++ b/harpy/commands/resume.py @@ -40,11 +40,11 @@ def resume(directory, absolute, threads, quiet): with open(PROFILE_FILE, 'r', encoding="utf-8") as f: snakemake_config: dict = yaml.full_load(f) - is_conda = snakemake_config["software-deployment-method"] == "conda" - workflow = Workflow(harpy_config["workflow"], "NA", snakemake_config["directory"], is_conda, quiet) + container = snakemake_config["software-deployment-method"] == "apptainer" + workflow = Workflow(harpy_config["workflow"], "NA", snakemake_config["directory"], container, quiet) workflow.conda = harpy_config["snakemake"]["conda_envs"] - if is_conda: + if not container: check_environments(directory, workflow.conda) sm_log = os.path.join(directory, harpy_config["snakemake"]["log"]) From 8373ebb0824e39b00af019eee4de5127a67e1cc0 Mon Sep 17 00:00:00 2001 From: pdimens Date: Tue, 21 Oct 2025 16:54:01 -0400 Subject: [PATCH 26/33] adjust phasing logic --- harpy/snakefiles/sv_naibr_phase.smk | 6 +++--- harpy/snakefiles/sv_naibr_pop_phase.smk | 4 ++-- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/harpy/snakefiles/sv_naibr_phase.smk b/harpy/snakefiles/sv_naibr_phase.smk index a54d445ab..9e162cfbe 100644 --- a/harpy/snakefiles/sv_naibr_phase.smk +++ b/harpy/snakefiles/sv_naibr_phase.smk @@ -11,7 +11,7 @@ wildcard_constraints: genomefile = config["inputs"]["reference"] bamlist = config["inputs"]["alignments"] -bamdict = dict(zip(bamlist, bamlist)) +#bamdict = dict(zip(bamlist, bamlist)) vcffile = config["inputs"]["vcf"] samplenames = {Path(i).stem for i in bamlist} extra = config.get("extra", None) @@ -77,9 +77,9 @@ rule preprocess_reference: rule index_alignments: input: - lambda wc: bamdict[wc.bam] + get_alignments output: - "{bam}.bai" + "{sample}.bai" shell: "samtools index {input}" diff --git a/harpy/snakefiles/sv_naibr_pop_phase.smk b/harpy/snakefiles/sv_naibr_pop_phase.smk index 5cd743db8..25dc932f5 100644 --- a/harpy/snakefiles/sv_naibr_pop_phase.smk +++ b/harpy/snakefiles/sv_naibr_pop_phase.smk @@ -114,9 +114,9 @@ rule index_snps_gz: rule index_alignments: input: - lambda wc: bamdict[wc.bam] + get_alignments output: - "{bam}.bai" + "{sample}.bai" shell: "samtools index {input}" From 008fce5d08e5b928dd1dabbf60b5f1cdae4a0a88 Mon Sep 17 00:00:00 2001 From: pdimens Date: Tue, 21 Oct 2025 17:13:37 -0400 Subject: [PATCH 27/33] simply package imports again --- harpy/reports/align_bxstats.qmd | 16 +++++----------- harpy/reports/align_stats.qmd | 14 ++++---------- harpy/reports/bcftools_stats.qmd | 17 ++++++----------- harpy/reports/hapcut.qmd | 17 ++++++----------- harpy/reports/impute.qmd | 17 ++++++----------- harpy/reports/leviathan.qmd | 16 +++++----------- harpy/reports/leviathan_pop.qmd | 18 +++++++----------- harpy/reports/naibr.qmd | 16 +++++----------- harpy/reports/naibr_pop.qmd | 18 +++++++----------- harpy/reports/qc_bx_stats.qmd | 18 +++++++----------- harpy/reports/stitch_collate.qmd | 17 ++++++----------- harpy/reports/validate_bam.qmd | 16 +++++----------- harpy/reports/validate_fastq.qmd | 16 +++++----------- 13 files changed, 74 insertions(+), 142 deletions(-) diff --git a/harpy/reports/align_bxstats.qmd b/harpy/reports/align_bxstats.qmd index 45284e3bf..c306c7e22 100644 --- a/harpy/reports/align_bxstats.qmd +++ b/harpy/reports/align_bxstats.qmd @@ -5,17 +5,11 @@ params: --- `r format(Sys.time(), '🗓️ %d %B, %Y 🕔 %H:%M')` -```{r imports, results = F} -using<-function(...) { - libs<-unlist(list(...)) - req<-unlist(lapply(libs,require,character.only=TRUE)) - need<-libs[req==FALSE] - if(length(need)>0){ - install.packages(need, repos = "https://cloud.r-project.org/") - lapply(need,require,character.only=TRUE) - } -} -using("dplyr","tidyr", "highcharter", "DT") +```{r package_imports, results = F} + library(dplyr) +library(tidyr) +library(highcharter) +library(DT) ``` ```{r nxx_and_process_funs, results = F} diff --git a/harpy/reports/align_stats.qmd b/harpy/reports/align_stats.qmd index 53deafc9b..999d75dbe 100644 --- a/harpy/reports/align_stats.qmd +++ b/harpy/reports/align_stats.qmd @@ -22,16 +22,10 @@ params: # Barcode Stats ```{r package_imports, results = F} -using<-function(...) { - libs<-unlist(list(...)) - req<-unlist(lapply(libs,require,character.only=TRUE)) - need<-libs[req==FALSE] - if(length(need)>0){ - install.packages(need, repos = "https://cloud.r-project.org/") - lapply(need,require,character.only=TRUE) - } -} -using("dplyr","highcharter","DT","BioCircos") +library(dplyr) +library(highcharter) +library(DT) +library(BioCircos) ``` ```{r import_file, results = F} diff --git a/harpy/reports/bcftools_stats.qmd b/harpy/reports/bcftools_stats.qmd index 23385e578..5b22de728 100644 --- a/harpy/reports/bcftools_stats.qmd +++ b/harpy/reports/bcftools_stats.qmd @@ -6,17 +6,12 @@ params: --- `r format(Sys.time(), '🗓️ %d %B, %Y 🕔 %H:%M')` -```{r load environment} -using<-function(...) { - libs<-unlist(list(...)) - req<-unlist(lapply(libs,require,character.only=TRUE)) - need<-libs[req==FALSE] - if(length(need)>0){ - install.packages(need, repos = "https://cloud.r-project.org/") - lapply(need,require,character.only=TRUE) - } -} -using("magrittr","tidyr","DT","highcharter","scales") +```{r package_imports, results = F} +library(magrittr) +library(tidyr) +library(DT) +library(highcharter) +library(scales) ``` ```{r} diff --git a/harpy/reports/hapcut.qmd b/harpy/reports/hapcut.qmd index d09992593..2c2dc580a 100644 --- a/harpy/reports/hapcut.qmd +++ b/harpy/reports/hapcut.qmd @@ -6,17 +6,12 @@ params: --- `r format(Sys.time(), '🗓️ %d %B, %Y 🕔 %H:%M')` -```{r package_imports} -using<-function(...) { - libs<-unlist(list(...)) - req<-unlist(lapply(libs,require,character.only=TRUE)) - need<-libs[req==FALSE] - if(length(need)>0){ - install.packages(need, repos = "https://cloud.r-project.org/") - lapply(need,require,character.only=TRUE) - } -} -using("dplyr","DT","scales","plotly","highcharter") +```{r package_imports, results = F} +library(dplyr) +library(DT) +library(highcharter) +library(plotly) +library(scales) ``` ```{r read_input} diff --git a/harpy/reports/impute.qmd b/harpy/reports/impute.qmd index fc6e4a39c..c87530078 100644 --- a/harpy/reports/impute.qmd +++ b/harpy/reports/impute.qmd @@ -17,17 +17,12 @@ params: # General Stats -```{r package_imports} -using<-function(...) { - libs<-unlist(list(...)) - req<-unlist(lapply(libs,require,character.only=TRUE)) - need<-libs[req==FALSE] - if(length(need)>0){ - install.packages(need, repos = "https://cloud.r-project.org/") - lapply(need,require,character.only=TRUE) - } -} -using("dplyr","tidyr","DT","highcharter","scales") +```{r package_imports, results = F} +library(dplyr) +library(tidyr) +library(DT) +library(highcharter) +library(scales) ``` ```{r read_input} diff --git a/harpy/reports/leviathan.qmd b/harpy/reports/leviathan.qmd index 9184c9c65..7e0bfd683 100644 --- a/harpy/reports/leviathan.qmd +++ b/harpy/reports/leviathan.qmd @@ -16,17 +16,11 @@ params: --- `r format(Sys.time(), '🗓️ %d %B, %Y 🕔 %H:%M')` -```{r package_imports} -using<-function(...) { - libs<-unlist(list(...)) - req<-unlist(lapply(libs,require,character.only=TRUE)) - need<-libs[req==FALSE] - if(length(need)>0){ - install.packages(need, repos = "https://cloud.r-project.org/") - lapply(need,require,character.only=TRUE) - } -} -using("dplyr","DT","BioCircos", "htmltools") +```{r package_imports, results = F} +library(BioCircos) +library(dplyr) +library(DT) +library(htmltools) ``` # General Stats diff --git a/harpy/reports/leviathan_pop.qmd b/harpy/reports/leviathan_pop.qmd index e44f0117b..ce3326210 100644 --- a/harpy/reports/leviathan_pop.qmd +++ b/harpy/reports/leviathan_pop.qmd @@ -15,17 +15,13 @@ params: --- `r format(Sys.time(), '🗓️ %d %B, %Y 🕔 %H:%M')` -```{r package_imports} -using<-function(...) { - libs<-unlist(list(...)) - req<-unlist(lapply(libs,require,character.only=TRUE)) - need<-libs[req==FALSE] - if(length(need)>0){ - install.packages(need, repos = "https://cloud.r-project.org/") - lapply(need,require,character.only=TRUE) - } -} -using("dplyr", "tidyr","DT", "viridisLite", "BioCircos", "tools") +```{r package_imports, results = F} +library(BioCircos) +library(dplyr) +library(DT) +library(tidyr) +library(tools) +library(viridisLite) ``` ```{r read_inputs} diff --git a/harpy/reports/naibr.qmd b/harpy/reports/naibr.qmd index fbd49a4fe..ffab45b24 100644 --- a/harpy/reports/naibr.qmd +++ b/harpy/reports/naibr.qmd @@ -39,17 +39,11 @@ tryCatch( ) ``` -```{r package_imports} -using<-function(...) { - libs<-unlist(list(...)) - req<-unlist(lapply(libs,require,character.only=TRUE)) - need<-libs[req==FALSE] - if(length(need)>0){ - install.packages(need, repos = "https://cloud.r-project.org/") - lapply(need,require,character.only=TRUE) - } -} -using("dplyr","tidyr","DT", "BioCircos") +```{r package_imports, results = F} +library(BioCircos) +library(dplyr) +library(DT) +library(tidyr) ``` ```{r process_sv} diff --git a/harpy/reports/naibr_pop.qmd b/harpy/reports/naibr_pop.qmd index 515d7ed4c..eb43e741d 100644 --- a/harpy/reports/naibr_pop.qmd +++ b/harpy/reports/naibr_pop.qmd @@ -16,17 +16,13 @@ params: `r format(Sys.time(), '🗓️ %d %B, %Y 🕔 %H:%M')` # General Stats -```{r package_imports} -using<-function(...) { - libs<-unlist(list(...)) - req<-unlist(lapply(libs,require,character.only=TRUE)) - need<-libs[req==FALSE] - if(length(need)>0){ - install.packages(need, repos = "https://cloud.r-project.org/") - lapply(need,require,character.only=TRUE) - } -} -using("dplyr", "tidyr","DT","BioCircos", "viridisLite", "tools") +```{r package_imports, results = F} +library(BioCircos) +library(dplyr) +library(DT) +library(tidyr) +library(tools) +library(viridisLite) ``` ```{r readvariants_func} diff --git a/harpy/reports/qc_bx_stats.qmd b/harpy/reports/qc_bx_stats.qmd index 2b90a3a30..ab0f1a327 100644 --- a/harpy/reports/qc_bx_stats.qmd +++ b/harpy/reports/qc_bx_stats.qmd @@ -5,18 +5,14 @@ params: --- `r format(Sys.time(), '🗓️ %d %B, %Y 🕔 %H:%M')` -```{r package_imports} -using<-function(...) { - libs<-unlist(list(...)) - req<-unlist(lapply(libs,require,character.only=TRUE)) - need<-libs[req==FALSE] - if(length(need)>0){ - install.packages(need, repos = "https://cloud.r-project.org/") - lapply(need,require,character.only=TRUE) - } -} -using("dplyr","tidyr","DT","highcharter","scales") +```{r package_imports, results = F} +library(dplyr) +library(DT) +library(highcharter) +library(scales) +library(tidyr) ``` + # Overview ##

General Per-Sample Barcode Statistics

diff --git a/harpy/reports/stitch_collate.qmd b/harpy/reports/stitch_collate.qmd index a8f7210ec..edee6d8b3 100644 --- a/harpy/reports/stitch_collate.qmd +++ b/harpy/reports/stitch_collate.qmd @@ -13,17 +13,12 @@ params: extra: "None" --- `r format(Sys.time(), '🗓️ %d %B, %Y 🕔 %H:%M')` -```{r setup environment} -using<-function(...) { - libs<-unlist(list(...)) - req<-unlist(lapply(libs,require,character.only=TRUE)) - need<-libs[req==FALSE] - if(length(need)>0){ - install.packages(need, repos = "https://cloud.r-project.org/") - lapply(need,require,character.only=TRUE) - } -} -using("tidyr","magrittr","DT", "scales") + +```{r package_imports, results = F} +library(DT) +library(magrittr) +library(tidyr) +library(scales) ``` ```{r load data} diff --git a/harpy/reports/validate_bam.qmd b/harpy/reports/validate_bam.qmd index 4bc95e609..cdaa26222 100644 --- a/harpy/reports/validate_bam.qmd +++ b/harpy/reports/validate_bam.qmd @@ -6,17 +6,11 @@ params: --- _`r format(Sys.time(), '🗓️ %d %B, %Y 🕔 %H:%M')`_ -```{r package_imports} -using<-function(...) { - libs<-unlist(list(...)) - req<-unlist(lapply(libs,require,character.only=TRUE)) - need<-libs[req==FALSE] - if(length(need)>0){ - install.packages(need, repos = "https://cloud.r-project.org/") - lapply(need,require,character.only=TRUE) - } -} -using("dplyr","tidyr","DT","scales") +```{r package_imports, results = F} +library(dplyr) +library(DT) +library(tidyr) +library(scales) ``` ```{r read_inputs} diff --git a/harpy/reports/validate_fastq.qmd b/harpy/reports/validate_fastq.qmd index 71c5dfe86..7acba5335 100644 --- a/harpy/reports/validate_fastq.qmd +++ b/harpy/reports/validate_fastq.qmd @@ -6,17 +6,11 @@ params: --- `r format(Sys.time(), '🗓️ %d %B, %Y 🕔 %H:%M')` -```{r} -using<-function(...) { - libs<-unlist(list(...)) - req<-unlist(lapply(libs,require,character.only=TRUE)) - need<-libs[req==FALSE] - if(length(need)>0){ - install.packages(need, repos = "https://cloud.r-project.org/") - lapply(need,require,character.only=TRUE) - } -} -using("dplyr","tidyr","DT","scales") +```{r package_imports, results = F} +library(dplyr) +library(DT) +library(scales) +library(tidyr) ``` ```{r} From 62da34c2fc1fddbf62946768a92225c27989daf3 Mon Sep 17 00:00:00 2001 From: pdimens Date: Wed, 22 Oct 2025 12:18:18 -0400 Subject: [PATCH 28/33] add new diagnose features --- harpy/commands/diagnose.py | 117 +++++++++++++++++++++++++++++++++++-- harpy/common/launch.py | 5 +- harpy/common/printing.py | 19 ++++++ harpy/common/workflow.py | 2 +- 4 files changed, 134 insertions(+), 9 deletions(-) diff --git a/harpy/commands/diagnose.py b/harpy/commands/diagnose.py index 44d2ba5ed..2d90e0c15 100644 --- a/harpy/commands/diagnose.py +++ b/harpy/commands/diagnose.py @@ -1,15 +1,26 @@ +import glob import os import sys import yaml import subprocess import rich_click as click -from harpy.common.printing import print_error, CONSOLE +from harpy.common.printing import print_error, CONSOLE, print_shellcmd_simple +from harpy.common.file_ops import safe_read + +@click.group(options_metavar='', context_settings={"help_option_names" : []}) +def diagnose(): + """ + Attempt to resolve workflow errors + """ @click.command(no_args_is_help = True, context_settings={"allow_interspersed_args" : False}) @click.argument('directory', required=True, type=click.Path(exists=True, file_okay=False)) -def diagnose(directory): +def stall(directory): """ - Run the Snakemake debugger to identify hang-ups + Run the Snakemake debugger to identify why a workflow stalled + + This will run Snakemake with the `--dry-run` and `--debug-dag` options, + printing the diagnostics to the terminal. """ directory = directory.rstrip("/") PROFILE_FILE = os.path.join(directory, "workflow", "config.yaml") @@ -62,8 +73,104 @@ def diagnose(directory): CONSOLE.print(output, end="", style = "yellow") except Exception as e: CONSOLE.print("") - #CONSOLE.print(f"{e}") CONSOLE.rule("[bold]End of diagnosis", style = "yellow") process.terminate() process.wait() - sys.exit(1) \ No newline at end of file + sys.exit(1) + +@click.command(no_args_is_help = True, context_settings={"allow_interspersed_args" : False}) +@click.argument('directory', required=True, type=click.Path(exists=True, file_okay=False)) +def snakemake(directory): + """ + Run the Snakemake workflow directly without Harpy intervention + + This will run Snakemake without any of the convenience features, + which can sometimes reveal errors that weren't captured by Harpy. Requries + the `workflow/config.yaml` and `workflow/workflow.yaml` files to be in + `DIRECTORY`. + """ + directory = directory.rstrip("/") + PROFILE_FILE = os.path.join(directory, "workflow", "config.yaml") + CONFIG_FILE = os.path.join(directory, "workflow", "workflow.yaml") + + if not os.path.exists(CONFIG_FILE): + print_error("missing workflow config", f"Target directory [blue]{directory}[/] does not contain the file [bold]workflow/workflow.yaml[/]") + if not os.path.exists(PROFILE_FILE): + print_error("missing snakemake config", f"Target directory [blue]{directory}[/] does not contain the file [bold]workflow/config.yaml[/]") + + with open(CONFIG_FILE, 'r', encoding="utf-8") as f: + harpy_config = yaml.full_load(f) + + command = harpy_config["snakemake"]["absolute"] + + os.system(command) + +@click.command(no_args_is_help = True, context_settings={"allow_interspersed_args" : False}) +@click.argument('directory', required=True, type=click.Path(exists=True, file_okay=False)) +def rule(directory): + """ + Directly run the first rule that caused the workflow failure + + The rule is identified in the most recent Snakemake log file in + `DIRECTORY` as the first one with the text `Error in rule ____`. + """ + directory = directory.rstrip("/") + if not os.path.exists(f'{directory}/logs/snakemake/'): + print_error("missing log folder", f"Target directory [blue]{directory}[/] does not contain the folder [bold]logs/snakemake[/]") + # get the lastest snakemake log file + list_of_files = glob.glob(f'{directory}/logs/snakemake/*') + latest_log = max(list_of_files, key=os.path.getctime) + + CONSOLE.rule(f"Latest log: {os.path.basename(latest_log)}", style = "yellow") + _found = False + _shellblock = False + conda = "" + container = "" + cmd = [] + with safe_read(latest_log) as logfile: + for line in logfile: + if "Error in rule" in line: + CONSOLE.print(f"Failing rule: {line.strip().removeprefix('Error in rule').rstrip(':')}", style = "yellow") + _found = True + continue + if _found: + if "conda-env:" in line: + conda = "source activate " + line.strip().split()[-1] + continue + if "container:" in line: + container = line.strip().split()[-1] + if "shell:" in line: + _shellblock = True + _ = line.strip().replace("shell:","").split() + if _: + cmd.append(_) + continue + if _shellblock: + if line.strip() == "(command exited with non-zero exit code)": + break + cmd.append(line.strip()) + + if conda: + print_shellcmd_simple("\n".join(cmd)) + os.system("\n".join([conda, f"cd {directory}", *cmd])) + elif container: + print_shellcmd_simple(f""" +apptainer exec {container} bash -c ' +{"\n".join([*cmd])} +' +""" + ) + os.system(f""" +cd {directory} +apptainer exec {container} bash -c ' +{"\n".join([*cmd])} +' +""" + ) + else: + print_shellcmd_simple("\n".join(cmd)) + os.system("\n".join([f"cd {directory}", *cmd])) + +diagnose.add_command(stall) +diagnose.add_command(snakemake) +diagnose.add_command(rule) \ No newline at end of file diff --git a/harpy/common/launch.py b/harpy/common/launch.py index de773cb88..5b20dbce6 100644 --- a/harpy/common/launch.py +++ b/harpy/common/launch.py @@ -5,9 +5,9 @@ import os import sys import subprocess -from rich.table import Table from rich import box from rich.syntax import Syntax +from rich.table import Table from harpy.common.file_ops import gzip_file, purge_empty_logs from harpy.common.printing import CONSOLE, print_onerror, print_setup_error from harpy.common.progress import harpy_progressbar, harpy_pulsebar, harpy_progresspanel @@ -66,7 +66,6 @@ def print_shellcmd(text: str, _process): CONSOLE.print("[red]" + re.sub(r'\n{3,}', '\n\n', merged_text), overflow = "ignore", crop = False) return _process.stderr.readline() - def highlight_params(text: str): """make important snakemake attributes like 'input:' highlighted in the error output""" text = text.removeprefix(" ").rstrip() @@ -93,7 +92,7 @@ def highlight_params(text: str): return f"\n[blue]{text}[/]" return text -def launch_snakemake(sm_args, workflow, outdir, sm_logfile, quiet, CONSOLE = CONSOLE): +def launch_snakemake(sm_args, outdir, sm_logfile, quiet, CONSOLE = CONSOLE): """launch snakemake with the given commands""" exitcode = None sm_start = datetime.now() diff --git a/harpy/common/printing.py b/harpy/common/printing.py index 955451719..ace99b593 100644 --- a/harpy/common/printing.py +++ b/harpy/common/printing.py @@ -2,9 +2,11 @@ import time as _time import os +import re import sys from rich.console import Console, RenderableType from rich import box +from rich.syntax import Syntax from rich.table import Table from rich.panel import Panel @@ -114,6 +116,23 @@ def print_onerror(logfile: str, time) -> None: CONSOLE.print(datatable) CONSOLE.rule("[bold]Where Error Occurred", style = "red") +def print_shellcmd_simple(text): + _table = Table( + show_header=False, + pad_edge=False, + show_edge=False, + padding=(0,0), + box=box.SIMPLE, + ) + _table.add_column("Lpadding", justify="left") + _table.add_column("shell", justify="left") + _table.add_column("Rpadding", justify="left") + + text = re.sub(r' {2,}|\t+', ' ', text) + cmd = Syntax(text, lexer = "bash", tab_size=2, word_wrap=True, padding=1, dedent=True, theme = "paraiso-dark") + _table.add_row(" ", cmd, " ") + CONSOLE.print("[bold default]shell:", _table) + def workflow_info(*arg: tuple[str, str | int | float]|None) -> Table: """ Accepts an unlimited number of length-2 lists or tuples and returns a rich.Table with the value of the first indices as the row names and the second indices as the values diff --git a/harpy/common/workflow.py b/harpy/common/workflow.py index 58f00ca3c..3d003d2e6 100644 --- a/harpy/common/workflow.py +++ b/harpy/common/workflow.py @@ -283,7 +283,7 @@ def launch(self, absolute:bool = False): cmd = self.snakemake_cmd_absolute if absolute else self.snakemake_cmd_relative try: - launch_snakemake(cmd, self.workflow_directory, self.output_directory, self.snakemake_logfile, self.quiet) + launch_snakemake(cmd, self.output_directory, self.snakemake_logfile, self.quiet) finally: with open(os.path.join(self.output_directory, "workflow", f"{self.name.replace('_','.')}.summary"), "w") as f_out: f_out.write(Summary(self.config).get_text()) From e5a2a228aafc697470786fad604497c0a45ffe93 Mon Sep 17 00:00:00 2001 From: pdimens Date: Wed, 22 Oct 2025 12:28:45 -0400 Subject: [PATCH 29/33] be clearer about limitations --- harpy/commands/diagnose.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/harpy/commands/diagnose.py b/harpy/commands/diagnose.py index 2d90e0c15..a8b7a545d 100644 --- a/harpy/commands/diagnose.py +++ b/harpy/commands/diagnose.py @@ -112,7 +112,9 @@ def rule(directory): Directly run the first rule that caused the workflow failure The rule is identified in the most recent Snakemake log file in - `DIRECTORY` as the first one with the text `Error in rule ____`. + `DIRECTORY/logs/snakemake` as the first one with the text `Error in rule ____`. This + convenience feature is somewhat limited and will fail if any of the inputs for the + failing rule were marked as temporary. """ directory = directory.rstrip("/") if not os.path.exists(f'{directory}/logs/snakemake/'): @@ -149,6 +151,8 @@ def rule(directory): if line.strip() == "(command exited with non-zero exit code)": break cmd.append(line.strip()) + if not _found: + CONSOLE.print(f"No errors found in {os.path.basename(latest_log)}", style = "green") if conda: print_shellcmd_simple("\n".join(cmd)) From 6bb633510909385fc77b772c5056995c67e20642 Mon Sep 17 00:00:00 2001 From: pdimens Date: Wed, 22 Oct 2025 12:44:30 -0400 Subject: [PATCH 30/33] fix indexing logic again --- harpy/snakefiles/sv_naibr_phase.smk | 19 ++++++++----------- harpy/snakefiles/sv_naibr_pop_phase.smk | 19 ++++++++----------- 2 files changed, 16 insertions(+), 22 deletions(-) diff --git a/harpy/snakefiles/sv_naibr_phase.smk b/harpy/snakefiles/sv_naibr_phase.smk index 9e162cfbe..4d32ec09d 100644 --- a/harpy/snakefiles/sv_naibr_phase.smk +++ b/harpy/snakefiles/sv_naibr_phase.smk @@ -11,7 +11,6 @@ wildcard_constraints: genomefile = config["inputs"]["reference"] bamlist = config["inputs"]["alignments"] -#bamdict = dict(zip(bamlist, bamlist)) vcffile = config["inputs"]["vcf"] samplenames = {Path(i).stem for i in bamlist} extra = config.get("extra", None) @@ -53,12 +52,6 @@ def get_alignments(wildcards): aln = list(filter(r.match, bamlist)) return aln[0] -def get_align_index(wildcards): - """returns a list with the bai index file for the sample based on wildcards.sample""" - r = re.compile(fr"(.*/{wildcards.sample})\.(bam|sam)$", flags = re.IGNORECASE) - aln = list(filter(r.match, bamlist)) - return aln[0] + ".bai" - rule preprocess_reference: input: genomefile @@ -79,9 +72,13 @@ rule index_alignments: input: get_alignments output: - "{sample}.bai" + temp("workflow/input/bam/{sample}.bam.bai"), + bam = temp("workflow/input/bam/{sample}.bam") shell: - "samtools index {input}" + """ + ln -sr {input} {output.bam} + samtools index {output.bam} + """ rule index_snps: input: @@ -101,11 +98,11 @@ rule index_snps_gz: rule phase_alignments: input: - get_align_index, + "workflow/input/bam/{sample}.bam.bai", vcfindex, f"{workflow_geno}.fai", vcf = vcffile, - aln = get_alignments, + aln = "workflow/input/bam/{sample}.bam", ref = workflow_geno output: bam = "phasedbam/{sample}.bam", diff --git a/harpy/snakefiles/sv_naibr_pop_phase.smk b/harpy/snakefiles/sv_naibr_pop_phase.smk index 25dc932f5..e01eea57f 100644 --- a/harpy/snakefiles/sv_naibr_pop_phase.smk +++ b/harpy/snakefiles/sv_naibr_pop_phase.smk @@ -13,7 +13,6 @@ wildcard_constraints: genomefile = config["inputs"]["reference"] bn = os.path.basename(genomefile) bamlist = config["inputs"]["alignments"] -bamdict = dict(zip(bamlist, bamlist)) samplenames = {Path(i).stem for i in bamlist} groupfile = config["inputs"]["groupings"] vcffile = config["inputs"]["vcf"] @@ -74,12 +73,6 @@ def get_alignments(wildcards): aln = list(filter(r.match, bamlist)) return aln[0] -def get_align_index(wildcards): - """returns a list with the bai index file for the sample based on wildcards.sample""" - r = re.compile(fr"(.*/{wildcards.sample})\.(bam|sam)$", flags = re.IGNORECASE) - aln = list(filter(r.match, bamlist)) - return aln[0] + ".bai" - rule preprocess_reference: input: genomefile @@ -116,17 +109,21 @@ rule index_alignments: input: get_alignments output: - "{sample}.bai" + bam = temp("workflow/input/bam/{sample}.bam"), + bai = temp("workflow/input/bam/{sample}.bam.bai") shell: - "samtools index {input}" + """ + ln -sr {input} {output.bam} + samtools index {output.bam} + """ rule phase_alignments: input: vcfindex, - get_align_index, + "workflow/input/bam/{sample}.bam.bai", f"{workflow_geno}.fai", vcf = vcffile, - aln = get_alignments, + aln = "workflow/input/bam/{sample}.bam", ref = workflow_geno output: bam = temp("phasedbam/{sample}.bam"), From a651dbe67cd7953df9f477c0ec86395fbbc093a7 Mon Sep 17 00:00:00 2001 From: pdimens Date: Wed, 22 Oct 2025 12:58:49 -0400 Subject: [PATCH 31/33] small formatting fixes [skip CI] --- resources/changelog.md | 6 ++++++ resources/harpy.yaml | 15 --------------- resources/meta.yaml | 2 +- 3 files changed, 7 insertions(+), 16 deletions(-) delete mode 100644 resources/harpy.yaml diff --git a/resources/changelog.md b/resources/changelog.md index 8403ffa7e..4d00ca438 100644 --- a/resources/changelog.md +++ b/resources/changelog.md @@ -1,3 +1,9 @@ +# new +- `diagnose` now has 3 subcommands: + - `stall`: same as previous `diagnose` behavior, where it runs snakemake with `--dry-run --debug-dag` + - `snakemake`: runs snakemake directly (without Harpy intervention), outputting everything to terminal + - `rule`: attempt to directly run the failing rule of a workflow as identified in the snakemake log + # deprecations - harpy convert - harpy downsample diff --git a/resources/harpy.yaml b/resources/harpy.yaml deleted file mode 100644 index 77edd8d82..000000000 --- a/resources/harpy.yaml +++ /dev/null @@ -1,15 +0,0 @@ -name: harpy -channels: - - conda-forge - - bioconda -dependencies: - - bcftools =1.22 - - click >=8.2 - - conda >24.7 - - htslib =1.22 - - pysam =0.23 - - python >=3.11 - - rich-click >=1.9.3 - - snakemake-minimal =9 - - samtools =1.22 - - seqtk \ No newline at end of file diff --git a/resources/meta.yaml b/resources/meta.yaml index fad72eff1..de3c57d64 100644 --- a/resources/meta.yaml +++ b/resources/meta.yaml @@ -50,7 +50,7 @@ requirements: - htslib >=1.22 - pysam >=0.23 - rich-click >=1.9.3 - - snakemake-minimal >=9.0 + - snakemake-minimal >=9.11 - samtools >=1.22 - seqtk From 34afed8f277f617bfa6f3c343213cde5f1d3eb3e Mon Sep 17 00:00:00 2001 From: pdimens Date: Wed, 22 Oct 2025 13:02:27 -0400 Subject: [PATCH 32/33] simplify [skip CI] --- harpy/snakefiles/environments.smk | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/harpy/snakefiles/environments.smk b/harpy/snakefiles/environments.smk index 90068806c..079ed1977 100644 --- a/harpy/snakefiles/environments.smk +++ b/harpy/snakefiles/environments.smk @@ -1,11 +1,5 @@ -if config.get("envs", None): - out_envs = config["envs"] -else: - out_envs = ["align", "assembly", "metassembly", "phase", "qc", "report", "simulations", "stitch", "variants"] - -if config.get("spades", None): - out_envs.append("spades") +out_envs = config.get("envs", ["align", "assembly", "metassembly", "phase", "qc", "report", "simulations", "stitch", "variants"]) rule all: input: @@ -13,6 +7,5 @@ rule all: rule conda_env: output: "{conda}.env" - container: "docker://pdimens/harpy:{conda}_latest" conda: "envs/{conda}.yaml" shell: "touch {output}" From d8e0dbf9a6e4ba030158fbd16ed0e20e555f5636 Mon Sep 17 00:00:00 2001 From: pdimens Date: Wed, 22 Oct 2025 13:16:24 -0400 Subject: [PATCH 33/33] expose more naibr params --- harpy/common/cli_types_params.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/harpy/common/cli_types_params.py b/harpy/common/cli_types_params.py index 237726c31..ba46ecdb4 100644 --- a/harpy/common/cli_types_params.py +++ b/harpy/common/cli_types_params.py @@ -202,7 +202,7 @@ class NaibrParams(click.ParamType): name = "naibr_params" def convert(self, value, param, ctx): harpy_options = "bam_file prefix outdir threads min_mapq d min_sv k".split() - valid_options = "blacklist candidates".split() + valid_options = "blacklist candidates min_discs min_reads sd_mult".split() opts = 0 docs = "https://github.com/pontushojer/NAIBR?tab=readme-ov-file#running-naibr" clean_args = []