diff --git a/.github/workflows/createrelease.yml b/.github/workflows/createrelease.yml index 9e50b4663..5c91bd823 100644 --- a/.github/workflows/createrelease.yml +++ b/.github/workflows/createrelease.yml @@ -6,35 +6,31 @@ on: - '*' # Push events of any tag created jobs: - build_versioned_container: + build_versioned_containers: name: Build and Push versioned container runs-on: ubuntu-latest permissions: contents: write pull-requests: write + strategy: + matrix: + env: [align, assembly, deconvolution, demultiplex, metassembly, phase, qc, report, simulations, stitch, variants] steps: - name: Checkout uses: actions/checkout@v4 with: fetch-depth: 1 - - name: Setup mamba - uses: mamba-org/setup-micromamba@v2 + - name: Install Harpy + uses: prefix-dev/setup-pixi@v0.9.2 with: - init-shell: bash - generate-run-shell: true - environment-file: resources/harpy.yaml - cache-environment: false - post-cleanup: 'all' - - name: Install harpy - shell: micromamba-shell {0} - run: | - python3 -m pip install --upgrade build && python3 -m build - pip install dist/*.whl - pip install importlib-resources + pixi-version: v0.56.0 + cache: false + activate-environment: true - name: Clear space run: rm -rf /opt/hostedtoolcache - - name: Recreate container - shell: micromamba-shell {0} +# - name: Clear Space +# uses: jlumbroso/free-disk-space@main + - name: Recreate containers run: harpy containerize - name: Set up Docker Buildx uses: docker/setup-buildx-action@v3 @@ -46,10 +42,12 @@ jobs: - name: Build and push uses: docker/build-push-action@v6 with: - context: ./container + context: container/${{ matrix.env }} push: true - tags: pdimens/harpy:${{ github.ref_name }} + tags: pdimens/harpy:$${{ matrix.env }}_{{ github.ref_name }} + build_tarball: + needs: build_versioned_containers name: Upload Release Tarball runs-on: ubuntu-latest permissions: @@ -60,10 +58,10 @@ jobs: - name: Checkout code uses: actions/checkout@v4 - name: Version the Container - # this removes the :latest tag and replaces with versioned container + # this removes the :*_latest tag and replaces with versioned container run: | for i in harpy/snakefiles/*.smk; do - sed -i "s/harpy\:latest/harpy\:${{ github.ref_name }}/g" $i + sed -i "s/_latest/_${{ github.ref_name }}/g" $i done - name: Bump Harpy Version # this removes the :latest tag and replaces with versioned container diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 5d1a00e18..dca693e71 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -54,93 +54,8 @@ jobs: with: filters: .github/filters.yml - build: - name: Build and Install - runs-on: ubuntu-latest - steps: - - name: Checkout - uses: actions/checkout@v4 - with: - fetch-depth: 1 - - name: Setup Mamba - uses: mamba-org/setup-micromamba@v2 - with: - init-shell: bash - generate-run-shell: true - environment-file: resources/harpy.yaml - cache-environment: true - post-cleanup: 'all' - - name: Install Harpy - id: harpybuild - shell: micromamba-shell {0} - run: | - python3 -m pip install --upgrade build && python3 -m build && \ - pip install --no-deps dist/*.whl - container: - needs: [changes, build] - if: ${{ needs.changes.outputs.environments == 'true' }} - name: Rebuild Container - runs-on: ubuntu-latest - steps: - - name: Checkout - uses: actions/checkout@v4 - with: - fetch-depth: 1 - - name: Setup Mamba - uses: mamba-org/setup-micromamba@v2 - with: - init-shell: bash - generate-run-shell: true - environment-file: resources/harpy.yaml - cache-environment: true - post-cleanup: 'all' - - name: Install Harpy - id: harpybuild - shell: micromamba-shell {0} - run: | - python3 -m pip install --upgrade build && python3 -m build - pip install --no-deps dist/*.whl - - name: Clear Space - uses: jlumbroso/free-disk-space@main - - name: Rebuild Dockerfile - id: rebuild - shell: micromamba-shell {0} - run: harpy containerize - - name: Set up Docker Buildx - id: buildx - if: ${{ steps.rebuild.outcome == 'success' }} - uses: docker/setup-buildx-action@v3 - - name: Login to Docker Hub - id: dockerhub - if: ${{ steps.buildx.outcome == 'success' }} - uses: docker/login-action@v3 - with: - username: ${{ secrets.DOCKERHUB_USERNAME }} - password: ${{ secrets.DOCKERHUB_TOKEN }} - - name: Build and Push to Dockerhub - if: ${{ steps.dockerhub.outcome == 'success' }} - uses: docker/build-push-action@v6 - with: - context: ./container - push: true - tags: pdimens/harpy:latest -# - name: Pull Image Locally -# id: singularity -# shell: micromamba-shell {0} -# if: ${{ needs.changes.outputs.modules == 'true' }} -# run: | -# export APPTAINER_TMPDIR=$PWD/test/ -# harpy qc --skip-reports --quiet 2 test/fastq/sample1.*.fq.gz -# - name: Create Singularity Artifact -# if: ${{ steps.singularity.outcome == 'success' }} -# uses: actions/upload-artifact@v4 -# with: -# name: deps-image -# path: .snakemake/singularity/*.simg -# retention-days: 1 - dmux_meier2021: - needs: [changes, build] + needs: changes if: ${{ needs.changes.outputs.demux == 'true' }} name: demux meier2021 runs-on: ubuntu-latest @@ -149,27 +64,19 @@ jobs: uses: actions/checkout@v4 with: fetch-depth: 1 - - name: Setup Mamba - uses: mamba-org/setup-micromamba@v2 - with: - init-shell: bash - generate-run-shell: true - environment-file: resources/harpy.yaml - cache-environment: true - post-cleanup: 'all' - name: Install Harpy - shell: micromamba-shell {0} - run: | - python3 -m pip install --upgrade build && python3 -m build - pip install --no-deps dist/*.whl + uses: prefix-dev/setup-pixi@v0.9.2 + with: + pixi-version: v0.56.0 + cache: false + activate-environment: true - name: harpy demultiplex - shell: micromamba-shell {0} run: | harpy demultiplex meier2021 --quiet 2 test/demux/samples.schema test/demux/multiplex.R* test/demux/multiplex.I* && \ ls -lh Demultiplex validate: - needs: [changes, build] + needs: changes if: ${{ needs.changes.outputs.validate == 'true' }} name: validate runs-on: ubuntu-latest @@ -178,36 +85,24 @@ jobs: uses: actions/checkout@v4 with: fetch-depth: 1 - - name: Setup Mamba - uses: mamba-org/setup-micromamba@v2 - env: - ACTIONS_STEP_DEBUG: true - with: - init-shell: bash - generate-run-shell: true - environment-file: resources/harpy.yaml - cache-environment: true - post-cleanup: 'all' - log-level: error - name: Install Harpy - shell: micromamba-shell {0} - run: | - python3 -m pip install --upgrade build && python3 -m build - pip install --no-deps dist/*.whl + uses: prefix-dev/setup-pixi@v0.9.2 + with: + pixi-version: v0.56.0 + cache: false + activate-environment: true - name: test validate fastq - shell: micromamba-shell {0} run: | harpy validate fastq test/fastq && \ ls -lh Validate/fastq - name: test validate bam if: always() - shell: micromamba-shell {0} run: | harpy validate bam --quiet 2 test/bam && \ ls -lh Validate/bam qc: - needs: [changes,build] + needs: changes if: ${{ needs.changes.outputs.qc == 'true' }} name: qc runs-on: ubuntu-latest @@ -216,30 +111,19 @@ jobs: uses: actions/checkout@v4 with: fetch-depth: 1 - - name: Setup Mamba - uses: mamba-org/setup-micromamba@v2 - env: - ACTIONS_STEP_DEBUG: true - with: - init-shell: bash - generate-run-shell: true - environment-file: resources/harpy.yaml - cache-environment: true - post-cleanup: 'all' - log-level: error - name: Install Harpy - shell: micromamba-shell {0} - run: | - python3 -m pip install --upgrade build && python3 -m build - pip install --no-deps dist/*.whl + uses: prefix-dev/setup-pixi@v0.9.2 + with: + pixi-version: v0.56.0 + cache: false + activate-environment: true - name: harpy qc - shell: micromamba-shell {0} run: | harpy qc -x "--low_complexity_filter" --quiet 2 test/fastq && \ ls -lh QC deconvolve: - needs: [changes,build] + needs: changes if: ${{ needs.changes.outputs.deconvolve == 'true' }} name: deconvolve runs-on: ubuntu-latest @@ -248,30 +132,19 @@ jobs: uses: actions/checkout@v4 with: fetch-depth: 1 - - name: Setup Mamba - uses: mamba-org/setup-micromamba@v2 - env: - ACTIONS_STEP_DEBUG: true - with: - init-shell: bash - generate-run-shell: true - environment-file: resources/harpy.yaml - cache-environment: true - post-cleanup: 'all' - log-level: error - name: Install Harpy - shell: micromamba-shell {0} - run: | - python3 -m pip install --upgrade build && python3 -m build - pip install --no-deps dist/*.whl + uses: prefix-dev/setup-pixi@v0.9.2 + with: + pixi-version: v0.56.0 + cache: false + activate-environment: true - name: harpy deconvolve - shell: micromamba-shell {0} run: | harpy deconvolve --quiet 2 test/fastq && \ ls -lh Deconvolve bwa: - needs: [changes,build] + needs: changes if: ${{ needs.changes.outputs.bwa == 'true' }} name: align BWA runs-on: ubuntu-latest @@ -280,30 +153,19 @@ jobs: uses: actions/checkout@v4 with: fetch-depth: 1 - - name: Setup Mamba - uses: mamba-org/setup-micromamba@v2 - env: - ACTIONS_STEP_DEBUG: true - with: - init-shell: bash - generate-run-shell: true - environment-file: resources/harpy.yaml - cache-environment: true - post-cleanup: 'all' - log-level: error - name: Install Harpy - shell: micromamba-shell {0} - run: | - python3 -m pip install --upgrade build && python3 -m build - pip install --no-deps dist/*.whl + uses: prefix-dev/setup-pixi@v0.9.2 + with: + pixi-version: v0.56.0 + cache: false + activate-environment: true - name: test bwa - shell: micromamba-shell {0} run: | harpy align bwa --quiet 2 -x "-A 2" test/genome/genome.fasta.gz test/fastq && \ ls -lh Align/bwa strobe: - needs: [changes,build] + needs: changes if: ${{ needs.changes.outputs.strobealign == 'true' }} name: align strobe runs-on: ubuntu-latest @@ -312,30 +174,19 @@ jobs: uses: actions/checkout@v4 with: fetch-depth: 1 - - name: Setup Mamba - uses: mamba-org/setup-micromamba@v2 - env: - ACTIONS_STEP_DEBUG: true - with: - init-shell: bash - generate-run-shell: true - environment-file: resources/harpy.yaml - cache-environment: true - post-cleanup: 'all' - log-level: error - name: Install Harpy - shell: micromamba-shell {0} - run: | - python3 -m pip install --upgrade build && python3 -m build - pip install --no-deps dist/*.whl + uses: prefix-dev/setup-pixi@v0.9.2 + with: + pixi-version: v0.56.0 + cache: false + activate-environment: true - name: test strobealign - shell: micromamba-shell {0} run: | harpy align strobe --quiet 2 test/genome/genome.fasta.gz test/fastq && \ ls -lh Align/strobealign mpileup: - needs: [changes,build] + needs: changes if: ${{ needs.changes.outputs.mpileup == 'true' }} name: mpileup runs-on: ubuntu-latest @@ -344,35 +195,23 @@ jobs: uses: actions/checkout@v4 with: fetch-depth: 1 - - name: Setup Mamba - uses: mamba-org/setup-micromamba@v2 - env: - ACTIONS_STEP_DEBUG: true - with: - init-shell: bash - generate-run-shell: true - environment-file: resources/harpy.yaml - cache-environment: true - post-cleanup: 'all' - log-level: error - name: Install Harpy - shell: micromamba-shell {0} - run: | - python3 -m pip install --upgrade build && python3 -m build - pip install --no-deps dist/*.whl + uses: prefix-dev/setup-pixi@v0.9.2 + with: + pixi-version: v0.56.0 + cache: false + activate-environment: true - name: snp mpileup - shell: micromamba-shell {0} run: | harpy snp mpileup --quiet 2 -r test/positions.bed -x "--ignore-RG" test/genome/genome.fasta.gz test/bam && \ ls -lh SNP/mpileup - name: snp mpileup-pop - shell: micromamba-shell {0} run: | harpy snp mpileup --quiet 2 -r test/positions.bed -o SNP/poptest -p test/samples.groups test/genome/genome.fasta.gz test/bam && \ ls -lh SNP/poptest freebayes: - needs: [changes,build] + needs: changes if: ${{ needs.changes.outputs.freebayes == 'true' }} name: freebayes runs-on: ubuntu-latest @@ -381,35 +220,23 @@ jobs: uses: actions/checkout@v4 with: fetch-depth: 1 - - name: Setup Mamba - uses: mamba-org/setup-micromamba@v2 - env: - ACTIONS_STEP_DEBUG: true - with: - init-shell: bash - generate-run-shell: true - environment-file: resources/harpy.yaml - cache-environment: true - post-cleanup: 'all' - log-level: error - name: Install Harpy - shell: micromamba-shell {0} - run: | - python3 -m pip install --upgrade build && python3 -m build - pip install --no-deps dist/*.whl + uses: prefix-dev/setup-pixi@v0.9.2 + with: + pixi-version: v0.56.0 + cache: false + activate-environment: true - name: snp freebayes - shell: micromamba-shell {0} run: | harpy snp freebayes --quiet 2 -r test/positions.bed -x "-g 200" test/genome/genome.fasta.gz test/bam && \ ls -lh SNP/freebayes - name: snp freebayes-pop - shell: micromamba-shell {0} run: | harpy snp freebayes --quiet 2 -r test/positions.bed -o SNP/poptest -p test/samples.groups test/genome/genome.fasta.gz test/bam && \ ls -lh SNP/poptest impute: - needs: [changes,build] + needs: changes if: ${{ needs.changes.outputs.impute == 'true' }} name: impute runs-on: ubuntu-latest @@ -418,42 +245,29 @@ jobs: uses: actions/checkout@v4 with: fetch-depth: 1 - - name: Setup Mamba - uses: mamba-org/setup-micromamba@v2 - env: - ACTIONS_STEP_DEBUG: true - with: - init-shell: bash - generate-run-shell: true - environment-file: resources/harpy.yaml - cache-environment: true - post-cleanup: 'all' - log-level: error - name: Install Harpy - shell: micromamba-shell {0} - run: | - python3 -m pip install --upgrade build && python3 -m build - pip install --no-deps dist/*.whl + uses: prefix-dev/setup-pixi@v0.9.2 + with: + pixi-version: v0.56.0 + cache: false + activate-environment: true - name: impute - shell: micromamba-shell {0} run: | harpy impute --quiet 2 --grid-size 1500 test/stitch.params test/vcf/test.bcf test/bam && \ ls -lh Impute/*/* - name: impute from vcf - shell: micromamba-shell {0} if: always() run: | harpy impute --quiet 2 --grid-size 1500 --vcf-samples -o vcfImpute test/stitch.params test/vcf/test.bcf test/bam && \ ls -lh vcfImpute/*/* - name: impute one region - shell: micromamba-shell {0} if: always() run: | harpy impute --quiet 2 --grid-size 1500 --vcf-samples -o regionImpute -r 3L:3000-28110227-1000 test/stitch.params test/vcf/test.bcf test/bam && \ ls -lh regionImpute/*/* phase: - needs: [changes,build] + needs: changes if: ${{ needs.changes.outputs.phase == 'true' }} name: phase runs-on: ubuntu-latest @@ -462,44 +276,30 @@ jobs: uses: actions/checkout@v4 with: fetch-depth: 1 - - name: Setup Mamba - uses: mamba-org/setup-micromamba@v2 - env: - ACTIONS_STEP_DEBUG: true - with: - init-shell: bash - generate-run-shell: true - environment-file: resources/harpy.yaml - cache-environment: true - post-cleanup: 'all' - log-level: error - name: Install Harpy - shell: micromamba-shell {0} - run: | - python3 -m pip install --upgrade build && python3 -m build - pip install --no-deps dist/*.whl - + uses: prefix-dev/setup-pixi@v0.9.2 + with: + pixi-version: v0.56.0 + cache: false + activate-environment: true - name: phase - shell: micromamba-shell {0} run: | harpy phase --quiet 2 -x "--max_iter 10001" test/vcf/test.bcf test/bam && \ ls -lh Phase - name: phase with indels - shell: micromamba-shell {0} if: always() run: | harpy phase --quiet 2 -o phaseindel -r test/genome/genome.fasta.gz test/vcf/test.bcf test/bam && \ ls -lh phaseindel - name: phase from vcf - shell: micromamba-shell {0} if: always() run: | cp test/bam/sample1.bam test/bam/pineapple.bam && rename_bam -d pineapple1 test/bam/pineapple.bam - harpy phase --quiet 2 --vcf-samples -o phasevcf test/vcf/test.bcf test/bam + harpy phase --quiet 2 --vcf-samples -o phasevcf test/vcf/test.bcf test/bam && \ ls -lh phasevcf leviathan: - needs: [changes,build] + needs: changes if: ${{ needs.changes.outputs.leviathan == 'true' }} name: sv leviathan runs-on: ubuntu-latest @@ -508,37 +308,24 @@ jobs: uses: actions/checkout@v4 with: fetch-depth: 1 - - name: Setup Mamba - uses: mamba-org/setup-micromamba@v2 - env: - ACTIONS_STEP_DEBUG: true - with: - init-shell: bash - generate-run-shell: true - environment-file: resources/harpy.yaml - cache-environment: true - post-cleanup: 'all' - log-level: error - name: Install Harpy - shell: micromamba-shell {0} - run: | - python3 -m pip install --upgrade build && python3 -m build - pip install --no-deps dist/*.whl - + uses: prefix-dev/setup-pixi@v0.9.2 + with: + pixi-version: v0.56.0 + cache: false + activate-environment: true - name: leviathan - shell: micromamba-shell {0} run: | harpy sv leviathan --quiet 2 -m 100 -s 80,80,80 -b 1 -x "-M 2002" test/genome/genome.fasta.gz test/bam && \ ls -lh SV/leviathan - name: leviathan-pop if: always() - shell: micromamba-shell {0} run: | harpy sv leviathan --quiet 2 -m 100 -s 80,80,80 -b 1 -o SV/leviathanpop -p test/samples.groups test/genome/genome.fasta.gz test/bam && \ ls -lh SV/leviathanpop naibr: - needs: [changes,build] + needs: changes if: ${{ needs.changes.outputs.naibr == 'true' }} name: sv naibr runs-on: ubuntu-latest @@ -547,49 +334,34 @@ jobs: uses: actions/checkout@v4 with: fetch-depth: 1 - - name: Setup Mamba - uses: mamba-org/setup-micromamba@v2 - env: - ACTIONS_STEP_DEBUG: true - with: - init-shell: bash - generate-run-shell: true - environment-file: resources/harpy.yaml - cache-environment: true - post-cleanup: 'all' - log-level: error - name: Install Harpy - shell: micromamba-shell {0} - run: | - python3 -m pip install --upgrade build && python3 -m build - pip install --no-deps dist/*.whl - + uses: prefix-dev/setup-pixi@v0.9.2 + with: + pixi-version: v0.56.0 + cache: false + activate-environment: true - name: naibr - shell: micromamba-shell {0} run: | harpy sv naibr --quiet 2 -o SV/naibr test/genome/genome.fasta.gz test/bam_phased && \ ls -lh SV/naibr - name: naibr pop if: always() - shell: micromamba-shell {0} run: | harpy sv naibr --quiet 2 -o SV/pop -p test/samples.groups test/genome/genome.fasta.gz test/bam_phased && \ ls -lh SV/pop - name: naibr with phasing if: always() - shell: micromamba-shell {0} run: | harpy sv naibr --quiet 2 -o SV/phase -v test/vcf/test.phased.bcf test/genome/genome.fasta.gz test/bam && \ ls -lh SV/phase - name: naibr pop with phasing if: always() - shell: micromamba-shell {0} run: | harpy sv naibr --quiet 2 -o SV/phasepop -v test/vcf/test.phased.bcf -p test/samples.groups test/genome/genome.fasta.gz test/bam && \ ls -lh SV/phasepop simulate_variants: - needs: [changes,build] + needs: changes if: ${{ needs.changes.outputs.simvars == 'true' }} name: simulate variants runs-on: ubuntu-latest @@ -598,32 +370,19 @@ jobs: uses: actions/checkout@v4 with: fetch-depth: 1 - - name: Setup Mamba - uses: mamba-org/setup-micromamba@v2 - env: - ACTIONS_STEP_DEBUG: true - with: - init-shell: bash - generate-run-shell: true - environment-file: resources/harpy.yaml - cache-environment: true - post-cleanup: 'all' - log-level: error - name: Install Harpy - shell: micromamba-shell {0} - run: | - python3 -m pip install --upgrade build && python3 -m build - pip install --no-deps dist/*.whl - + uses: prefix-dev/setup-pixi@v0.9.2 + with: + pixi-version: v0.56.0 + cache: false + activate-environment: true - name: simulate random snps/indels - shell: micromamba-shell {0} run: | harpy simulate snpindel --quiet 2 --snp-count 10 --indel-count 10 -z 0.5 test/genome/genome.fasta.gz ls -lh Simulate/snpindel harpy simulate snpindel --quiet 2 --prefix Simulate/snpvcf --snp-vcf Simulate/snpindel/haplotype_1/sim.hap1.snp.vcf --indel-vcf Simulate/snpindel/haplotype_1/sim.hap1.indel.vcf test/genome/genome.fasta.gz && \ ls -lh Simulate - name: simulate inversions - shell: micromamba-shell {0} if: always() run: | harpy simulate inversion --quiet 2 --count 10 -z 0.5 test/genome/genome.fasta.gz @@ -631,7 +390,6 @@ jobs: harpy simulate inversion --quiet 2 --prefix Simulate/invvcf --vcf Simulate/inversion/haplotype_1/sim.hap1.inversion.vcf test/genome/genome.fasta.gz && \ ls -lh Simulate - name: simulate cnv - shell: micromamba-shell {0} if: always() run: | harpy simulate cnv --quiet 2 --count 10 -z 0.5 test/genome/genome.fasta.gz @@ -639,7 +397,6 @@ jobs: harpy simulate cnv --quiet 2 --prefix Simulate/cnvvcf --vcf Simulate/cnv/haplotype_1/sim.hap1.cnv.vcf test/genome/genome.fasta.gz && \ ls -lh Simulate - name: simulate translocations - shell: micromamba-shell {0} if: always() run: | harpy simulate translocation --quiet 2 --count 10 -z 0.5 test/genome/genome.fasta.gz @@ -648,7 +405,7 @@ jobs: ls -lh Simulate assembly: - needs: [changes,build] + needs: changes if: ${{ needs.changes.outputs.assembly == 'true' }} name: assembly runs-on: ubuntu-latest @@ -657,41 +414,29 @@ jobs: uses: actions/checkout@v4 with: fetch-depth: 1 - - name: Setup Mamba - uses: mamba-org/setup-micromamba@v2 - env: - ACTIONS_STEP_DEBUG: true - with: - init-shell: bash - generate-run-shell: true - environment-file: resources/harpy.yaml - cache-environment: true - post-cleanup: 'all' - log-level: error - name: Install Harpy - shell: micromamba-shell {0} - run: | - python3 -m pip install --upgrade build && python3 -m build - pip install --no-deps dist/*.whl - + uses: prefix-dev/setup-pixi@v0.9.2 + with: + pixi-version: v0.56.0 + cache: false + activate-environment: true + - name: Clear Space + uses: jlumbroso/free-disk-space@main - name: test assembly - shell: micromamba-shell {0} run: | harpy assembly --quiet 2 -r 4000 test/fastq/sample1.* && \ ls -lh Assembly - name: test metassembly - shell: micromamba-shell {0} run: | harpy metassembly --quiet 2 --force -r 4000 test/fastq/sample1.* && \ ls -lh Metassembly - name: test metassembly without barcodes - shell: micromamba-shell {0} run: | harpy metassembly --unlinked --force --quiet 2 -r 4000 test/fastq/sample1.* && \ ls -lh Metassembly other: - needs: [changes,build] + needs: changes if: ${{ needs.changes.outputs.other == 'true' }} name: miscellaneous runs-on: ubuntu-latest @@ -700,32 +445,17 @@ jobs: uses: actions/checkout@v4 with: fetch-depth: 1 - - name: Setup Mamba - uses: mamba-org/setup-micromamba@v2 - env: - ACTIONS_STEP_DEBUG: true - with: - init-shell: bash - generate-run-shell: true - environment-file: resources/harpy.yaml - cache-environment: true - post-cleanup: 'all' - log-level: error - name: Install Harpy - shell: micromamba-shell {0} - run: | - python3 -m pip install --upgrade build && python3 -m build - pip install --no-deps dist/*.whl -# - name: Clear Space -# uses: jlumbroso/free-disk-space@main + uses: prefix-dev/setup-pixi@v0.9.2 + with: + pixi-version: v0.56.0 + cache: false + activate-environment: true - name: template impute - shell: micromamba-shell {0} run: harpy template impute - name: template groupings - shell: micromamba-shell {0} run: harpy template groupings test/fastq - name: template hpc - shell: micromamba-shell {0} run: | harpy template hpc-slurm harpy template hpc-googlebatch diff --git a/.gitignore b/.gitignore index c45d82e55..5c18fb758 100644 --- a/.gitignore +++ b/.gitignore @@ -35,7 +35,7 @@ haplotag.bc _Inline # pixi environments .pixi -pixi* +pixi.lock .gitattributes *.egg-info .deprecated/ diff --git a/harpy/commands/align.py b/harpy/commands/align.py index b62cd6a79..d13845349 100644 --- a/harpy/commands/align.py +++ b/harpy/commands/align.py @@ -51,8 +51,8 @@ def bwa(reference, inputs, output_dir, depth_window, unlinked, threads, keep_unm Presence and type of linked-read data is auto-detected, but can be deliberately ignored using `-U`. Setting `--molecule-distance` to `>0` activates alignment-distance based barcode deconvolution. """ - workflow = Workflow("align_bwa", "align_bwa.smk", output_dir, quiet) - workflow.setup_snakemake(container, threads, hpc, snakemake) + workflow = Workflow("align_bwa", "align_bwa.smk", output_dir, container, quiet) + workflow.setup_snakemake(threads, hpc, snakemake) workflow.reports = ["align_stats.qmd", "align_bxstats.qmd"] workflow.conda = ["align", "report", "qc"] @@ -122,8 +122,8 @@ def strobe(reference, inputs, output_dir, unlinked, keep_unmapped, depth_window, but can be deliberately ignored using `-U`. Setting `--molecule-distance` to `>0` activates alignment-distance based barcode deconvolution. """ - workflow = Workflow("align_strobe", "align_strobe.smk", output_dir, quiet) - workflow.setup_snakemake(container, threads, hpc, snakemake) + workflow = Workflow("align_strobe", "align_strobe.smk", output_dir, container, quiet) + workflow.setup_snakemake(threads, hpc, snakemake) workflow.reports = ["align_stats.qmd", "align_bxstats.qmd"] workflow.conda = ["align", "report", "qc"] diff --git a/harpy/commands/assembly.py b/harpy/commands/assembly.py index c701bac21..65f9150c2 100644 --- a/harpy/commands/assembly.py +++ b/harpy/commands/assembly.py @@ -46,8 +46,8 @@ def assembly(fastq_r1, fastq_r2, kmer_length, max_memory, output_dir, extra_para separated by commas and without spaces (e.g. `-k 15,23,51`). It is strongly recommended to first deconvolve the input FASTQ files with `harpy deconvolve`. """ - workflow = Workflow("assembly", "assembly.smk", output_dir, quiet) - workflow.setup_snakemake(container, threads, hpc, snakemake) + workflow = Workflow("assembly", "assembly.smk", output_dir, container, quiet) + workflow.setup_snakemake(threads, hpc, snakemake) workflow.conda = ["assembly","qc"] ## checks and validations ## diff --git a/harpy/commands/deconvolve.py b/harpy/commands/deconvolve.py index fb5e8a9a0..3086e0677 100644 --- a/harpy/commands/deconvolve.py +++ b/harpy/commands/deconvolve.py @@ -33,8 +33,8 @@ def deconvolve(inputs, output_dir, kmer_length, window_size, density, dropout, t `dropout` is set to `0`, meaning it will consider all barcodes, even clouds with singleton. """ is_arm(allowed = False) - workflow = Workflow("deconvolve", "deconvolve.smk", output_dir, quiet) - workflow.setup_snakemake(container, threads, hpc, snakemake) + workflow = Workflow("deconvolve", "deconvolve.smk", output_dir, container, quiet) + workflow.setup_snakemake(threads, hpc, snakemake) workflow.conda = ["deconvolution"] ## checks and validations ## diff --git a/harpy/commands/demultiplex.py b/harpy/commands/demultiplex.py index 35aa34714..a15fe2d8d 100644 --- a/harpy/commands/demultiplex.py +++ b/harpy/commands/demultiplex.py @@ -47,8 +47,8 @@ def meier2021(r12_fq, i12_fq, output_dir, schema, qx_rx, keep_unknown_samples, k `QX:Z` (barcode PHRED scores) and `RX:Z` (nucleotide barcode) tags in the sequence headers. These tags aren't used by any subsequent analyses, but may be useful for your own diagnostics. """ - workflow = Workflow("demultiplex_meier2021", "demultiplex_meier2021.smk", output_dir, quiet) - workflow.setup_snakemake(container, threads, hpc, snakemake) + workflow = Workflow("demultiplex_meier2021", "demultiplex_meier2021.smk", output_dir, container, quiet) + workflow.setup_snakemake(threads, hpc, snakemake) workflow.conda = ["demultiplex", "qc"] workflow.inputs = { diff --git a/harpy/commands/diagnose.py b/harpy/commands/diagnose.py index 44d2ba5ed..a8b7a545d 100644 --- a/harpy/commands/diagnose.py +++ b/harpy/commands/diagnose.py @@ -1,15 +1,26 @@ +import glob import os import sys import yaml import subprocess import rich_click as click -from harpy.common.printing import print_error, CONSOLE +from harpy.common.printing import print_error, CONSOLE, print_shellcmd_simple +from harpy.common.file_ops import safe_read + +@click.group(options_metavar='', context_settings={"help_option_names" : []}) +def diagnose(): + """ + Attempt to resolve workflow errors + """ @click.command(no_args_is_help = True, context_settings={"allow_interspersed_args" : False}) @click.argument('directory', required=True, type=click.Path(exists=True, file_okay=False)) -def diagnose(directory): +def stall(directory): """ - Run the Snakemake debugger to identify hang-ups + Run the Snakemake debugger to identify why a workflow stalled + + This will run Snakemake with the `--dry-run` and `--debug-dag` options, + printing the diagnostics to the terminal. """ directory = directory.rstrip("/") PROFILE_FILE = os.path.join(directory, "workflow", "config.yaml") @@ -62,8 +73,108 @@ def diagnose(directory): CONSOLE.print(output, end="", style = "yellow") except Exception as e: CONSOLE.print("") - #CONSOLE.print(f"{e}") CONSOLE.rule("[bold]End of diagnosis", style = "yellow") process.terminate() process.wait() - sys.exit(1) \ No newline at end of file + sys.exit(1) + +@click.command(no_args_is_help = True, context_settings={"allow_interspersed_args" : False}) +@click.argument('directory', required=True, type=click.Path(exists=True, file_okay=False)) +def snakemake(directory): + """ + Run the Snakemake workflow directly without Harpy intervention + + This will run Snakemake without any of the convenience features, + which can sometimes reveal errors that weren't captured by Harpy. Requries + the `workflow/config.yaml` and `workflow/workflow.yaml` files to be in + `DIRECTORY`. + """ + directory = directory.rstrip("/") + PROFILE_FILE = os.path.join(directory, "workflow", "config.yaml") + CONFIG_FILE = os.path.join(directory, "workflow", "workflow.yaml") + + if not os.path.exists(CONFIG_FILE): + print_error("missing workflow config", f"Target directory [blue]{directory}[/] does not contain the file [bold]workflow/workflow.yaml[/]") + if not os.path.exists(PROFILE_FILE): + print_error("missing snakemake config", f"Target directory [blue]{directory}[/] does not contain the file [bold]workflow/config.yaml[/]") + + with open(CONFIG_FILE, 'r', encoding="utf-8") as f: + harpy_config = yaml.full_load(f) + + command = harpy_config["snakemake"]["absolute"] + + os.system(command) + +@click.command(no_args_is_help = True, context_settings={"allow_interspersed_args" : False}) +@click.argument('directory', required=True, type=click.Path(exists=True, file_okay=False)) +def rule(directory): + """ + Directly run the first rule that caused the workflow failure + + The rule is identified in the most recent Snakemake log file in + `DIRECTORY/logs/snakemake` as the first one with the text `Error in rule ____`. This + convenience feature is somewhat limited and will fail if any of the inputs for the + failing rule were marked as temporary. + """ + directory = directory.rstrip("/") + if not os.path.exists(f'{directory}/logs/snakemake/'): + print_error("missing log folder", f"Target directory [blue]{directory}[/] does not contain the folder [bold]logs/snakemake[/]") + # get the lastest snakemake log file + list_of_files = glob.glob(f'{directory}/logs/snakemake/*') + latest_log = max(list_of_files, key=os.path.getctime) + + CONSOLE.rule(f"Latest log: {os.path.basename(latest_log)}", style = "yellow") + _found = False + _shellblock = False + conda = "" + container = "" + cmd = [] + with safe_read(latest_log) as logfile: + for line in logfile: + if "Error in rule" in line: + CONSOLE.print(f"Failing rule: {line.strip().removeprefix('Error in rule').rstrip(':')}", style = "yellow") + _found = True + continue + if _found: + if "conda-env:" in line: + conda = "source activate " + line.strip().split()[-1] + continue + if "container:" in line: + container = line.strip().split()[-1] + if "shell:" in line: + _shellblock = True + _ = line.strip().replace("shell:","").split() + if _: + cmd.append(_) + continue + if _shellblock: + if line.strip() == "(command exited with non-zero exit code)": + break + cmd.append(line.strip()) + if not _found: + CONSOLE.print(f"No errors found in {os.path.basename(latest_log)}", style = "green") + + if conda: + print_shellcmd_simple("\n".join(cmd)) + os.system("\n".join([conda, f"cd {directory}", *cmd])) + elif container: + print_shellcmd_simple(f""" +apptainer exec {container} bash -c ' +{"\n".join([*cmd])} +' +""" + ) + os.system(f""" +cd {directory} +apptainer exec {container} bash -c ' +{"\n".join([*cmd])} +' +""" + ) + else: + print_shellcmd_simple("\n".join(cmd)) + os.system("\n".join([f"cd {directory}", *cmd])) + +diagnose.add_command(stall) +diagnose.add_command(snakemake) +diagnose.add_command(rule) \ No newline at end of file diff --git a/harpy/commands/environments.py b/harpy/commands/environments.py index e9f770610..4d60402ba 100644 --- a/harpy/commands/environments.py +++ b/harpy/commands/environments.py @@ -2,53 +2,20 @@ import os import shutil -import subprocess import rich_click as click from harpy.common.conda import create_conda_recipes +from harpy.common.create_pixi import create_pixi_dockerfiles from harpy.common.workflow import Workflow @click.command(hidden = True) def containerize(): """ - Configure conda and docker environments + Configure the harpy container **INTERNAL USE ONLY**. Used to recreate all the conda environments required by the workflows and build a dockerfile from that. """ - workflow = Workflow("container", "environments.smk", "container", 1) - workflow.fetch_snakefile() - create_conda_recipes("container") - - with open("container/Dockerfile", "w", encoding = "utf-8") as dockerraw: - _module = subprocess.run( - 'snakemake -s container/workflow/workflow.smk --containerize --directory container'.split(), - stdout = dockerraw - ) - - #with open("Dockerfile.raw", "r") as dockerraw, open("Dockerfile", "w") as dockerfile: - # # copy over the first three lines - # dockerfile.write(dockerraw.readline()) - # dockerfile.write(dockerraw.readline()) - # dockerfile.write(dockerraw.readline()) - # dockerfile.write("\nCOPY container/workflow/envs/*.yaml /\n") - # env_hash = {} - # for line in dockerraw: - # if line.startswith("#"): - # continue - # if line.startswith("COPY"): - # dockercmd, env, hashname = line.split() - # env = Path(env).stem - # hashname = hashname.split("/")[-2] - # env_hash[env] = hashname - # runcmds = [] - # for env, _hash in env_hash.items(): - # runcmds.append(f"conda env create --prefix /conda-envs/{_hash} --file /{env}.yaml && \\") - # runcmds.append("conda clean --all -y") - # dockerfile.write("\nRUN ") - # dockerfile.write( - # "\n\t".join(runcmds) - # ) - #os.remove("Dockerfile.raw") + create_pixi_dockerfiles() @click.group(options_metavar='') def deps(): @@ -81,7 +48,7 @@ def conda(workflows): - stitch - variants """ - workflow = Workflow("localenv", "environments.smk", "localenv/", 1) + workflow = Workflow("localenv", "environments.smk", "localenv/", False, 1) # if "all" was mixed with other workflows, default to just all and avoid doubling up create_conda_recipes(workflow.output_directory) if "all" in workflows: @@ -99,12 +66,12 @@ def conda(workflows): @click.command(context_settings={"help_option_names" : ["-h", "--help"]}) def container(): """ - Install workflow dependency container + Install workflow dependency containers Manually pull the harpy dependency container from dockerhub and convert it into an Apptainer .sif. To use, run this command again without arguments. """ - workflow = Workflow("localcontainer", "environments.smk", "localenv/", 1) + workflow = Workflow("localcontainer", "environments.smk", "localenv/", True, 1) workflow.fetch_snakefile() workflow.snakemake_cmd_relative = " ".join(["snakemake", "-s", os.path.join(workflow.workflow_directory, "workflow.smk"), "--sdm", "conda apptainer", "--cores 2", "--apptainer-prefix ../.environments", "--directory localenv"]) workflow.launch() diff --git a/harpy/commands/impute.py b/harpy/commands/impute.py index 6b6796247..4dab688c1 100644 --- a/harpy/commands/impute.py +++ b/harpy/commands/impute.py @@ -42,8 +42,8 @@ def impute(parameters, vcf, inputs, output_dir, region, grid_size, threads, vcf_ `contig:start-end-buffer`, otherwise all contigs will be imputed. If providing additional STITCH arguments, they must be in quotes and in the `--option=value` format, without spaces (e.g. `"--switchModelIteration=39"`). """ - workflow = Workflow("impute", "impute.smk", output_dir, quiet) - workflow.setup_snakemake(container, threads, hpc, snakemake) + workflow = Workflow("impute", "impute.smk", output_dir, container, quiet) + workflow.setup_snakemake(threads, hpc, snakemake) workflow.reports = ["impute.qmd", "stitch_collate.qmd"] workflow.conda = ["report", "stitch"] diff --git a/harpy/commands/metassembly.py b/harpy/commands/metassembly.py index 820b91a87..fd68186f6 100644 --- a/harpy/commands/metassembly.py +++ b/harpy/commands/metassembly.py @@ -39,9 +39,9 @@ def metassembly(fastq_r1, fastq_r2, bx_tag, kmer_length, max_memory, unlinked, o separated by commas and without spaces (e.g. `-k 15,23,51`). It is strongly recommended to first deconvolve the input FASTQ files with `harpy deconvolve`. """ - workflow = Workflow("metassembly","metassembly.smk", output_dir, quiet) - workflow.setup_snakemake(container, threads, hpc, snakemake) - workflow.conda = ["align", "assembly", "metassembly", "qc", "spades"] + workflow = Workflow("metassembly","metassembly.smk", output_dir, container, quiet) + workflow.setup_snakemake(threads, hpc, snakemake) + workflow.conda = ["align", "assembly", "metassembly", "qc"] ## checks and validations ## fastq = FASTQ([fastq_r1,fastq_r2]) diff --git a/harpy/commands/phase.py b/harpy/commands/phase.py index 3cf848536..b1d513570 100644 --- a/harpy/commands/phase.py +++ b/harpy/commands/phase.py @@ -43,8 +43,8 @@ def phase(vcf, inputs, output_dir, threads, unlinked, min_map_quality, min_base_ information with `-U`. Use `--vcf-samples` to phase only the samples present in your input `VCF` file rather than all the samples present in the `INPUT` alignments. """ - workflow = Workflow("phase", "phase.smk", output_dir, quiet) - workflow.setup_snakemake(container, threads, hpc, snakemake) + workflow = Workflow("phase", "phase.smk", output_dir, container, quiet) + workflow.setup_snakemake(threads, hpc, snakemake) workflow.reports = ["hapcut.qmd"] workflow.conda = ["phase", "report"] diff --git a/harpy/commands/qc.py b/harpy/commands/qc.py index 21c9d2104..813ee91fe 100644 --- a/harpy/commands/qc.py +++ b/harpy/commands/qc.py @@ -46,8 +46,8 @@ def qc(inputs, output_dir, unlinked, min_length, max_length, trim_adapters, dedu - `-d` removes optical PCR duplicates - recommended to skip at this step in favor of barcode-assisted deduplication after alignment """ - workflow = Workflow("qc", "qc.smk", output_dir, quiet) - workflow.setup_snakemake(container, threads, hpc, snakemake) + workflow = Workflow("qc", "qc.smk", output_dir, container, quiet) + workflow.setup_snakemake(threads, hpc, snakemake) workflow.reports = ["qc_bx_stats.qmd"] workflow.conda = ["qc", "report"] diff --git a/harpy/commands/resume.py b/harpy/commands/resume.py index 862608d8e..45539d5a7 100644 --- a/harpy/commands/resume.py +++ b/harpy/commands/resume.py @@ -5,29 +5,28 @@ import re import yaml import rich_click as click -from harpy.common.conda import check_environments, create_conda_recipes +from harpy.common.conda import check_environments from harpy.common.printing import print_error, workflow_info from harpy.common.workflow import Workflow @click.command(no_args_is_help = True, context_settings={"allow_interspersed_args" : False}, epilog = "Documentation: https://pdimens.github.io/harpy/workflows/other") -@click.option('-c', '--conda', is_flag = True, default = False, help = 'Recreate the conda environments') @click.option('-a', '--absolute', is_flag = True, default = False, help = 'Call Snakemake with absolute paths') @click.option('-t', '--threads', type = click.IntRange(2, 999, clamp = True), help = 'Change the number of threads (>1)') @click.option('--quiet', default = 0, type = click.IntRange(0,2,clamp=True), help = '`0` all output, `1` progress bar, `2` no output') @click.argument('directory', required=True, type=click.Path(exists=True, file_okay=False, readable=True, resolve_path=True), nargs=1) -def resume(directory, conda, absolute, threads, quiet): +def resume(directory, absolute, threads, quiet): """ Continue an incomplete Harpy workflow In the event you need to run the Snakemake workflow present in a Harpy output directory (e.g. `Align/bwa`) without Harpy redoing validations and rewriting any of the configuration files, this command bypasses all the preprocessing steps of Harpy workflows and executes the Snakemake command - present in `directory/workflow/workflow.yaml`. It will reuse an existing `workflow/envs/` folder - to validate software dependencies, otherwise use `--conda` to create a populated one. + present in `directory/workflow/workflow.yaml`. The only requirements are: - the target directory has `workflow/config.yaml` present in it - - the targest directory has `workflow/envs/*.yaml` present in it + - the target directory has `workflow/workflow.yaml` present in it + - the targest directory has `workflow/envs/*.yaml` present in it (if using conda) """ CONFIG_FILE = os.path.join(directory, "workflow", "workflow.yaml") PROFILE_FILE = os.path.join(directory, "workflow", "config.yaml") @@ -37,16 +36,15 @@ def resume(directory, conda, absolute, threads, quiet): print_error("missing workflow config", f"Target directory [yellow]{directory}[/] does not contain the file [blue]workflow/workflow.yaml[/]") with open(CONFIG_FILE, 'r', encoding="utf-8") as f: - harpy_config = yaml.full_load(f) + harpy_config: dict = yaml.full_load(f) with open(PROFILE_FILE, 'r', encoding="utf-8") as f: - snakemake_config = yaml.full_load(f) + snakemake_config: dict = yaml.full_load(f) - workflow = Workflow(harpy_config["workflow"], "NA", snakemake_config["directory"], quiet) + container = snakemake_config["software-deployment-method"] == "apptainer" + workflow = Workflow(harpy_config["workflow"], "NA", snakemake_config["directory"], container, quiet) workflow.conda = harpy_config["snakemake"]["conda_envs"] - if conda: - create_conda_recipes(directory, workflow.conda) - else: + if not container: check_environments(directory, workflow.conda) sm_log = os.path.join(directory, harpy_config["snakemake"]["log"]) @@ -64,8 +62,8 @@ def resume(directory, conda, absolute, threads, quiet): workflow.snakemake_cmd_absolute = harpy_config["snakemake"]["absolute"] workflow.snakemake_cmd_relative = harpy_config["snakemake"]["relative"] - # pull in the inputs and store them, removing the original so it doesn't g - workflow.inputs = harpy_config["inputs"] + # pull in the inputs and store them, removing the original + workflow.inputs = harpy_config.pop("inputs") workflow.config = harpy_config workflow.start_text = workflow_info( ("Workflow:", workflow.name.replace("_", " ")), diff --git a/harpy/commands/simulate_variants.py b/harpy/commands/simulate_variants.py index 51347d09b..22b90211a 100644 --- a/harpy/commands/simulate_variants.py +++ b/harpy/commands/simulate_variants.py @@ -61,8 +61,8 @@ def snpindel(genome, snp_vcf, indel_vcf, only_vcf, output_dir, prefix, snp_count | `--snp-ratio` | transitions / transversions | transit. only | transv. only | | `--indel-ratio` | insertions / deletions | insert. only | delet. only | """ - workflow = Workflow("simulate_snpindel", "simulate_snpindel.smk", output_dir, quiet, True) - workflow.setup_snakemake(container, 2, hpc, snakemake) + workflow = Workflow("simulate_snpindel", "simulate_snpindel.smk", output_dir, container, quiet, True) + workflow.setup_snakemake(2, hpc, snakemake) workflow.conda = ["simulations"] ## checks and validations ## @@ -156,8 +156,8 @@ def inversion(genome, vcf, only_vcf, prefix, output_dir, count, min_size, max_si To simulate a diploid genome with heterozygous and homozygous variants, set `--heterozygosity` to a value greater than `0`. Use `--only-vcf` alongside `--heterozygosity` to only generate the second VCF file and not simulate a second FASTA file. """ - workflow = Workflow("simulate_inversion", "simulate_variants.smk", output_dir, quiet, True) - workflow.setup_snakemake(container, 2, hpc, snakemake) + workflow = Workflow("simulate_inversion", "simulate_variants.smk", output_dir, container, quiet, True) + workflow.setup_snakemake(2, hpc, snakemake) workflow.conda = ["simulations"] ## checks and validations ## @@ -247,8 +247,8 @@ def cnv(genome, output_dir, vcf, only_vcf, prefix, count, min_size, max_size, du | `--dup-ratio` | tandem / dispersed | tand. only | disp. only | | `--gain-ratio` | copy gain / loss | gain only | loss only | """ - workflow = Workflow("simulate_cnv", "simulate_variants.smk", output_dir, quiet, True) - workflow.setup_snakemake(container, 2, hpc, snakemake) + workflow = Workflow("simulate_cnv", "simulate_variants.smk", output_dir, container, quiet, True) + workflow.setup_snakemake(2, hpc, snakemake) workflow.conda = ["simulations"] ## checks and validations ## @@ -329,8 +329,8 @@ def translocation(genome, output_dir, prefix, vcf, only_vcf, count, centromeres, To simulate a diploid genome with heterozygous and homozygous variants, set `--heterozygosity` to a value greater than `0`. Use `--only-vcf` alongside `--heterozygosity` to only generate the second VCF file and not simulate a second FASTA file. """ - workflow = Workflow("simulate_translocation", "simulate_variants.smk", output_dir, quiet, True) - workflow.setup_snakemake(container, 2, hpc, snakemake) + workflow = Workflow("simulate_translocation", "simulate_variants.smk", output_dir, container, quiet, True) + workflow.setup_snakemake(2, hpc, snakemake) workflow.conda = ["simulations"] ## checks and validations ## diff --git a/harpy/commands/snp.py b/harpy/commands/snp.py index 292b0f541..90425f2b7 100644 --- a/harpy/commands/snp.py +++ b/harpy/commands/snp.py @@ -56,8 +56,8 @@ def freebayes(reference, inputs, output_dir, threads, populations, ploidy, regio Optionally specify `--populations` for population-aware variant calling (**harpy template** can create that file). """ - workflow = Workflow("snp_freebayes", "snp_freebayes.smk", output_dir, quiet) - workflow.setup_snakemake(container, threads, hpc, snakemake) + workflow = Workflow("snp_freebayes", "snp_freebayes.smk", output_dir, container, quiet) + workflow.setup_snakemake(threads, hpc, snakemake) workflow.reports = ["bcftools_stats.qmd"] workflow.conda = ["report", "variants"] @@ -129,8 +129,8 @@ def mpileup(reference, inputs, output_dir, regions, threads, populations, ploidy Optionally specify `--populations` for population-aware variant calling (**harpy template** can create that file). """ - workflow = Workflow("snp_mpileup", "snp_mpileup.smk", output_dir, quiet) - workflow.setup_snakemake(container, threads, hpc, snakemake) + workflow = Workflow("snp_mpileup", "snp_mpileup.smk", output_dir, container, quiet) + workflow.setup_snakemake(threads, hpc, snakemake) workflow.reports = ["bcftools_stats.qmd"] workflow.conda = ["report"] diff --git a/harpy/commands/sv.py b/harpy/commands/sv.py index 683d7646a..def966a5f 100644 --- a/harpy/commands/sv.py +++ b/harpy/commands/sv.py @@ -59,8 +59,8 @@ def leviathan(inputs, output_dir, reference, min_size, min_barcodes, iterations, have to be the same across the different size classes. """ vcaller = "sv_leviathan" if not populations else "sv_leviathan_pop" - workflow = Workflow("sv_leviathan", f"{vcaller}.smk", output_dir, quiet) - workflow.setup_snakemake(container, threads, hpc, snakemake) + workflow = Workflow("sv_leviathan", f"{vcaller}.smk", output_dir, container, quiet) + workflow.setup_snakemake(threads, hpc, snakemake) workflow.reports = ["leviathan.qmd"] if populations: workflow.reports.append("leviathan_pop.qmd") @@ -142,8 +142,8 @@ def naibr(inputs, output_dir, reference, vcf, min_size, min_barcodes, min_qualit """ vcaller = "sv_naibr" if not populations else "sv_naibr_pop" vcaller += "_phase" if vcf else "" - workflow = Workflow("sv_naibr", f"{vcaller}.smk", output_dir, quiet) - workflow.setup_snakemake(container, threads, hpc, snakemake) + workflow = Workflow("sv_naibr", f"{vcaller}.smk", output_dir, container, quiet) + workflow.setup_snakemake(threads, hpc, snakemake) workflow.reports = ["naibr.qmd"] if populations: workflow.reports.append("naibr_pop.qmd") diff --git a/harpy/commands/validate.py b/harpy/commands/validate.py index 6a173e373..337aa640b 100755 --- a/harpy/commands/validate.py +++ b/harpy/commands/validate.py @@ -41,8 +41,8 @@ def bam(inputs, output_dir, threads, snakemake, quiet, hpc, container, setup_onl fix your data, but it will report the number of records that feature errors to help you diagnose if file formatting will cause downstream issues. """ - workflow = Workflow("validate_bam", "validate_bam.smk", output_dir, quiet) - workflow.setup_snakemake(container, threads, hpc, snakemake) + workflow = Workflow("validate_bam", "validate_bam.smk", output_dir, container, quiet) + workflow.setup_snakemake(threads, hpc, snakemake) workflow.reports = ["validate_bam.qmd"] workflow.conda = ["report"] @@ -86,8 +86,8 @@ def fastq(inputs, output_dir, threads, snakemake, quiet, hpc, container, setup_o of `TAG:TYPE:VALUE`. This **will not** fix your data, but it will report the number of reads that feature errors to help you diagnose if file formatting will cause downstream issues. """ - workflow = Workflow("validate_fastq", "validate_fastq.smk", output_dir, quiet) - workflow.setup_snakemake(container, threads, hpc, snakemake) + workflow = Workflow("validate_fastq", "validate_fastq.smk", output_dir, container, quiet) + workflow.setup_snakemake(threads, hpc, snakemake) workflow.reports = ["validate_fastq.qmd"] workflow.conda = ["report"] diff --git a/harpy/common/cli_types_params.py b/harpy/common/cli_types_params.py index 237726c31..ba46ecdb4 100644 --- a/harpy/common/cli_types_params.py +++ b/harpy/common/cli_types_params.py @@ -202,7 +202,7 @@ class NaibrParams(click.ParamType): name = "naibr_params" def convert(self, value, param, ctx): harpy_options = "bam_file prefix outdir threads min_mapq d min_sv k".split() - valid_options = "blacklist candidates".split() + valid_options = "blacklist candidates min_discs min_reads sd_mult".split() opts = 0 docs = "https://github.com/pontushojer/NAIBR?tab=readme-ov-file#running-naibr" clean_args = [] diff --git a/harpy/common/conda.py b/harpy/common/conda.py index ec0482a0b..308179076 100644 --- a/harpy/common/conda.py +++ b/harpy/common/conda.py @@ -63,11 +63,7 @@ def create_conda_recipes(outdir: str, envs: list= []) -> None: "r::r-biocircos" ], "simulations" : [ - "bioconda::mimick>=2.3", - "bioconda::simug>1.0.0", - ], - "spades" : [ - "conda-forge::python=3" + "bioconda::simug>1.0.0" ], "stitch" : [ "bioconda::r-stitch>=1.8.4" diff --git a/harpy/common/create_pixi.py b/harpy/common/create_pixi.py new file mode 100755 index 000000000..566cd1a1b --- /dev/null +++ b/harpy/common/create_pixi.py @@ -0,0 +1,124 @@ +#! /usr/bin/env python + +import shutil +import subprocess +import os + +environ = { + "align" : [ + "bwa-mem2", + "bwa", + "samtools==1.22", + "seqtk", + "strobealign", + "tabix" + ], + "assembly" : [ + "arcs", + "bwa", + "cloudspades", + "links", + "quast", + "busco", + "samtools", + "tigmint" + ], + "deconvolution" : [ + "quickdeconvolution" + ], + + "demultiplex": [ + "dmox>=0.2" + ], + "metassembly": [ + "athena_meta==1.2" + ], + "phase" : [ + "hapcut2", + "whatshap" + ], + "qc" : [ + "click==8.2.1", + "falco==1.2.5", + "fastp", + "multiqc==1.30", + "pysam==0.23" + ], + "report" : [ + "quarto", + "r-dt", + "r-dplyr", + "r-highcharter", + "r-magrittr", + "r-plotly", + "r-scales", + "r-tidyr", + "r-viridislite", + "r-xml2", + "r-biocircos" + ], + "simulations" : [ + "simug>=1.0.1" + ], + "stitch" : [ + "r-stitch>=1.8.4" + ], + "variants" : [ + "bcftools==1.22", + "freebayes==1.3.9", + "leviathan", + "naibr-plus", + "setuptools" + ] +} + +dockerfile_text = """ +FROM ghcr.io/prefix-dev/pixi:0.56.0 AS build + +# copy source code, pixi.toml and pixi.lock to the container +WORKDIR /app +COPY . . + +# use `--locked` to ensure the lockfile is up to date with pixi.toml +RUN pixi install --locked && rm -rf ~/.cache/rattler + +# create the shell-hook bash script to activate the environment +RUN echo "#!/bin/bash" > /app/entrypoint.sh && \\ + pixi shell-hook -s bash >> /app/entrypoint.sh && \\ + echo 'exec "$@"' >> /app/entrypoint.sh && \\ + chmod +x /app/entrypoint.sh + +FROM ubuntu:24.04 AS production +WORKDIR /app +COPY --from=build --chmod=0755 /app/entrypoint.sh /app/entrypoint.sh +COPY --from=build /app/.pixi/envs/default /app/.pixi/envs/default + +ENTRYPOINT ["/app/entrypoint.sh"] +""" + +def create_pixi_dockerfiles(): + ''' + Using the defined environments, create a series of folders where each has a dockerfile + and pixi.toml file to create one of the environments. + ''' + shutil.rmtree("container", ignore_errors=True) + for env,deps in environ.items(): + os.makedirs(f"container/{env}", exist_ok=True) + with open(f"container/{env}/Dockerfile", "w") as dockerfile: + dockerfile.write(dockerfile_text) + if env == "report": + subprocess.run( + f"pixi init container/{env} -c conda-forge -c r".split(), + check = True + ) + else: + subprocess.run( + f"pixi init container/{env} -c conda-forge -c bioconda".split(), + check = True + ) + + subprocess.run( + ["pixi", "add", "--no-progress", "--manifest-path", f"container/{env}/pixi.toml", *deps], + check = True + ) + shutil.rmtree("container/.pixi", ignore_errors=True) diff --git a/harpy/common/file_ops.py b/harpy/common/file_ops.py index e58349d94..62a4ed43d 100644 --- a/harpy/common/file_ops.py +++ b/harpy/common/file_ops.py @@ -5,25 +5,6 @@ import gzip import shutil from pathlib import Path -import importlib.resources as resources -from harpy.common.printing import print_error - -def fetch_snakefile(workdir: str, target: str) -> None: - """ - Retrieve the target harpy rule and write it into the workdir as workflow.smk - """ - os.makedirs(workdir, exist_ok= True) - dest_file = os.path.join(workdir,"workflow.smk") - source_file = resources.files("harpy.snakefiles") / target - try: - with resources.as_file(source_file) as _source: - shutil.copy2(_source, dest_file) - except (FileNotFoundError, KeyError): - print_error( - "snakefile missing", - f"The required snakefile [blue bold]{target}[/] was not found in the Harpy installation.", - "There may be an issue with your Harpy installation, which would require reinstalling Harpy. Alternatively, there may be an issue with your conda/mamba environment or configuration." - ) def filepath(infile: str) -> str: """returns a posix-formatted absolute path of infile""" diff --git a/harpy/common/launch.py b/harpy/common/launch.py index de773cb88..5b20dbce6 100644 --- a/harpy/common/launch.py +++ b/harpy/common/launch.py @@ -5,9 +5,9 @@ import os import sys import subprocess -from rich.table import Table from rich import box from rich.syntax import Syntax +from rich.table import Table from harpy.common.file_ops import gzip_file, purge_empty_logs from harpy.common.printing import CONSOLE, print_onerror, print_setup_error from harpy.common.progress import harpy_progressbar, harpy_pulsebar, harpy_progresspanel @@ -66,7 +66,6 @@ def print_shellcmd(text: str, _process): CONSOLE.print("[red]" + re.sub(r'\n{3,}', '\n\n', merged_text), overflow = "ignore", crop = False) return _process.stderr.readline() - def highlight_params(text: str): """make important snakemake attributes like 'input:' highlighted in the error output""" text = text.removeprefix(" ").rstrip() @@ -93,7 +92,7 @@ def highlight_params(text: str): return f"\n[blue]{text}[/]" return text -def launch_snakemake(sm_args, workflow, outdir, sm_logfile, quiet, CONSOLE = CONSOLE): +def launch_snakemake(sm_args, outdir, sm_logfile, quiet, CONSOLE = CONSOLE): """launch snakemake with the given commands""" exitcode = None sm_start = datetime.now() diff --git a/harpy/common/printing.py b/harpy/common/printing.py index 955451719..ace99b593 100644 --- a/harpy/common/printing.py +++ b/harpy/common/printing.py @@ -2,9 +2,11 @@ import time as _time import os +import re import sys from rich.console import Console, RenderableType from rich import box +from rich.syntax import Syntax from rich.table import Table from rich.panel import Panel @@ -114,6 +116,23 @@ def print_onerror(logfile: str, time) -> None: CONSOLE.print(datatable) CONSOLE.rule("[bold]Where Error Occurred", style = "red") +def print_shellcmd_simple(text): + _table = Table( + show_header=False, + pad_edge=False, + show_edge=False, + padding=(0,0), + box=box.SIMPLE, + ) + _table.add_column("Lpadding", justify="left") + _table.add_column("shell", justify="left") + _table.add_column("Rpadding", justify="left") + + text = re.sub(r' {2,}|\t+', ' ', text) + cmd = Syntax(text, lexer = "bash", tab_size=2, word_wrap=True, padding=1, dedent=True, theme = "paraiso-dark") + _table.add_row(" ", cmd, " ") + CONSOLE.print("[bold default]shell:", _table) + def workflow_info(*arg: tuple[str, str | int | float]|None) -> Table: """ Accepts an unlimited number of length-2 lists or tuples and returns a rich.Table with the value of the first indices as the row names and the second indices as the values diff --git a/harpy/common/system_ops.py b/harpy/common/system_ops.py index d1cf57f14..81f404af3 100644 --- a/harpy/common/system_ops.py +++ b/harpy/common/system_ops.py @@ -104,12 +104,13 @@ def container_ok(ctx, param, value) -> bool: Check if the system is linux or has apptainer installed """ if value: - if os.sys.platform != 'linux': + if shutil.which("apptainer"): + return value + + if platform.system().lower() != 'linux': raise click.BadParameter( "Snakemake uses Apptainer (formerly Singularity) to manage containers, which is only available for Linux systems.", ctx, param ) - if shutil.which("apptainer"): - return value else: raise click.BadParameter( "Container software management requires apptainer, which wasn't detected in this environment.", ctx, param diff --git a/harpy/common/workflow.py b/harpy/common/workflow.py index bb1793f66..3d003d2e6 100644 --- a/harpy/common/workflow.py +++ b/harpy/common/workflow.py @@ -10,11 +10,10 @@ import urllib.request import urllib.error import yaml -from rich import print as rprint from rich import box from rich.table import Table from harpy.common.conda import create_conda_recipes -from harpy.common.file_ops import filepath, gzip_file, fetch_snakefile, purge_empty_logs +from harpy.common.file_ops import filepath, gzip_file, purge_empty_logs from harpy.common.printing import CONSOLE, print_error from harpy.common.launch import launch_snakemake from harpy.common.summaries import Summary @@ -23,9 +22,11 @@ class Workflow(): ''' The container for workflow parameters. Set inputdir = True to create a workflow/input directory ''' - def __init__(self, name, snakefile, outdir, quiet, inputdir = False): - creatdir = os.path.join(outdir, 'workflow') if not inputdir else os.path.join(outdir, 'workflow', 'input') - os.makedirs(creatdir, exist_ok = True) + def __init__(self, name, snakefile, outdir, container, quiet, inputdir = False): + os.makedirs( + os.path.join(outdir, 'workflow') if not inputdir else os.path.join(outdir, 'workflow', 'input'), + exist_ok = True + ) self.name: str = name self.output_directory: str = outdir self.workflow_directory = os.path.join(outdir, 'workflow') @@ -39,6 +40,7 @@ def __init__(self, name, snakefile, outdir, quiet, inputdir = False): self.config: Dict = {} self.profile: Dict = {} self.hpc: str = "" + self.container: bool = container self.conda: list[str] = [] self.start_text: None|Table = None self.quiet: bool = quiet @@ -56,7 +58,7 @@ def snakemake_log(self, outdir: str, workflow: str) -> str: increment = sorted([int(i.split(".")[1]) for i in attempts])[-1] + 1 return os.path.join("logs", "snakemake", f"{workflow}.{increment}.{timestamp}") - def setup_snakemake(self, container: bool, threads: int, hpc: str|None = None, sm_extra: str|None = None): + def setup_snakemake(self, threads: int, hpc: str|None = None, sm_extra: str|None = None): """ Sets up the snakemake command based on hpc, threads, and extra snakemake params. """ @@ -81,7 +83,7 @@ def setup_snakemake(self, container: bool, threads: int, hpc: str|None = None, s "rerun-triggers": ["mtime", "params"], "scheduler": "greedy", "nolock": True, - "software-deployment-method": "conda" if not container else ["conda", "apptainer"], + "software-deployment-method": "conda" if not self.container else "apptainer", "conda-prefix": filepath("./.environments"), "conda-cleanup-pkgs": "cache", "apptainer-prefix": filepath("./.environments"), @@ -164,10 +166,21 @@ def fetch_report_configs(self): ) def fetch_snakefile(self): - """ - Retrieve the target harpy rule and write it into the workdir as workflow.smk - """ - fetch_snakefile(self.workflow_directory, self.snakefile) + """ + Retrieve the target harpy rule and write it into the workdir as workflow.smk + """ + os.makedirs(self.workflow_directory, exist_ok= True) + dest_file = os.path.join(self.workflow_directory,"workflow.smk") + source_file = resources.files("harpy.snakefiles") / self.snakefile + try: + with resources.as_file(source_file) as _source: + shutil.copy2(_source, dest_file) + except (FileNotFoundError, KeyError): + print_error( + "snakefile missing", + f"The required snakefile [blue bold]{self.snakefile}[/] was not found in the Harpy installation.", + "There may be an issue with your Harpy installation, which would require reinstalling Harpy. Alternatively, there may be an issue with your conda/mamba environment or configuration." + ) def fetch_script(self, target: str) -> None: """ @@ -180,7 +193,7 @@ def fetch_script(self, target: str) -> None: shutil.copy2(_source, dest_file) except (FileNotFoundError, KeyError): print_error( - "snakefile missing", + "script missing", f"The required script [blue bold]{target}[/] was not found in the Harpy installation.", "There may be an issue with your Harpy installation, which would require reinstalling Harpy. Alternatively, there may be an issue with your conda/mamba environment or configuration." ) @@ -226,7 +239,7 @@ def print_onstart(self): """Print a panel of info on workflow run""" if self.quiet == 2: return - rprint("") + CONSOLE.print("") CONSOLE.rule("[bold]harpy " + self.name.replace("_", " "), style = "light_steel_blue") CONSOLE.print(self.start_text) @@ -250,7 +263,8 @@ def initialize(self, setup_only: bool = False): """Using the configurations, create all necessary folders and files""" self.write_workflow_config() self.write_snakemake_profile() - create_conda_recipes(self.output_directory, self.conda) + if not self.container: + create_conda_recipes(self.output_directory, self.conda) self.fetch_snakefile() for i in self.reports: self.fetch_report(i) @@ -269,7 +283,7 @@ def launch(self, absolute:bool = False): cmd = self.snakemake_cmd_absolute if absolute else self.snakemake_cmd_relative try: - launch_snakemake(cmd, self.workflow_directory, self.output_directory, self.snakemake_logfile, self.quiet) + launch_snakemake(cmd, self.output_directory, self.snakemake_logfile, self.quiet) finally: with open(os.path.join(self.output_directory, "workflow", f"{self.name.replace('_','.')}.summary"), "w") as f_out: f_out.write(Summary(self.config).get_text()) diff --git a/harpy/reports/align_bxstats.qmd b/harpy/reports/align_bxstats.qmd index 45284e3bf..c306c7e22 100644 --- a/harpy/reports/align_bxstats.qmd +++ b/harpy/reports/align_bxstats.qmd @@ -5,17 +5,11 @@ params: --- `r format(Sys.time(), '🗓️ %d %B, %Y 🕔 %H:%M')` -```{r imports, results = F} -using<-function(...) { - libs<-unlist(list(...)) - req<-unlist(lapply(libs,require,character.only=TRUE)) - need<-libs[req==FALSE] - if(length(need)>0){ - install.packages(need, repos = "https://cloud.r-project.org/") - lapply(need,require,character.only=TRUE) - } -} -using("dplyr","tidyr", "highcharter", "DT") +```{r package_imports, results = F} + library(dplyr) +library(tidyr) +library(highcharter) +library(DT) ``` ```{r nxx_and_process_funs, results = F} diff --git a/harpy/reports/align_stats.qmd b/harpy/reports/align_stats.qmd index 53deafc9b..999d75dbe 100644 --- a/harpy/reports/align_stats.qmd +++ b/harpy/reports/align_stats.qmd @@ -22,16 +22,10 @@ params: # Barcode Stats ```{r package_imports, results = F} -using<-function(...) { - libs<-unlist(list(...)) - req<-unlist(lapply(libs,require,character.only=TRUE)) - need<-libs[req==FALSE] - if(length(need)>0){ - install.packages(need, repos = "https://cloud.r-project.org/") - lapply(need,require,character.only=TRUE) - } -} -using("dplyr","highcharter","DT","BioCircos") +library(dplyr) +library(highcharter) +library(DT) +library(BioCircos) ``` ```{r import_file, results = F} diff --git a/harpy/reports/bcftools_stats.qmd b/harpy/reports/bcftools_stats.qmd index 23385e578..5b22de728 100644 --- a/harpy/reports/bcftools_stats.qmd +++ b/harpy/reports/bcftools_stats.qmd @@ -6,17 +6,12 @@ params: --- `r format(Sys.time(), '🗓️ %d %B, %Y 🕔 %H:%M')` -```{r load environment} -using<-function(...) { - libs<-unlist(list(...)) - req<-unlist(lapply(libs,require,character.only=TRUE)) - need<-libs[req==FALSE] - if(length(need)>0){ - install.packages(need, repos = "https://cloud.r-project.org/") - lapply(need,require,character.only=TRUE) - } -} -using("magrittr","tidyr","DT","highcharter","scales") +```{r package_imports, results = F} +library(magrittr) +library(tidyr) +library(DT) +library(highcharter) +library(scales) ``` ```{r} diff --git a/harpy/reports/hapcut.qmd b/harpy/reports/hapcut.qmd index d09992593..2c2dc580a 100644 --- a/harpy/reports/hapcut.qmd +++ b/harpy/reports/hapcut.qmd @@ -6,17 +6,12 @@ params: --- `r format(Sys.time(), '🗓️ %d %B, %Y 🕔 %H:%M')` -```{r package_imports} -using<-function(...) { - libs<-unlist(list(...)) - req<-unlist(lapply(libs,require,character.only=TRUE)) - need<-libs[req==FALSE] - if(length(need)>0){ - install.packages(need, repos = "https://cloud.r-project.org/") - lapply(need,require,character.only=TRUE) - } -} -using("dplyr","DT","scales","plotly","highcharter") +```{r package_imports, results = F} +library(dplyr) +library(DT) +library(highcharter) +library(plotly) +library(scales) ``` ```{r read_input} diff --git a/harpy/reports/impute.qmd b/harpy/reports/impute.qmd index fc6e4a39c..c87530078 100644 --- a/harpy/reports/impute.qmd +++ b/harpy/reports/impute.qmd @@ -17,17 +17,12 @@ params: # General Stats -```{r package_imports} -using<-function(...) { - libs<-unlist(list(...)) - req<-unlist(lapply(libs,require,character.only=TRUE)) - need<-libs[req==FALSE] - if(length(need)>0){ - install.packages(need, repos = "https://cloud.r-project.org/") - lapply(need,require,character.only=TRUE) - } -} -using("dplyr","tidyr","DT","highcharter","scales") +```{r package_imports, results = F} +library(dplyr) +library(tidyr) +library(DT) +library(highcharter) +library(scales) ``` ```{r read_input} diff --git a/harpy/reports/leviathan.qmd b/harpy/reports/leviathan.qmd index 9184c9c65..7e0bfd683 100644 --- a/harpy/reports/leviathan.qmd +++ b/harpy/reports/leviathan.qmd @@ -16,17 +16,11 @@ params: --- `r format(Sys.time(), '🗓️ %d %B, %Y 🕔 %H:%M')` -```{r package_imports} -using<-function(...) { - libs<-unlist(list(...)) - req<-unlist(lapply(libs,require,character.only=TRUE)) - need<-libs[req==FALSE] - if(length(need)>0){ - install.packages(need, repos = "https://cloud.r-project.org/") - lapply(need,require,character.only=TRUE) - } -} -using("dplyr","DT","BioCircos", "htmltools") +```{r package_imports, results = F} +library(BioCircos) +library(dplyr) +library(DT) +library(htmltools) ``` # General Stats diff --git a/harpy/reports/leviathan_pop.qmd b/harpy/reports/leviathan_pop.qmd index e44f0117b..ce3326210 100644 --- a/harpy/reports/leviathan_pop.qmd +++ b/harpy/reports/leviathan_pop.qmd @@ -15,17 +15,13 @@ params: --- `r format(Sys.time(), '🗓️ %d %B, %Y 🕔 %H:%M')` -```{r package_imports} -using<-function(...) { - libs<-unlist(list(...)) - req<-unlist(lapply(libs,require,character.only=TRUE)) - need<-libs[req==FALSE] - if(length(need)>0){ - install.packages(need, repos = "https://cloud.r-project.org/") - lapply(need,require,character.only=TRUE) - } -} -using("dplyr", "tidyr","DT", "viridisLite", "BioCircos", "tools") +```{r package_imports, results = F} +library(BioCircos) +library(dplyr) +library(DT) +library(tidyr) +library(tools) +library(viridisLite) ``` ```{r read_inputs} diff --git a/harpy/reports/naibr.qmd b/harpy/reports/naibr.qmd index fbd49a4fe..ffab45b24 100644 --- a/harpy/reports/naibr.qmd +++ b/harpy/reports/naibr.qmd @@ -39,17 +39,11 @@ tryCatch( ) ``` -```{r package_imports} -using<-function(...) { - libs<-unlist(list(...)) - req<-unlist(lapply(libs,require,character.only=TRUE)) - need<-libs[req==FALSE] - if(length(need)>0){ - install.packages(need, repos = "https://cloud.r-project.org/") - lapply(need,require,character.only=TRUE) - } -} -using("dplyr","tidyr","DT", "BioCircos") +```{r package_imports, results = F} +library(BioCircos) +library(dplyr) +library(DT) +library(tidyr) ``` ```{r process_sv} diff --git a/harpy/reports/naibr_pop.qmd b/harpy/reports/naibr_pop.qmd index 515d7ed4c..eb43e741d 100644 --- a/harpy/reports/naibr_pop.qmd +++ b/harpy/reports/naibr_pop.qmd @@ -16,17 +16,13 @@ params: `r format(Sys.time(), '🗓️ %d %B, %Y 🕔 %H:%M')` # General Stats -```{r package_imports} -using<-function(...) { - libs<-unlist(list(...)) - req<-unlist(lapply(libs,require,character.only=TRUE)) - need<-libs[req==FALSE] - if(length(need)>0){ - install.packages(need, repos = "https://cloud.r-project.org/") - lapply(need,require,character.only=TRUE) - } -} -using("dplyr", "tidyr","DT","BioCircos", "viridisLite", "tools") +```{r package_imports, results = F} +library(BioCircos) +library(dplyr) +library(DT) +library(tidyr) +library(tools) +library(viridisLite) ``` ```{r readvariants_func} diff --git a/harpy/reports/qc_bx_stats.qmd b/harpy/reports/qc_bx_stats.qmd index 2b90a3a30..ab0f1a327 100644 --- a/harpy/reports/qc_bx_stats.qmd +++ b/harpy/reports/qc_bx_stats.qmd @@ -5,18 +5,14 @@ params: --- `r format(Sys.time(), '🗓️ %d %B, %Y 🕔 %H:%M')` -```{r package_imports} -using<-function(...) { - libs<-unlist(list(...)) - req<-unlist(lapply(libs,require,character.only=TRUE)) - need<-libs[req==FALSE] - if(length(need)>0){ - install.packages(need, repos = "https://cloud.r-project.org/") - lapply(need,require,character.only=TRUE) - } -} -using("dplyr","tidyr","DT","highcharter","scales") +```{r package_imports, results = F} +library(dplyr) +library(DT) +library(highcharter) +library(scales) +library(tidyr) ``` + # Overview ##

General Per-Sample Barcode Statistics

diff --git a/harpy/reports/stitch_collate.qmd b/harpy/reports/stitch_collate.qmd index a8f7210ec..edee6d8b3 100644 --- a/harpy/reports/stitch_collate.qmd +++ b/harpy/reports/stitch_collate.qmd @@ -13,17 +13,12 @@ params: extra: "None" --- `r format(Sys.time(), '🗓️ %d %B, %Y 🕔 %H:%M')` -```{r setup environment} -using<-function(...) { - libs<-unlist(list(...)) - req<-unlist(lapply(libs,require,character.only=TRUE)) - need<-libs[req==FALSE] - if(length(need)>0){ - install.packages(need, repos = "https://cloud.r-project.org/") - lapply(need,require,character.only=TRUE) - } -} -using("tidyr","magrittr","DT", "scales") + +```{r package_imports, results = F} +library(DT) +library(magrittr) +library(tidyr) +library(scales) ``` ```{r load data} diff --git a/harpy/reports/validate_bam.qmd b/harpy/reports/validate_bam.qmd index 4bc95e609..cdaa26222 100644 --- a/harpy/reports/validate_bam.qmd +++ b/harpy/reports/validate_bam.qmd @@ -6,17 +6,11 @@ params: --- _`r format(Sys.time(), '🗓️ %d %B, %Y 🕔 %H:%M')`_ -```{r package_imports} -using<-function(...) { - libs<-unlist(list(...)) - req<-unlist(lapply(libs,require,character.only=TRUE)) - need<-libs[req==FALSE] - if(length(need)>0){ - install.packages(need, repos = "https://cloud.r-project.org/") - lapply(need,require,character.only=TRUE) - } -} -using("dplyr","tidyr","DT","scales") +```{r package_imports, results = F} +library(dplyr) +library(DT) +library(tidyr) +library(scales) ``` ```{r read_inputs} diff --git a/harpy/reports/validate_fastq.qmd b/harpy/reports/validate_fastq.qmd index 71c5dfe86..7acba5335 100644 --- a/harpy/reports/validate_fastq.qmd +++ b/harpy/reports/validate_fastq.qmd @@ -6,17 +6,11 @@ params: --- `r format(Sys.time(), '🗓️ %d %B, %Y 🕔 %H:%M')` -```{r} -using<-function(...) { - libs<-unlist(list(...)) - req<-unlist(lapply(libs,require,character.only=TRUE)) - need<-libs[req==FALSE] - if(length(need)>0){ - install.packages(need, repos = "https://cloud.r-project.org/") - lapply(need,require,character.only=TRUE) - } -} -using("dplyr","tidyr","DT","scales") +```{r package_imports, results = F} +library(dplyr) +library(DT) +library(scales) +library(tidyr) ``` ```{r} diff --git a/harpy/scripts/separate_singletons.py b/harpy/scripts/separate_singletons.py deleted file mode 100755 index 92402876b..000000000 --- a/harpy/scripts/separate_singletons.py +++ /dev/null @@ -1,81 +0,0 @@ -#! /usr/bin/env python - -import os -import re -import sys -import argparse -import subprocess -import pysam - -def main(): - parser = argparse.ArgumentParser( - prog='separate_singletons', - description='Isolate singleton and non-singleton linked-read BAM records into separate files.', - usage = "separate_singletons -t threads -b barcode_tag -s singletons.bam input.bam > output.bam", - ) - parser.add_argument("-b", dest = "bx_tag", metavar = "barcode_tag", type=str, default = "BX", help="The header tag with the barcode (default: %(default)s)") - parser.add_argument("-s", dest = "singletons", metavar = "singletons_file", type=str, default = "singletons.bam", help="Name of output singleton file (default: %(default)s)") - parser.add_argument("-t", dest = "threads", metavar="threads", type=int, default = 4, help="Number of threads to use (default: %(default)s)") - parser.add_argument('input', type = str, help = "Input bam file") - if len(sys.argv) == 1: - parser.print_help(sys.stderr) - sys.exit(1) - - args = parser.parse_args() - if args.threads <1: - parser.error(f"Threads supplied to -t ({args.threads}) must be positive (e.g. >=1)") - if not os.path.exists(args.input): - parser.error(f"{args.input} was not found") - if len(args.bx_tag) != 2 or not args.bx_tag.isalnum(): - parser.error(f"The header tag supplied to -b ({args.bx_tag}) must be alphanumeric and exactly two characters long") - - invalid_pattern = re.compile(r'[AaBbCcDd]00') - sorted_bam = f"{args.input[:-4]}.bxsort.bam" - result = subprocess.run(f"samtools sort -@ {args.threads} -o {sorted_bam} -t {args.bx_tag} {args.input}".split(), stderr=sys.stderr) - if result.returncode != 0: - sys.stderr.write(f"Error: samtools sort failed with exit code {result.returncode}\n") - sys.exit(1) - with ( - pysam.AlignmentFile(sorted_bam, "rb", check_sq=False) as infile, - pysam.AlignmentFile(sys.stdout, "wb", template=infile) as nonsingleton, - pysam.AlignmentFile(args.singletons, "wb", template=infile) as singleton, - ): - record_store = [] - read_count = 0 - last_barcode = None - for record in infile: - try: - barcode = record.get_tag(args.bx_tag) - if isinstance(barcode, int): - pass # an int from an MI-type tag - elif invalid_pattern.search(barcode): - continue - except KeyError: - continue - # write the stored records when the barcode changes - if last_barcode and barcode != last_barcode: - target_file = nonsingleton if read_count > 1 else singleton - for record in record_store: - target_file.write(record) - - # reset the record store and read count - record_store = [] - read_count = 0 - - record_store.append(record) - if record.is_forward: - # +1 for a forward read, whether it is paired or not - read_count += 1 - elif record.is_reverse and not record.is_paired: - # +1 for reverse only if it's unpaired, so the paired read doesn't count twice - read_count += 1 - # update the last barcode with the current one - last_barcode = barcode - # After the for loop ends - if record_store: - target_file = nonsingleton if read_count > 1 else singleton - for i in record_store: - target_file.write(i) - - # final housekeeping to remove intermediate - os.remove(sorted_bam) \ No newline at end of file diff --git a/harpy/snakefiles/align_bwa.smk b/harpy/snakefiles/align_bwa.smk index 77f9a3ed7..9bb40972f 100644 --- a/harpy/snakefiles/align_bwa.smk +++ b/harpy/snakefiles/align_bwa.smk @@ -1,5 +1,3 @@ -containerized: "docker://pdimens/harpy:latest" - import os import re import logging @@ -47,6 +45,8 @@ rule preprocess_reference: genome_zip conda: "envs/align.yaml" + container: + "docker://pdimens/harpy:align_latest" shell: """ {{ @@ -103,6 +103,8 @@ rule align: min(6, workflow.cores - 1) conda: "envs/align.yaml" + container: + "docker://pdimens/harpy:align_latest" shell: """ {{ @@ -117,8 +119,6 @@ rule standardize_barcodes: temp("samples/{sample}/{sample}.standard.sam") log: "logs/{sample}.standardize.log" - container: - None shell: "standardize_barcodes_sam > {output} 2> {log} < {input}" @@ -138,8 +138,6 @@ rule mark_duplicates: quality = config['alignment_quality'] resources: mem_mb = 2000 - container: - None threads: 4 shell: @@ -170,8 +168,6 @@ rule assign_molecules: "logs/assign_mi/{sample}.assign_mi.log" params: molecule_distance - container: - None shell: """ assign_mi -c {params} {input} > {output.bam} 2> {log} @@ -188,8 +184,6 @@ rule barcode_stats: "logs/bxstats/{sample}.bxstats.log" params: sample = lambda wc: d[wc.sample] - container: - None shell: "bx_stats {input.bam} > {output} 2> {log}" @@ -203,8 +197,6 @@ rule molecule_coverage: "logs/{sample}.molcov.log" params: windowsize - container: - None shell: "molecule_coverage -f {input.fai} -w {params} {input.stats} 2> {log} | gzip > {output}" @@ -215,8 +207,6 @@ rule alignment_coverage: bed = "reports/data/coverage/coverage.bed" output: "reports/data/coverage/{sample}.cov.gz" - container: - None shell: "samtools bedcov -c {input.bed} {input.bam} | awk '{{ $6 = ($4 / ($3 + 1 - $2)); print }}' | gzip > {output}" @@ -252,6 +242,8 @@ rule sample_reports: "logs/reports/{sample}.alignstats.log" conda: "envs/report.yaml" + container: + "docker://pdimens/harpy:report_latest" retries: 3 shell: @@ -270,8 +262,6 @@ if ignore_bx: output: "{sample}.bam.bai", bam = "{sample}.bam" - container: - None shell: """ mv {input} {output.bam} @@ -287,8 +277,6 @@ rule general_stats: flagstat = temp("reports/data/samtools_flagstat/{sample}.flagstat") log: "logs/stats/{sample}.samstats.log" - container: - None shell: """ {{ @@ -311,6 +299,8 @@ rule samtools_report: outdir = "reports/data/samtools_stats reports/data/samtools_flagstat" conda: "envs/qc.yaml" + container: + "docker://pdimens/harpy:qc_latest" shell: "multiqc {params} > {output} 2> {log}" @@ -329,6 +319,8 @@ rule barcode_report: f"logs/reports/bxstats.report.log" conda: "envs/report.yaml" + container: + "docker://pdimens/harpy:report_latest" retries: 3 shell: diff --git a/harpy/snakefiles/align_strobe.smk b/harpy/snakefiles/align_strobe.smk index 61b2774da..0d284415f 100644 --- a/harpy/snakefiles/align_strobe.smk +++ b/harpy/snakefiles/align_strobe.smk @@ -1,5 +1,3 @@ -containerized: "docker://pdimens/harpy:latest" - import os import re import logging @@ -41,8 +39,6 @@ rule preprocess_reference: fai = f"{workflow_geno}.fai" log: f"{workflow_geno}.preprocess.log" - container: - None shell: """ {{ @@ -86,6 +82,8 @@ rule align: min(4, workflow.cores - 1) conda: "envs/align.yaml" + container: + "docker://pdimens/harpy:align_latest" shell: """ {{ @@ -100,8 +98,6 @@ rule standardize_barcodes: temp("samples/{sample}/{sample}.standard.sam") log: "logs/{sample}.standardize.log" - container: - None shell: "standardize_barcodes_sam > {output} 2> {log} < {input}" @@ -123,8 +119,6 @@ rule mark_duplicates: mem_mb = 2000 threads: 2 - container: - None shell: """ if grep -q "^[ABCD]" <<< $(samtools head -h 0 -n 1 {input.sam}); then @@ -153,8 +147,6 @@ rule assign_molecules: "logs/assign_mi/{sample}.assign_me.log" params: molecule_distance - container: - None shell: """ assign_mi -c {params} {input} > {output.bam} 2> {log} @@ -171,8 +163,6 @@ rule barcode_stats: "logs/bxstats/{sample}.bxstats.log" params: sample = lambda wc: d[wc.sample] - container: - None shell: "bx_stats {input.bam} > {output} 2> {log}" @@ -186,8 +176,6 @@ rule molecule_coverage: "logs/molcov/{sample}.molcov.log" params: windowsize - container: - None shell: "molecule_coverage -f {input.fai} -w {params} {input.stats} 2> {log} | gzip > {output}" @@ -198,8 +186,6 @@ rule alignment_coverage: bed = "reports/data/coverage/coverage.bed" output: "reports/data/coverage/{sample}.cov.gz" - container: - None shell: "samtools bedcov -c {input.bed} {input.bam} | awk '{{ $6 = ($4 / ($3 + 1 - $2)); print }}' | gzip > {output}" @@ -235,6 +221,8 @@ rule sample_reports: "logs/reports/{sample}.alignstats.log" conda: "envs/report.yaml" + container: + "docker://pdimens/harpy:report_latest" retries: 3 shell: @@ -253,8 +241,6 @@ if ignore_bx: output: "{sample}.bam.bai", bam = "{sample}.bam" - container: - None shell: """ mv {input} {output.bam} @@ -270,8 +256,6 @@ rule general_stats: flagstat = temp("reports/data/samtools_flagstat/{sample}.flagstat") log: "logs/stats/{sample}.samstats.log" - container: - None shell: """ {{ @@ -294,6 +278,8 @@ rule samtools_report: outdir = "reports/data/samtools_stats reports/data/samtools_flagstat" conda: "envs/qc.yaml" + container: + "docker://pdimens/harpy:qc_latest" shell: "multiqc {params} > {output} 2> {log}" @@ -312,6 +298,8 @@ rule barcode_report: "logs/reports/bxstats.report.log" conda: "envs/report.yaml" + container: + "docker://pdimens/harpy:report_latest" retries: 3 shell: diff --git a/harpy/snakefiles/assembly.smk b/harpy/snakefiles/assembly.smk index 8e7a9e942..20e496637 100644 --- a/harpy/snakefiles/assembly.smk +++ b/harpy/snakefiles/assembly.smk @@ -1,5 +1,3 @@ -containerized: "docker://pdimens/harpy:latest" - import os import logging @@ -50,6 +48,8 @@ rule cloudspades: "logs/assembly.log" conda: "envs/assembly.yaml" + container: + "docker://pdimens/harpy:assembly_latest" threads: workflow.cores resources: @@ -63,8 +63,6 @@ rule interleave_fastq: FQ2 output: temp("scaffold/interleaved.fq.gz") - container: - None shell: "seqtk mergepe {input} | bgzip > {output}" @@ -73,8 +71,6 @@ rule link_assembly: "spades/scaffolds.fasta", output: "scaffold/spades.fa" - container: - None shell: "ln -sr {input} {output}" @@ -107,6 +103,8 @@ rule scaffolding: extra = arcs_extra conda: "envs/assembly.yaml" + container: + "docker://pdimens/harpy:assembly_latest" shell: """ arcs-make arcs-tigmint -C {params} 2> {log} @@ -131,6 +129,8 @@ rule QUAST_assessment: workflow.cores conda: "envs/assembly.yaml" + container: + "docker://pdimens/harpy:assembly_latest" shell: "quast.py --threads {threads} --pe12 {input.fastq} {params} {input.contigs} {input.scaffolds} 2> {log}" @@ -151,6 +151,8 @@ rule BUSCO_analysis: workflow.cores conda: "envs/assembly.yaml" + container: + "docker://pdimens/harpy:assembly_latest" shell: "( busco -f -i {input} -c {threads} -m genome {params} > {log} 2>&1 ) || touch {output}" @@ -167,6 +169,8 @@ rule build_report: title = "--title \"Assembly Metrics\"" conda: "envs/qc.yaml" + container: + "docker://pdimens/harpy:qc_latest" shell: "multiqc {params} {input} > {output} 2> {log}" diff --git a/harpy/snakefiles/deconvolve.smk b/harpy/snakefiles/deconvolve.smk index 7fc970e03..027c2e7d2 100644 --- a/harpy/snakefiles/deconvolve.smk +++ b/harpy/snakefiles/deconvolve.smk @@ -1,5 +1,3 @@ -containerized: "docker://pdimens/harpy:latest" - import os import re import logging @@ -55,6 +53,8 @@ rule deconvolve: 2 conda: "envs/deconvolution.yaml" + container: + "docker://pdimens/harpy:deconvolution_latest" shell: "QuickDeconvolution -t {threads} -i {input} -o {output} {params} > {log} 2>&1" @@ -65,8 +65,6 @@ rule extract_forward: "{sample}.R1.fq.gz" params: "-1" - container: - None shell: "seqtk seq {params} {input} | gzip > {output}" diff --git a/harpy/snakefiles/demultiplex_meier2021.smk b/harpy/snakefiles/demultiplex_meier2021.smk index a3be2b4a5..8ed35055a 100644 --- a/harpy/snakefiles/demultiplex_meier2021.smk +++ b/harpy/snakefiles/demultiplex_meier2021.smk @@ -1,5 +1,3 @@ -containerized: "docker://pdimens/harpy:latest" - import os import logging @@ -70,6 +68,8 @@ rule demultiplex: workflow.cores conda: "envs/demultiplex.yaml" + container: + "docker://pdimens/harpy:demultiplex_latest" shell: """ dmox --i1 {input.I1} --i2 {input.I2} --r1 {input.R1} --r2 {input.R2} \ @@ -90,6 +90,8 @@ rule assess_quality: 1 conda: "envs/qc.yaml" + container: + "docker://pdimens/harpy:qc_latest" shell: """ ( falco --quiet --threads {threads} -skip-report -skip-summary -data-filename {output} {input} ) > {log} 2>&1 || @@ -145,6 +147,8 @@ rule quality_report: logdir = "reports/data/" conda: "envs/qc.yaml" + container: + "docker://pdimens/harpy:qc_latest" shell: "multiqc --config {input.mqc_yaml} {params} > {output} 2> {log}" diff --git a/harpy/snakefiles/environments.smk b/harpy/snakefiles/environments.smk index 085cf9985..079ed1977 100644 --- a/harpy/snakefiles/environments.smk +++ b/harpy/snakefiles/environments.smk @@ -1,11 +1,5 @@ -if config.get("envs", None): - out_envs = config["envs"] -else: - out_envs = ["align", "assembly", "metassembly", "phase", "qc", "report", "simulations", "stitch", "variants"] - -if config.get("spades", None): - out_envs.append("spades") +out_envs = config.get("envs", ["align", "assembly", "metassembly", "phase", "qc", "report", "simulations", "stitch", "variants"]) rule all: input: @@ -13,6 +7,5 @@ rule all: rule conda_env: output: "{conda}.env" - container: "docker://pdimens/harpy:latest" conda: "envs/{conda}.yaml" shell: "touch {output}" diff --git a/harpy/snakefiles/impute.smk b/harpy/snakefiles/impute.smk index 3056d2fc1..3ccb7ad7d 100644 --- a/harpy/snakefiles/impute.smk +++ b/harpy/snakefiles/impute.smk @@ -1,4 +1,3 @@ -containerized: "docker://pdimens/harpy:latest" import os import re import logging diff --git a/harpy/snakefiles/metassembly.smk b/harpy/snakefiles/metassembly.smk index b280d4cdf..0e87ff0b7 100644 --- a/harpy/snakefiles/metassembly.smk +++ b/harpy/snakefiles/metassembly.smk @@ -1,5 +1,3 @@ -containerized: "docker://pdimens/harpy:latest" - import os import logging @@ -39,8 +37,6 @@ rule sort_by_barcode: barcode_tag = BX_TAG threads: workflow.cores - container: - None shell: """ {{ @@ -57,8 +53,6 @@ rule format_barcode: temp("fastq_preproc/input.R{FR}.fq.gz") params: barcode_tag = BX_TAG - container: - None shell: "sed 's/{params}:Z:[^[:space:]]*/&-1/g' {input} | bgzip > {output}" @@ -83,9 +77,9 @@ rule error_correction: resources: mem_mb=max_mem conda: - "envs/spades.yaml" + "envs/assembly.yaml" container: - None + "docker://pdimens/harpy:assembly_latest" shell: "metaspades.py -t {threads} {params} -1 {input.FQ_R1} -2 {input.FQ_R2} > {log}" @@ -109,7 +103,7 @@ rule spades_assembly: resources: mem_mb=max_mem conda: - "envs/spades.yaml" + "envs/assembly.yaml" container: None shell: @@ -227,14 +221,13 @@ rule athena_metassembly: params: force = "--force_reads" if force_athena else "", local_asm = "athena/results/olc/flye-input-contigs.fa", - final_asm = "athena/results/olc/athena.asm.fa", - result_dir = "athena" + final_asm = "athena/results/olc/athena.asm.fa" conda: "envs/metassembly.yaml" shell: """ athena-meta {params.force} --config {input.config} &> {log} &&\\ - mv {params.local_asm} {params.final_asm} {params.result_dir} + mv {params.local_asm} {params.final_asm} athena """ rule QUAST_assessment: diff --git a/harpy/snakefiles/phase.smk b/harpy/snakefiles/phase.smk index 75f3d0dea..d200c0961 100644 --- a/harpy/snakefiles/phase.smk +++ b/harpy/snakefiles/phase.smk @@ -1,5 +1,3 @@ -containerized: "docker://pdimens/harpy:latest" - import os import subprocess import logging @@ -75,8 +73,6 @@ rule isolate_sample: output: vcf = temp("workflow/input/original/{sample}.bcf"), csi = temp("workflow/input/original/{sample}.bcf.csi") - container: - None shell: "bcftools view -Ob -W -s {wildcards.sample} -o {output.vcf} {input}" @@ -85,8 +81,6 @@ rule isolate_het_snps: "workflow/input/original/{sample}.bcf" output: temp("workflow/input/heterozygotes/{sample}.het.vcf") - container: - None shell: "bcftools view -m 2 -M 2 -i 'GT=\"het\"' {input} > {output}" @@ -95,8 +89,6 @@ rule index_alignments: lambda wc: bamdict[wc.bam] output: "{bam}.bai" - container: - None shell: "samtools index {input}" @@ -109,8 +101,6 @@ if indels: fai = temp(genofai) log: f"workflow/reference/{bn}.preprocess.log" - container: - None shell: """ {{ @@ -138,6 +128,8 @@ rule extract_hairs: purge_invalid = invalid_regex.get(bc_type, "'$4 !~ /N/'") conda: "envs/phase.yaml" + container: + "docker://pdimens/harpy:phase_latest" shell: """ extractHAIRS {params.static} --bam {input.bam} --VCF {input.vcf} --out {output.all_bc} > {log} 2>&1 @@ -157,6 +149,8 @@ rule link_fragments: f"-d {molecule_distance} --use-tag" conda: "envs/phase.yaml" + container: + "docker://pdimens/harpy:phase_latest" shell: "LinkFragments.py --bam {input.bam} --VCF {input.vcf} --fragments {input.fragments} --out {output} {params} > {log} 2>&1" @@ -175,6 +169,8 @@ rule phase: extra = extra conda: "envs/phase.yaml" + container: + "docker://pdimens/harpy:phase_latest" shell: "HAPCUT2 --fragments {input.fragments} --vcf {input.vcf} --out {output.blocks} {params} > {log} 2>&1" @@ -183,8 +179,6 @@ rule compress_phaseblock: "phase_blocks/{sample}.blocks.phased.VCF" output: "phase_blocks/{sample}.phased.vcf.gz" - container: - None shell: "bcftools view -Oz6 -o {output} --write-index {input}" @@ -203,8 +197,6 @@ rule annotate_phase: "-Ob --write-index -c CHROM,POS,FMT/GT,FMT/PS,FMT/PQ,FMT/PD -m +HAPCUT" threads: 2 - container: - None shell: "bcftools annotate -a {input.phase} -o {output.bcf} {params} {input.orig} 2> {log}" @@ -228,8 +220,6 @@ rule merge_samples: bcf = "variants.phased.bcf" threads: workflow.cores - container: - None shell: "bcftools merge --threads {threads} --force-single -l {input.filelist} -Ob -o {output.bcf} --write-index" @@ -238,8 +228,6 @@ rule summarize_blocks: collect("phase_blocks/{sample}.blocks", sample = samplenames) output: "reports/blocks.summary.gz" - container: - None shell: """ {{ @@ -277,6 +265,8 @@ rule phase_report: f"-P contigs:{plot_contigs}" conda: "envs/report.yaml" + container: + "docker://pdimens/harpy:report_latest" retries: 3 shell: diff --git a/harpy/snakefiles/qc.smk b/harpy/snakefiles/qc.smk index b6a4cb730..07d547abf 100644 --- a/harpy/snakefiles/qc.smk +++ b/harpy/snakefiles/qc.smk @@ -1,5 +1,3 @@ -containerized: "docker://pdimens/harpy:latest" - import os import re import logging @@ -59,6 +57,8 @@ rule fastp: workflow.cores conda: "envs/qc.yaml" + container: + "docker://pdimens/harpy:qc_latest" shell: "fastp {params} --thread {threads} -i {input.fw} -I {input.rv} -o {output.fw} -O {output.rv} -h {output.html} -j {output.json} 2> {log.serr}" @@ -69,8 +69,6 @@ rule barcode_stats: temp("logs/bxcount/{sample}.count.log") params: lr_type - container: - None shell: "count_bx {params} {input} > {output}" @@ -101,6 +99,8 @@ rule barcode_report: "logs/barcode.report.log" conda: "envs/report.yaml" + container: + "docker://pdimens/harpy:report_latest" retries: 3 shell: @@ -125,6 +125,8 @@ rule qc_report: logdir = "reports/data/fastp/" conda: "envs/qc.yaml" + container: + "docker://pdimens/harpy:qc_latest" shell: "multiqc {params} > {output} 2> {log}" diff --git a/harpy/snakefiles/simulate_snpindel.smk b/harpy/snakefiles/simulate_snpindel.smk index 51fe7b773..64ed31aaf 100644 --- a/harpy/snakefiles/simulate_snpindel.smk +++ b/harpy/snakefiles/simulate_snpindel.smk @@ -1,5 +1,3 @@ -containerized: "docker://pdimens/harpy:latest" - import os import random import logging @@ -68,8 +66,6 @@ if snp_vcf: snp_vcf output: snp_vcf_correct - container: - None shell: "bcftools view -Oz {input} > {output}" @@ -79,8 +75,6 @@ if indel_vcf: indel_vcf output: indel_vcf_correct - container: - None shell: "bcftools view -Oz {input} > {output}" @@ -100,6 +94,8 @@ rule simulate_haploid: parameters = variant_params conda: "envs/simulations.yaml" + container: + "docker://pdimens/harpy:simulations_latest" shell: "simuG -refseq {input.geno} -prefix {params.prefix} {params.parameters} > {log}" @@ -170,6 +166,8 @@ rule simulate_diploid: indel = f"-indel_vcf haplotype_{{haplotype}}/{outprefix}.hap{{haplotype}}.indel.vcf" if indel else "" conda: "envs/simulations.yaml" + container: + "docker://pdimens/harpy:simulations_latest" shell: "simuG -refseq {input.geno} -prefix {params.prefix} {params.snp} {params.indel} > {log}" @@ -180,8 +178,6 @@ rule rename_diploid: output: fasta = f"haplotype_{{haplotype}}/{outprefix}.hap{{haplotype}}.fasta.gz", mapfile = f"haplotype_{{haplotype}}/{outprefix}.hap{{haplotype}}.map" - container: - None shell: """ bgzip -c {input.fasta} > {output.fasta} diff --git a/harpy/snakefiles/simulate_variants.smk b/harpy/snakefiles/simulate_variants.smk index 911a789bf..950bc4e9c 100644 --- a/harpy/snakefiles/simulate_variants.smk +++ b/harpy/snakefiles/simulate_variants.smk @@ -1,5 +1,3 @@ -containerized: "docker://pdimens/harpy:latest" - import os import random import logging @@ -43,8 +41,6 @@ if vcf: vcf output: vcf_correct - container: - None shell: "bcftools view -Oz {input} > {output}" @@ -63,6 +59,8 @@ rule simulate_haploid: parameters = variant_params conda: "envs/simulations.yaml" + container: + "docker://pdimens/harpy:simulations_latest" shell: "simuG -refseq {input.geno} -prefix {params.prefix} {params.parameters} > {log}" @@ -118,6 +116,8 @@ rule simulate_diploid: vcf_arg = f"-{variant}_vcf" conda: "envs/simulations.yaml" + container: + "docker://pdimens/harpy:simulations_latest" shell: "simuG -refseq {input.geno} -prefix {params.prefix} {params.vcf_arg} {input.hap} > {log}" @@ -128,8 +128,6 @@ rule rename_diploid: output: fasta = f"haplotype_{{haplotype}}/{outprefix}.hap{{haplotype}}.fasta.gz", mapfile = f"haplotype_{{haplotype}}/{outprefix}.hap{{haplotype}}.{variant}.map" - container: - None shell: """ bgzip -c {input.fasta} > {output.fasta} diff --git a/harpy/snakefiles/snp_freebayes.smk b/harpy/snakefiles/snp_freebayes.smk index d4e20a8f6..4eba1117f 100644 --- a/harpy/snakefiles/snp_freebayes.smk +++ b/harpy/snakefiles/snp_freebayes.smk @@ -1,5 +1,3 @@ -containerized: "docker://pdimens/harpy:latest" - import os import logging from pathlib import Path @@ -56,8 +54,6 @@ rule preprocess_reference: fai = f"{workflow_geno}.fai" log: f"{workflow_geno}.preprocess.log" - container: - None shell: """ {{ @@ -71,8 +67,6 @@ rule index_alignments: lambda wc: bamdict[wc.bam] output: "{bam}.bai" - container: - None shell: "samtools index {input}" @@ -108,6 +102,8 @@ rule call_variants: extra = extra conda: "envs/variants.yaml" + container: + "docker://pdimens/harpy:variants_latest" shell: """ freebayes -f {input.reference} -L {input.bamlist} {params} 2> {log} | @@ -134,8 +130,6 @@ rule concat_variants: "logs/concat.log" threads: workflow.cores - container: - None shell: "bcftools concat -f {input.filelist} --threads {threads} --naive -Ob -o {output} 2> {log}" @@ -145,8 +139,6 @@ rule sort_variants: output: bcf = "variants.raw.bcf", csi = "variants.raw.bcf.csi" - container: - None shell: "bcftools sort --write-index -Ob -o {output.bcf} {input} 2> /dev/null" @@ -164,10 +156,6 @@ rule realign_indels: workflow.cores params: "-m -both -d both --write-index -Ob -c w" - threads: - workflow.cores - container: - None shell: "bcftools norm --threads {threads} {params} -o {output.bcf} -f {input.genome} {input.bcf} 2> {log}" @@ -179,8 +167,6 @@ rule general_stats: idx = "variants.{type}.bcf.csi" output: "reports/data/variants.{type}.stats", - container: - None shell: """ bcftools stats -s "-" --fasta-ref {input.genome} {input.bcf} > {output} 2> /dev/null @@ -213,6 +199,8 @@ rule variant_report: "logs/variants.{type}.report.log" conda: "envs/report.yaml" + container: + "docker://pdimens/harpy:report_latest" retries: 3 shell: diff --git a/harpy/snakefiles/snp_mpileup.smk b/harpy/snakefiles/snp_mpileup.smk index 0df47f54f..cadd22f62 100644 --- a/harpy/snakefiles/snp_mpileup.smk +++ b/harpy/snakefiles/snp_mpileup.smk @@ -1,5 +1,3 @@ -containerized: "docker://pdimens/harpy:latest" - import os import logging from pathlib import Path @@ -46,8 +44,6 @@ rule preprocess_reference: f"{workflow_geno}.preprocess.log" params: f"--gzi-idx {workflow_geno}.gzi" if genome_zip else "" - container: - None shell: """ {{ @@ -75,8 +71,6 @@ rule index_alignments: lambda wc: bamdict[wc.bam] output: "{bam}.bai" - container: - None shell: "samtools index {input}" @@ -113,8 +107,6 @@ rule call_genotypes: groups = "--group-samples workflow/sample.groups" if groupings else "--group-samples -" threads: 1 - container: - None shell: """ bcftools mpileup --threads {threads} --fasta-ref {input.genome} --bam-list {input.bamlist} -Ou {params.region} {params.annot_mp} {params.extra} 2> {output.logfile} | @@ -129,8 +121,6 @@ rule sort_genotypes: idx = temp("sort/{part}.bcf.csi") log: "logs/sort/{part}.sort.log" - container: - None shell: "bcftools sort --output {output.bcf} --write-index {input.bcf} 2> {log}" @@ -167,8 +157,6 @@ rule concat_variants: "logs/concat.log" threads: workflow.cores - container: - None shell: "bcftools concat -f {input.filelist} --threads {threads} --naive -Ob -o {output} 2> {log}" @@ -178,8 +166,6 @@ rule sort_variants: output: bcf = "variants.raw.bcf", csi = "variants.raw.bcf.csi" - container: - None shell: "bcftools sort --write-index -Ob -o {output.bcf} {input} 2> /dev/null" @@ -197,8 +183,6 @@ rule realign_indels: "-m -both -d both --write-index -Ob -c w" threads: workflow.cores - container: - None shell: "bcftools norm --threads {threads} {params} -o {output.bcf} -f {input.genome} {input.bcf} 2> {log}" @@ -209,8 +193,6 @@ rule general_stats: idx = "variants.{type}.bcf.csi" output: "reports/data/variants.{type}.stats" - container: - None shell: """ bcftools stats -s "-" --fasta-ref {input.genome} {input.bcf} > {output} 2> /dev/null @@ -243,6 +225,8 @@ rule variant_report: "logs/variants.{type}.report.log" conda: "envs/report.yaml" + container: + "docker://pdimens/harpy:report_latest" retries: 3 shell: diff --git a/harpy/snakefiles/sv_leviathan.smk b/harpy/snakefiles/sv_leviathan.smk index 9ddd1c76f..58d4cdd77 100644 --- a/harpy/snakefiles/sv_leviathan.smk +++ b/harpy/snakefiles/sv_leviathan.smk @@ -1,5 +1,3 @@ -containerized: "docker://pdimens/harpy:latest" - import os import re import logging @@ -49,6 +47,8 @@ rule index_barcodes: min(10, workflow.cores) conda: "envs/variants.yaml" + container: + "docker://pdimens/harpy:variants_latest" shell: """ {{ @@ -68,6 +68,8 @@ rule preprocess_reference: f"{workflow_geno}.preprocess.log" conda: "envs/align.yaml" + container: + "docker://pdimens/harpy:align_latest" shell: """ {{ @@ -101,6 +103,8 @@ rule call_variants: workflow.cores - 1 conda: "envs/variants.yaml" + container: + "docker://pdimens/harpy:variants_latest" shell: "LEVIATHAN -b {input.bam} -i {input.bc_idx} {params} -g {input.genome} -o {output.vcf} -t {threads} --candidates {output.candidates} 2> {log.runlog}" @@ -110,8 +114,6 @@ rule sort_variants: "vcf/{sample}.vcf" output: "vcf/{sample}.bcf" - container: - None shell: "bcftools sort -Ob --output {output} {input} 2> /dev/null" @@ -120,8 +122,6 @@ rule variant_stats: "vcf/{sample}.bcf" output: temp("reports/data/{sample}.sv.stats") - container: - None shell: """ {{ @@ -197,6 +197,8 @@ rule sample_reports: contigs= f"-P contigs:{plot_contigs}" conda: "envs/report.yaml" + container: + "docker://pdimens/harpy:report_latest" retries: 3 shell: diff --git a/harpy/snakefiles/sv_leviathan_pop.smk b/harpy/snakefiles/sv_leviathan_pop.smk index 4d2613ff8..25a7d94a4 100644 --- a/harpy/snakefiles/sv_leviathan_pop.smk +++ b/harpy/snakefiles/sv_leviathan_pop.smk @@ -1,5 +1,3 @@ -containerized: "docker://pdimens/harpy:latest" - import os import re import logging @@ -82,8 +80,6 @@ rule concat_groups: mem_mb = 2000 threads: workflow.cores - container: - None shell: """ {{ @@ -104,6 +100,8 @@ rule index_barcode: min(5, workflow.cores) conda: "envs/variants.yaml" + container: + "docker://pdimens/harpy:variants_latest" shell: "LRez index bam -p -b {input.bam} -o {output} --threads {threads}" @@ -118,6 +116,8 @@ rule preprocess_reference: f"{workflow_geno}.preprocess.log" conda: "envs/align.yaml" + container: + "docker://pdimens/harpy:align_latest" shell: """ {{ @@ -152,6 +152,8 @@ rule call_variants: workflow.cores - 1 conda: "envs/variants.yaml" + container: + "docker://pdimens/harpy:variants_latest" shell: "LEVIATHAN -b {input.bam} -i {input.bc_idx} {params} -g {input.genome} -o {output.vcf} -t {threads} --candidates {output.candidates} 2> {log.runlog}" @@ -163,8 +165,6 @@ rule sort_variants: "vcf/{population}.bcf" params: lambda wc: wc.population - container: - None shell: "bcftools sort -Ob --output {output} {input} 2> /dev/null" @@ -173,8 +173,6 @@ rule variant_stats: "vcf/{population}.bcf" output: temp("reports/data/{population}.sv.stats") - container: - None shell: """ {{ @@ -245,6 +243,8 @@ rule group_reports: contigs= f"-P contigs:{plot_contigs}" conda: "envs/report.yaml" + container: + "docker://pdimens/harpy:report_latest" retries: 3 shell: @@ -272,6 +272,8 @@ rule aggregate_report: contigs = f"-P contigs:{plot_contigs}" conda: "envs/report.yaml" + container: + "docker://pdimens/harpy:report_latest" retries: 3 shell: diff --git a/harpy/snakefiles/sv_naibr.smk b/harpy/snakefiles/sv_naibr.smk index 92a0ef8a8..d26d090cf 100644 --- a/harpy/snakefiles/sv_naibr.smk +++ b/harpy/snakefiles/sv_naibr.smk @@ -1,5 +1,3 @@ -containerized: "docker://pdimens/harpy:latest" - import os import re import logging @@ -60,8 +58,6 @@ rule index_alignments: lambda wc: bamdict[wc.bam] output: "{bam}.bai" - container: - None shell: "samtools index {input}" @@ -98,6 +94,8 @@ rule call_variants: min(10, workflow.cores -1) conda: "envs/variants.yaml" + container: + "docker://pdimens/harpy:variants_latest" shell: "naibr {input.conf} > {log} 2>&1 && rm -rf naibrlog" @@ -111,8 +109,6 @@ rule infer_variants: refmt = "IGV/{sample}.reformat.bedpe", fail = "bedpe/qc_fail/{sample}.fail.bedpe", vcf = "vcf/{sample}.vcf" - container: - None shell: """ infer_sv {input.bedpe} -f {output.fail} > {output.bedpe} @@ -163,8 +159,6 @@ rule preprocess_reference: f"{workflow_geno}.preprocess.log" params: f"--gzi-idx {workflow_geno}.gzi" if genome_zip else "" - container: - None shell: """ {{ @@ -208,6 +202,8 @@ rule sample_reports: contigs= f"-P contigs:{plot_contigs}" conda: "envs/report.yaml" + container: + "docker://pdimens/harpy:report_latest" retries: 3 shell: diff --git a/harpy/snakefiles/sv_naibr_phase.smk b/harpy/snakefiles/sv_naibr_phase.smk index e62806cbb..4d32ec09d 100644 --- a/harpy/snakefiles/sv_naibr_phase.smk +++ b/harpy/snakefiles/sv_naibr_phase.smk @@ -1,5 +1,3 @@ -containerized: "docker://pdimens/harpy:latest" - import os import re import logging @@ -13,7 +11,6 @@ wildcard_constraints: genomefile = config["inputs"]["reference"] bamlist = config["inputs"]["alignments"] -bamdict = dict(zip(bamlist, bamlist)) vcffile = config["inputs"]["vcf"] samplenames = {Path(i).stem for i in bamlist} extra = config.get("extra", None) @@ -55,12 +52,6 @@ def get_alignments(wildcards): aln = list(filter(r.match, bamlist)) return aln[0] -def get_align_index(wildcards): - """returns a list with the bai index file for the sample based on wildcards.sample""" - r = re.compile(fr"(.*/{wildcards.sample})\.(bam|sam)$", flags = re.IGNORECASE) - aln = list(filter(r.match, bamlist)) - return aln[0] + ".bai" - rule preprocess_reference: input: genomefile @@ -69,8 +60,6 @@ rule preprocess_reference: fai = f"{workflow_geno}.fai" log: f"{workflow_geno}.preprocess.log" - container: - None shell: """ {{ @@ -81,21 +70,21 @@ rule preprocess_reference: rule index_alignments: input: - lambda wc: bamdict[wc.bam] + get_alignments output: - "{bam}.bai" - container: - None + temp("workflow/input/bam/{sample}.bam.bai"), + bam = temp("workflow/input/bam/{sample}.bam") shell: - "samtools index {input}" + """ + ln -sr {input} {output.bam} + samtools index {output.bam} + """ rule index_snps: input: vcffile output: vcffile + ".csi" - container: - None shell: "bcftools index {input}" @@ -104,18 +93,16 @@ rule index_snps_gz: vcffile output: vcffile + ".tbi" - container: - None shell: "tabix {input}" rule phase_alignments: input: - get_align_index, + "workflow/input/bam/{sample}.bam.bai", vcfindex, f"{workflow_geno}.fai", vcf = vcffile, - aln = get_alignments, + aln = "workflow/input/bam/{sample}.bam", ref = workflow_geno output: bam = "phasedbam/{sample}.bam", @@ -124,6 +111,8 @@ rule phase_alignments: mol_dist conda: "envs/phase.yaml" + container: + "docker://pdimens/harpy:phase_latest" threads: 4 shell: @@ -134,8 +123,6 @@ rule log_phasing: collect("logs/whatshap-haplotag/{sample}.phase.log", sample = samplenames) output: "logs/whatshap-haplotag.log" - container: - None shell: """ echo -e "sample\\ttotal_alignments\\tphased_alignments" > {output} @@ -168,8 +155,6 @@ rule index_phased: "phasedbam/{sample}.bam" output: "phasedbam/{sample}.bam.bai" - container: - None shell: "samtools index {input} {output} 2> /dev/null" @@ -189,6 +174,8 @@ rule call_variants: 10 conda: "envs/variants.yaml" + container: + "docker://pdimens/harpy:variants_latest" shell: "naibr {input.conf} > {log} 2>&1 && rm -rf naibrlog" @@ -203,8 +190,6 @@ rule infer_variants: refmt = "IGV/{sample}.reformat.bedpe", fail = "bedpe/qc_fail/{sample}.fail.bedpe", vcf = "vcf/{sample}.vcf" - container: - None shell: """ infer_sv {input.bedpe} -f {output.fail} > {output.bedpe} @@ -273,6 +258,8 @@ rule sample_reports: contigs= f"-P contigs:{plot_contigs}" conda: "envs/report.yaml" + container: + "docker://pdimens/harpy:report_latest" retries: 3 shell: diff --git a/harpy/snakefiles/sv_naibr_pop.smk b/harpy/snakefiles/sv_naibr_pop.smk index be7450b6c..0cd2e39ae 100644 --- a/harpy/snakefiles/sv_naibr_pop.smk +++ b/harpy/snakefiles/sv_naibr_pop.smk @@ -1,5 +1,3 @@ -containerized: "docker://pdimens/harpy:latest" - import os import re import logging @@ -96,8 +94,6 @@ rule concat_groups: mem_mb = 2000 threads: 10 - container: - None shell: """ {{ @@ -139,6 +135,8 @@ rule call_variants: min(10, workflow.cores - 1) conda: "envs/variants.yaml" + container: + "docker://pdimens/harpy:variants_latest" shell: "naibr {input.conf} > {log} 2>&1 && rm -rf naibrlog" @@ -153,8 +151,6 @@ rule infer_variants: refmt = "IGV/{population}.reformat.bedpe", fail = "bedpe/qc_fail/{population}.fail.bedpe", vcf = "vcf/{population}.vcf" - container: - None shell: """ infer_sv {input.bedpe} -f {output.fail} > {output.bedpe} @@ -202,8 +198,6 @@ rule preprocess_reference: fai = f"{workflow_geno}.fai" log: f"{workflow_geno}.preprocess.log" - container: - None shell: """ {{ @@ -241,6 +235,8 @@ rule group_reports: contigs= f"-P contigs:{plot_contigs}" conda: "envs/report.yaml" + container: + "docker://pdimens/harpy:report_latest" retries: 3 shell: @@ -268,6 +264,8 @@ rule aggregate_report: contigs = f"-P contigs:{plot_contigs}" conda: "envs/report.yaml" + container: + "docker://pdimens/harpy:report_latest" retries: 3 shell: diff --git a/harpy/snakefiles/sv_naibr_pop_phase.smk b/harpy/snakefiles/sv_naibr_pop_phase.smk index 47d1d446e..e01eea57f 100644 --- a/harpy/snakefiles/sv_naibr_pop_phase.smk +++ b/harpy/snakefiles/sv_naibr_pop_phase.smk @@ -1,5 +1,3 @@ -containerized: "docker://pdimens/harpy:latest" - import os import re import logging @@ -15,7 +13,6 @@ wildcard_constraints: genomefile = config["inputs"]["reference"] bn = os.path.basename(genomefile) bamlist = config["inputs"]["alignments"] -bamdict = dict(zip(bamlist, bamlist)) samplenames = {Path(i).stem for i in bamlist} groupfile = config["inputs"]["groupings"] vcffile = config["inputs"]["vcf"] @@ -76,12 +73,6 @@ def get_alignments(wildcards): aln = list(filter(r.match, bamlist)) return aln[0] -def get_align_index(wildcards): - """returns a list with the bai index file for the sample based on wildcards.sample""" - r = re.compile(fr"(.*/{wildcards.sample})\.(bam|sam)$", flags = re.IGNORECASE) - aln = list(filter(r.match, bamlist)) - return aln[0] + ".bai" - rule preprocess_reference: input: genomefile @@ -90,8 +81,6 @@ rule preprocess_reference: fai = f"{workflow_geno}.fai" log: f"{workflow_geno}.preprocess.log" - container: - None shell: """ {{ @@ -105,8 +94,6 @@ rule index_snps: vcffile output: vcffile + ".csi" - container: - None shell: "bcftools index {input}" @@ -115,28 +102,28 @@ rule index_snps_gz: vcffile output: vcffile + ".tbi" - container: - None shell: "tabix {input}" rule index_alignments: input: - lambda wc: bamdict[wc.bam] + get_alignments output: - "{bam}.bai" - container: - None + bam = temp("workflow/input/bam/{sample}.bam"), + bai = temp("workflow/input/bam/{sample}.bam.bai") shell: - "samtools index {input}" + """ + ln -sr {input} {output.bam} + samtools index {output.bam} + """ rule phase_alignments: input: vcfindex, - get_align_index, + "workflow/input/bam/{sample}.bam.bai", f"{workflow_geno}.fai", vcf = vcffile, - aln = get_alignments, + aln = "workflow/input/bam/{sample}.bam", ref = workflow_geno output: bam = temp("phasedbam/{sample}.bam"), @@ -147,6 +134,8 @@ rule phase_alignments: 4 conda: "envs/phase.yaml" + container: + "docker://pdimens/harpy:phase_latest" shell: "whatshap haplotag --sample {wildcards.sample} --linked-read-distance-cutoff {params} --ignore-read-groups --tag-supplementary --output-threads={threads} -o {output.bam} --reference {input.ref} {input.vcf} {input.aln} 2> {output.log}" @@ -155,8 +144,6 @@ rule log_phasing: collect("logs/whatshap-haplotag/{sample}.phase.log", sample = samplenames) output: "logs/whatshap-haplotag.log" - container: - None shell: """ echo -e "sample\\ttotal_alignments\\tphased_alignments" > {output} @@ -199,8 +186,6 @@ rule concat_groups: mem_mb = 2000 threads: 10 - container: - None shell: """ {{ @@ -242,6 +227,8 @@ rule call_variants: min(10, workflow.cores - 1) conda: "envs/variants.yaml" + container: + "docker://pdimens/harpy:variants_latest" shell: "naibr {input.conf} > {log} 2>&1 && rm -rf naibrlog" @@ -255,9 +242,7 @@ rule infer_variants: bedpe = "bedpe/{population}.bedpe", refmt = "IGV/{population}.reformat.bedpe", fail = "bedpe/qc_fail/{population}.fail.bedpe", - vcf = "vcf/{population}.vcf" - container: - None + vcf = "vcf/{population}.vcf" shell: """ infer_sv {input.bedpe} -f {output.fail} > {output.bedpe} @@ -326,6 +311,8 @@ rule sample_reports: contigs= f"-P contigs:{plot_contigs}" conda: "envs/report.yaml" + container: + "docker://pdimens/harpy:report_latest" retries: 3 shell: @@ -353,6 +340,8 @@ rule aggregate_report: contigs = f"-P contigs:{plot_contigs}" conda: "envs/report.yaml" + container: + "docker://pdimens/harpy:report_latest" retries: 3 shell: diff --git a/harpy/snakefiles/validate_bam.smk b/harpy/snakefiles/validate_bam.smk index 1214737ef..19c3f405e 100644 --- a/harpy/snakefiles/validate_bam.smk +++ b/harpy/snakefiles/validate_bam.smk @@ -1,5 +1,3 @@ -containerized: "docker://pdimens/harpy:latest" - import os import re import logging @@ -29,8 +27,6 @@ rule check_bam: temp("{sample}.log") params: lr_platform - container: - None shell: "check_bam {params} {input} > {output}" @@ -39,8 +35,6 @@ rule concat_results: collect("{sample}.log", sample = samplenames) output: "validate.bam.tsv" - container: - None shell: """ {{ @@ -76,6 +70,8 @@ rule create_report: "logs/report.log" conda: "envs/report.yaml" + container: + "docker://pdimens/harpy:report_latest" retries: 3 shell: diff --git a/harpy/snakefiles/validate_fastq.smk b/harpy/snakefiles/validate_fastq.smk index a854df6f4..e7b6248e9 100644 --- a/harpy/snakefiles/validate_fastq.smk +++ b/harpy/snakefiles/validate_fastq.smk @@ -1,5 +1,3 @@ -containerized: "docker://pdimens/harpy:latest" - import os import re import logging @@ -34,8 +32,6 @@ rule check_forward: temp("{sample}.F.log") params: lr_platform - container: - None shell: "check_fastq {params} {input} > {output}" @@ -46,8 +42,6 @@ rule check_reverse: temp("{sample}.R.log") params: lr_platform - container: - None shell: "check_fastq {params} {input} > {output}" @@ -56,8 +50,6 @@ rule concat_results: collect("{sample}.{FR}.log", sample = samplenames, FR = ["F","R"]) output: "validate.fastq.tsv" - container: - None shell: """ {{ @@ -93,6 +85,8 @@ rule create_report: lr_platform conda: "envs/report.yaml" + container: + "docker://pdimens/harpy:report_latest" retries: 3 shell: diff --git a/pixi.toml b/pixi.toml new file mode 100644 index 000000000..7fd96fb71 --- /dev/null +++ b/pixi.toml @@ -0,0 +1,24 @@ +[workspace] +name = "harpy" +authors = ["pdimens "] +channels = ["conda-forge", "bioconda"] +platforms = ["linux-64"] +version = "3.2.0" + +[tasks] + +[dependencies] +bcftools = "1.22.*" +click = ">=8.2" +conda = ">=24.8" +htslib = "1.22.*" +pysam = "0.23.*" +python = ">=3.11" +rich-click = ">=1.9.3" +snakemake-minimal = ">=9.13" +samtools = "1.22.*" +seqtk = "*" +apptainer = ">=1.4.2,<2" + +[pypi-dependencies] +harpy = { path = ".", editable = true} diff --git a/resources/changelog.md b/resources/changelog.md index 8403ffa7e..4d00ca438 100644 --- a/resources/changelog.md +++ b/resources/changelog.md @@ -1,3 +1,9 @@ +# new +- `diagnose` now has 3 subcommands: + - `stall`: same as previous `diagnose` behavior, where it runs snakemake with `--dry-run --debug-dag` + - `snakemake`: runs snakemake directly (without Harpy intervention), outputting everything to terminal + - `rule`: attempt to directly run the failing rule of a workflow as identified in the snakemake log + # deprecations - harpy convert - harpy downsample diff --git a/resources/harpy.yaml b/resources/harpy.yaml deleted file mode 100644 index dc2b067cd..000000000 --- a/resources/harpy.yaml +++ /dev/null @@ -1,15 +0,0 @@ -name: harpy -channels: - - conda-forge - - bioconda -dependencies: - - bcftools =1.22 - - click >=8.2 - - conda >24.7 - - htslib =1.22 - - pysam =0.23 - - python >=3.11 - - rich-click =1.9 - - snakemake-minimal =9 - - samtools =1.22 - - seqtk \ No newline at end of file diff --git a/resources/meta.yaml b/resources/meta.yaml index 77c6c5f02..de3c57d64 100644 --- a/resources/meta.yaml +++ b/resources/meta.yaml @@ -26,9 +26,7 @@ build: - concatenate_bam = harpy.scripts:concatenate_bam.main - count_bx = harpy.scripts:count_bx.main - create_simulation_data = harpy.scripts:create_simulation_data.main - - deconvolve_alignments = harpy.scripts:deconvolve_alignments.main - depth_windows = harpy.scripts:depth_windows.main - - extract_bxtags = harpy.scripts:extract_bxtags.main - haplotag_acbd = harpy.scripts:haplotag_acbd.main - haplotag_barcodes = harpy.scripts:haplotag_barcodes.main - infer_sv = harpy.scripts:infer_sv.main @@ -36,8 +34,6 @@ build: - molecule_coverage = harpy.scripts:molecule_coverage.main - parse_phaseblocks = harpy.scripts:parse_phaseblocks.main - rename_bam = harpy.scripts:rename_bam.main - - separate_singletons = harpy.scripts:separate_singletons.main - - separate_validbx = harpy.scripts:separate_validbx.main - standardize_barcodes_sam = harpy.scripts:standardize_barcodes_sam.main run_exports: - {{ pin_subpackage('harpy', max_pin="x") }} @@ -53,8 +49,8 @@ requirements: - conda >24.7 - htslib >=1.22 - pysam >=0.23 - - rich-click >=1.8 - - snakemake-minimal >=9.0 + - rich-click >=1.9.3 + - snakemake-minimal >=9.11 - samtools >=1.22 - seqtk