Skip to content

Commit c329a57

Browse files
authored
Merge pull request #44 from RasmussenLab/dev
Dev
2 parents d971b48 + 532b5ac commit c329a57

25 files changed

+18946
-2594
lines changed

README.md

Lines changed: 14 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -22,9 +22,19 @@ mamba install -c conda-forge -c bioconda snakemake
2222
mamba install -c conda-forge -c bioconda scikit-learn=1.0.2
2323
mamba install -c conda-forge -c bioconda cython
2424
mamba install -c conda-forge -c bioconda pygraphviz
25+
```
26+
27+
28+
```
29+
### Clone repository
30+
git clone the repository https://github.com/RasmussenLab/phamb.git
31+
32+
### Quick install
33+
pip install -e .
2534
26-
### Old dependencies
27-
conda create -c conda-forge -c bioconda -n phamb python=3.6 cython scikit-learn=0.21.3 snakemake pygraphviz
35+
### Test installation
36+
mkdir -p testout
37+
run_RF.py test/contigs.fna.gz test/clusters.tsv test testout
2838
```
2939

3040

@@ -51,7 +61,7 @@ mkdir -p projectdir
5161
cd projectdir
5262
git clone the repository https://github.com/RasmussenLab/phamb.git
5363
cp -r phamb/workflows/mag_annotation .
54-
python mag_annotation/scripts/split_contigs.py -c contigs.fna.gz
64+
python split_contigs.py -c contigs.fna.gz
5565
```
5666

5767
- Now the `contigs.fna.gz` is splitted into individual assemblies i.e. `assembly/{sample}/{sample}.fna`
@@ -98,7 +108,7 @@ gzip contigs.fna
98108
### Run the RF model
99109
Running the provided script, the virome bins are written to a fasta file and bin-annotations are summarised in `vambbins_aggregated_annotation.txt`.
100110
```bash
101-
python mag_annotation/scripts/run_RF.py contigs.fna.gz vamb/clusters.tsv annotations resultdir
111+
run_RF.py contigs.fna.gz vamb/clusters.tsv annotations resultdir
102112

103113
ls resultsidr
104114
resultdir/vambbins_aggregated_annotation.txt

phamb/__init__.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
2+
3+
"""Phamb - Phages from metagenomic binning
4+
Documentation: https://github.com/RasmussenLab/phamb
5+
"""
6+
7+
__licence__ = 'MIT'
8+
__version__ = (1, 0, 1)
File renamed without changes.

workflows/mag_annotation/scripts/run_RF.py renamed to phamb/run_RF.py

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,14 @@
11
#!/usr/bin/python
22
import sys
33
import argparse
4-
import vambtools as _vambtools
5-
import run_RF_modules
4+
#import vambtools as _vambtools
5+
from phamb import vambtools as _vambtools
6+
from phamb import run_RF_modules
7+
#import run_RF_modules
68
import collections as _collections
79
import os
810
import numpy as _np
11+
from pathlib import Path
912

1013

1114
parser = argparse.ArgumentParser(
@@ -185,7 +188,8 @@ def _run_RF_model(cls,RF_model,genome_order, sparse_df):
185188

186189
print('Loading Model and annotation table')
187190
trained_model = joblib.load(RF_model)
188-
191+
trained_model.n_estimators = 300
192+
trained_model.max_features = 'sqrt'
189193
predicted_genome_labels = trained_model.predict(sparse_df)
190194
prediction_probabilities = trained_model.predict_proba(sparse_df)
191195
predicted_genome_labels = [label.lower() for label in list(predicted_genome_labels) ]
@@ -226,7 +230,7 @@ def _run_RF_model(cls,RF_model,genome_order, sparse_df):
226230

227231
viral_annotation = run_RF_modules.Viral_annotation(annotation_files=viral_annotation_files,genomes=reference)
228232

229-
rf_model_file = 'mag_annotation/dbs/RF_model.python39.sav'
233+
rf_model_file = Path(__file__).parent / "dbs/RF_model.python39.sav"
230234
RF_results = RF_model(rf_model_file, genomes = viral_annotation.genomes)
231235

232236
bins = {binname:clusters[binname] for binname in RF_results.RF_non_bacteria}

workflows/mag_annotation/scripts/run_RF_modules.py renamed to phamb/run_RF_modules.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
#!/bin/python
22
'''Helper modules'''
3-
import vambtools as _vambtools
3+
#import vambtools as _vambtools
4+
from phamb import vambtools as _vambtools
5+
46
import collections as _collections
57
import os
68
import numpy as _np

workflows/mag_annotation/scripts/split_contigs.py renamed to phamb/split_contigs.py

File renamed without changes.

workflows/mag_annotation/scripts/vambtools.py renamed to phamb/vambtools.py

File renamed without changes.

setup.py

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
import sys
2+
from setuptools import setup, find_packages
3+
from setuptools import Extension
4+
import os
5+
6+
SETUP_METADATA = \
7+
{
8+
"name": "phamb",
9+
"description": "Phages from metagenomic binning",
10+
"url": "https://github.com/RasmussenLab/phamb",
11+
"version": "1.0.1",
12+
"license": "MIT",
13+
"packages": ['phamb'],
14+
"package_data": {'phamb': ['dbs/RF_model.python39.sav']},
15+
"python_requires": ">=3.9",
16+
"install_requires": ["scikit-learn==1.0.2"],
17+
"classifiers":[
18+
"Programming Language :: Python :: 3",
19+
"License :: OSI Approved :: MIT License",
20+
"Operating System :: OS Independent",
21+
],
22+
"scripts":['phamb/run_RF.py','phamb/split_contigs.py','phamb/vambtools.py','phamb/run_RF_modules.py']
23+
}
24+
25+
setup(**SETUP_METADATA)

test/all.DVF.predictions.txt

Lines changed: 3001 additions & 0 deletions
Large diffs are not rendered by default.

0 commit comments

Comments
 (0)