Skip to content

Commit ab745a9

Browse files
committed
Add support for ProstT5 foldseek search
1 parent dc216e6 commit ab745a9

File tree

3 files changed

+64
-43
lines changed

3 files changed

+64
-43
lines changed

data/clustersearch.sh

Lines changed: 61 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -22,38 +22,48 @@ TARGET="$2"
2222
OUTPUT="$3"
2323
TMP_PATH="$4"
2424

25-
if [ -n "${USE_FOLDSEEK}" ]; then
25+
if [ -n "${USE_PROSTT5}" ]; then
26+
[ -n "${USE_PROFILE}" ] && [ ! -f "${TARGET}_clu_seq.dbtype" ] && echo "${TARGET}_foldseek_clu_seq.dbtype not found! Please make sure the ${TARGET} is clustered with clusterdb ${TARGET} tmp --search-mode 1" && exit 1;
27+
[ ! -f "${TARGET}_ss.dbtype" ] && echo "${TARGET}_ss.dbtype not found! Please make sure the ${TARGET} is created using ProstT5. " && exit 1;
28+
elif [ -n "${USE_FOLDSEEK}" ]; then
2629
[ -n "${USE_PROFILE}" ] && [ ! -f "${TARGET}_foldseek_clu_seq.dbtype" ] && echo "${TARGET}_foldseek_clu_seq.dbtype not found! Please make sure the ${TARGET}_foldseek is clustered with clusterdb ${TARGET}_foldseek tmp --search-mode 1" && exit 1;
27-
[ ! -f "$FOLDSEEK" ] && echo "Please make sure Foldseek is installed in the working directory." && exit 1;
28-
[ ! -f "${TARGET}_foldseek.dbtype" ] && echo "${TARGET}_foldseek.dbtype not found! Please make sure the ${TARGET}_foldseek is created with aa2foldseek" && exit 1;
30+
[ ! -f "${TARGET}_foldseek.dbtype" ] && echo "${TARGET}_foldseek.dbtype not found! Please make sure the ${TARGET}_foldseek is created with aa2foldseek. If ${TARGET} is created with ProstT5 please use --search-mode 2" && exit 1;
2931
fi
3032

3133
if [ -n "${USE_PROFILE}" ]; then
3234
if [ -n "${USE_FOLDSEEK}" ]; then
33-
if notExists "${TMP_PATH}/result_foldseek.index"; then
34-
# shellcheck disable=SC2086
35-
"${FOLDSEEK}" search "${QUERY}_foldseek" "${TARGET}_foldseek_clu" "${TMP_PATH}/result_foldseek" "${TMP_PATH}/search" --cluster-search 1 ${FOLDSEEKSEARCH_PAR}\
36-
|| fail "foldseek search failed"
37-
fi
38-
if notExists "${TMP_PATH}/result_clu.index"; then
39-
# shellcheck disable=SC2086
40-
"${MMSEQS}" search "${QUERY}_unmapped" "${TARGET}_clu" "${TMP_PATH}/result_clu" "${TMP_PATH}/search" ${SEARCH_PAR} \
41-
|| fail "mmseqs search failed"
42-
fi
43-
if notExists "${TMP_PATH}/result_exp.index"; then
44-
# shellcheck disable=SC2086
45-
"${MMSEQS}" expandaln "${QUERY}_unmapped" "${TARGET}_clu" "${TMP_PATH}/result_clu" "${TARGET}_clu_aln" "${TMP_PATH}/result_exp" ${THREADS_PAR} \
46-
|| fail "expandaln failed"
47-
fi
48-
if notExists "${TMP_PATH}/result_mmseqs.index"; then
49-
# shellcheck disable=SC2086
50-
"${MMSEQS}" align "${QUERY}_unmapped" "${TARGET}" "${TMP_PATH}/result_exp" "${TMP_PATH}/result_mmseqs" -a --alt-ali 10 ${THREADS_PAR} \
51-
|| fail "realign failed"
52-
fi
53-
if notExists "${TMP_PATH}/result.index"; then
54-
# shellcheck disable=SC2086
55-
"${MMSEQS}" concatdbs "${TMP_PATH}/result_foldseek" "${TMP_PATH}/result_mmseqs" "${TMP_PATH}/result" --preserve-keys ${THREADS_PAR} \
56-
|| fail "concatdbs failed"
35+
if [ -n "${USE_PROSTT5}" ] ; then
36+
if notExists "${TMP_PATH}/result.index"; then
37+
# shellcheck disable=SC2086
38+
"${FOLDSEEK}" search "${QUERY}" "${TARGET}_clu" "${TMP_PATH}/result" "${TMP_PATH}/search" --cluster-search 1 ${FOLDSEEKSEARCH_PAR}\
39+
|| fail "foldseek search failed"
40+
fi
41+
else
42+
if notExists "${TMP_PATH}/result_foldseek.index"; then
43+
# shellcheck disable=SC2086
44+
"${FOLDSEEK}" search "${QUERY}_foldseek" "${TARGET}_foldseek_clu" "${TMP_PATH}/result_foldseek" "${TMP_PATH}/search" --cluster-search 1 ${FOLDSEEKSEARCH_PAR}\
45+
|| fail "foldseek search failed"
46+
fi
47+
if notExists "${TMP_PATH}/result_clu.index"; then
48+
# shellcheck disable=SC2086
49+
"${MMSEQS}" search "${QUERY}_unmapped" "${TARGET}_clu" "${TMP_PATH}/result_clu" "${TMP_PATH}/search" ${SEARCH_PAR} \
50+
|| fail "mmseqs search failed"
51+
fi
52+
if notExists "${TMP_PATH}/result_exp.index"; then
53+
# shellcheck disable=SC2086
54+
"${MMSEQS}" expandaln "${QUERY}_unmapped" "${TARGET}_clu" "${TMP_PATH}/result_clu" "${TARGET}_clu_aln" "${TMP_PATH}/result_exp" ${THREADS_PAR} \
55+
|| fail "expandaln failed"
56+
fi
57+
if notExists "${TMP_PATH}/result_mmseqs.index"; then
58+
# shellcheck disable=SC2086
59+
"${MMSEQS}" align "${QUERY}_unmapped" "${TARGET}" "${TMP_PATH}/result_exp" "${TMP_PATH}/result_mmseqs" -a --alt-ali 10 ${THREADS_PAR} \
60+
|| fail "realign failed"
61+
fi
62+
if notExists "${TMP_PATH}/result.index"; then
63+
# shellcheck disable=SC2086
64+
"${MMSEQS}" concatdbs "${TMP_PATH}/result_foldseek" "${TMP_PATH}/result_mmseqs" "${TMP_PATH}/result" --preserve-keys ${THREADS_PAR} \
65+
|| fail "concatdbs failed"
66+
fi
5767
fi
5868
else
5969
if notExists "${TMP_PATH}/result_clu.index"; then
@@ -73,25 +83,35 @@ if [ -n "${USE_PROFILE}" ]; then
7383
else
7484
if notExists "${TMP_PATH}/result.index"; then
7585
if [ -n "${USE_FOLDSEEK}" ]; then
76-
if notExists "${TMP_PATH}/result_foldseek.index"; then
77-
# shellcheck disable=SC2086
78-
"${FOLDSEEK}" search "${QUERY}_foldseek" "${TARGET}_foldseek" "${TMP_PATH}/result_foldseek" "${TMP_PATH}/search" ${FOLDSEEKSEARCH_PAR}\
79-
|| fail "foldseek search failed"
80-
fi
81-
if notExists "${TMP_PATH}/result_mmseqs.index"; then
82-
# shellcheck disable=SC2086
83-
"${MMSEQS}" search "${QUERY}_unmapped" "${TARGET}" "${TMP_PATH}/result_mmseqs" "${TMP_PATH}/search" ${SEARCH_PAR} \
84-
|| fail "mmseqs search failed"
85-
fi
86-
if notExists "${TMP_PATH}/result.index"; then
87-
# shellcheck disable=SC2086
88-
"${MMSEQS}" concatdbs "${TMP_PATH}/result_foldseek" "${TMP_PATH}/result_mmseqs" "${TMP_PATH}/result" --preserve-keys ${THREADS_PAR} \
89-
|| fail "concatdbs failed"
86+
if [ -n "${USE_PROSTT5}" ] ; then
87+
if notExists "${TMP_PATH}/result.index"; then
88+
# shellcheck disable=SC2086
89+
"${FOLDSEEK}" search "${QUERY}" "${TARGET}" "${TMP_PATH}/result" "${TMP_PATH}/search" ${FOLDSEEKSEARCH_PAR}\
90+
|| fail "foldseek search failed"
91+
fi
92+
else
93+
if notExists "${TMP_PATH}/result_foldseek.index"; then
94+
# shellcheck disable=SC2086
95+
"${FOLDSEEK}" search "${QUERY}_foldseek" "${TARGET}_foldseek" "${TMP_PATH}/result_foldseek" "${TMP_PATH}/search" ${FOLDSEEKSEARCH_PAR}\
96+
|| fail "foldseek search failed"
97+
fi
98+
if notExists "${TMP_PATH}/result_mmseqs.index"; then
99+
# shellcheck disable=SC2086
100+
"${MMSEQS}" search "${QUERY}_unmapped" "${TARGET}" "${TMP_PATH}/result_mmseqs" "${TMP_PATH}/search" ${SEARCH_PAR} \
101+
|| fail "mmseqs search failed"
102+
fi
103+
if notExists "${TMP_PATH}/result.index"; then
104+
# shellcheck disable=SC2086
105+
"${MMSEQS}" concatdbs "${TMP_PATH}/result_foldseek" "${TMP_PATH}/result_mmseqs" "${TMP_PATH}/result" --preserve-keys ${THREADS_PAR} \
106+
|| fail "concatdbs failed"
107+
fi
90108
fi
91109
else
110+
if notExists "${TMP_PATH}/result.index"; then
92111
# shellcheck disable=SC2086
93112
"${MMSEQS}" search "${QUERY}" "${TARGET}" "${TMP_PATH}/result" "${TMP_PATH}/search" ${SEARCH_PAR} \
94113
|| fail "mmseqs search failed"
114+
fi
95115
fi
96116
fi
97117
fi

src/commons/LocalParameters.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -59,7 +59,7 @@ class LocalParameters : public Parameters {
5959
private:
6060
LocalParameters() :
6161
Parameters(),
62-
PARAM_CLUSTERSEARCH_MODE(PARAM_CLUSTERSEARCH_MODE_ID, "--search-mode", "Cluster Search Mode", "0: sequence search with MMseqs2, 1: structure comparison with Foldseek", typeid(int), (void *) &clusterSearchMode, "^[0-1]{1}"),
62+
PARAM_CLUSTERSEARCH_MODE(PARAM_CLUSTERSEARCH_MODE_ID, "--search-mode", "Cluster Search Mode", "0: sequence search with MMseqs2, 1: structure comparison with Foldseek, 2: Foldseek + ProstT5", typeid(int), (void *) &clusterSearchMode, "^[0-2]{1}"),
6363
PARAM_SUBOPTIMAL_HITS(PARAM_SUBOPTIMAL_HITS_ID, "--suboptimal-hits", "Include sub-optimal hits with factor", "Include sub-optimal hits of query sequence up to a factor of its E-value. 0: only include one best hit", typeid(int), (void *) &suboptHitsFactor, "^(0|[1-9]{1}[0-9]*)$"),
6464
PARAM_FILTER_SELF_MATCH(PARAM_FILTER_SELF_MATCH_ID, "--filter-self-match", "Filter self match", "Remove hits between the same set. 0: do not filter, 1: filter", typeid(bool), (void *) &filterSelfMatch, ""),
6565
PARAM_MULTIHIT_PVAL(PARAM_MULTIHIT_PVAL_ID, "--multihit-pval", "Multihit P-value cutoff", "Multihit P-value threshold for cluster match output", typeid(float), (void *) &pMHThr, "^0(\\.[0-9]+)?|^1(\\.0+)?$"),

src/workflow/clustersearch.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -89,7 +89,7 @@ int clustersearch(int argc, const char **argv, const Command &command) {
8989
}
9090

9191
bool useFoldseek = false;
92-
if (par.clusterSearchMode == 1) {
92+
if (par.clusterSearchMode >= 1) {
9393
useFoldseek = true;
9494
struct stat st;
9595
if (stat(par.foldseekPath.c_str(), &st) != 0) {
@@ -103,6 +103,7 @@ int clustersearch(int argc, const char **argv, const Command &command) {
103103
}
104104
}
105105
cmd.addVariable("USE_PROFILE", par.profileClusterSearch == 1 ? "TRUE" : NULL);
106+
cmd.addVariable("USE_PROSTT5", par.clusterSearchMode == 2 ? "TRUE" : NULL);
106107
cmd.addVariable("FOLDSEEK", par.foldseekPath.c_str());
107108
cmd.addVariable("USE_FOLDSEEK", useFoldseek ? "TRUE" : NULL);
108109
cmd.addVariable("CLUSTER_PAR", par.createParameterString(par.clusterworkflow).c_str());

0 commit comments

Comments
 (0)