From e778d643630adb6ba96f40711e02e886122680c9 Mon Sep 17 00:00:00 2001
From: Hannes Hauswedell
Date: Wed, 22 Jan 2020 14:20:04 +0100
Subject: [PATCH 1/7] [FIX] three fixes
---
src/search_algo.hpp | 29 ++++++++++++++---------------
1 file changed, 14 insertions(+), 15 deletions(-)
diff --git a/src/search_algo.hpp b/src/search_algo.hpp
index 9e33c2b38..b304b74fc 100644
--- a/src/search_algo.hpp
+++ b/src/search_algo.hpp
@@ -2378,6 +2378,10 @@ iterateMatchesFullSimd(TLocalHolder & lH)
? lH.gH.untransSubjSeqLengths[bm._n_sId]
: length(lH.gH.subjSeqs[it->subjId]);
+ bm.qLength = qIsTranslated(TGlobalHolder::blastProgram)
+ ? lH.gH.untransQrySeqLengths[bm._n_qId ]
+ : length(lH.gH.qrySeqs[it->qryId]);
+
_setupAlignInfix(bm, *it, lH);
_setFrames(bm, *it, lH);
@@ -2555,7 +2559,7 @@ iterateMatchesFullSerial(TLocalHolder & lH)
? lH.gH.untransQrySeqLengths[trueQryId]
: length(lH.gH.qrySeqs[lH.matches[0].qryId]));
- unsigned band = _bandSize(record.qLength, lH);
+ size_t band = _bandSize(length(lH.gH.qrySeqs[lH.matches[0].qryId]), lH);
#ifdef LAMBDA_MICRO_STATS
double start = sysTime();
@@ -2578,25 +2582,20 @@ iterateMatchesFullSerial(TLocalHolder & lH)
? lH.gH.untransSubjSeqLengths[bm._n_sId]
: length(lH.gH.subjSeqs[it->subjId]);
+ bm.qLength = qIsTranslated(TGlobalHolder::blastProgram)
+ ? lH.gH.untransQrySeqLengths[bm._n_qId ]
+ : length(lH.gH.qrySeqs[it->qryId]);
+
_setupAlignInfix(bm, *it, lH);
_setFrames(bm, m, lH);
// Run extension WITHOUT TRACEBACK
- typedef AlignConfig2,
- DPBandConfig,
- FreeEndGaps_,
- TracebackOff> TAlignConfig;
-
- DPScoutState_ scoutState;
-
- bm.alignStats.alignmentScore = _setUpAndRunAlignment(lH.alignContext.dpContext,
- lH.alignContext.traceSegment,
- scoutState,
- source(bm.alignRow0),
- source(bm.alignRow1),
- seqanScheme(context(lH.gH.outfile).scoringScheme),
- TAlignConfig(-band, +band));
+ bm.alignStats.alignmentScore = localAlignmentScore(bm.alignRow0,
+ bm.alignRow1,
+ seqanScheme(context(lH.gH.outfile).scoringScheme),
+ -band,
+ +band);
computeEValueThreadSafe(bm, record.qLength, context(lH.gH.outfile));
From d48cc93e1b3ac47fc4850044e0354dd29e5968c5 Mon Sep 17 00:00:00 2001
From: sarahet
Date: Thu, 30 Jan 2020 05:55:44 +0100
Subject: [PATCH 2/7] [Fix] Get correct number of header entries for subject
sequences
---
src/search_output.hpp | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/src/search_output.hpp b/src/search_output.hpp
index 395e8b64b..0852eeabb 100644
--- a/src/search_output.hpp
+++ b/src/search_output.hpp
@@ -330,7 +330,7 @@ myWriteHeader(TGH & globalHolder, TLambdaOptions const & options)
if (sIsTranslated(TGH::blastProgram))
{
//TODO can we get around a copy?
- subjSeqLengths = globalHolder.untransSubjSeqLengths;
+ subjSeqLengths = prefix(globalHolder.untransSubjSeqLengths, length(globalHolder.untransSubjSeqLengths) - 1);
} else
{
// compute lengths ultra-fast
From b3eba457288c4f671c2c8d3b859e85e8581155e8 Mon Sep 17 00:00:00 2001
From: sarahet
Date: Wed, 21 Dec 2022 09:00:58 +0100
Subject: [PATCH 3/7] Re-activate pre-scoring = 1 if no reduction is used
---
.gitmodules | 2 +-
src/search_options.hpp | 7 +++----
src/shared_misc.hpp | 5 ++++-
3 files changed, 8 insertions(+), 6 deletions(-)
diff --git a/.gitmodules b/.gitmodules
index 283c7f9ba..23a548592 100644
--- a/.gitmodules
+++ b/.gitmodules
@@ -1,3 +1,3 @@
[submodule "seqan"]
path = include/seqan
- url = git://github.com/seqan/seqan.git
+ url = ../../seqan/seqan.git
diff --git a/src/search_options.hpp b/src/search_options.hpp
index a9ef96554..1ae027714 100644
--- a/src/search_options.hpp
+++ b/src/search_options.hpp
@@ -835,10 +835,9 @@ parseCommandLine(LambdaOptions & options, int argc, char const ** argv)
// TODO always prescore 1
getOptionValue(options.preScoring, parser, "pre-scoring");
- //TODO reactivate
-// if ((!isSet(parser, "pre-scoring")) &&
-// (options.alphReduction == 0))
-// options.preScoring = 1;
+ if ((!isSet(parser, "pre-scoring")) &&
+ (options.reducedAlphabet == options.transAlphabet))
+ options.preScoring = 1;
getOptionValue(options.preScoringThresh, parser, "pre-scoring-threshold");
// if (options.preScoring == 0)
diff --git a/src/shared_misc.hpp b/src/shared_misc.hpp
index ae38f269f..3e57c3966 100644
--- a/src/shared_misc.hpp
+++ b/src/shared_misc.hpp
@@ -26,7 +26,10 @@
#include
#include
#include
-#include
+
+#if __has_include()
+ #include
+#endif
#include
#include
From ea5bb685b420cc6b2d778d7ec96b9b68790ff6bb Mon Sep 17 00:00:00 2001
From: Hannes Hauswedell
Date: Wed, 17 Aug 2022 13:53:06 +0000
Subject: [PATCH 4/7] [feature] minimum bit-score
---
src/search_algo.hpp | 36 ++++++++++++++++++++++++-----------
src/search_datastructures.hpp | 12 +++++++++---
src/search_options.hpp | 7 ++++---
3 files changed, 38 insertions(+), 17 deletions(-)
diff --git a/src/search_algo.hpp b/src/search_algo.hpp
index c4f4a7b3d..4edf9e88e 100644
--- a/src/search_algo.hpp
+++ b/src/search_algo.hpp
@@ -2431,17 +2431,28 @@ iterateMatchesFullSimd(TLocalHolder & lH)
{
TBlastMatch & bm = *it;
- computeEValueThreadSafe(bm,
- qIsTranslated(TGlobalHolder::blastProgram)
- ? lH.gH.untransQrySeqLengths[bm._n_qId]
- : length(lH.gH.qrySeqs[_untrueQryId(bm, lH)]),
- context(lH.gH.outfile));
+ if (lH.options.minBitScore >= 0)
+ {
+ seqan::computeBitScore(bm, seqan::context(lH.gH.outfileBlastTab));
- if (bm.eValue > lH.options.eCutOff)
+ if (bm.bitScore < lH.options.minBitScore)
+ {
+ ++lH.stats.hitsFailedExtendBitScoreTest;
+ it = blastMatches.erase(it);
+ continue;
+ }
+ }
+
+ if (lH.options.maxEValue >= 0)
{
- ++lH.stats.hitsFailedExtendEValueTest;
- it = blastMatches.erase(it);
- continue;
+ computeEValueThreadSafe(bm, bm.qLength, seqan::context(lH.gH.outfileBlastTab));
+
+ if (bm.eValue > lH.options.maxEValue)
+ {
+ ++lH.stats.hitsFailedExtendEValueTest;
+ it = blastMatches.erase(it);
+ continue;
+ }
}
++it;
@@ -2484,9 +2495,12 @@ iterateMatchesFullSimd(TLocalHolder & lH)
continue;
}
- computeBitScore(bm, context(lH.gH.outfile));
+ // not computed previously
+ if (lH.options.minBitScore < 0)
+ seqan::computeBitScore(bm, seqan::context(lH.gH.outfileBlastTab));
- // evalue computed previously
+ if (lH.options.maxEValue < 0)
+ computeEValueThreadSafe(bm, bm.qLength, seqan::context(lH.gH.outfileBlastTab));
++it;
}
diff --git a/src/search_datastructures.hpp b/src/search_datastructures.hpp
index 29930d7a7..5417afc6b 100644
--- a/src/search_datastructures.hpp
+++ b/src/search_datastructures.hpp
@@ -111,6 +111,7 @@ struct StatsHolder
// post-extension
uint64_t hitsFailedExtendPercentIdentTest;
+ uint64_t hitsFailedExtendBitScoreTest;
uint64_t hitsFailedExtendEValueTest;
uint64_t hitsAbundant;
uint64_t hitsDuplicate;
@@ -150,6 +151,7 @@ struct StatsHolder
hitsPutativeAbundant = 0;
hitsFailedExtendPercentIdentTest = 0;
+ hitsFailedExtendBitScoreTest = 0;
hitsFailedExtendEValueTest = 0;
hitsAbundant = 0;
hitsDuplicate = 0;
@@ -183,6 +185,7 @@ struct StatsHolder
hitsPutativeAbundant += rhs.hitsPutativeAbundant;
hitsFailedExtendPercentIdentTest += rhs.hitsFailedExtendPercentIdentTest;
+ hitsFailedExtendBitScoreTest += rhs.hitsFailedExtendBitScoreTest;
hitsFailedExtendEValueTest += rhs.hitsFailedExtendEValueTest;
hitsAbundant += rhs.hitsAbundant;
hitsDuplicate += rhs.hitsDuplicate;
@@ -253,12 +256,15 @@ void printStats(StatsHolder const & stats, LambdaOptions const & options)
std::cout << "\n - failed pre-extend test " << R
<< stats.hitsFailedPreExtendTest << RR
<< (rem -= stats.hitsFailedPreExtendTest);
- std::cout << "\n - failed %-identity test " << R
- << stats.hitsFailedExtendPercentIdentTest << RR
- << (rem -= stats.hitsFailedExtendPercentIdentTest);
std::cout << "\n - failed e-value test " << R
<< stats.hitsFailedExtendEValueTest << RR
<< (rem -= stats.hitsFailedExtendEValueTest);
+ std::cout << "\n - failed bitScore test " << R
+ << stats.hitsFailedExtendBitScoreTest << RR
+ << (rem -= stats.hitsFailedExtendBitScoreTest);
+ std::cout << "\n - failed %-identity test " << R
+ << stats.hitsFailedExtendPercentIdentTest << RR
+ << (rem -= stats.hitsFailedExtendPercentIdentTest);
std::cout << "\n - duplicates " << R
<< stats.hitsDuplicate << RR
<< (rem -= stats.hitsDuplicate);
diff --git a/src/search_options.hpp b/src/search_options.hpp
index 1ae027714..3ad4e25b2 100644
--- a/src/search_options.hpp
+++ b/src/search_options.hpp
@@ -95,8 +95,8 @@ struct LambdaOptions : public SharedOptions
int misMatch = 0; // only for manual
int xDropOff = 0;
- int band = -1;
- double eCutOff = 0;
+ int32_t minBitScore = -1;
+ double maxEValue = 1e-04;
int idCutOff = 0;
unsigned long maxMatches = 500;
@@ -903,8 +903,9 @@ printOptions(LambdaOptions const & options)
<< " db index type: " << _indexEnumToName(options.dbIndexType) << "\n"
<< " OUTPUT (file)\n"
<< " output file: " << options.output << "\n"
+ << " maximum e-value: " << options.maxEValue << "\n"
+ << " minimum bit-score: " << options.minBitScore << "\n"
<< " minimum % identity: " << options.idCutOff << "\n"
- << " maximum e-value: " << options.eCutOff << "\n"
<< " max #matches per query: " << options.maxMatches << "\n"
<< " include subj names in sam:" << options.samWithRefHeader << "\n"
<< " include seq in sam/bam: " << options.samBamSeq << "\n"
From 391e939c6a2b0a999d60ef7b9d743c3b38868343 Mon Sep 17 00:00:00 2001
From: Hannes Hauswedell
Date: Mon, 8 May 2023 17:06:04 +0000
Subject: [PATCH 5/7] bit-score cleanup
---
src/search_algo.hpp | 30 ++++++++++++++++++------------
src/search_options.hpp | 14 ++++++++++++--
2 files changed, 30 insertions(+), 14 deletions(-)
diff --git a/src/search_algo.hpp b/src/search_algo.hpp
index 4edf9e88e..70edb626f 100644
--- a/src/search_algo.hpp
+++ b/src/search_algo.hpp
@@ -1736,7 +1736,7 @@ computeBlastMatch(typename TBlastRecord::TBlastMatch & bm,
computeBitScore(bm, context(lH.gH.outfile));
computeEValueThreadSafe(bm, record.qLength, context(lH.gH.outfile));
- if (bm.eValue > lH.options.eCutOff)
+ if (bm.eValue > lH.options.maxEValue)
return EVALUE;
_setFrames(bm, m, lH);
@@ -2431,9 +2431,9 @@ iterateMatchesFullSimd(TLocalHolder & lH)
{
TBlastMatch & bm = *it;
- if (lH.options.minBitScore >= 0)
+ if (lH.options.minBitScore > 0)
{
- seqan::computeBitScore(bm, seqan::context(lH.gH.outfileBlastTab));
+ seqan::computeBitScore(bm, seqan::context(lH.gH.outfile));
if (bm.bitScore < lH.options.minBitScore)
{
@@ -2443,9 +2443,9 @@ iterateMatchesFullSimd(TLocalHolder & lH)
}
}
- if (lH.options.maxEValue >= 0)
+ if (lH.options.maxEValue < 100)
{
- computeEValueThreadSafe(bm, bm.qLength, seqan::context(lH.gH.outfileBlastTab));
+ computeEValueThreadSafe(bm, bm.qLength, seqan::context(lH.gH.outfile));
if (bm.eValue > lH.options.maxEValue)
{
@@ -2496,11 +2496,11 @@ iterateMatchesFullSimd(TLocalHolder & lH)
}
// not computed previously
- if (lH.options.minBitScore < 0)
- seqan::computeBitScore(bm, seqan::context(lH.gH.outfileBlastTab));
+ if (lH.options.minBitScore == 0)
+ seqan::computeBitScore(bm, seqan::context(lH.gH.outfile));
- if (lH.options.maxEValue < 0)
- computeEValueThreadSafe(bm, bm.qLength, seqan::context(lH.gH.outfileBlastTab));
+ if (lH.options.maxEValue == 100)
+ computeEValueThreadSafe(bm, bm.qLength, seqan::context(lH.gH.outfile));
++it;
}
@@ -2611,9 +2611,16 @@ iterateMatchesFullSerial(TLocalHolder & lH)
-band,
+band);
- computeEValueThreadSafe(bm, record.qLength, context(lH.gH.outfile));
+ computeBitScore(bm, context(lH.gH.outfile));
+ if (bm.bitScore < lH.options.minBitScore)
+ {
+ ++lH.stats.hitsFailedExtendBitScoreTest;
+ record.matches.pop_back();
+ continue;
+ }
- if (bm.eValue > lH.options.eCutOff)
+ computeEValueThreadSafe(bm, record.qLength, context(lH.gH.outfile));
+ if (bm.eValue > lH.options.maxEValue)
{
++lH.stats.hitsFailedExtendEValueTest;
record.matches.pop_back();
@@ -2638,7 +2645,6 @@ iterateMatchesFullSerial(TLocalHolder & lH)
continue;
}
- computeBitScore(bm, context(lH.gH.outfile));
if (lH.options.hasSTaxIds)
bm.sTaxIds = lH.gH.sTaxIds[bm._n_sId];
diff --git a/src/search_options.hpp b/src/search_options.hpp
index 3ad4e25b2..27a5e7aed 100644
--- a/src/search_options.hpp
+++ b/src/search_options.hpp
@@ -95,7 +95,8 @@ struct LambdaOptions : public SharedOptions
int misMatch = 0; // only for manual
int xDropOff = 0;
- int32_t minBitScore = -1;
+ int band = -1;
+ double minBitScore = 0;
double maxEValue = 1e-04;
int idCutOff = 0;
unsigned long maxMatches = 500;
@@ -247,6 +248,14 @@ parseCommandLine(LambdaOptions & options, int argc, char const ** argv)
setMinValue(parser, "e-value", "0");
setMaxValue(parser, "e-value", "100");
+ addOption(parser, ArgParseOption("", "bit-score",
+ "Output only matches that score above this threshold.",
+ ArgParseArgument::DOUBLE));
+ setDefaultValue(parser, "bit-score", "0");
+ setMinValue(parser, "bit-score", "0");
+ setMaxValue(parser, "bit-score", "1000");
+
+
addOption(parser, ArgParseOption("n", "num-matches",
"Print at most this number of matches per query.",
ArgParseArgument::INTEGER));
@@ -771,7 +780,8 @@ parseCommandLine(LambdaOptions & options, int argc, char const ** argv)
getOptionValue(options.seedDeltaIncreasesLength, parser, "seed-delta-increases-length");
- getOptionValue(options.eCutOff, parser, "e-value");
+ getOptionValue(options.maxEValue, parser, "e-value");
+ getOptionValue(options.minBitScore, parser, "bit-score");
getOptionValue(options.idCutOff, parser, "percent-identity");
getOptionValue(options.xDropOff, parser, "x-drop");
From c7091232858369a98925ce870ff6f118c0a26591 Mon Sep 17 00:00:00 2001
From: Hannes Hauswedell
Date: Mon, 17 Jul 2023 14:26:37 +0000
Subject: [PATCH 6/7] bump version prior to release
---
src/CMakeLists.txt | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index 703dbd7b9..c7787f5ff 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -13,7 +13,7 @@
# change this after every release
set (SEQAN_APP_VERSION_MAJOR "2")
set (SEQAN_APP_VERSION_MINOR "0")
-set (SEQAN_APP_VERSION_PATCH "0")
+set (SEQAN_APP_VERSION_PATCH "1")
# don't change the following
set (SEQAN_APP_VERSION "${SEQAN_APP_VERSION_MAJOR}.${SEQAN_APP_VERSION_MINOR}.${SEQAN_APP_VERSION_PATCH}")
From 40b9fb2f9a3c8dac498d5de6241af4f76146eed8 Mon Sep 17 00:00:00 2001
From: Hannes Hauswedell
Date: Tue, 18 Jul 2023 15:36:30 +0000
Subject: [PATCH 7/7] [fix] dispatcher script on old macOS
---
bin/lambda2.in | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/bin/lambda2.in b/bin/lambda2.in
index ebb114559..dc85f101e 100644
--- a/bin/lambda2.in
+++ b/bin/lambda2.in
@@ -1,6 +1,6 @@
#!/bin/sh
-CURDIR="$(readlink -f $(dirname "$0"))/"
+CURDIR="$(cd "$(dirname "$0")" && pwd -P)/"
SYSTEM_BIN_DIR="@CMAKE_INSTALL_FULL_BINDIR@/"
if [ "${CURDIR}" = "${SYSTEM_BIN_DIR}" ]; then # we are installed