From e778d643630adb6ba96f40711e02e886122680c9 Mon Sep 17 00:00:00 2001 From: Hannes Hauswedell Date: Wed, 22 Jan 2020 14:20:04 +0100 Subject: [PATCH 1/7] [FIX] three fixes --- src/search_algo.hpp | 29 ++++++++++++++--------------- 1 file changed, 14 insertions(+), 15 deletions(-) diff --git a/src/search_algo.hpp b/src/search_algo.hpp index 9e33c2b38..b304b74fc 100644 --- a/src/search_algo.hpp +++ b/src/search_algo.hpp @@ -2378,6 +2378,10 @@ iterateMatchesFullSimd(TLocalHolder & lH) ? lH.gH.untransSubjSeqLengths[bm._n_sId] : length(lH.gH.subjSeqs[it->subjId]); + bm.qLength = qIsTranslated(TGlobalHolder::blastProgram) + ? lH.gH.untransQrySeqLengths[bm._n_qId ] + : length(lH.gH.qrySeqs[it->qryId]); + _setupAlignInfix(bm, *it, lH); _setFrames(bm, *it, lH); @@ -2555,7 +2559,7 @@ iterateMatchesFullSerial(TLocalHolder & lH) ? lH.gH.untransQrySeqLengths[trueQryId] : length(lH.gH.qrySeqs[lH.matches[0].qryId])); - unsigned band = _bandSize(record.qLength, lH); + size_t band = _bandSize(length(lH.gH.qrySeqs[lH.matches[0].qryId]), lH); #ifdef LAMBDA_MICRO_STATS double start = sysTime(); @@ -2578,25 +2582,20 @@ iterateMatchesFullSerial(TLocalHolder & lH) ? lH.gH.untransSubjSeqLengths[bm._n_sId] : length(lH.gH.subjSeqs[it->subjId]); + bm.qLength = qIsTranslated(TGlobalHolder::blastProgram) + ? lH.gH.untransQrySeqLengths[bm._n_qId ] + : length(lH.gH.qrySeqs[it->qryId]); + _setupAlignInfix(bm, *it, lH); _setFrames(bm, m, lH); // Run extension WITHOUT TRACEBACK - typedef AlignConfig2, - DPBandConfig, - FreeEndGaps_, - TracebackOff> TAlignConfig; - - DPScoutState_ scoutState; - - bm.alignStats.alignmentScore = _setUpAndRunAlignment(lH.alignContext.dpContext, - lH.alignContext.traceSegment, - scoutState, - source(bm.alignRow0), - source(bm.alignRow1), - seqanScheme(context(lH.gH.outfile).scoringScheme), - TAlignConfig(-band, +band)); + bm.alignStats.alignmentScore = localAlignmentScore(bm.alignRow0, + bm.alignRow1, + seqanScheme(context(lH.gH.outfile).scoringScheme), + -band, + +band); computeEValueThreadSafe(bm, record.qLength, context(lH.gH.outfile)); From d48cc93e1b3ac47fc4850044e0354dd29e5968c5 Mon Sep 17 00:00:00 2001 From: sarahet Date: Thu, 30 Jan 2020 05:55:44 +0100 Subject: [PATCH 2/7] [Fix] Get correct number of header entries for subject sequences --- src/search_output.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/search_output.hpp b/src/search_output.hpp index 395e8b64b..0852eeabb 100644 --- a/src/search_output.hpp +++ b/src/search_output.hpp @@ -330,7 +330,7 @@ myWriteHeader(TGH & globalHolder, TLambdaOptions const & options) if (sIsTranslated(TGH::blastProgram)) { //TODO can we get around a copy? - subjSeqLengths = globalHolder.untransSubjSeqLengths; + subjSeqLengths = prefix(globalHolder.untransSubjSeqLengths, length(globalHolder.untransSubjSeqLengths) - 1); } else { // compute lengths ultra-fast From b3eba457288c4f671c2c8d3b859e85e8581155e8 Mon Sep 17 00:00:00 2001 From: sarahet Date: Wed, 21 Dec 2022 09:00:58 +0100 Subject: [PATCH 3/7] Re-activate pre-scoring = 1 if no reduction is used --- .gitmodules | 2 +- src/search_options.hpp | 7 +++---- src/shared_misc.hpp | 5 ++++- 3 files changed, 8 insertions(+), 6 deletions(-) diff --git a/.gitmodules b/.gitmodules index 283c7f9ba..23a548592 100644 --- a/.gitmodules +++ b/.gitmodules @@ -1,3 +1,3 @@ [submodule "seqan"] path = include/seqan - url = git://github.com/seqan/seqan.git + url = ../../seqan/seqan.git diff --git a/src/search_options.hpp b/src/search_options.hpp index a9ef96554..1ae027714 100644 --- a/src/search_options.hpp +++ b/src/search_options.hpp @@ -835,10 +835,9 @@ parseCommandLine(LambdaOptions & options, int argc, char const ** argv) // TODO always prescore 1 getOptionValue(options.preScoring, parser, "pre-scoring"); - //TODO reactivate -// if ((!isSet(parser, "pre-scoring")) && -// (options.alphReduction == 0)) -// options.preScoring = 1; + if ((!isSet(parser, "pre-scoring")) && + (options.reducedAlphabet == options.transAlphabet)) + options.preScoring = 1; getOptionValue(options.preScoringThresh, parser, "pre-scoring-threshold"); // if (options.preScoring == 0) diff --git a/src/shared_misc.hpp b/src/shared_misc.hpp index ae38f269f..3e57c3966 100644 --- a/src/shared_misc.hpp +++ b/src/shared_misc.hpp @@ -26,7 +26,10 @@ #include #include #include -#include + +#if __has_include() + #include +#endif #include #include From ea5bb685b420cc6b2d778d7ec96b9b68790ff6bb Mon Sep 17 00:00:00 2001 From: Hannes Hauswedell Date: Wed, 17 Aug 2022 13:53:06 +0000 Subject: [PATCH 4/7] [feature] minimum bit-score --- src/search_algo.hpp | 36 ++++++++++++++++++++++++----------- src/search_datastructures.hpp | 12 +++++++++--- src/search_options.hpp | 7 ++++--- 3 files changed, 38 insertions(+), 17 deletions(-) diff --git a/src/search_algo.hpp b/src/search_algo.hpp index c4f4a7b3d..4edf9e88e 100644 --- a/src/search_algo.hpp +++ b/src/search_algo.hpp @@ -2431,17 +2431,28 @@ iterateMatchesFullSimd(TLocalHolder & lH) { TBlastMatch & bm = *it; - computeEValueThreadSafe(bm, - qIsTranslated(TGlobalHolder::blastProgram) - ? lH.gH.untransQrySeqLengths[bm._n_qId] - : length(lH.gH.qrySeqs[_untrueQryId(bm, lH)]), - context(lH.gH.outfile)); + if (lH.options.minBitScore >= 0) + { + seqan::computeBitScore(bm, seqan::context(lH.gH.outfileBlastTab)); - if (bm.eValue > lH.options.eCutOff) + if (bm.bitScore < lH.options.minBitScore) + { + ++lH.stats.hitsFailedExtendBitScoreTest; + it = blastMatches.erase(it); + continue; + } + } + + if (lH.options.maxEValue >= 0) { - ++lH.stats.hitsFailedExtendEValueTest; - it = blastMatches.erase(it); - continue; + computeEValueThreadSafe(bm, bm.qLength, seqan::context(lH.gH.outfileBlastTab)); + + if (bm.eValue > lH.options.maxEValue) + { + ++lH.stats.hitsFailedExtendEValueTest; + it = blastMatches.erase(it); + continue; + } } ++it; @@ -2484,9 +2495,12 @@ iterateMatchesFullSimd(TLocalHolder & lH) continue; } - computeBitScore(bm, context(lH.gH.outfile)); + // not computed previously + if (lH.options.minBitScore < 0) + seqan::computeBitScore(bm, seqan::context(lH.gH.outfileBlastTab)); - // evalue computed previously + if (lH.options.maxEValue < 0) + computeEValueThreadSafe(bm, bm.qLength, seqan::context(lH.gH.outfileBlastTab)); ++it; } diff --git a/src/search_datastructures.hpp b/src/search_datastructures.hpp index 29930d7a7..5417afc6b 100644 --- a/src/search_datastructures.hpp +++ b/src/search_datastructures.hpp @@ -111,6 +111,7 @@ struct StatsHolder // post-extension uint64_t hitsFailedExtendPercentIdentTest; + uint64_t hitsFailedExtendBitScoreTest; uint64_t hitsFailedExtendEValueTest; uint64_t hitsAbundant; uint64_t hitsDuplicate; @@ -150,6 +151,7 @@ struct StatsHolder hitsPutativeAbundant = 0; hitsFailedExtendPercentIdentTest = 0; + hitsFailedExtendBitScoreTest = 0; hitsFailedExtendEValueTest = 0; hitsAbundant = 0; hitsDuplicate = 0; @@ -183,6 +185,7 @@ struct StatsHolder hitsPutativeAbundant += rhs.hitsPutativeAbundant; hitsFailedExtendPercentIdentTest += rhs.hitsFailedExtendPercentIdentTest; + hitsFailedExtendBitScoreTest += rhs.hitsFailedExtendBitScoreTest; hitsFailedExtendEValueTest += rhs.hitsFailedExtendEValueTest; hitsAbundant += rhs.hitsAbundant; hitsDuplicate += rhs.hitsDuplicate; @@ -253,12 +256,15 @@ void printStats(StatsHolder const & stats, LambdaOptions const & options) std::cout << "\n - failed pre-extend test " << R << stats.hitsFailedPreExtendTest << RR << (rem -= stats.hitsFailedPreExtendTest); - std::cout << "\n - failed %-identity test " << R - << stats.hitsFailedExtendPercentIdentTest << RR - << (rem -= stats.hitsFailedExtendPercentIdentTest); std::cout << "\n - failed e-value test " << R << stats.hitsFailedExtendEValueTest << RR << (rem -= stats.hitsFailedExtendEValueTest); + std::cout << "\n - failed bitScore test " << R + << stats.hitsFailedExtendBitScoreTest << RR + << (rem -= stats.hitsFailedExtendBitScoreTest); + std::cout << "\n - failed %-identity test " << R + << stats.hitsFailedExtendPercentIdentTest << RR + << (rem -= stats.hitsFailedExtendPercentIdentTest); std::cout << "\n - duplicates " << R << stats.hitsDuplicate << RR << (rem -= stats.hitsDuplicate); diff --git a/src/search_options.hpp b/src/search_options.hpp index 1ae027714..3ad4e25b2 100644 --- a/src/search_options.hpp +++ b/src/search_options.hpp @@ -95,8 +95,8 @@ struct LambdaOptions : public SharedOptions int misMatch = 0; // only for manual int xDropOff = 0; - int band = -1; - double eCutOff = 0; + int32_t minBitScore = -1; + double maxEValue = 1e-04; int idCutOff = 0; unsigned long maxMatches = 500; @@ -903,8 +903,9 @@ printOptions(LambdaOptions const & options) << " db index type: " << _indexEnumToName(options.dbIndexType) << "\n" << " OUTPUT (file)\n" << " output file: " << options.output << "\n" + << " maximum e-value: " << options.maxEValue << "\n" + << " minimum bit-score: " << options.minBitScore << "\n" << " minimum % identity: " << options.idCutOff << "\n" - << " maximum e-value: " << options.eCutOff << "\n" << " max #matches per query: " << options.maxMatches << "\n" << " include subj names in sam:" << options.samWithRefHeader << "\n" << " include seq in sam/bam: " << options.samBamSeq << "\n" From 391e939c6a2b0a999d60ef7b9d743c3b38868343 Mon Sep 17 00:00:00 2001 From: Hannes Hauswedell Date: Mon, 8 May 2023 17:06:04 +0000 Subject: [PATCH 5/7] bit-score cleanup --- src/search_algo.hpp | 30 ++++++++++++++++++------------ src/search_options.hpp | 14 ++++++++++++-- 2 files changed, 30 insertions(+), 14 deletions(-) diff --git a/src/search_algo.hpp b/src/search_algo.hpp index 4edf9e88e..70edb626f 100644 --- a/src/search_algo.hpp +++ b/src/search_algo.hpp @@ -1736,7 +1736,7 @@ computeBlastMatch(typename TBlastRecord::TBlastMatch & bm, computeBitScore(bm, context(lH.gH.outfile)); computeEValueThreadSafe(bm, record.qLength, context(lH.gH.outfile)); - if (bm.eValue > lH.options.eCutOff) + if (bm.eValue > lH.options.maxEValue) return EVALUE; _setFrames(bm, m, lH); @@ -2431,9 +2431,9 @@ iterateMatchesFullSimd(TLocalHolder & lH) { TBlastMatch & bm = *it; - if (lH.options.minBitScore >= 0) + if (lH.options.minBitScore > 0) { - seqan::computeBitScore(bm, seqan::context(lH.gH.outfileBlastTab)); + seqan::computeBitScore(bm, seqan::context(lH.gH.outfile)); if (bm.bitScore < lH.options.minBitScore) { @@ -2443,9 +2443,9 @@ iterateMatchesFullSimd(TLocalHolder & lH) } } - if (lH.options.maxEValue >= 0) + if (lH.options.maxEValue < 100) { - computeEValueThreadSafe(bm, bm.qLength, seqan::context(lH.gH.outfileBlastTab)); + computeEValueThreadSafe(bm, bm.qLength, seqan::context(lH.gH.outfile)); if (bm.eValue > lH.options.maxEValue) { @@ -2496,11 +2496,11 @@ iterateMatchesFullSimd(TLocalHolder & lH) } // not computed previously - if (lH.options.minBitScore < 0) - seqan::computeBitScore(bm, seqan::context(lH.gH.outfileBlastTab)); + if (lH.options.minBitScore == 0) + seqan::computeBitScore(bm, seqan::context(lH.gH.outfile)); - if (lH.options.maxEValue < 0) - computeEValueThreadSafe(bm, bm.qLength, seqan::context(lH.gH.outfileBlastTab)); + if (lH.options.maxEValue == 100) + computeEValueThreadSafe(bm, bm.qLength, seqan::context(lH.gH.outfile)); ++it; } @@ -2611,9 +2611,16 @@ iterateMatchesFullSerial(TLocalHolder & lH) -band, +band); - computeEValueThreadSafe(bm, record.qLength, context(lH.gH.outfile)); + computeBitScore(bm, context(lH.gH.outfile)); + if (bm.bitScore < lH.options.minBitScore) + { + ++lH.stats.hitsFailedExtendBitScoreTest; + record.matches.pop_back(); + continue; + } - if (bm.eValue > lH.options.eCutOff) + computeEValueThreadSafe(bm, record.qLength, context(lH.gH.outfile)); + if (bm.eValue > lH.options.maxEValue) { ++lH.stats.hitsFailedExtendEValueTest; record.matches.pop_back(); @@ -2638,7 +2645,6 @@ iterateMatchesFullSerial(TLocalHolder & lH) continue; } - computeBitScore(bm, context(lH.gH.outfile)); if (lH.options.hasSTaxIds) bm.sTaxIds = lH.gH.sTaxIds[bm._n_sId]; diff --git a/src/search_options.hpp b/src/search_options.hpp index 3ad4e25b2..27a5e7aed 100644 --- a/src/search_options.hpp +++ b/src/search_options.hpp @@ -95,7 +95,8 @@ struct LambdaOptions : public SharedOptions int misMatch = 0; // only for manual int xDropOff = 0; - int32_t minBitScore = -1; + int band = -1; + double minBitScore = 0; double maxEValue = 1e-04; int idCutOff = 0; unsigned long maxMatches = 500; @@ -247,6 +248,14 @@ parseCommandLine(LambdaOptions & options, int argc, char const ** argv) setMinValue(parser, "e-value", "0"); setMaxValue(parser, "e-value", "100"); + addOption(parser, ArgParseOption("", "bit-score", + "Output only matches that score above this threshold.", + ArgParseArgument::DOUBLE)); + setDefaultValue(parser, "bit-score", "0"); + setMinValue(parser, "bit-score", "0"); + setMaxValue(parser, "bit-score", "1000"); + + addOption(parser, ArgParseOption("n", "num-matches", "Print at most this number of matches per query.", ArgParseArgument::INTEGER)); @@ -771,7 +780,8 @@ parseCommandLine(LambdaOptions & options, int argc, char const ** argv) getOptionValue(options.seedDeltaIncreasesLength, parser, "seed-delta-increases-length"); - getOptionValue(options.eCutOff, parser, "e-value"); + getOptionValue(options.maxEValue, parser, "e-value"); + getOptionValue(options.minBitScore, parser, "bit-score"); getOptionValue(options.idCutOff, parser, "percent-identity"); getOptionValue(options.xDropOff, parser, "x-drop"); From c7091232858369a98925ce870ff6f118c0a26591 Mon Sep 17 00:00:00 2001 From: Hannes Hauswedell Date: Mon, 17 Jul 2023 14:26:37 +0000 Subject: [PATCH 6/7] bump version prior to release --- src/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 703dbd7b9..c7787f5ff 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -13,7 +13,7 @@ # change this after every release set (SEQAN_APP_VERSION_MAJOR "2") set (SEQAN_APP_VERSION_MINOR "0") -set (SEQAN_APP_VERSION_PATCH "0") +set (SEQAN_APP_VERSION_PATCH "1") # don't change the following set (SEQAN_APP_VERSION "${SEQAN_APP_VERSION_MAJOR}.${SEQAN_APP_VERSION_MINOR}.${SEQAN_APP_VERSION_PATCH}") From 40b9fb2f9a3c8dac498d5de6241af4f76146eed8 Mon Sep 17 00:00:00 2001 From: Hannes Hauswedell Date: Tue, 18 Jul 2023 15:36:30 +0000 Subject: [PATCH 7/7] [fix] dispatcher script on old macOS --- bin/lambda2.in | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bin/lambda2.in b/bin/lambda2.in index ebb114559..dc85f101e 100644 --- a/bin/lambda2.in +++ b/bin/lambda2.in @@ -1,6 +1,6 @@ #!/bin/sh -CURDIR="$(readlink -f $(dirname "$0"))/" +CURDIR="$(cd "$(dirname "$0")" && pwd -P)/" SYSTEM_BIN_DIR="@CMAKE_INSTALL_FULL_BINDIR@/" if [ "${CURDIR}" = "${SYSTEM_BIN_DIR}" ]; then # we are installed