diff --git a/.gitmodules b/.gitmodules index 283c7f9ba..23a548592 100644 --- a/.gitmodules +++ b/.gitmodules @@ -1,3 +1,3 @@ [submodule "seqan"] path = include/seqan - url = git://github.com/seqan/seqan.git + url = ../../seqan/seqan.git diff --git a/bin/lambda2.in b/bin/lambda2.in index ebb114559..dc85f101e 100644 --- a/bin/lambda2.in +++ b/bin/lambda2.in @@ -1,6 +1,6 @@ #!/bin/sh -CURDIR="$(readlink -f $(dirname "$0"))/" +CURDIR="$(cd "$(dirname "$0")" && pwd -P)/" SYSTEM_BIN_DIR="@CMAKE_INSTALL_FULL_BINDIR@/" if [ "${CURDIR}" = "${SYSTEM_BIN_DIR}" ]; then # we are installed diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 703dbd7b9..c7787f5ff 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -13,7 +13,7 @@ # change this after every release set (SEQAN_APP_VERSION_MAJOR "2") set (SEQAN_APP_VERSION_MINOR "0") -set (SEQAN_APP_VERSION_PATCH "0") +set (SEQAN_APP_VERSION_PATCH "1") # don't change the following set (SEQAN_APP_VERSION "${SEQAN_APP_VERSION_MAJOR}.${SEQAN_APP_VERSION_MINOR}.${SEQAN_APP_VERSION_PATCH}") diff --git a/src/search_algo.hpp b/src/search_algo.hpp index 6d04c40f8..70edb626f 100644 --- a/src/search_algo.hpp +++ b/src/search_algo.hpp @@ -1736,7 +1736,7 @@ computeBlastMatch(typename TBlastRecord::TBlastMatch & bm, computeBitScore(bm, context(lH.gH.outfile)); computeEValueThreadSafe(bm, record.qLength, context(lH.gH.outfile)); - if (bm.eValue > lH.options.eCutOff) + if (bm.eValue > lH.options.maxEValue) return EVALUE; _setFrames(bm, m, lH); @@ -2378,6 +2378,10 @@ iterateMatchesFullSimd(TLocalHolder & lH) ? lH.gH.untransSubjSeqLengths[bm._n_sId] : length(lH.gH.subjSeqs[_untrueSubjId(bm, lH)]); + bm.qLength = qIsTranslated(TGlobalHolder::blastProgram) + ? lH.gH.untransQrySeqLengths[bm._n_qId ] + : length(lH.gH.qrySeqs[it->qryId]); + _setupAlignInfix(bm, *it, lH); _setFrames(bm, *it, lH); @@ -2427,17 +2431,28 @@ iterateMatchesFullSimd(TLocalHolder & lH) { TBlastMatch & bm = *it; - computeEValueThreadSafe(bm, - qIsTranslated(TGlobalHolder::blastProgram) - ? lH.gH.untransQrySeqLengths[bm._n_qId] - : length(lH.gH.qrySeqs[_untrueQryId(bm, lH)]), - context(lH.gH.outfile)); + if (lH.options.minBitScore > 0) + { + seqan::computeBitScore(bm, seqan::context(lH.gH.outfile)); + + if (bm.bitScore < lH.options.minBitScore) + { + ++lH.stats.hitsFailedExtendBitScoreTest; + it = blastMatches.erase(it); + continue; + } + } - if (bm.eValue > lH.options.eCutOff) + if (lH.options.maxEValue < 100) { - ++lH.stats.hitsFailedExtendEValueTest; - it = blastMatches.erase(it); - continue; + computeEValueThreadSafe(bm, bm.qLength, seqan::context(lH.gH.outfile)); + + if (bm.eValue > lH.options.maxEValue) + { + ++lH.stats.hitsFailedExtendEValueTest; + it = blastMatches.erase(it); + continue; + } } ++it; @@ -2480,9 +2495,12 @@ iterateMatchesFullSimd(TLocalHolder & lH) continue; } - computeBitScore(bm, context(lH.gH.outfile)); + // not computed previously + if (lH.options.minBitScore == 0) + seqan::computeBitScore(bm, seqan::context(lH.gH.outfile)); - // evalue computed previously + if (lH.options.maxEValue == 100) + computeEValueThreadSafe(bm, bm.qLength, seqan::context(lH.gH.outfile)); ++it; } @@ -2555,7 +2573,7 @@ iterateMatchesFullSerial(TLocalHolder & lH) ? lH.gH.untransQrySeqLengths[trueQryId] : length(lH.gH.qrySeqs[lH.matches[0].qryId])); - unsigned band = _bandSize(record.qLength, lH); + size_t band = _bandSize(length(lH.gH.qrySeqs[lH.matches[0].qryId]), lH); #ifdef LAMBDA_MICRO_STATS double start = sysTime(); @@ -2578,29 +2596,31 @@ iterateMatchesFullSerial(TLocalHolder & lH) ? lH.gH.untransSubjSeqLengths[bm._n_sId] : length(lH.gH.subjSeqs[it->subjId]); + bm.qLength = qIsTranslated(TGlobalHolder::blastProgram) + ? lH.gH.untransQrySeqLengths[bm._n_qId ] + : length(lH.gH.qrySeqs[it->qryId]); + _setupAlignInfix(bm, *it, lH); _setFrames(bm, m, lH); // Run extension WITHOUT TRACEBACK - typedef AlignConfig2, - DPBandConfig, - FreeEndGaps_, - TracebackOff> TAlignConfig; - - DPScoutState_ scoutState; + bm.alignStats.alignmentScore = localAlignmentScore(bm.alignRow0, + bm.alignRow1, + seqanScheme(context(lH.gH.outfile).scoringScheme), + -band, + +band); - bm.alignStats.alignmentScore = _setUpAndRunAlignment(lH.alignContext.dpContext, - lH.alignContext.traceSegment, - scoutState, - source(bm.alignRow0), - source(bm.alignRow1), - seqanScheme(context(lH.gH.outfile).scoringScheme), - TAlignConfig(-band, +band)); + computeBitScore(bm, context(lH.gH.outfile)); + if (bm.bitScore < lH.options.minBitScore) + { + ++lH.stats.hitsFailedExtendBitScoreTest; + record.matches.pop_back(); + continue; + } computeEValueThreadSafe(bm, record.qLength, context(lH.gH.outfile)); - - if (bm.eValue > lH.options.eCutOff) + if (bm.eValue > lH.options.maxEValue) { ++lH.stats.hitsFailedExtendEValueTest; record.matches.pop_back(); @@ -2625,7 +2645,6 @@ iterateMatchesFullSerial(TLocalHolder & lH) continue; } - computeBitScore(bm, context(lH.gH.outfile)); if (lH.options.hasSTaxIds) bm.sTaxIds = lH.gH.sTaxIds[bm._n_sId]; diff --git a/src/search_datastructures.hpp b/src/search_datastructures.hpp index 29930d7a7..5417afc6b 100644 --- a/src/search_datastructures.hpp +++ b/src/search_datastructures.hpp @@ -111,6 +111,7 @@ struct StatsHolder // post-extension uint64_t hitsFailedExtendPercentIdentTest; + uint64_t hitsFailedExtendBitScoreTest; uint64_t hitsFailedExtendEValueTest; uint64_t hitsAbundant; uint64_t hitsDuplicate; @@ -150,6 +151,7 @@ struct StatsHolder hitsPutativeAbundant = 0; hitsFailedExtendPercentIdentTest = 0; + hitsFailedExtendBitScoreTest = 0; hitsFailedExtendEValueTest = 0; hitsAbundant = 0; hitsDuplicate = 0; @@ -183,6 +185,7 @@ struct StatsHolder hitsPutativeAbundant += rhs.hitsPutativeAbundant; hitsFailedExtendPercentIdentTest += rhs.hitsFailedExtendPercentIdentTest; + hitsFailedExtendBitScoreTest += rhs.hitsFailedExtendBitScoreTest; hitsFailedExtendEValueTest += rhs.hitsFailedExtendEValueTest; hitsAbundant += rhs.hitsAbundant; hitsDuplicate += rhs.hitsDuplicate; @@ -253,12 +256,15 @@ void printStats(StatsHolder const & stats, LambdaOptions const & options) std::cout << "\n - failed pre-extend test " << R << stats.hitsFailedPreExtendTest << RR << (rem -= stats.hitsFailedPreExtendTest); - std::cout << "\n - failed %-identity test " << R - << stats.hitsFailedExtendPercentIdentTest << RR - << (rem -= stats.hitsFailedExtendPercentIdentTest); std::cout << "\n - failed e-value test " << R << stats.hitsFailedExtendEValueTest << RR << (rem -= stats.hitsFailedExtendEValueTest); + std::cout << "\n - failed bitScore test " << R + << stats.hitsFailedExtendBitScoreTest << RR + << (rem -= stats.hitsFailedExtendBitScoreTest); + std::cout << "\n - failed %-identity test " << R + << stats.hitsFailedExtendPercentIdentTest << RR + << (rem -= stats.hitsFailedExtendPercentIdentTest); std::cout << "\n - duplicates " << R << stats.hitsDuplicate << RR << (rem -= stats.hitsDuplicate); diff --git a/src/search_options.hpp b/src/search_options.hpp index a9ef96554..27a5e7aed 100644 --- a/src/search_options.hpp +++ b/src/search_options.hpp @@ -96,7 +96,8 @@ struct LambdaOptions : public SharedOptions int xDropOff = 0; int band = -1; - double eCutOff = 0; + double minBitScore = 0; + double maxEValue = 1e-04; int idCutOff = 0; unsigned long maxMatches = 500; @@ -247,6 +248,14 @@ parseCommandLine(LambdaOptions & options, int argc, char const ** argv) setMinValue(parser, "e-value", "0"); setMaxValue(parser, "e-value", "100"); + addOption(parser, ArgParseOption("", "bit-score", + "Output only matches that score above this threshold.", + ArgParseArgument::DOUBLE)); + setDefaultValue(parser, "bit-score", "0"); + setMinValue(parser, "bit-score", "0"); + setMaxValue(parser, "bit-score", "1000"); + + addOption(parser, ArgParseOption("n", "num-matches", "Print at most this number of matches per query.", ArgParseArgument::INTEGER)); @@ -771,7 +780,8 @@ parseCommandLine(LambdaOptions & options, int argc, char const ** argv) getOptionValue(options.seedDeltaIncreasesLength, parser, "seed-delta-increases-length"); - getOptionValue(options.eCutOff, parser, "e-value"); + getOptionValue(options.maxEValue, parser, "e-value"); + getOptionValue(options.minBitScore, parser, "bit-score"); getOptionValue(options.idCutOff, parser, "percent-identity"); getOptionValue(options.xDropOff, parser, "x-drop"); @@ -835,10 +845,9 @@ parseCommandLine(LambdaOptions & options, int argc, char const ** argv) // TODO always prescore 1 getOptionValue(options.preScoring, parser, "pre-scoring"); - //TODO reactivate -// if ((!isSet(parser, "pre-scoring")) && -// (options.alphReduction == 0)) -// options.preScoring = 1; + if ((!isSet(parser, "pre-scoring")) && + (options.reducedAlphabet == options.transAlphabet)) + options.preScoring = 1; getOptionValue(options.preScoringThresh, parser, "pre-scoring-threshold"); // if (options.preScoring == 0) @@ -904,8 +913,9 @@ printOptions(LambdaOptions const & options) << " db index type: " << _indexEnumToName(options.dbIndexType) << "\n" << " OUTPUT (file)\n" << " output file: " << options.output << "\n" + << " maximum e-value: " << options.maxEValue << "\n" + << " minimum bit-score: " << options.minBitScore << "\n" << " minimum % identity: " << options.idCutOff << "\n" - << " maximum e-value: " << options.eCutOff << "\n" << " max #matches per query: " << options.maxMatches << "\n" << " include subj names in sam:" << options.samWithRefHeader << "\n" << " include seq in sam/bam: " << options.samBamSeq << "\n" diff --git a/src/search_output.hpp b/src/search_output.hpp index 395e8b64b..0852eeabb 100644 --- a/src/search_output.hpp +++ b/src/search_output.hpp @@ -330,7 +330,7 @@ myWriteHeader(TGH & globalHolder, TLambdaOptions const & options) if (sIsTranslated(TGH::blastProgram)) { //TODO can we get around a copy? - subjSeqLengths = globalHolder.untransSubjSeqLengths; + subjSeqLengths = prefix(globalHolder.untransSubjSeqLengths, length(globalHolder.untransSubjSeqLengths) - 1); } else { // compute lengths ultra-fast diff --git a/src/shared_misc.hpp b/src/shared_misc.hpp index ae38f269f..3e57c3966 100644 --- a/src/shared_misc.hpp +++ b/src/shared_misc.hpp @@ -26,7 +26,10 @@ #include #include #include -#include + +#if __has_include() + #include +#endif #include #include