@@ -1985,7 +1985,7 @@ ModuleKmerContent::summarize_module(FastqStats &stats) {
19851985 for (size_t kmer = 0 ; kmer < num_kmers; ++kmer) {
19861986 for (size_t i = kmer_size - 1 ; i < num_kmer_bases; ++i) {
19871987 observed_count = stats.kmer_count [
1988- (i << FastqStats:: kBitShiftKmer ) | kmer
1988+ (i << Constants::bit_shift_kmer ) | kmer
19891989 ];
19901990 total_kmer_counts[kmer] += observed_count;
19911991 }
@@ -1995,40 +1995,51 @@ ModuleKmerContent::summarize_module(FastqStats &stats) {
19951995 double dividend = static_cast <double >(num_seen_kmers);
19961996 for (size_t kmer = 0 ; kmer < num_kmers; ++kmer) {
19971997 for (size_t i = kmer_size - 1 ; i < num_kmer_bases; ++i) {
1998- observed_count = stats. kmer_count [
1999- (i << FastqStats:: kBitShiftKmer ) | kmer
2000- ];
1998+ observed_count =
1999+ stats. kmer_count [ (i << Constants::bit_shift_kmer ) | kmer];
2000+
20012001 expected_count = pos_kmer_count[i] / dividend;
20022002 obs_exp_ratio = observed_count / expected_count;
20032003
20042004 if (i == 0 || obs_exp_ratio > obs_exp_max[kmer]) {
20052005 obs_exp_max[kmer] = obs_exp_ratio;
2006- where_obs_exp_is_max[kmer] = i;
2006+ where_obs_exp_is_max[kmer] = i + 1 - kmer_size ;
20072007 }
20082008 }
20092009
2010- if (obs_exp_max[kmer] > 5 ) {
2010+ if (obs_exp_max[kmer] > MIN_OBS_EXP_TO_REPORT ) {
20112011 kmers_to_report.push_back (make_pair (kmer, obs_exp_max[kmer]));
20122012 }
20132013 }
20142014
20152015 sort (begin (kmers_to_report), end (kmers_to_report),
2016- [](pair<size_t , double > &a, pair<size_t , double > &b) {
2016+ [](const pair<size_t , double > &a, const pair<size_t , double > &b) {
20172017 return a.second > b.second ;
20182018 });
2019-
20202019}
20212020
20222021void
20232022ModuleKmerContent::make_grade () {
2024- grade = " fail" ;
2023+ const size_t lim = min (kmers_to_report.size (), MAX_KMERS_TO_REPORT);
2024+ grade = " pass" ;
2025+
2026+ // the worst kmer is at the top
2027+ if (lim > 0 ) {
2028+ const size_t kmer = kmers_to_report[0 ].first ;
2029+ const double obs_exp = obs_exp_max[kmer];
2030+ if (obs_exp >= grade_error)
2031+ grade = " fail" ;
2032+ else if (obs_exp >= grade_warn)
2033+ grade = " warn" ;
2034+ }
20252035}
20262036
20272037void
20282038ModuleKmerContent::write_module (ostream &os) {
20292039 os << " #Sequence\t Count\t PValue\t Obs/Exp Max\t Max Obs/Exp Position\n " ;
2030- for (size_t i = 0 ; i < 20 && i < kmers_to_report.size (); ++i) {
2031- size_t kmer = kmers_to_report[i].first ;
2040+ const size_t lim = min (kmers_to_report.size (), MAX_KMERS_TO_REPORT);
2041+ for (size_t i = 0 ; i < lim; ++i) {
2042+ const size_t kmer = kmers_to_report[i].first ;
20322043 os << size_t_to_seq (kmer, kmer_size) << " \t "
20332044 << total_kmer_counts[kmer] << " \t "
20342045 << " 0.0" << " \t "
@@ -2039,5 +2050,44 @@ ModuleKmerContent::write_module(ostream &os) {
20392050
20402051string
20412052ModuleKmerContent::make_html_data () {
2042- return " <b>K-mer content module currently not implemented!" ;
2053+ bool seen_first = false ;
2054+ ostringstream data;
2055+ const size_t lim = min (kmers_to_report.size (), MAX_KMERS_TO_PLOT);
2056+
2057+ // get xlim to plot: whatever the largest position with some
2058+ // reported k-mer is
2059+ size_t xlim = 0 ;
2060+ for (size_t i = 0 ; i < lim; ++i)
2061+ xlim = max (xlim, where_obs_exp_is_max[kmers_to_report[i].first ]);
2062+
2063+ for (size_t i = 0 ; i < lim; ++i) {
2064+ const size_t kmer = kmers_to_report[i].first ;
2065+ const double log_obs_exp = log (kmers_to_report[i].second )/log (2 );
2066+ if (!seen_first)
2067+ seen_first = true ;
2068+ else
2069+ data << " ," ;
2070+ data << " {" ;
2071+
2072+ // X values : read position
2073+ data << " x : [" ;
2074+ for (size_t j = 0 ; j < xlim; ++j) {
2075+ data << j+1 ;
2076+ if (j < xlim - 1 ) data << " ," ;
2077+ }
2078+ data << " ]" ;
2079+
2080+ // Y values : A peak wherever the k-mer is seen the most
2081+ data << " , y : [" ;
2082+ for (size_t j = 0 ; j < xlim; ++j) {
2083+ data << ((j == (where_obs_exp_is_max[kmer])) ? (log_obs_exp) : 0 );
2084+ if (i < xlim - 1 )
2085+ data << " ," ;
2086+ }
2087+
2088+ data << " ]" ;
2089+ data << " , type : 'line', " ;
2090+ data << " name : \" " << size_t_to_seq (kmer, Constants::kmer_size) << " \" }" ;
2091+ }
2092+ return data.str ();
20432093}
0 commit comments