Home  · Classes  · Annotated Classes  · Modules  · Members  · Namespaces  · Related Pages

SignalToNoiseEstimatorMedian.h (Maintainer: Chris Bielow)

Go to the documentation of this file.
00001 // -*- Mode: C++; tab-width: 2; -*-
00002 // vi: set ts=2:
00003 //
00004 // --------------------------------------------------------------------------
00005 //                   OpenMS Mass Spectrometry Framework
00006 // --------------------------------------------------------------------------
00007 //  Copyright (C) 2003-2008 -- Oliver Kohlbacher, Knut Reinert
00008 //
00009 //  This library is free software; you can redistribute it and/or
00010 //  modify it under the terms of the GNU Lesser General Public
00011 //  License as published by the Free Software Foundation; either
00012 //  version 2.1 of the License, or (at your option) any later version.
00013 //
00014 //  This library is distributed in the hope that it will be useful,
00015 //  but WITHOUT ANY WARRANTY; without even the implied warranty of
00016 //  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00017 //  Lesser General Public License for more details.
00018 //
00019 //  You should have received a copy of the GNU Lesser General Public
00020 //  License along with this library; if not, write to the Free Software
00021 //  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
00022 //
00023 // --------------------------------------------------------------------------
00024 // $Maintainer: Chris Bielow $
00025 // --------------------------------------------------------------------------
00026 //
00027 
00028 #ifndef OPENMS_FILTERING_NOISEESTIMATION_SIGNALTONOISEESTIMATORMEDIAN_H
00029 #define OPENMS_FILTERING_NOISEESTIMATION_SIGNALTONOISEESTIMATORMEDIAN_H
00030 
00031 
00032 #include <OpenMS/FILTERING/NOISEESTIMATION/SignalToNoiseEstimator.h>
00033 #include <OpenMS/CONCEPT/Types.h>
00034 #include <OpenMS/CONCEPT/Exception.h>
00035 #include <vector>
00036 
00037 namespace OpenMS
00038 {
00062   template < typename Container = MSSpectrum< > >
00063   class SignalToNoiseEstimatorMedian : public SignalToNoiseEstimator< Container >
00064   {
00065 
00066   public:
00067 
00069     enum IntensityThresholdCalculation { MANUAL=-1, AUTOMAXBYSTDEV=0, AUTOMAXBYPERCENT=1 };
00070 
00071     using SignalToNoiseEstimator< Container >::stn_estimates_;
00072     using SignalToNoiseEstimator< Container >::first_;
00073     using SignalToNoiseEstimator< Container >::last_;
00074     using SignalToNoiseEstimator< Container >::is_result_valid_;
00075     using SignalToNoiseEstimator< Container >::defaults_;
00076     using SignalToNoiseEstimator< Container >::param_;
00077     
00078     typedef typename SignalToNoiseEstimator< Container >::PeakIterator PeakIterator;
00079     typedef typename SignalToNoiseEstimator< Container >::PeakType PeakType;
00080     
00081     typedef typename SignalToNoiseEstimator< Container >::GaussianEstimate GaussianEstimate;
00082 
00084     inline SignalToNoiseEstimatorMedian()
00085     {
00086       //set the name for DefaultParamHandler error messages
00087       this->setName("SignalToNoiseEstimatorMedian");  
00088 
00089       defaults_.setValue("max_intensity", -1, "maximal intensity considered for histogram construction. By default, it will be calculated automatically (see auto_mode)."\
00090 " Only provide this parameter if you know what you are doing (and change 'auto_mode' to '-1')!"\
00091 " All intensities EQUAL/ABOVE 'max_intensity' will be added to the LAST histogram bin."\
00092 " If you choose 'max_intensity' too small, the noise estimate might be too small as well. "\
00093 " If chosen too big, the bins become quite large (which you could counter by increasing 'bin_count', which increases runtime)."\
00094 " In general, the Median-S/N estimator is more robust to a manual max_intensity than the MeanIterative-S/N.", true); 
00095       defaults_.setMinInt ("max_intensity", -1);
00096 
00097       defaults_.setValue("auto_max_stdev_factor", 3.0, "parameter for 'max_intensity' estimation (if 'auto_mode' == 0): mean + 'auto_max_stdev_factor' * stdev", true);
00098       defaults_.setMinFloat ("auto_max_stdev_factor", 0.0);
00099       defaults_.setMaxFloat ("auto_max_stdev_factor", 999.0);
00100               
00101       defaults_.setValue("auto_max_percentile", 95, "parameter for 'max_intensity' estimation (if 'auto_mode' == 1): auto_max_percentile th percentile", true);
00102       defaults_.setMinInt ("auto_max_percentile", 0);
00103       defaults_.setMaxInt ("auto_max_percentile", 100);
00104               
00105       defaults_.setValue("auto_mode", 0, "method to use to determine maximal intensity: -1 --> use 'max_intensity'; 0 --> 'auto_max_stdev_factor' method (default); 1 --> 'auto_max_percentile' method", true);
00106       defaults_.setMinInt ("auto_mode", -1);
00107       defaults_.setMaxInt ("auto_mode", 1);  
00108               
00109       defaults_.setValue("win_len", 200.0, "window length in Thomson", false);
00110       defaults_.setMinFloat ("win_len", 1.0);
00111               
00112       defaults_.setValue("bin_count", 30, "number of bins for intensity values", false);
00113       defaults_.setMinInt ("bin_count", 3);
00114               
00115       defaults_.setValue("min_required_elements", 10, "minimum number of elements required in a window (otherwise it is considered sparse)", false);
00116       defaults_.setMinInt ("min_required_elements", 1);
00117       
00118       defaults_.setValue("noise_for_empty_window", std::pow(10.0,20), "noise value used for sparse windows", true);
00119       
00120 
00121       SignalToNoiseEstimator< Container >::defaultsToParam_();
00122     }
00123 
00124 
00126     inline SignalToNoiseEstimatorMedian(const SignalToNoiseEstimatorMedian&  source)
00127         : SignalToNoiseEstimator< Container >(source)
00128     {
00129       updateMembers_();
00130     }
00131 
00132 
00136 
00137     inline SignalToNoiseEstimatorMedian& operator=(const SignalToNoiseEstimatorMedian& source)
00138     {
00139       if(&source == this) return *this; 
00140       SignalToNoiseEstimator< Container >::operator=(source);
00141       updateMembers_();
00142       return *this;
00143     }
00145 
00146 
00148     virtual ~SignalToNoiseEstimatorMedian()
00149     {}
00150 
00151 
00152   protected:
00153 
00154 
00158     void computeSTN_(const PeakIterator& scan_first_, const PeakIterator& scan_last_)
00159     throw(Exception::InvalidValue)
00160     {
00161       // reset counter for sparse windows
00162       double sparse_window_percent = 0;
00163       // reset counter for histogram overflow
00164       double histogram_oob_percent = 0;
00165       
00166       // reset the results
00167       stn_estimates_.clear();
00168       
00169       // maximal range of histogram needs to be calculated first
00170       if (auto_mode_ == AUTOMAXBYSTDEV)
00171       {
00172         // use MEAN+auto_max_intensity_*STDEV as threshold
00173         GaussianEstimate gauss_global = SignalToNoiseEstimator< Container >::estimate_(scan_first_, scan_last_);
00174         max_intensity_ = gauss_global.mean + std::sqrt(gauss_global.variance)*auto_max_stdev_Factor_;
00175       }
00176       else if (auto_mode_ == AUTOMAXBYPERCENT)
00177       {
00178         // get value at "auto_max_percentile_"th percentile
00179         // we use a histogram approach here as well.
00180         if ((auto_max_percentile_ < 0) || (auto_max_percentile_ > 100))
00181         {
00182           String s = auto_max_percentile_;
00183           throw Exception::InvalidValue(__FILE__, 
00184                                          __LINE__, 
00185                                          __PRETTY_FUNCTION__, 
00186                                          "auto_mode is on AUTOMAXBYPERCENT! auto_max_percentile is not in [0,100]. Use setAutoMaxPercentile(<value>) to change it!", 
00187                                          s);
00188         }
00189 
00190         std::vector <int> histogram_auto(100, 0);
00191 
00192         // find maximum of current scan
00193         int size = 0;
00194         typename PeakType::IntensityType maxInt = 0;
00195         PeakIterator run = scan_first_;
00196         while (run != scan_last_)
00197         {
00198           maxInt = std::max(maxInt, (*run).getIntensity());
00199           ++size;
00200           ++run;
00201         }
00202 
00203         double bin_size = maxInt / 100;
00204 
00205         // fill histogram
00206         run = scan_first_;
00207         while (run != scan_last_)
00208         {
00209           ++histogram_auto[(int) (((*run).getIntensity()-1) / bin_size)];
00210           ++run;
00211         }
00212 
00213         // add up element counts in histogram until ?th percentile is reached
00214         int elements_below_percentile = (int) (auto_max_percentile_ * size / 100);
00215         int elements_seen = 0;
00216         int i = -1;
00217         run = scan_first_;
00218 
00219         while (run != scan_last_ && elements_seen < elements_below_percentile)
00220         {
00221           ++i;
00222           elements_seen += histogram_auto[i];
00223           ++run;
00224         }
00225 
00226         max_intensity_ = (((double)i) + 0.5) * bin_size;
00227       }
00228       else //if (auto_mode_ == MANUAL)
00229       {
00230         if (max_intensity_<=0) 
00231         {
00232           String s = max_intensity_;
00233           throw Exception::InvalidValue(__FILE__, 
00234                                          __LINE__, 
00235                                          __PRETTY_FUNCTION__, 
00236                                          "auto_mode is on MANUAL! max_intensity is <=0. Needs to be positive! Use setMaxIntensity(<value>) or enable auto_mode!", 
00237                                          s);
00238         }
00239       }
00240       
00241       if (max_intensity_ <= 0)
00242       {
00243         std::cerr << "TODO SignalToNoiseEstimatorMedian: the max_intensity_ value should be positive! " << max_intensity_ << std::endl;
00244         return;
00245       }
00246 
00247       PeakIterator window_pos_center  = scan_first_;
00248       PeakIterator window_pos_borderleft = scan_first_;
00249       PeakIterator window_pos_borderright = scan_first_;
00250       
00251       double window_half_size = win_len_ / 2;
00252       double bin_size = max_intensity_ / bin_count_;
00253       int bin_count_minus_1 = bin_count_ - 1;
00254       
00255       std::vector <int> histogram(bin_count_, 0);
00256       std::vector <double> bin_value(bin_count_, 0);
00257       // calculate average intensity that is represented by a bin
00258       for (int bin=0; bin<bin_count_; bin++)
00259       {
00260          histogram[bin] = 0;
00261          bin_value[bin] = (bin + 0.5) * bin_size;           
00262       }
00263       // bin in which a datapoint would fall
00264       int to_bin = 0;
00265 
00266       // index of bin where the median is located
00267       int median_bin = 0;
00268       // additive number of elements from left to x in histogram
00269       int element_inc_count = 0;
00270       
00271       // tracks elements in current window, which may vary because of uneven spaced data
00272       int elements_in_window = 0;
00273       // number of windows
00274       int window_count = 0;
00275       
00276       // number of elements where we find the median
00277       int element_in_window_half = 0;
00278       
00279       double noise;    // noise value of a datapoint      
00280 
00281       // determine how many elements we need to estimate (for progress estimation)
00282       int windows_overall = 0;
00283       PeakIterator run = scan_first_;
00284       while (run != scan_last_)
00285       {
00286         ++windows_overall;
00287         ++run;
00288       }
00289       SignalToNoiseEstimator< Container >::startProgress(0,windows_overall,"noise estimation of data");
00290 
00291       // MAIN LOOP
00292       while (window_pos_center != scan_last_)
00293       {
00294         
00295         // erase all elements from histogram that will leave the window on the LEFT side
00296         while ( (*window_pos_borderleft).getMZ() <  (*window_pos_center).getMZ() - window_half_size )
00297         {
00298           to_bin = std::min((int) (((*window_pos_borderleft).getIntensity()) / bin_size), bin_count_minus_1);
00299           --histogram[to_bin];
00300           --elements_in_window;
00301           ++window_pos_borderleft;
00302         }
00303         
00304         // add all elements to histogram that will enter the window on the RIGHT side
00305         while (    (window_pos_borderright != scan_last_)
00306                 &&((*window_pos_borderright).getMZ() <= (*window_pos_center).getMZ() + window_half_size ) )
00307         {
00308           //std::cerr << (*window_pos_borderright).getIntensity() << " " << bin_size << " " << bin_count_minus_1 << std::endl;
00309           to_bin = std::min((int) (((*window_pos_borderright).getIntensity()) / bin_size), bin_count_minus_1);
00310           ++histogram[to_bin];
00311           ++elements_in_window;
00312           ++window_pos_borderright;
00313         }
00314 
00315         if (elements_in_window < min_required_elements_)
00316         {
00317           noise = noise_for_empty_window_;
00318           ++sparse_window_percent;
00319         }
00320         else
00321         {
00322           // find bin i where ceil[elements_in_window/2] <= sum_c(0..i){ histogram[c] }
00323           median_bin = -1;
00324           element_inc_count = 0;
00325           element_in_window_half = (elements_in_window+1) / 2;
00326           while (median_bin < bin_count_minus_1 && element_inc_count < element_in_window_half) {
00327             ++median_bin;
00328             element_inc_count += histogram[median_bin];
00329           }
00330 
00331           // increase the error count
00332           if (median_bin == bin_count_minus_1) {++histogram_oob_percent;}
00333           
00334           // just avoid division by 0
00335           noise = std::max(1.0, bin_value[median_bin]);
00336         }
00337         
00338         // store result
00339         stn_estimates_[*window_pos_center] = (*window_pos_center).getIntensity() / noise;
00340         
00341         
00342         // advance the window center by one datapoint
00343         ++window_pos_center;
00344         ++window_count;  
00345         // update progress 
00346         SignalToNoiseEstimator< Container >::setProgress(window_count);
00347                   
00348       } // end while
00349 
00350       SignalToNoiseEstimator< Container >::endProgress();
00351         
00352       sparse_window_percent = sparse_window_percent *100 / window_count;
00353       histogram_oob_percent = histogram_oob_percent *100 / window_count;
00354       
00355       // warn if percentage of sparse windows is above 20%
00356       if (sparse_window_percent > 20) 
00357       {
00358         std::cerr << "WARNING in SignalToNoiseEstimatorMedian: " 
00359                  << sparse_window_percent 
00360                  << "% of all windows were sparse. You should consider increasing WindowLength or decreasing MinReqElementsInWindow" 
00361                  << std::endl;
00362       }
00363       
00364       // warn if percentage of possibly wrong median estimates is above 1%
00365       if (histogram_oob_percent > 1) 
00366       {
00367         std::cerr << "WARNING in SignalToNoiseEstimatorMedian: " 
00368                  << histogram_oob_percent 
00369                  << "% of all Signal-to-Noise estimates are too high, because the median was found in the rightmost histogram-bin. " 
00370                  << "You should consider increasing max_intensity (and maybe 'bin_count' with it, to keep bin width reasonable)" 
00371                  << std::endl;
00372       }      
00373       
00374     } // end of shiftWindow_
00375 
00377     void updateMembers_()
00378     {
00379       max_intensity_         = (double)param_.getValue("max_intensity"); 
00380       auto_max_stdev_Factor_ = (double)param_.getValue("auto_max_stdev_factor"); 
00381       auto_max_percentile_   = param_.getValue("auto_max_percentile"); 
00382       auto_mode_             = param_.getValue("auto_mode"); 
00383       win_len_               = (double)param_.getValue("win_len"); 
00384       bin_count_             = param_.getValue("bin_count"); 
00385       min_required_elements_ = param_.getValue("min_required_elements"); 
00386       noise_for_empty_window_= (double)param_.getValue("noise_for_empty_window"); 
00387       is_result_valid_ = false;
00388     }
00389 
00391     double max_intensity_;
00393     double auto_max_stdev_Factor_;
00395     double auto_max_percentile_;
00397     int    auto_mode_;
00399     double win_len_;
00401     int    bin_count_;
00403     int min_required_elements_;
00406     double noise_for_empty_window_;
00407 
00408 
00409 
00410   };
00411 
00412 }// namespace OpenMS
00413 
00414 #endif //OPENMS_FILTERING_NOISEESTIMATION_DSIGNALTONOISEESTIMATORMEDIAN_H

Generated Tue Apr 1 15:36:37 2008 -- using doxygen 1.5.4 OpenMS / TOPP 1.1