Home  · Classes  · Annotated Classes  · Modules  · Members  · Namespaces  · Related Pages

SignalToNoiseEstimatorMeanIterative.h (Maintainer: Chris Bielow)

Go to the documentation of this file.
00001 // -*- Mode: C++; tab-width: 2; -*-
00002 // vi: set ts=2:
00003 //
00004 // --------------------------------------------------------------------------
00005 //                   OpenMS Mass Spectrometry Framework
00006 // --------------------------------------------------------------------------
00007 //  Copyright (C) 2003-2008 -- Oliver Kohlbacher, Knut Reinert
00008 //
00009 //  This library is free software; you can redistribute it and/or
00010 //  modify it under the terms of the GNU Lesser General Public
00011 //  License as published by the Free Software Foundation; either
00012 //  version 2.1 of the License, or (at your option) any later version.
00013 //
00014 //  This library is distributed in the hope that it will be useful,
00015 //  but WITHOUT ANY WARRANTY; without even the implied warranty of
00016 //  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00017 //  Lesser General Public License for more details.
00018 //
00019 //  You should have received a copy of the GNU Lesser General Public
00020 //  License along with this library; if not, write to the Free Software
00021 //  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
00022 //
00023 // --------------------------------------------------------------------------
00024 // $Maintainer: Chris Bielow $
00025 // --------------------------------------------------------------------------
00026 //
00027 
00028 #ifndef OPENMS_FILTERING_NOISEESTIMATION_SIGNALTONOISEESTIMATORMEANITERATIVE_H
00029 #define OPENMS_FILTERING_NOISEESTIMATION_SIGNALTONOISEESTIMATORMEANITERATIVE_H
00030 
00031 #include <OpenMS/FILTERING/NOISEESTIMATION/SignalToNoiseEstimator.h>
00032 #include <OpenMS/CONCEPT/Types.h>
00033 #include <OpenMS/CONCEPT/Exception.h>
00034 #include <vector>
00035 
00036 namespace OpenMS
00037 {
00061   template < typename Container = MSSpectrum< > >
00062   class SignalToNoiseEstimatorMeanIterative : public SignalToNoiseEstimator< Container >
00063   {
00064 
00065     public:
00066 
00068       enum IntensityThresholdCalculation { MANUAL=-1, AUTOMAXBYSTDEV=0, AUTOMAXBYPERCENT=1 };
00069 
00070       using SignalToNoiseEstimator< Container >::stn_estimates_;
00071       using SignalToNoiseEstimator< Container >::first_;
00072       using SignalToNoiseEstimator< Container >::last_;
00073       using SignalToNoiseEstimator< Container >::is_result_valid_;
00074       using SignalToNoiseEstimator< Container >::defaults_;
00075       using SignalToNoiseEstimator< Container >::param_;
00076       
00077       typedef typename SignalToNoiseEstimator< Container >::PeakIterator PeakIterator;
00078       typedef typename SignalToNoiseEstimator< Container >::PeakType PeakType;
00079       
00080       typedef typename SignalToNoiseEstimator< Container >::GaussianEstimate GaussianEstimate;
00081       
00082 
00084       inline SignalToNoiseEstimatorMeanIterative()
00085       {
00086         //set the name for DefaultParamHandler error messages
00087         this->setName("SignalToNoiseEstimatorMeanIterative"); 
00088       
00089         defaults_.setValue("max_intensity", -1, "maximal intensity considered for histogram construction. By default, it will be calculated automatically (see auto_mode)."\
00090 " Only provide this parameter if you know what you are doing (and change 'auto_mode' to '-1')!"\
00091 " All intensities EQUAL/ABOVE 'max_intensity' will not be added to the histogram."\
00092 " If you choose 'max_intensity' too small, the noise estimate might be too small as well."\
00093 " If chosen too big, the bins become quite large (which you could counter by increasing 'bin_count', which increases runtime).", true); 
00094         defaults_.setMinInt ("max_intensity", -1);
00095         
00096         defaults_.setValue("auto_max_stdev_factor", 3.0, "parameter for 'max_intensity' estimation (if 'auto_mode' == 0): mean + 'auto_max_stdev_factor' * stdev", true); 
00097         defaults_.setMinFloat ("auto_max_stdev_factor", 0.0);
00098         defaults_.setMaxFloat ("auto_max_stdev_factor", 999.0);
00099 
00100                 
00101         defaults_.setValue("auto_max_percentile", 95, "parameter for 'max_intensity' estimation (if 'auto_mode' == 1): auto_max_percentile th percentile", true); 
00102         defaults_.setMinInt ("auto_max_percentile", 0);
00103         defaults_.setMaxInt ("auto_max_percentile", 100);
00104         
00105         defaults_.setValue("auto_mode", 0, "method to use to determine maximal intensity: -1 --> use 'max_intensity'; 0 --> 'auto_max_stdev_factor' method (default); 1 --> 'auto_max_percentile' method", true); 
00106         defaults_.setMinInt ("auto_mode", -1);
00107         defaults_.setMaxInt ("auto_mode", 1);        
00108         
00109         defaults_.setValue("win_len", 200.0, "window length in Thomson", false); 
00110         defaults_.setMinFloat ("win_len", 1.0);
00111                 
00112         defaults_.setValue("bin_count", 30, "number of bins for intensity values", false); 
00113         defaults_.setMinInt ("bin_count", 3);
00114                 
00115         defaults_.setValue("stdev_mp", 3.0, "multiplier for stdev", true); 
00116         defaults_.setMinFloat ("stdev_mp", 0.01);
00117         defaults_.setMaxFloat ("stdev_mp", 999.0);
00118                 
00119         defaults_.setValue("min_required_elements", 10, "minimum number of elements required in a window (otherwise it is considered sparse)", false); 
00120         defaults_.setMinInt ("min_required_elements", 1);
00121                 
00122         defaults_.setValue("noise_for_empty_window", std::pow(10.0,20), "noise value used for sparse windows", true); 
00123 
00124         SignalToNoiseEstimator< Container >::defaultsToParam_();
00125       }
00126 
00127 
00129       inline SignalToNoiseEstimatorMeanIterative(const SignalToNoiseEstimatorMeanIterative&  source)
00130           : SignalToNoiseEstimator< Container >(source)
00131       {
00132         updateMembers_();
00133       }
00134 
00135 
00139 
00140       inline SignalToNoiseEstimatorMeanIterative& operator=(const SignalToNoiseEstimatorMeanIterative& source)
00141       {
00142         if(&source == this) return *this; 
00143         SignalToNoiseEstimator< Container >::operator=(source);
00144         updateMembers_();
00145         return *this;
00146       }
00148 
00149 
00151       virtual ~SignalToNoiseEstimatorMeanIterative()
00152       {}
00153       
00154     
00155     protected:
00156 
00157 
00161       virtual void computeSTN_(const PeakIterator& scan_first_, const PeakIterator& scan_last_) 
00162       throw(Exception::InvalidValue)
00163       {
00164         // reset counter for sparse windows
00165         double sparse_window_percent = 0;
00166 
00167         // reset the results
00168         stn_estimates_.clear();
00169 
00170         // maximal range of histogram needs to be calculated first
00171         if (auto_mode_ == AUTOMAXBYSTDEV)
00172         {
00173           // use MEAN+auto_max_intensity_*STDEV as threshold
00174           GaussianEstimate gauss_global = SignalToNoiseEstimator< Container >::estimate_(scan_first_, scan_last_);
00175           max_intensity_ = gauss_global.mean + std::sqrt(gauss_global.variance)*auto_max_stdev_Factor_;
00176         }
00177         else if (auto_mode_ == AUTOMAXBYPERCENT)
00178         {
00179           // get value at "auto_max_percentile_"th percentile
00180           // we use a histogram approach here as well.
00181           if ((auto_max_percentile_ < 0) || (auto_max_percentile_ > 100))
00182           {
00183             String s = auto_max_percentile_;
00184             throw Exception::InvalidValue(__FILE__, 
00185                                            __LINE__, 
00186                                            __PRETTY_FUNCTION__, 
00187                                            "auto_mode is on AUTOMAXBYPERCENT! auto_max_percentile is not in [0,100]. Use setAutoMaxPercentile(<value>) to change it!", 
00188                                            s);
00189           }
00190 
00191           std::vector <int> histogram_auto(100, 0);
00192 
00193           // find maximum of current scan
00194           int size = 0;
00195           typename PeakType::IntensityType maxInt = 0;
00196           PeakIterator run = scan_first_;
00197           while (run != scan_last_)
00198           {
00199             maxInt = std::max(maxInt, (*run).getIntensity());
00200             ++size;
00201             ++run;
00202           }
00203 
00204           double bin_size = maxInt / 100;
00205 
00206           // fill histogram
00207           run = scan_first_;
00208           while (run != scan_last_)
00209           {
00210             ++histogram_auto[(int) (((*run).getIntensity()-1) / bin_size)];
00211             ++run;
00212           }
00213 
00214           // add up element counts in histogram until ?th percentile is reached
00215           int elements_below_percentile = (int) (auto_max_percentile_ * size / 100);
00216           int elements_seen = 0;
00217           int i = -1;
00218           run = scan_first_;
00219 
00220           while (run != scan_last_ && elements_seen < elements_below_percentile)
00221           {
00222             ++i;
00223             elements_seen += histogram_auto[i];
00224             ++run;
00225           }
00226 
00227           max_intensity_ = (((double)i) + 0.5) * bin_size;
00228         }
00229         else //if (auto_mode_ == MANUAL)
00230         {
00231           if (max_intensity_<=0) 
00232           {
00233             String s = max_intensity_;
00234             throw Exception::InvalidValue(__FILE__, 
00235                                            __LINE__, 
00236                                            __PRETTY_FUNCTION__, 
00237                                            "auto_mode is on MANUAL! max_intensity is <=0. Needs to be positive! Use setMaxIntensity(<value>) or enable auto_mode!", 
00238                                            s);
00239           }
00240         }
00241 
00242         PeakIterator window_pos_center  = scan_first_;
00243         PeakIterator window_pos_borderleft = scan_first_;
00244         PeakIterator window_pos_borderright = scan_first_;
00245 
00246         double window_half_size = win_len_ / 2;
00247         double bin_size = max_intensity_ / bin_count_;
00248 
00249         std::vector <int> histogram(bin_count_, 0);
00250         std::vector <double> bin_value(bin_count_, 0);
00251         // calculate average intensity that is represented by a bin
00252         for (int bin=0; bin<bin_count_; bin++)
00253         {
00254           histogram[bin] = 0;
00255           bin_value[bin] = (bin + 0.5) * bin_size;
00256         }
00257         // index of last valid bin during iteration
00258         int hist_rightmost_bin;
00259         // bin in which a datapoint would fall
00260         int to_bin;
00261         // mean & stdev of the histogram
00262         double hist_mean;
00263         double hist_stdev;
00264 
00265         // tracks elements in current window, which may vary because of uneven spaced data
00266         int elements_in_window = 0;
00267         int window_count = 0;
00268 
00269         double noise;    // noise value of a datapoint
00270 
00271         // determine how many elements we need to estimate (for progress estimation)
00272         int windows_overall = 0;
00273         PeakIterator run = scan_first_;
00274         while (run != scan_last_)
00275         {
00276           ++windows_overall;
00277           ++run;
00278         }
00279         SignalToNoiseEstimator< Container >::startProgress(0,windows_overall,"noise estimation of data");
00280 
00281         // MAIN LOOP
00282         while (window_pos_center != scan_last_)
00283         {
00284           // erase all elements from histogram that will leave the window on the LEFT side
00285           while ( (*window_pos_borderleft).getMZ() <  (*window_pos_center).getMZ() - window_half_size )
00286           {
00287             //std::cout << "S: " << (*window_pos_borderleft).getMZ()  <<  " " << ( (*window_pos_center).getMZ() - window_half_size ) << "\n";
00288             to_bin = (int) (((*window_pos_borderleft).getIntensity()) / bin_size);
00289             if (to_bin < bin_count_)
00290             {
00291               --histogram[to_bin];
00292               --elements_in_window;
00293             }
00294             ++window_pos_borderleft;
00295           }
00296           
00297           //std::printf("S1: %E %E\n", (*window_pos_borderright).getMZ(), (*window_pos_center).getMZ() + window_half_size);
00298             
00299  
00300           // add all elements to histogram that will enter the window on the RIGHT side
00301           while (     (window_pos_borderright != scan_last_)
00302                       && ((*window_pos_borderright).getMZ() < (*window_pos_center).getMZ() + window_half_size )                     )
00303           {
00304             //std::printf("Sb: %E %E %E\n", (*window_pos_borderright).getMZ(), (*window_pos_center).getMZ() + window_half_size, (*window_pos_borderright).getMZ() - ((*window_pos_center).getMZ() + window_half_size));
00305             
00306             to_bin = (int) (((*window_pos_borderright).getIntensity()) / bin_size);
00307             if (to_bin < bin_count_)
00308             {
00309               ++histogram[to_bin];
00310               ++elements_in_window;
00311             }
00312             ++window_pos_borderright;
00313           }
00314 
00315           if (elements_in_window < min_required_elements_)
00316           {
00317             noise = noise_for_empty_window_;
00318             ++sparse_window_percent;
00319           }
00320           else
00321           {
00322 
00323             hist_rightmost_bin = bin_count_;
00324 
00325             // do iteration on histogram and find threshold
00326             for (int i=0;i<3;++i)
00327             {
00328               // mean
00329               hist_mean = 0;
00330               for (int bin = 0; bin < hist_rightmost_bin; ++bin)
00331               {
00332                 //std::cout << "V: " << bin << " " << hist_mean << " " << histogram[bin] << " " << elements_in_window << " " << bin_value[bin] << "\n";
00333                 // immediate division is numerically more stable
00334                 hist_mean += histogram[bin] / (double) elements_in_window * bin_value[bin] ;
00335               }
00336               //hist_mean = hist_mean / elements_in_window;
00337 
00338               // stdev
00339               hist_stdev = 0;
00340               for (int bin = 0; bin < hist_rightmost_bin; ++bin)
00341               {
00342                 hist_stdev += histogram[bin]/ (double) elements_in_window * std::pow(bin_value[bin]-hist_mean, 2);
00343               }
00344               hist_stdev = std::sqrt(hist_stdev);
00345 
00346               //determine new threshold (i.e. the rightmost bin we consider)
00347               int estimate = (int) ((hist_mean + hist_stdev * stdev_ - 1) / bin_size + 1);
00348               //std::cout << "E: " << hist_mean << " " << hist_stdev << " " << stdev_ << " " << bin_size<< " " << estimate << "\n";
00349               hist_rightmost_bin = std::min(estimate, bin_count_);
00350             }
00351 
00352             // just avoid division by 0
00353             noise = std::max(1.0, hist_mean);
00354           }
00355 
00356           // store result
00357           stn_estimates_[*window_pos_center] = (*window_pos_center).getIntensity() / noise;
00358 
00359 
00360 
00361           // advance the window center by one datapoint
00362           ++window_pos_center;
00363           ++window_count;
00364           // update progress 
00365           SignalToNoiseEstimator< Container >::setProgress(window_count);
00366                     
00367         } // end while
00368 
00369         SignalToNoiseEstimator< Container >::endProgress();
00370         
00371         sparse_window_percent = sparse_window_percent *100 / window_count;
00372         // warn if percentage of sparse windows is above 20%
00373         if (sparse_window_percent > 20)
00374         {
00375           std::cerr << "WARNING in SignalToNoiseEstimatorMeanIterative: "
00376           << sparse_window_percent
00377           << "% of all windows were sparse. You should consider increasing win_len or increasing MinReqElementsInWindow"
00378           << " You should also check the MaximalIntensity value (or the parameters for its heuristic estimation)"
00379           << " If it is too low, then too many high intensity peaks will be discarded, which leads to a sparse window!"
00380           << std::endl;
00381         }
00382 
00383         return;
00384         
00385       } // end of shiftWindow_
00386 
00387 
00389       void updateMembers_()
00390       {
00391         max_intensity_         = (double)param_.getValue("max_intensity"); 
00392         auto_max_stdev_Factor_ = (double)param_.getValue("auto_max_stdev_factor"); 
00393         auto_max_percentile_   = param_.getValue("auto_max_percentile"); 
00394         auto_mode_             = param_.getValue("auto_mode"); 
00395         win_len_               = (double)param_.getValue("win_len"); 
00396         bin_count_             = param_.getValue("bin_count"); 
00397         stdev_                 = (double)param_.getValue("stdev_mp"); 
00398         min_required_elements_ = param_.getValue("min_required_elements"); 
00399         noise_for_empty_window_= (double)param_.getValue("noise_for_empty_window"); 
00400         is_result_valid_ = false;
00401       }
00402   
00404       double max_intensity_;
00406       double auto_max_stdev_Factor_;
00408       double auto_max_percentile_;
00410       int    auto_mode_;
00412       double win_len_;
00414       int    bin_count_;
00416       double stdev_;
00418       int min_required_elements_;
00421       double noise_for_empty_window_;
00422 
00423 
00424 
00425 
00426   };
00427 
00428 }// namespace OpenMS
00429 
00430 #endif //OPENMS_FILTERING_NOISEESTIMATION_SIGNALTONOISEESTIMATORMEANITERATIVE_H

Generated Tue Apr 1 15:36:37 2008 -- using doxygen 1.5.4 OpenMS / TOPP 1.1