00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028 #ifndef OPENMS_FILTERING_NOISEESTIMATION_SIGNALTONOISEESTIMATORMEDIAN_H
00029 #define OPENMS_FILTERING_NOISEESTIMATION_SIGNALTONOISEESTIMATORMEDIAN_H
00030
00031
00032 #include <OpenMS/FILTERING/NOISEESTIMATION/SignalToNoiseEstimator.h>
00033 #include <OpenMS/CONCEPT/Types.h>
00034 #include <OpenMS/CONCEPT/Exception.h>
00035 #include <vector>
00036
00037 namespace OpenMS
00038 {
00062 template < typename Container = MSSpectrum< > >
00063 class SignalToNoiseEstimatorMedian : public SignalToNoiseEstimator< Container >
00064 {
00065
00066 public:
00067
00069 enum IntensityThresholdCalculation { MANUAL=-1, AUTOMAXBYSTDEV=0, AUTOMAXBYPERCENT=1 };
00070
00071 using SignalToNoiseEstimator< Container >::stn_estimates_;
00072 using SignalToNoiseEstimator< Container >::first_;
00073 using SignalToNoiseEstimator< Container >::last_;
00074 using SignalToNoiseEstimator< Container >::is_result_valid_;
00075 using SignalToNoiseEstimator< Container >::defaults_;
00076 using SignalToNoiseEstimator< Container >::param_;
00077
00078 typedef typename SignalToNoiseEstimator< Container >::PeakIterator PeakIterator;
00079 typedef typename SignalToNoiseEstimator< Container >::PeakType PeakType;
00080
00081 typedef typename SignalToNoiseEstimator< Container >::GaussianEstimate GaussianEstimate;
00082
00084 inline SignalToNoiseEstimatorMedian()
00085 {
00086
00087 this->setName("SignalToNoiseEstimatorMedian");
00088
00089 defaults_.setValue("max_intensity", -1, "maximal intensity considered for histogram construction. By default, it will be calculated automatically (see auto_mode)."\
00090 " Only provide this parameter if you know what you are doing (and change 'auto_mode' to '-1')!"\
00091 " All intensities EQUAL/ABOVE 'max_intensity' will be added to the LAST histogram bin."\
00092 " If you choose 'max_intensity' too small, the noise estimate might be too small as well. "\
00093 " If chosen too big, the bins become quite large (which you could counter by increasing 'bin_count', which increases runtime)."\
00094 " In general, the Median-S/N estimator is more robust to a manual max_intensity than the MeanIterative-S/N.", true);
00095 defaults_.setMinInt ("max_intensity", -1);
00096
00097 defaults_.setValue("auto_max_stdev_factor", 3.0, "parameter for 'max_intensity' estimation (if 'auto_mode' == 0): mean + 'auto_max_stdev_factor' * stdev", true);
00098 defaults_.setMinFloat ("auto_max_stdev_factor", 0.0);
00099 defaults_.setMaxFloat ("auto_max_stdev_factor", 999.0);
00100
00101 defaults_.setValue("auto_max_percentile", 95, "parameter for 'max_intensity' estimation (if 'auto_mode' == 1): auto_max_percentile th percentile", true);
00102 defaults_.setMinInt ("auto_max_percentile", 0);
00103 defaults_.setMaxInt ("auto_max_percentile", 100);
00104
00105 defaults_.setValue("auto_mode", 0, "method to use to determine maximal intensity: -1 --> use 'max_intensity'; 0 --> 'auto_max_stdev_factor' method (default); 1 --> 'auto_max_percentile' method", true);
00106 defaults_.setMinInt ("auto_mode", -1);
00107 defaults_.setMaxInt ("auto_mode", 1);
00108
00109 defaults_.setValue("win_len", 200.0, "window length in Thomson", false);
00110 defaults_.setMinFloat ("win_len", 1.0);
00111
00112 defaults_.setValue("bin_count", 30, "number of bins for intensity values", false);
00113 defaults_.setMinInt ("bin_count", 3);
00114
00115 defaults_.setValue("min_required_elements", 10, "minimum number of elements required in a window (otherwise it is considered sparse)", false);
00116 defaults_.setMinInt ("min_required_elements", 1);
00117
00118 defaults_.setValue("noise_for_empty_window", std::pow(10.0,20), "noise value used for sparse windows", true);
00119
00120
00121 SignalToNoiseEstimator< Container >::defaultsToParam_();
00122 }
00123
00124
00126 inline SignalToNoiseEstimatorMedian(const SignalToNoiseEstimatorMedian& source)
00127 : SignalToNoiseEstimator< Container >(source)
00128 {
00129 updateMembers_();
00130 }
00131
00132
00136
00137 inline SignalToNoiseEstimatorMedian& operator=(const SignalToNoiseEstimatorMedian& source)
00138 {
00139 if(&source == this) return *this;
00140 SignalToNoiseEstimator< Container >::operator=(source);
00141 updateMembers_();
00142 return *this;
00143 }
00145
00146
00148 virtual ~SignalToNoiseEstimatorMedian()
00149 {}
00150
00151
00152 protected:
00153
00154
00158 void computeSTN_(const PeakIterator& scan_first_, const PeakIterator& scan_last_)
00159 throw(Exception::InvalidValue)
00160 {
00161
00162 double sparse_window_percent = 0;
00163
00164 double histogram_oob_percent = 0;
00165
00166
00167 stn_estimates_.clear();
00168
00169
00170 if (auto_mode_ == AUTOMAXBYSTDEV)
00171 {
00172
00173 GaussianEstimate gauss_global = SignalToNoiseEstimator< Container >::estimate_(scan_first_, scan_last_);
00174 max_intensity_ = gauss_global.mean + std::sqrt(gauss_global.variance)*auto_max_stdev_Factor_;
00175 }
00176 else if (auto_mode_ == AUTOMAXBYPERCENT)
00177 {
00178
00179
00180 if ((auto_max_percentile_ < 0) || (auto_max_percentile_ > 100))
00181 {
00182 String s = auto_max_percentile_;
00183 throw Exception::InvalidValue(__FILE__,
00184 __LINE__,
00185 __PRETTY_FUNCTION__,
00186 "auto_mode is on AUTOMAXBYPERCENT! auto_max_percentile is not in [0,100]. Use setAutoMaxPercentile(<value>) to change it!",
00187 s);
00188 }
00189
00190 std::vector <int> histogram_auto(100, 0);
00191
00192
00193 int size = 0;
00194 typename PeakType::IntensityType maxInt = 0;
00195 PeakIterator run = scan_first_;
00196 while (run != scan_last_)
00197 {
00198 maxInt = std::max(maxInt, (*run).getIntensity());
00199 ++size;
00200 ++run;
00201 }
00202
00203 double bin_size = maxInt / 100;
00204
00205
00206 run = scan_first_;
00207 while (run != scan_last_)
00208 {
00209 ++histogram_auto[(int) (((*run).getIntensity()-1) / bin_size)];
00210 ++run;
00211 }
00212
00213
00214 int elements_below_percentile = (int) (auto_max_percentile_ * size / 100);
00215 int elements_seen = 0;
00216 int i = -1;
00217 run = scan_first_;
00218
00219 while (run != scan_last_ && elements_seen < elements_below_percentile)
00220 {
00221 ++i;
00222 elements_seen += histogram_auto[i];
00223 ++run;
00224 }
00225
00226 max_intensity_ = (((double)i) + 0.5) * bin_size;
00227 }
00228 else
00229 {
00230 if (max_intensity_<=0)
00231 {
00232 String s = max_intensity_;
00233 throw Exception::InvalidValue(__FILE__,
00234 __LINE__,
00235 __PRETTY_FUNCTION__,
00236 "auto_mode is on MANUAL! max_intensity is <=0. Needs to be positive! Use setMaxIntensity(<value>) or enable auto_mode!",
00237 s);
00238 }
00239 }
00240
00241 if (max_intensity_ <= 0)
00242 {
00243 std::cerr << "TODO SignalToNoiseEstimatorMedian: the max_intensity_ value should be positive! " << max_intensity_ << std::endl;
00244 return;
00245 }
00246
00247 PeakIterator window_pos_center = scan_first_;
00248 PeakIterator window_pos_borderleft = scan_first_;
00249 PeakIterator window_pos_borderright = scan_first_;
00250
00251 double window_half_size = win_len_ / 2;
00252 double bin_size = max_intensity_ / bin_count_;
00253 int bin_count_minus_1 = bin_count_ - 1;
00254
00255 std::vector <int> histogram(bin_count_, 0);
00256 std::vector <double> bin_value(bin_count_, 0);
00257
00258 for (int bin=0; bin<bin_count_; bin++)
00259 {
00260 histogram[bin] = 0;
00261 bin_value[bin] = (bin + 0.5) * bin_size;
00262 }
00263
00264 int to_bin = 0;
00265
00266
00267 int median_bin = 0;
00268
00269 int element_inc_count = 0;
00270
00271
00272 int elements_in_window = 0;
00273
00274 int window_count = 0;
00275
00276
00277 int element_in_window_half = 0;
00278
00279 double noise;
00280
00281
00282 int windows_overall = 0;
00283 PeakIterator run = scan_first_;
00284 while (run != scan_last_)
00285 {
00286 ++windows_overall;
00287 ++run;
00288 }
00289 SignalToNoiseEstimator< Container >::startProgress(0,windows_overall,"noise estimation of data");
00290
00291
00292 while (window_pos_center != scan_last_)
00293 {
00294
00295
00296 while ( (*window_pos_borderleft).getMZ() < (*window_pos_center).getMZ() - window_half_size )
00297 {
00298 to_bin = std::min((int) (((*window_pos_borderleft).getIntensity()) / bin_size), bin_count_minus_1);
00299 --histogram[to_bin];
00300 --elements_in_window;
00301 ++window_pos_borderleft;
00302 }
00303
00304
00305 while ( (window_pos_borderright != scan_last_)
00306 &&((*window_pos_borderright).getMZ() <= (*window_pos_center).getMZ() + window_half_size ) )
00307 {
00308
00309 to_bin = std::min((int) (((*window_pos_borderright).getIntensity()) / bin_size), bin_count_minus_1);
00310 ++histogram[to_bin];
00311 ++elements_in_window;
00312 ++window_pos_borderright;
00313 }
00314
00315 if (elements_in_window < min_required_elements_)
00316 {
00317 noise = noise_for_empty_window_;
00318 ++sparse_window_percent;
00319 }
00320 else
00321 {
00322
00323 median_bin = -1;
00324 element_inc_count = 0;
00325 element_in_window_half = (elements_in_window+1) / 2;
00326 while (median_bin < bin_count_minus_1 && element_inc_count < element_in_window_half) {
00327 ++median_bin;
00328 element_inc_count += histogram[median_bin];
00329 }
00330
00331
00332 if (median_bin == bin_count_minus_1) {++histogram_oob_percent;}
00333
00334
00335 noise = std::max(1.0, bin_value[median_bin]);
00336 }
00337
00338
00339 stn_estimates_[*window_pos_center] = (*window_pos_center).getIntensity() / noise;
00340
00341
00342
00343 ++window_pos_center;
00344 ++window_count;
00345
00346 SignalToNoiseEstimator< Container >::setProgress(window_count);
00347
00348 }
00349
00350 SignalToNoiseEstimator< Container >::endProgress();
00351
00352 sparse_window_percent = sparse_window_percent *100 / window_count;
00353 histogram_oob_percent = histogram_oob_percent *100 / window_count;
00354
00355
00356 if (sparse_window_percent > 20)
00357 {
00358 std::cerr << "WARNING in SignalToNoiseEstimatorMedian: "
00359 << sparse_window_percent
00360 << "% of all windows were sparse. You should consider increasing WindowLength or decreasing MinReqElementsInWindow"
00361 << std::endl;
00362 }
00363
00364
00365 if (histogram_oob_percent > 1)
00366 {
00367 std::cerr << "WARNING in SignalToNoiseEstimatorMedian: "
00368 << histogram_oob_percent
00369 << "% of all Signal-to-Noise estimates are too high, because the median was found in the rightmost histogram-bin. "
00370 << "You should consider increasing max_intensity (and maybe 'bin_count' with it, to keep bin width reasonable)"
00371 << std::endl;
00372 }
00373
00374 }
00375
00377 void updateMembers_()
00378 {
00379 max_intensity_ = (double)param_.getValue("max_intensity");
00380 auto_max_stdev_Factor_ = (double)param_.getValue("auto_max_stdev_factor");
00381 auto_max_percentile_ = param_.getValue("auto_max_percentile");
00382 auto_mode_ = param_.getValue("auto_mode");
00383 win_len_ = (double)param_.getValue("win_len");
00384 bin_count_ = param_.getValue("bin_count");
00385 min_required_elements_ = param_.getValue("min_required_elements");
00386 noise_for_empty_window_= (double)param_.getValue("noise_for_empty_window");
00387 is_result_valid_ = false;
00388 }
00389
00391 double max_intensity_;
00393 double auto_max_stdev_Factor_;
00395 double auto_max_percentile_;
00397 int auto_mode_;
00399 double win_len_;
00401 int bin_count_;
00403 int min_required_elements_;
00406 double noise_for_empty_window_;
00407
00408
00409
00410 };
00411
00412 }
00413
00414 #endif //OPENMS_FILTERING_NOISEESTIMATION_DSIGNALTONOISEESTIMATORMEDIAN_H