Home  · Classes  · Annotated Classes  · Modules  · Members  · Namespaces  · Related Pages

PeakPickerCWT.h (Maintainer: Eva Lange)

Go to the documentation of this file.
00001 // -*- Mode: C++; tab-width: 2; -*-
00002 // vi: set ts=2:
00003 //
00004 // --------------------------------------------------------------------------
00005 //                   OpenMS Mass Spectrometry Framework
00006 // --------------------------------------------------------------------------
00007 //  Copyright (C) 2003-2008 -- Oliver Kohlbacher, Knut Reinert
00008 //
00009 //  This library is free software; you can redistribute it and/or
00010 //  modify it under the terms of the GNU Lesser General Public
00011 //  License as published by the Free Software Foundation; either
00012 //  version 2.1 of the License, or (at your option) any later version.
00013 //
00014 //  This library is distributed in the hope that it will be useful,
00015 //  but WITHOUT ANY WARRANTY; without even the implied warranty of
00016 //  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00017 //  Lesser General Public License for more details.
00018 //
00019 //  You should have received a copy of the GNU Lesser General Public
00020 //  License along with this library; if not, write to the Free Software
00021 //  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
00022 //
00023 // --------------------------------------------------------------------------
00024 // $Maintainer: Eva Lange $
00025 // --------------------------------------------------------------------------
00026 //
00027 #ifndef OPENMS_TRANSFORMATIONS_RAW2PEAK_PEAKPICKERCWT_H
00028 #define OPENMS_TRANSFORMATIONS_RAW2PEAK_PEAKPICKERCWT_H
00029 
00030 #include <OpenMS/TRANSFORMATIONS/RAW2PEAK/PeakPicker.h>
00031 #include <OpenMS/KERNEL/PickedPeak1D.h>
00032 #include <OpenMS/KERNEL/MSExperiment.h>
00033 #include <OpenMS/TRANSFORMATIONS/RAW2PEAK/PeakShape.h>
00034 #include <OpenMS/FILTERING/NOISEESTIMATION/SignalToNoiseEstimatorMeanIterative.h>
00035 #include <OpenMS/TRANSFORMATIONS/RAW2PEAK/ContinuousWaveletTransformNumIntegration.h>
00036 #include <OpenMS/TRANSFORMATIONS/RAW2PEAK/OptimizePeakDeconvolution.h>
00037 #include <OpenMS/TRANSFORMATIONS/RAW2PEAK/TwoDOptimization.h>
00038 #include <OpenMS/TRANSFORMATIONS/RAW2PEAK/OptimizePick.h>
00039 
00040 #include <math.h>
00041 #include <vector>
00042 #include <algorithm>
00043 
00044 
00045 #define DEBUG_PEAK_PICKING
00046 #undef DEBUG_PEAK_PICKING
00047 //#undef DEBUG_DECONV
00048 namespace OpenMS
00049 {
00062   class PeakPickerCWT : public PeakPicker
00063   {
00064   public:
00065 
00067     typedef RawDataPoint1D RawDataPointType;
00069     typedef DPeakArray<RawDataPointType > RawDataArrayType;
00071     typedef RawDataArrayType::iterator RawDataPointIterator;
00073     typedef DPosition<1> PositionType;
00074 
00075 
00076     using PeakPicker::peak_bound_;
00077     using PeakPicker::peak_bound_ms2_level_;
00078     using PeakPicker::signal_to_noise_;
00079     using PeakPicker::fwhm_bound_;
00080 
00082     PeakPickerCWT();
00083 
00085     virtual ~PeakPickerCWT();
00086 
00102     template <typename InputPeakIterator, typename OutputPeakContainer  >
00103     void pick(InputPeakIterator first, InputPeakIterator last, OutputPeakContainer& picked_peak_container, int ms_level = 1)
00104     {
00105       if (peak_bound_cwt_==0.0 || peak_bound_ms2_level_cwt_==0.0)
00106       {
00107         initializeWT_();
00108       }
00109 
00110       // empty spectra shouldn't be picked
00111       if(first == last)
00112       {
00113         return;
00114       }
00115       else
00116       {
00117         // only one "peak"
00118         if (last - first == 1)
00119         {
00121           return;
00122         }
00123       }
00124       typedef typename OutputPeakContainer::value_type OutputPeakType;
00125 
00126       //clear the peak shapes vector
00127       peak_shapes_.clear();
00128 
00129 #ifdef DEBUG_PEAK_PICKING
00130 
00131       std::cout << "****************** PICK ******************" << std::endl;
00132 #endif
00133 
00134       // vector of peak endpoint positions
00135       std::vector<double> peak_endpoints;
00136 
00137       // copy the raw data into a DPeakArray<DRawDataPoint<D> >
00138       //  raw_peak_array_original is needed for the separation of overlapping peaks
00139       RawDataArrayType raw_peak_array, raw_peak_array_original;
00140       // signal to noise estimator
00141       SignalToNoiseEstimatorMeanIterative< RawDataArrayType > sne;      
00142       Param sne_param(param_.copy("SignalToNoiseEstimationParameter:",true));
00143       if(sne_param.empty()) sne.setParameters(Param());
00144       else sne.setParameters(sne_param);
00145       UInt n = distance(first, last);
00146       raw_peak_array.resize(n);
00147       raw_peak_array_original.resize(n);
00148       
00149       for (UInt i = 0; i < n; ++i)
00150       {
00151         RawDataPointType raw_data_point;
00152         raw_data_point.setIntensity((first + i)->getIntensity());
00153         raw_data_point.setPosition((first + i)->getPosition());
00154         raw_peak_array[i] = raw_data_point;
00155         raw_peak_array_original[i] = raw_data_point;
00156       }
00157 
00158 
00159       RawDataPointIterator it_pick_begin = raw_peak_array.begin();
00160       RawDataPointIterator it_pick_end   = raw_peak_array.end();
00161 
00162       if(it_pick_begin == it_pick_end)
00163         return;
00164       sne.init(it_pick_begin,it_pick_end);
00165 
00166       // thresholds for deconvolution
00167       double fwhm_threshold = (float)param_.getValue("deconvolution:fitting:fwhm_threshold");
00168       double symm_threshold = (float)param_.getValue("deconvolution:asym_threshold");
00169 
00170 
00171       // Points to the actual maximum position in the raw data
00172       RawDataPointIterator it_max_pos;
00173 
00174       // start the peak picking until no more maxima can be found in the wavelet transform
00175       UInt number_of_peaks = 0;
00176 
00177       do
00178       {
00179         number_of_peaks = 0;
00180         int peak_left_index, peak_right_index;
00181 
00182         // compute the continious wavelet transform with resolution 1
00183         DoubleReal resolution = 1;
00184         wt_.transform(it_pick_begin, it_pick_end,resolution);
00185         PeakArea_ area;
00186         bool centroid_fit=false;
00187         bool regular_endpoints=true;
00188 
00189         // search for maximum positions in the cwt and extract potential peaks
00190         int direction=1;
00191         int distance_from_scan_border = 0;
00192         while ((distance(it_pick_begin, it_pick_end) > 3)
00193                && getMaxPosition_(it_pick_begin,
00194                                   it_pick_end,
00195                                   wt_,
00196                                   area,
00197                                   distance_from_scan_border,
00198                                   ms_level,
00199                                   direction))
00200         {
00201           // if the signal to noise ratio at the max position is too small
00202           // the peak isn't considered
00203 
00204           if((area.max  != it_pick_end) && (sne.getSignalToNoise(area.max) < signal_to_noise_) )
00205           {
00206             it_pick_begin = area.max;
00207             distance_from_scan_border = distance(raw_peak_array.begin(),it_pick_begin);
00208             
00209             continue;
00210           }
00211           else if(area.max >= it_pick_end) break;
00212 
00213           //search for the endpoints of the peak
00214           regular_endpoints = getPeakEndPoints_(it_pick_begin,
00215                                                 it_pick_end,
00216                                                 area,
00217                                                 distance_from_scan_border,
00218                                                 peak_left_index,
00219                                                 peak_right_index);
00220 
00221           // compute the centroid position
00222           getPeakCentroid_(area);
00223 
00224           // if the peak achieves a minimal width, start the peak fitting
00225           if (regular_endpoints)
00226           {
00227 #ifdef DEBUG_PEAK_PICKING
00228             std::cout << "The endpoints are "
00229             << area.left->getPosition()
00230             << " and "
00231             << area.right->getPosition()
00232             << std::endl;
00233 #endif
00234             // determine the best fitting lorezian or sech2 function
00235             PeakShape shape = fitPeakShape_(area,centroid_fit);
00236             shape.left_endpoint = (raw_peak_array_original.begin() + distance(raw_peak_array.begin(), area.left));
00237             shape.right_endpoint = (raw_peak_array_original.begin() + distance(raw_peak_array.begin(), area.right));
00238             if(shape.right_endpoint == raw_peak_array_original.end()) --shape.right_endpoint; 
00239             // Use the centroid for Optimization
00240             shape.mz_position=area.centroid_position[0];
00241             if ( (shape.r_value > peak_corr_bound_)
00242                  && (shape.getFWHM() >= fwhm_bound_))
00243             {
00244               shape.signal_to_noise = sne.getSignalToNoise(area.max);
00245               peak_shapes_.push_back(shape);
00246               ++number_of_peaks;
00247             }
00248 
00249             else
00250             {
00251 #ifdef DEBUG_PEAK_PICKING
00252               std::cout << "Corr: " << shape.r_value << " SN: " << sne.getSignalToNoise(area.max) << " FWHM: " << shape.getFWHM() << std::endl;
00253               std::cout << "Bad fitting peak "<< std::endl;
00254 #endif
00255 
00256             }
00257           }
00258 
00259           // remove the peak from the signal
00260           // TODO: does this work as expected???
00261           for (RawDataPointIterator pi=area.left; pi!=area.right+1; pi++)
00262           {
00263             pi->setIntensity(0.);
00264           }
00265 
00266           // search for the next peak
00267           it_pick_begin = area.right;
00268           distance_from_scan_border = distance(raw_peak_array.begin(),it_pick_begin);
00269 
00270         } //end while (getMaxPosition_(it_pick_begin, it_pick_end, wt_, area, distance_from_scan_border, ms_level, direction))
00271         it_pick_begin = raw_peak_array.begin();
00272       }
00273       while (number_of_peaks != 0);
00274 
00275       // start the nonlinear optimization for all peaks in split
00276 #ifdef DEBUG_PEAK_PICKING
00277 
00278       std::cout << "Try the optimization run... with " << peak_shapes_.size() << std::endl;
00279 #endif
00280 
00281       if (peak_shapes_.size() > 0)
00282       {
00283         // overlapping peaks are mostly broad or asymmetric
00284         // we distinguish them from broad or asymmetric isotopic peaks 
00285         // (e.g. charge one peaks, or peaks in the high mass range)
00286         // by a simple heuristic: if the distances to adjacent peaks
00287         // are dissimilar, the fhwm is much broader than the fhwm of
00288         // adjacent peaks or if the peak has no near neighbors
00289         // we assume a convolved peak pattern and start the deconvolution.
00290         // sort the peaks according to their positions
00291         sort(peak_shapes_.begin(), peak_shapes_.end(), PeakShape::PositionLess());
00292         std::vector<UInt> peaks_to_skip;
00293         // search for broad or asymmetric peaks
00294         UInt n = peak_shapes_.size();
00295         if( deconvolution_)
00296         {
00297           for (UInt i = 0; i < n; ++i)
00298             {
00299               if ((peak_shapes_[i].getFWHM() > fwhm_threshold) 
00300                   || (peak_shapes_[i].getSymmetricMeasure() < symm_threshold))
00301                 {
00302 #ifdef DEBUG_DECONV
00303                   std::cout << "check " << peak_shapes_[i].mz_position 
00304                             << " with fwhm: " << peak_shapes_[i].getFWHM() 
00305                             << " and " << peak_shapes_[i].left_width 
00306                             << ' ' << peak_shapes_[i].right_width 
00307                             << ' ' << peak_shapes_[i].left_endpoint->getMZ()
00308                             << ' ' << peak_shapes_[i].right_endpoint->getMZ()
00309                             << std::endl;
00310 #endif
00311                   // this might be a convolved peak pattern
00312                   // and we check the distance to the neighboring peaks as well as their fwhm values:
00313                   //float max_distance = 1.1;
00314                   float dist_left = ((i > 0) && (fabs(peak_shapes_[i].mz_position-peak_shapes_[i-1].mz_position) < 1.2)) ? fabs(peak_shapes_[i].mz_position-peak_shapes_[i-1].mz_position) : -1;
00315                   float dist_right = ((i < (n-1)) && (fabs(peak_shapes_[i].mz_position-peak_shapes_[i+1].mz_position) < 1.2)) ? fabs(peak_shapes_[i].mz_position-peak_shapes_[i+1].mz_position) : -1;
00316           
00317                   // left and right neighbor
00318                   if ((dist_left > 0) && (dist_right > 0))
00319                     {
00320                       // if distances to left and right adjacent peaks is dissimilar deconvolute
00321                       DoubleReal ratio = (dist_left > dist_right) ? dist_right/dist_left : dist_left/dist_right;
00322 #ifdef DEBUG_DECONV
00323                       std::cout << "Ratio " << ratio << std::endl;
00324 #endif
00325                       if (ratio < 0.6)
00326                         {
00327 #ifdef DEBUG_DECONV
00328                           std::cout << "deconvolute: dissimilar left and right neighbor "  << peak_shapes_[i-1].mz_position << ' ' << peak_shapes_[i+1].mz_position << std::endl;
00329 #endif
00330                           if(deconvolutePeak_(peak_shapes_[i])) peaks_to_skip.push_back(i);
00331                         }
00332                     }
00333                   // has only one or no neighbor peak
00334                   else
00335                     {
00336                       // only left neighbor
00337                       if (dist_left > 0)
00338                         {
00339                           // check distance and compare fwhm
00340                           DoubleReal dist = 1.00235;
00341                           //check charge 1 or 2
00342                           bool dist_ok = ((fabs(dist-dist_left) < 0.21) || (fabs(dist/2.-dist_left) < 0.11)) ? true : false ;  
00343                           // distance complies peptide mass rule
00344                           if (dist_ok)
00345                             {
00346 #ifdef DEBUG_DECONV
00347                               std::cout << "left neighbor " << peak_shapes_[i-1].mz_position << ' ' << peak_shapes_[i-1].getFWHM() << std::endl;
00348 #endif
00349                               // if the left peak has a fwhm which is smaller than 60% of the fwhm of the broad peak deconvolute
00350                               if ((peak_shapes_[i-1].getFWHM()/peak_shapes_[i].getFWHM()) < 0.6)
00351                                 {
00352 #ifdef DEBUG_DECONV
00353                                   std::cout << " too small fwhm" << std::endl;
00354 #endif
00355                                   if(deconvolutePeak_(peak_shapes_[i])) peaks_to_skip.push_back(i);
00356                                 }
00357                             }
00358                           else
00359                             {
00360 #ifdef DEBUG_DECONV
00361                               std::cout << "distance not ok" << dist_left << ' ' << peak_shapes_[i-1].mz_position << std::endl;
00362 #endif
00363                               if(deconvolutePeak_(peak_shapes_[i])) peaks_to_skip.push_back(i);
00364                             }
00365                         }
00366                       else
00367                         { 
00368                           // only right neighbor 
00369                           if (dist_right > 0)
00370                             {
00371                               // check distance and compare fwhm
00372                               DoubleReal dist = 1.00235;
00373                               //check charge 1 or 2
00374                               bool dist_ok = ((fabs(dist-dist_right) < 0.21) || (fabs(dist/2.-dist_right) < 0.11)) ? true : false ;  
00375                               // distance complies peptide mass rule
00376                               if (dist_ok)
00377                                 {
00378 #ifdef DEBUG_DECONV
00379                                   std::cout << "right neighbor " << peak_shapes_[i+1].mz_position << ' ' << peak_shapes_[i+1].getFWHM() << std::endl;
00380 #endif
00381                                   // if the left peak has a fwhm which is smaller than 60% of the fwhm of the broad peak deconvolute
00382                                   if ((peak_shapes_[i+1].getFWHM()/peak_shapes_[i].getFWHM()) < 0.6)
00383                                     {
00384 #ifdef DEBUG_DECONV
00385                                       std::cout << "too small fwhm"  << std::endl;
00386 #endif
00387                                       if(deconvolutePeak_(peak_shapes_[i])) peaks_to_skip.push_back(i);
00388                                     }
00389                                 }
00390                               else
00391                                 {
00392 #ifdef DEBUG_DECONV
00393                                   std::cout << "distance not ok" << dist_right << ' ' << peak_shapes_[i+1].mz_position << std::endl;
00394 #endif
00395                                   if(deconvolutePeak_(peak_shapes_[i])) peaks_to_skip.push_back(i);
00396                                 }
00397                             }
00398                           // no neighbor
00399                           else
00400                             {
00401 #ifdef DEBUG_DECONV
00402                               std::cout << "no neighbor" << std::endl;
00403 #endif
00404                               if(deconvolutePeak_(peak_shapes_[i])) peaks_to_skip.push_back(i);
00405                             } 
00406                         }
00407                     }
00408                 }
00409             }
00410         }
00411         
00412         // write the picked peaks to the outputcontainer
00413         for (UInt i = 0; i < peak_shapes_.size(); ++i)
00414         {
00415           // put it out only if the peak was not deconvoluted
00416           if(find(peaks_to_skip.begin(),peaks_to_skip.end(),i) == peaks_to_skip.end() )
00417           {
00418             OutputPeakType picked_peak;
00419             
00420             picked_peak.setIntensity(peak_shapes_[i].height);
00421             picked_peak.setMZ(peak_shapes_[i].mz_position);
00422             
00423             fillPeak_(peak_shapes_[i],picked_peak);
00424             picked_peak_container.push_back(picked_peak);
00425           }
00426         }
00427       } // if (peak_shapes_.size() > 0)
00428     }
00429 
00446     template <typename InputPeakContainer, typename OutputPeakContainer >
00447     void pick(const InputPeakContainer& input_peak_container, OutputPeakContainer& picked_peaks_container, int ms_level = 1)
00448     {
00449       // copy the spectrum settings
00450       static_cast<SpectrumSettings&>(picked_peaks_container) = input_peak_container;
00451       
00452       pick(input_peak_container.begin(), input_peak_container.end(), picked_peaks_container, ms_level);
00453     }
00454 
00455 
00469     template <typename InputSpectrumIterator, typename OutputPeakType >
00470     void pickExperiment(InputSpectrumIterator first, InputSpectrumIterator last, MSExperiment<OutputPeakType>& ms_exp_peaks)
00471     {
00472       UInt n = distance(first,last);
00473       ms_exp_peaks.reserve(n);
00474       startProgress(0,n,"pick peaks in mzData file");
00475       // pick peaks on each scan
00476       for (UInt i = 0; i < n; ++i)
00477       {
00478         setProgress(i);
00479         MSSpectrum< OutputPeakType > spectrum;
00480         InputSpectrumIterator input_it(first+i);
00481 #ifdef DEBUG_PEAK_PICKING
00482         std::cout << "PeakPicker: Picking Scan " << input_it->getRT()<< std::endl;
00483 #endif
00484         // pick the peaks in scan i
00485         pick(*input_it,spectrum,input_it->getMSLevel());
00486         setProgress(i);
00487 
00488         // if any peaks are found copy the spectrum settings
00489         if (spectrum.size() > 0)
00490         {
00491           // copy the spectrum settings
00492           static_cast<SpectrumSettings&>(spectrum) = *input_it;
00493           spectrum.setType(SpectrumSettings::PEAKS);
00494 
00495           // copy the spectrum information
00496           spectrum.setPrecursorPeak(input_it->getPrecursorPeak());
00497           spectrum.setRT(input_it->getRT());
00498           spectrum.setMSLevel(input_it->getMSLevel());
00499           spectrum.getName() = input_it->getName();
00500 
00501           ms_exp_peaks.push_back(spectrum);
00502         }
00503       }
00504       // sort spectra
00505       ms_exp_peaks.sortSpectra(true);
00506 
00507       if(two_d_optimization_ || optimization_)
00508       {
00509         Param two_d_param(param_.copy("optimization:",true));
00510       
00511         TwoDOptimization my_2d;
00512         my_2d.setParameters(two_d_param);
00513         my_2d.optimize(first,last,ms_exp_peaks,two_d_optimization_);
00514       }
00515       // sort spectra
00516       ms_exp_peaks.sortSpectra(true);
00517       endProgress();
00518     }
00519 
00530     template <typename InputPeakType, typename OutputPeakType >
00531     void pickExperiment(const MSExperiment< InputPeakType >& ms_exp_raw, MSExperiment<OutputPeakType>& ms_exp_peaks)
00532     {
00533       // copy the experimental settings
00534       static_cast<ExperimentalSettings&>(ms_exp_peaks) = ms_exp_raw;
00535 
00536       pickExperiment(ms_exp_raw.begin(),ms_exp_raw.end(),ms_exp_peaks);
00537     }
00538 
00539   protected:
00541     std::vector<PeakShape> peak_shapes_;
00542 
00544     ContinuousWaveletTransformNumIntegration wt_;
00545 
00547     ContinuousWaveletTransformNumIntegration wtDC_;
00548 
00550     UInt radius_;
00551 
00553     float scale_;
00554 
00556     float peak_bound_cwt_;
00557 
00559     float peak_bound_ms2_level_cwt_;
00560 
00562     float peak_corr_bound_;
00563 
00565     float noise_level_;
00566 
00568     bool optimization_;
00569 
00571     bool deconvolution_;
00572 
00574     bool two_d_optimization_;
00575 
00576 
00577     void updateMembers_();
00578 
00580     void init_();
00581 
00582 
00589     class PeakArea_
00590     {
00591       typedef std::vector<RawDataPointType>::iterator RawDataPointIterator;
00592 
00593     public:
00594       PeakArea_() : left(0), max(0), right(0), left_behind_centroid(0)
00595       {
00596       }
00597 
00607       RawDataPointIterator left;
00608       RawDataPointIterator max;
00609       RawDataPointIterator right;
00610       RawDataPointIterator left_behind_centroid;
00612       DPosition<1> centroid_position;
00613     };
00614 
00616     template <typename OutputPeakType>
00617     void fillPeak_(const PeakShape& /* peak_shape */, OutputPeakType& /* picked_peak */)
00618     {
00619     }
00620 
00622     void getPeakArea_(const PeakArea_& area, double &area_left, double &area_right);
00623 
00625     PeakShape fitPeakShape_(const PeakArea_& area, bool enable_centroid_fit);
00626 
00633     double correlate_(const PeakShape& peak, const PeakArea_& area, int direction=0) const;
00634 
00635 
00644     bool getMaxPosition_(RawDataPointIterator first, RawDataPointIterator last, const ContinuousWaveletTransform& wt, PeakArea_& area, int distance_from_scan_border, int ms_level, int direction=1);
00645 
00646 
00669     bool getPeakEndPoints_(RawDataPointIterator first, RawDataPointIterator last,  PeakArea_ &area, int distance_from_scan_border, int& peak_left_index, int& peak_right_index);
00670 
00671 
00678     void getPeakCentroid_(PeakArea_& area);
00679 
00681     double lorentz_(double height, double lambda, double pos, double x);
00682 
00692     void initializeWT_();
00693 
00697     
00704     bool deconvolutePeak_(PeakShape& shape);
00705 
00707     int getNumberOfPeaks_(RawDataPointIterator& first,RawDataPointIterator& last, std::vector<double>& peak_values,
00708                           int direction,DoubleReal resolution, ContinuousWaveletTransformNumIntegration& wt);
00709 
00711     int determineChargeState_(std::vector<double>& peak_values);
00712 
00714     void addPeak_(std::vector<PeakShape>& peaks_DC,PeakArea_& area,double left_width,double right_width);
00716   }
00717   ; // end PeakPickerCWT
00718 
00719 
00721   template <>
00722   void PeakPickerCWT::fillPeak_< PickedPeak1D >(const PeakShape& peak_shape, PickedPeak1D& picked_peak);
00723 
00724 
00725 }// namespace OpenMS
00726 
00727 #endif

Generated Tue Apr 1 15:36:36 2008 -- using doxygen 1.5.4 OpenMS / TOPP 1.1