Home  · Classes  · Annotated Classes  · Modules  · Members  · Namespaces  · Related Pages

IsotopeWaveletTransform.h (Maintainer: Rene Hussong)

Go to the documentation of this file.
00001 // -*- Mode: C++; tab-width: 2; -*-
00002 // vi: set ts=2:
00003 //
00004 // --------------------------------------------------------------------------
00005 //                   OpenMS Mass Spectrometry Framework
00006 // --------------------------------------------------------------------------
00007 //  Copyright (C) 2003-2008 -- Oliver Kohlbacher, Knut Reinert
00008 //
00009 //  This library is free software; you can redistribute it and/or
00010 //  modify it under the terms of the GNU Lesser General Public
00011 //  License as published by the Free Software Foundation; either
00012 //  version 2.1 of the License, or (at your option) any later version.
00013 //
00014 //  This library is distributed in the hope that it will be useful,
00015 //  but WITHOUT ANY WARRANTY; without even the implied warranty of
00016 //  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00017 //  Lesser General Public License for more details.
00018 //
00019 //  You should have received a copy of the GNU Lesser General Public
00020 //  License along with this library; if not, write to the Free Software
00021 //  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
00022 //
00023 // --------------------------------------------------------------------------
00024 // $Maintainer: Rene Hussong$
00025 // --------------------------------------------------------------------------
00026 
00027 #ifndef OPENMS_TRANSFORMATIONS_FEATUREFINDER_ISOTOPEWAVELETTRANSFORM_H
00028 #define OPENMS_TRANSFORMATIONS_FEATUREFINDER_ISOTOPEWAVELETTRANSFORM_H
00029 
00030 
00031 #include <OpenMS/KERNEL/MSSpectrum.h>
00032 #include <OpenMS/TRANSFORMATIONS/FEATUREFINDER/IsotopeWavelet.h>
00033 #include <OpenMS/KERNEL/FeatureMap.h>
00034 #include <OpenMS/KERNEL/MSExperiment.h>
00035 #include <OpenMS/CONCEPT/Exception.h>
00036 #include <OpenMS/MATH/STATISTICS/LinearRegression.h>
00037 #include <math.h>
00038 #include <gsl/gsl_spline.h>
00039 #include <gsl/gsl_statistics_double.h>
00040 #include <vector>
00041 #include <map>
00042 #include <sstream>
00043 #include <fstream>
00044 #include <iomanip>
00045 
00046 #ifndef DEFAULT_NUM_OF_INTERPOLATION_POINTS
00047 #define DEFAULT_NUM_OF_INTERPOLATION_POINTS 3 
00048 #endif
00049 
00050 #ifndef EPSILON_ION_COUNTS
00051 #define EPSILON_ION_COUNTS 0 
00052 #endif
00053 
00054 
00055 namespace OpenMS
00056 {
00062   template <typename PeakType>
00063   class IsotopeWaveletTransform
00064   {
00065     public:
00066         
00068       struct BoxElement_
00069       {     
00070         DoubleReal mz;
00071         UInt c; //<Note, this is not the charge (it is charge-1!!!)
00072         DoubleReal score;
00073         DoubleReal intens;
00074         DoubleReal max_intens;
00075         DoubleReal RT; //<The elution time (not the scan index)
00076         UInt RT_index;
00077         UInt MZ_begin; //<Index 
00078         UInt MZ_end; //<Index
00079       };
00080 
00081       typedef std::multimap<UInt, BoxElement_> Box_; 
00082 
00083 
00089       IsotopeWaveletTransform (const DoubleReal min_mz, const DoubleReal max_mz, const UInt max_charge) throw();
00090 
00092       virtual ~IsotopeWaveletTransform () throw();
00093     
00106       virtual void getTransforms (const MSSpectrum<PeakType>& scan, 
00107         std::vector<MSSpectrum<PeakType> > &transforms, const UInt max_charge, const Int mode) throw ();
00108 
00109 
00126       virtual void identifyCharges (const std::vector<MSSpectrum<PeakType> >& candidates,
00127         const MSSpectrum<PeakType>& ref, const UInt scan_index, const DoubleReal ampl_cutoff=0) throw ();
00128       
00129 
00136       void updateBoxStates (const UInt scan_index, const UInt RT_interleave, 
00137         const UInt RT_votes_cutoff) throw ();
00138 
00139 
00144       FeatureMap<Feature> mapSeeds2Features (const MSExperiment<PeakType>& map, const UInt max_charge, const UInt RT_votes_cutoff) throw ();
00145 
00146 
00148       virtual std::multimap<DoubleReal, Box_> getClosedBoxes () throw ()
00149         { return (closed_boxes_); };
00150 
00151 
00152     protected:            
00153 
00154 
00157       IsotopeWaveletTransform () throw();
00158 
00159 
00160       void estimatePeakCutOffs_ (const DoubleReal min_mz, const DoubleReal max_mz, const UInt max_charge) throw ();
00161       
00162 
00173       inline void sampleTheWavelet_ (const MSSpectrum<PeakType>& scan, const UInt wavelet_length, const UInt mz_index, 
00174         const DoubleReal offset, const UInt charge, const UInt peak_cutoff, const Int mode=+1) throw ();    
00175 
00183       virtual DoubleReal scoreThis_ (const MSSpectrum<PeakType>& candidate, const UInt peak_cutoff, 
00184         const DoubleReal seed_mz, const UInt c, const DoubleReal intens, const DoubleReal ampl_cutoff=0) throw ();
00185 
00193       virtual void checkPosition_ (const MSSpectrum<PeakType>& candidate, const MSSpectrum<PeakType>& ref, const DoubleReal seed_mz, 
00194         const UInt c, const UInt scan_index) throw ();
00195 
00196 
00198       inline DoubleReal getAvIntens_ (const MSSpectrum<PeakType>& scan) throw ();     
00199       
00201       inline DoubleReal getSdIntens_ (const MSSpectrum<PeakType>& scan, const DoubleReal mean) throw ();
00202 
00204       DoubleReal getCubicInterpolatedValue_ (const std::vector<DoubleReal>& x, const DoubleReal xi, const std::vector<DoubleReal>& y) throw ();
00205 
00208       inline std::pair<Int, Int> getNearBys_ (const MSSpectrum<PeakType>& signal, const DoubleReal mz, 
00209         const UInt start=0) const throw ();
00210 
00221       virtual void push2Box_ (const DoubleReal mz, const UInt scan, UInt charge, const DoubleReal score, 
00222         const DoubleReal intens, const DoubleReal rt, const UInt MZ_begin, const UInt MZ_end) throw ();
00223 
00239       virtual void push2TmpBox_ (const DoubleReal mz, const UInt scan, UInt charge, const DoubleReal score, 
00240         const DoubleReal intens, const DoubleReal rt, const UInt MZ_begin, const UInt MZ_end) throw ();
00241 
00242 
00248       inline DoubleReal getAvMZSpacing_ (const MSSpectrum<PeakType>& scan, Int start_index=0, Int end_index=-1) throw ();
00249  
00250         
00256       inline DoubleReal chordTrapezoidRule_ (const DoubleReal a, const DoubleReal b, const DoubleReal fa, const DoubleReal fb) throw ()
00257       { 
00258         return ((fb+fa)*0.5*(b-a)); 
00259       };
00260     
00264       inline DoubleReal chordTrapezoidRule_ (const std::vector<DoubleReal>& x, const std::vector<DoubleReal>& y) throw ()
00265       {
00266         DoubleReal res=0;
00267         for (UInt i=0; i<x.size()-1; ++i)
00268         {
00269           res += (x[i+1]-x[i])*(y[i+1]+y[i]);
00270         };
00271         return (0.5*res); 
00272       };
00273 
00278       inline UInt getPeakCutOff_ (const DoubleReal mass, const UInt z)
00279       { 
00280         return ((UInt) ceil(peak_cutoff_intercept_+peak_cutoff_slope_*mass*z)); 
00281       };    
00282 
00283 
00289       void clusterSeeds_ (const std::vector<MSSpectrum<PeakType> >& candidates, 
00290         const MSSpectrum<PeakType>& ref, const UInt scan_index, const UInt max_charge) throw ();
00291 
00292 
00293       //internally used data structures for the sweep line algorithm  
00294       std::multimap<DoubleReal, Box_> open_boxes_, closed_boxes_; //DoubleReal = average m/z position
00295       std::vector<std::multimap<DoubleReal, Box_> >* tmp_boxes_; //for each charge we need a separate container
00296     
00297       gsl_interp_accel* acc_;
00298       gsl_spline* spline_; 
00299       DoubleReal av_MZ_spacing_, peak_cutoff_intercept_, peak_cutoff_slope_;  
00300       std::vector<DoubleReal> c_mzs_, c_spacings_, psi_, prod_, xs_;
00301   };
00302 
00303 
00304 
00305 
00306   template <typename PeakType>  
00307   bool comparator (const PeakType& a, const PeakType& b)
00308   {
00309     return (a.getIntensity() > b.getIntensity());
00310   }   
00311 
00312 
00313   template <typename PeakType>
00314   IsotopeWaveletTransform<PeakType>::IsotopeWaveletTransform () throw()
00315   {
00316     acc_ = gsl_interp_accel_alloc ();
00317     spline_ = gsl_spline_alloc (gsl_interp_cspline, DEFAULT_NUM_OF_INTERPOLATION_POINTS); 
00318     tmp_boxes_ = new std::vector<std::multimap<DoubleReal, Box_> > (1);
00319     av_MZ_spacing_=1;
00320   }
00321 
00322   template <typename PeakType>
00323   IsotopeWaveletTransform<PeakType>::IsotopeWaveletTransform (const DoubleReal min_mz, const DoubleReal max_mz, const UInt max_charge) throw()
00324   { 
00325     acc_ = gsl_interp_accel_alloc ();
00326     spline_ = gsl_spline_alloc (gsl_interp_cspline, DEFAULT_NUM_OF_INTERPOLATION_POINTS); 
00327     tmp_boxes_ = new std::vector<std::multimap<DoubleReal, Box_> > (max_charge);
00328     IsotopeWavelet::init (max_mz, max_charge);    
00329     av_MZ_spacing_=1;
00330     estimatePeakCutOffs_ (min_mz, max_mz, max_charge);    
00331     UInt max_cutoff (getPeakCutOff_(max_mz, max_charge));
00332     psi_.reserve (max_cutoff); //The wavelet
00333     prod_.reserve (max_cutoff); 
00334     xs_.reserve (max_cutoff); 
00335 
00336   }
00337 
00338 
00339   template <typename PeakType>
00340   IsotopeWaveletTransform<PeakType>::~IsotopeWaveletTransform () throw()
00341   {
00342     gsl_interp_accel_free (acc_);
00343     gsl_spline_free (spline_);
00344     delete (tmp_boxes_);
00345   }
00346 
00347 
00348   template <typename PeakType>
00349   void IsotopeWaveletTransform<PeakType>::getTransforms (const MSSpectrum<PeakType>& scan, 
00350     std::vector<MSSpectrum<PeakType> > &transforms, const UInt max_charge, const Int mode) throw ()
00351   {
00352     UInt scan_size = scan.size(), wavelet_length=0, old_length=0, peak_cutoff=0;
00353     av_MZ_spacing_ = getAvMZSpacing_(scan);
00354     
00355     DoubleReal cum_spacing, c_spacing, //Helping variables
00356       max_w_monoi_intens=QUARTER_NEUTRON_MASS, //The position of the monoisotopic peak within the coordinate sys. of the wavelet 
00357       sums=0, //Helping variables
00358       max_position_scan=0, //The position of the data point (within the scan) we want to align with
00359       align_offset, //Correction term; shifts the wavelet to get the desired alignment
00360       last;
00361     UInt c=0, k=0, j=0;
00362     DoubleReal c_charge; //DoubleReal, since we will oven divide by c_charge 
00363     
00364     //The upcoming variable is necessary to capture strange effects in special types of unequally spaced data sets.
00365     //Imagine some wholes in the m/z range (points the mass spectrometer did not sample). If they become larger than 
00366     //0.25*NEUTRON_MASS (considering the case of charge 1), several data points will share the same max_position, 
00367     //causing the upcoming code to crash since suddenly some m/z positions will occur twice. The interval of multiple 
00368     //occurring points is stored by multiple_s and implicitly by i.
00369     std::vector<int> multiple_s (max_charge,-1);
00370     std::vector<DoubleReal> last_max_position_scan (max_charge, -1);
00371     bool repair=false;
00372 
00373     //Starting convolution
00374     for (UInt i=0; i<scan_size; ++i)
00375     {
00376       //Now, let's sample the wavelets
00377       for (c=0; c<max_charge; ++c)
00378       { 
00379         c_charge=c+1;
00380         cum_spacing=0;        
00381         max_w_monoi_intens=QUARTER_NEUTRON_MASS/c_charge; //This is the position of the monoisotopic peak (centered)
00382         
00383         //Align the maximum monoisotopic peak of the wavelet with some scan point. This is step is critical, since
00384         //otherwise we might - especially in the case of badly resolved data - miss patterns, since scan maxima and
00385         //wavelet maxima are "anticorrelated"
00386         j=0; last=0;
00387         while (cum_spacing < max_w_monoi_intens)
00388         {
00389           c_spacing = scan[(i+j+1)%scan_size].getMZ() - scan[(i+j)%scan_size].getMZ();
00390           last=cum_spacing; 
00391           if (c_spacing < 0) //I.e. we are at the end of the scan
00392           {
00393             cum_spacing += av_MZ_spacing_;
00394           }
00395           else //The "normal" case
00396           {
00397             cum_spacing += c_spacing; 
00398           };
00399           ++j;
00400         };
00401           
00402         align_offset = max_w_monoi_intens - last; //I.e. we have to shift the wavelet by this amount to align the data
00403         --j;        
00404 
00405         //The upcoming variable holds the position of the spectrum that is aligned with the monoisotopic 
00406         //maximum of the wavelet. We do not add the overall correction term for the left shift at this point, 
00407         //since we will get trouble by the NEUTRON_MASS and the resulting numerical instabilities. 
00408         //We will add this correcting term at the end of the whole processing.
00409         max_position_scan = scan[(i+j)%scan_size].getMZ();
00410         if (max_position_scan == last_max_position_scan[c]) //Uuups, multiple times the same m/z coordinate
00411         {
00412           if (multiple_s[c] < 0) //This is the first entry where this artifact occurred
00413           {
00414             multiple_s[c] = i-1;
00415           }; 
00416           //Notice that the problematic case of multiple_s being at the end of the spectrum (this might happen for 
00417           //the overlapping part of the transform) can be ignored.        
00418           //The special case if we are the boundary (exactly the last point in the spectrum).
00419           if (i == scan_size-1)
00420           { 
00421             repair = true;
00422           };
00423         }
00424         else //Denotes the end of the multiple pos interval and triggers a repair.
00425         {
00426           if (multiple_s[c] >= 0)
00427           {
00428             repair=true; //We cannot do this now. Just after the transform at the actual point is completed.
00429           };
00430         };
00431 
00432         last_max_position_scan[c] = max_position_scan;
00433         cum_spacing = align_offset;
00434         
00435         peak_cutoff = getPeakCutOff_ (scan[i].getMZ(), (UInt) c_charge);
00436         //IsotopeWavelet::getAveragine (scan[i].getMZ()*c_charge, &peak_cutoff);
00437         
00438         wavelet_length = (UInt) floor(peak_cutoff/av_MZ_spacing_);
00439         if (wavelet_length > scan_size)
00440         {   
00441           return;
00442         };
00443 
00444         if (wavelet_length != old_length)
00445         {
00446           psi_.resize (wavelet_length, 0);
00447           prod_.resize (wavelet_length, 0);
00448           xs_.resize (wavelet_length, 0);
00449           c_mzs_.resize (wavelet_length+1, 0);
00450           c_spacings_.resize (wavelet_length, 0);
00451           old_length = wavelet_length;
00452         };
00453         
00454         //Sampling the wavelet 
00455         sampleTheWavelet_ (scan, wavelet_length, i, cum_spacing, (UInt) c_charge, peak_cutoff, mode);
00456         k=0; 
00457         for (UInt j=i; j<scan_size && k<wavelet_length; ++j, ++k)
00458         {
00459           prod_[k] = scan[j].getIntensity()*psi_[k];
00460           xs_[k] = scan[j].getMZ();
00461         };
00462 
00463         if (k< wavelet_length) // I.e. we have an overlapping wavelet
00464         {
00465           sums = 0;
00466           max_position_scan = transforms[c][i-1].getMZ()+av_MZ_spacing_;
00467         }
00468         else
00469         {
00470           sums = chordTrapezoidRule_ (xs_, prod_);
00471         };
00472 
00473         //Store the current convolution result
00474         PeakType& c_peak1 (transforms[c][i]);
00475         c_peak1.setIntensity(sums);
00476         c_peak1.setMZ(max_position_scan);
00477         transforms[c][i].setIntensity(sums);
00478         transforms[c][i].setMZ(max_position_scan);  
00479         if (repair)
00480         {   
00481           UInt noi2interpol = i - multiple_s[c]; //NOT +1
00482 
00483           //The special case if we are the boundary (exactly the last point in the spectrum)
00484           if (i == scan_size-1)
00485           {
00486             //We do not care about the intensities, since we will set them to zero anyway.
00487             //We would just like to avoid multiple positions to occur in the transform
00488             for (UInt ii=0; ii<=noi2interpol; ++ii) 
00489             //it must be "<=noi..." !!! not "<", since in this case we do not want to keep the last multiple  
00490             //the same holds for "ii=0"
00491             {
00492               transforms[c][multiple_s[c]+ii].setMZ(transforms[c][multiple_s[c]-1].getMZ() + (ii+1)*av_MZ_spacing_);    
00493             };
00494 
00495             last_max_position_scan[c] = max_position_scan; //Reset
00496             multiple_s[c]=-1; //Reset
00497             repair=false;
00498             continue;
00499           }
00500 
00501           PeakType& c_peak2 (transforms[c][multiple_s[c]]);
00502           DoubleReal x1 = c_peak2.getMZ(); 
00503           DoubleReal y1 = c_peak2.getIntensity();         
00504           DoubleReal x2 = c_peak1.getMZ();
00505           DoubleReal y2 = c_peak1.getIntensity();
00506           DoubleReal dx = (x2-x1)/(DoubleReal)(noi2interpol);
00507           for (UInt ii=1; ii<noi2interpol; ++ii) //ii=1, not 0, since we want to keep the first of the multiples
00508           { 
00509             transforms[c][multiple_s[c]+ii].setMZ(c_peak2.getMZ()+ii*dx);
00510             transforms[c][multiple_s[c]+ii].setIntensity(y1 + (y2-y1)/(x2-x1)*(c_peak2.getMZ()+ii*dx-x1));
00511           };
00512 
00513           last_max_position_scan[c] = max_position_scan; //Reset
00514           multiple_s[c]=-1; //Reset
00515           repair=false;
00516         }     
00517       }
00518     }
00519 
00520     #ifdef DEBUG_FEATUREFINDER
00521       for (c=0; c<max_charge; ++c)
00522       {
00523         std::stringstream name; name << "trans_" << c+1 << ".dat\0"; 
00524         std::ofstream ofile (name.str().c_str());
00525         for (unsigned int i=0; i<transforms[c].size(); ++i)
00526           ofile << transforms[c][i].getMZ() << "\t" <<  transforms[c][i].getIntensity() << std::endl;
00527         ofile.close();
00528       };
00529     #endif
00530 
00531     return;
00532   }
00533 
00534 
00535   template <typename PeakType>
00536   void IsotopeWaveletTransform<PeakType>::identifyCharges (const std::vector<MSSpectrum<PeakType> >& candidates,
00537     const MSSpectrum<PeakType>& ref, const UInt scan_index, const DoubleReal ampl_cutoff) throw ()
00538   {
00539     UInt peak_cutoff=0;   
00540     UInt cands_size=candidates.size();
00541     UInt signal_size=candidates[0].size(), i_iter; 
00542     typename MSSpectrum<PeakType>::iterator iter, iter2;
00543     typename MSSpectrum<PeakType>::const_iterator iter_start, iter_end, iter_p;
00544     DoubleReal seed_mz, c_av_intens=0, c_score=0, c_sd_intens=0, threshold=0, help_mz;
00545     UInt help_dist, MZ_start, MZ_end;
00546 
00547     //For all charges do ...
00548     for (UInt c=0; c<cands_size; ++c)   
00549     {
00550       MSSpectrum<PeakType> c_sorted_candidate (candidates[c]); 
00551       std::vector<DoubleReal> processed (signal_size, 0);
00552 
00553       //Sort the transform in descending order according to the intensities present in the transform  
00554       sort (c_sorted_candidate.begin(), c_sorted_candidate.end(), comparator<PeakType>); 
00555       
00556       if (ampl_cutoff < 0)
00557       {
00558         threshold=EPSILON_ION_COUNTS;
00559       }
00560       else
00561       {       
00562         c_av_intens = getAvIntens_ (c_sorted_candidate);
00563         c_sd_intens = getSdIntens_ (c_sorted_candidate, c_av_intens);
00564         threshold=ampl_cutoff*c_sd_intens + c_av_intens;
00565       };    
00566         
00567       //Eliminate uninteresting regions
00568       for (iter=c_sorted_candidate.begin(); iter != c_sorted_candidate.end(); ++iter)
00569       {
00570         if (iter->getIntensity() < c_av_intens)
00571         { 
00572           break;
00573         };
00574       };
00575 
00576       i_iter=0;
00577       for (iter=c_sorted_candidate.begin(); iter != c_sorted_candidate.end(); ++iter, ++i_iter)
00578       {         
00579         seed_mz=iter->getMZ();
00580        
00581         if (processed[distance(candidates[c].begin(), candidates[c].MZBegin(seed_mz))] > iter->getIntensity())
00582         {   
00583           continue;
00584         }
00585 
00586         peak_cutoff = getPeakCutOff_ (seed_mz, c+1);
00587         //IsotopeWavelet::getAveragine(seed_mz*(c+1), &peak_cutoff);
00588         //Mark the region as processed
00589         //Do not move this further down, since we have to mark this as processed in any case, 
00590         //even when score <=0; otherwise we would look around the maximum's position unless 
00591         //any significant point is found
00592         iter_start = candidates[c].MZBegin(seed_mz-QUARTER_NEUTRON_MASS/(c+1.));
00593         iter_end = candidates[c].MZEnd(seed_mz+(peak_cutoff-1)-QUARTER_NEUTRON_MASS/(c+1.));
00594         
00595       
00596         for (iter_p=iter_start; iter_p!=iter_end; ++iter_p)
00597         {
00598           help_dist = distance(candidates[c].begin(), iter_p);
00599           processed[help_dist] = processed[help_dist] >= iter->getIntensity() ?  processed[help_dist] : iter->getIntensity();
00600         };      
00601 
00602 
00603         c_score = scoreThis_ (candidates[c], peak_cutoff, seed_mz, c, iter->getIntensity(), threshold);
00604 
00605         if (c_score <= 0)
00606         {
00607           continue;
00608         };
00609 
00610         MZ_start = distance (candidates[c].begin(), iter_start);
00611         MZ_end = distance (candidates[c].begin(), iter_end);
00612 
00613         //Push the seed into its corresponding box (or create a new one, if necessary)
00614         //Do ***NOT*** move this further down!
00615         push2TmpBox_ (seed_mz, scan_index, c, c_score, iter->getIntensity(), candidates[0].getRT(), MZ_start, MZ_end);
00616         
00617         for (Int h=-2; h<=2; ++h)
00618         {
00619           if (h!=0)
00620           {
00621             help_mz = seed_mz + h*NEUTRON_MASS/(c+1.);
00622             iter2 = c_sorted_candidate.MZBegin (help_mz);
00623             if (fabs(iter2->getMZ()-help_mz) > fabs(h)*NEUTRON_MASS/(2*(c+1.)))
00624             {
00625               typename MSSpectrum<PeakType>::const_iterator iter3 (candidates[c].MZBegin(help_mz));
00626               if (iter3 != candidates[c].end())
00627               {
00628                 push2TmpBox_ (iter3->getMZ(), scan_index, c, 0, iter3->getIntensity(), candidates[0].getRT(), MZ_start, MZ_end);
00629               };
00630             };
00631           };
00632         };
00633       };  
00634     };
00635 
00636     clusterSeeds_(candidates, ref, scan_index, candidates.size());
00637   }
00638     
00639   
00640   template <typename PeakType>
00641   void IsotopeWaveletTransform<PeakType>::estimatePeakCutOffs_ (const DoubleReal min_mz, const DoubleReal max_mz, const UInt max_charge) throw () 
00642   {   
00643     std::vector<DoubleReal> x, y;
00644     UInt peak_cutoff=0;
00645     for (DoubleReal i=min_mz; i<max_mz*max_charge; i+=100)
00646     {
00647       IsotopeWavelet::getAveragine (i, &peak_cutoff);
00648       x.push_back (i);
00649       y.push_back (peak_cutoff);
00650     };
00651 
00652 
00653     Math::LinearRegression<std::vector<DoubleReal>::iterator > regress;
00654     regress.computeInterceptXAxis (0.95, x.begin(), x.end(), y.begin());
00655     peak_cutoff_intercept_ = regress.getIntercept();
00656     peak_cutoff_slope_ = regress.getSlope();
00657   }
00658 
00659 
00660   template <typename PeakType>
00661   void IsotopeWaveletTransform<PeakType>::sampleTheWavelet_ (const MSSpectrum<PeakType>& scan, const UInt wavelet_length, 
00662     const UInt mz_index, const DoubleReal offset, const UInt charge, const UInt peak_cutoff, const Int mode) throw ()
00663   {
00664     UInt scan_size = scan.size();
00665     DoubleReal c_pos=scan[mz_index].getMZ(), lambda=IsotopeWavelet::getLambdaQ(c_pos*charge-mode*charge*PROTON_MASS);
00666 
00667     if (mz_index+wavelet_length >= scan_size)
00668     {
00669       psi_ = std::vector<double> (wavelet_length, 0);
00670       return;
00671     }
00672 
00673     DoubleReal cum_spacing=offset;
00674     c_mzs_[0] = scan[mz_index].getMZ();
00675     for (UInt j=1; j<wavelet_length+1; ++j)
00676     {
00677       c_mzs_[j] = scan[mz_index+j].getMZ();
00678       c_spacings_[j-1] = c_mzs_[j]-c_mzs_[j-1];
00679       c_spacings_[j-1] = (c_spacings_[j-1] > 0) ? c_spacings_[j-1] : av_MZ_spacing_;
00680     }
00681 
00682     //Building up (sampling) the wavelet
00683     DoubleReal tz1;
00684     UInt j=0;
00685     for (; j<wavelet_length && cum_spacing<2*peak_cutoff; ++j)
00686     {
00687       tz1=cum_spacing*charge+1;
00688       psi_[j] = (cum_spacing > 0) ? IsotopeWavelet::getValueByLambda (lambda, tz1) : 0;
00689       cum_spacing += c_spacings_[j];
00690     }
00691     for (; j<wavelet_length; ++j)
00692     {
00693       tz1=cum_spacing*charge+1;
00694       psi_[j] = (cum_spacing > 0) ? IsotopeWavelet::getValueByLambdaExtrapol (lambda, tz1) : 0;
00695       cum_spacing += c_spacings_[j];
00696     };
00697 
00698     DoubleReal mean=0;
00699     for (UInt j=0; j<wavelet_length-1; ++j)
00700     {
00701       mean += chordTrapezoidRule_ (scan[(mz_index+j)%scan_size].getMZ(), scan[(mz_index+j+1)%scan_size].getMZ(), psi_[j], psi_[j+1]);
00702     }
00703 
00704     mean /= peak_cutoff;
00705     for (UInt j=0; j<wavelet_length; ++j)
00706     {
00707       psi_[j] -= mean;
00708     }
00709 
00710     #ifdef DEBUG_FEATUREFINDER
00711       if (trunc(c_mzs_[0]) == 1000 || trunc(c_mzs_[0]) == 1700 || trunc(c_mzs_[0]) == 2000 || trunc(c_mzs_[0]) == 3000)
00712       {
00713         std::stringstream stream; stream << "wavelet_" << c_mzs_[0] << "_" << charge+1 << ".dat\0"; 
00714         std::ofstream ofile (stream.str().c_str());
00715         for (unsigned int i=0; i<wavelet_length; ++i)
00716         {
00717           ofile << scan[mz_index+i].getMZ() << "\t" << psi[i] << std::endl;
00718         }
00719         ofile.close();
00720       };
00721     #endif
00722 
00723   }
00724 
00725   
00726   template<typename PeakType>
00727   DoubleReal IsotopeWaveletTransform<PeakType>::scoreThis_ (const MSSpectrum<PeakType>& candidate, 
00728     const UInt peak_cutoff, const DoubleReal seed_mz, const UInt c, const DoubleReal intens, const DoubleReal ampl_cutoff) throw ()
00729   { 
00730     DoubleReal c_score=0, c_check_point=-1, c_val;
00731     std::pair<int, int> c_between_left, c_between_right;        
00732     typename MSSpectrum<PeakType>::const_iterator c_left_iter, c_right_iter, c_iter;
00733 
00734     DoubleReal leftBound, rightBound;
00735     //p_h_ind indicates if we are looking for a whole or a peak
00736     Int p_h_ind=1, end=4*(peak_cutoff-1) -1; //4 times and not 2 times, since we move by 0.5 m/z entities
00737 
00738     std::vector<DoubleReal> xvec, yvec, weights;
00739     std::vector<DoubleReal> xs (DEFAULT_NUM_OF_INTERPOLATION_POINTS), ys(DEFAULT_NUM_OF_INTERPOLATION_POINTS);
00740     UInt i=0;
00741     
00742     for (Int v=1; v<end; ++v, ++p_h_ind)
00743     {   
00744       c_check_point = seed_mz-((peak_cutoff-1)*NEUTRON_MASS-v*0.5*NEUTRON_MASS)/((DoubleReal)c+1);
00745       
00746       leftBound = c_check_point;
00747       rightBound = c_check_point;   
00748 
00749       c_left_iter = candidate.MZBegin (c_check_point);
00750       c_right_iter = c_left_iter;
00751 
00752       //ugly, but the only way to check it I guess
00753       if (c_left_iter == candidate.begin()) continue;   
00754       --c_left_iter;
00755       if (c_right_iter == candidate.end()) continue;
00756       if (++c_right_iter == candidate.end()) continue;
00757     
00758       for (c_iter=c_left_iter, i=0; c_iter!=c_right_iter; ++c_iter, ++i)
00759       {
00760         xs[i] = c_iter->getMZ();  
00761         ys[i] = c_iter->getIntensity();
00762       }
00763       xs[i] = c_iter->getMZ();  
00764       ys[i] = c_iter->getIntensity();
00765 
00766       c_val = getCubicInterpolatedValue_ (xs, c_check_point, ys);
00767 
00768       if (p_h_ind%2 == 1) //I.e. a whole
00769       {
00770         c_score -= c_val;
00771       }
00772       else
00773       {
00774         c_score +=c_val;
00775       };      
00776     };
00777 
00778     #ifdef DEBUG_FEATUREFINDER  
00779       std::ofstream ofile_score ("scores.dat", ios::app);
00780       std::ofstream ofile_check_score ("check_scores.dat", ios::app);
00781       ofile_score << c_check_point << "\t" << c_score << std::endl;
00782       ofile_check_score << c_check_point << "\t" << c_check_score << std::endl;
00783       ofile_score.close();
00784       ofile_check_score.close();
00785     #endif
00786 
00787     if (c_score <= ampl_cutoff+intens)
00788     {
00789       return(0);
00790     };
00791 
00792     return (c_score);
00793   }
00794 
00795 
00796   template <typename PeakType>
00797   DoubleReal IsotopeWaveletTransform<PeakType>::getAvMZSpacing_ (const MSSpectrum<PeakType>& scan, Int start_index, Int end_index) throw ()
00798   { 
00799     DoubleReal av_MZ_spacing=0;
00800     if (end_index < 0)
00801     {
00802       end_index = scan.size();
00803     };
00804     for (Int i=start_index; i<end_index-1; ++i)
00805     {
00806        av_MZ_spacing += scan[i+1].getMZ() - scan[i].getMZ();
00807     };
00808     return (av_MZ_spacing / (DoubleReal) (end_index-1-start_index));
00809   }
00810 
00811 
00812   template <typename PeakType>
00813   DoubleReal IsotopeWaveletTransform<PeakType>::getAvIntens_ (const MSSpectrum<PeakType>& scan) throw ()
00814   { 
00815     DoubleReal av_intens=0;
00816     for (UInt i=0; i<scan.size(); ++i)
00817     {
00818       if (scan[i].getIntensity() >= 0)
00819       {
00820         av_intens += scan[i].getIntensity();
00821       }
00822     };
00823     return (av_intens / (double)scan.size());
00824   }
00825   
00826   template <typename PeakType>
00827   DoubleReal IsotopeWaveletTransform<PeakType>::getSdIntens_ (const MSSpectrum<PeakType>& scan, const DoubleReal mean) throw ()
00828   {
00829     DoubleReal res=0, intens;
00830     for (UInt i=0; i<scan.size(); ++i)
00831     {   
00832       if (scan[i].getIntensity() >= 0)
00833       {
00834         intens = scan[i].getIntensity();
00835         res += (intens-mean)*(intens-mean);
00836       };
00837     };
00838     return (sqrt(res/(double)(scan.size()-1)));
00839   }
00840 
00841 
00842   template <typename PeakType>
00843   DoubleReal IsotopeWaveletTransform<PeakType>::getCubicInterpolatedValue_ (const std::vector<DoubleReal>& x, 
00844     const DoubleReal xi, const std::vector<DoubleReal>& y) throw ()
00845   {
00846     gsl_spline_init (spline_, &x[0], &y[0], x.size());
00847     DoubleReal yi = gsl_spline_eval (spline_, xi, acc_);
00848     return (yi);  
00849   }
00850 
00851 
00852   template <typename PeakType>
00853   std::pair<int, int> IsotopeWaveletTransform<PeakType>::getNearBys_ (const MSSpectrum<PeakType>& signal, const DoubleReal mz, 
00854     const UInt start) const throw ()
00855   {
00856     for (UInt i=start; i<signal.size(); ++i)
00857     {
00858       if (signal[i].getMZ() > mz)
00859       {
00860         if (i>start) //everything's fine
00861         {
00862           return (std::pair<int, int> (i-1, i));
00863         }
00864         else //wrong boundaries!
00865         {
00866           break;
00867         };
00868       };
00869     };
00870 
00871     //not found
00872     return (std::pair<int, int> (-1, -1));
00873   }
00874 
00875 
00876   template <typename PeakType>
00877   void IsotopeWaveletTransform<PeakType>::push2Box_ (const DoubleReal mz, const UInt scan, UInt charge, 
00878     const DoubleReal score, const DoubleReal intens, const DoubleReal rt, const UInt MZ_begin, const UInt MZ_end) throw ()
00879   { 
00880     if (intens <= 0)
00881     {   
00882       #ifdef DEBUG_FEATUREFINDER
00883         std::cout << "Warning: detected candidate with zero ion counts at m/z: " << mz << std::endl;
00884       #endif
00885       return;
00886     };
00887 
00888     typename std::map<DoubleReal, Box_>::iterator upper_iter = open_boxes_.upper_bound(mz);
00889     typename std::map<DoubleReal, Box_>::iterator lower_iter; 
00890     
00891     lower_iter = open_boxes_.lower_bound(mz);
00892     if (lower_iter != open_boxes_.end())
00893     {
00894       //Ugly, but necessary due to the implementation of STL lower_bound
00895       if (mz != lower_iter->first)
00896       {
00897         lower_iter = --(open_boxes_.lower_bound(mz));
00898       };
00899     };
00900     
00901     typename std::map<DoubleReal, Box_>::iterator insert_iter;
00902     bool create_new_box=true;
00903     if (lower_iter == open_boxes_.end()) //I.e. there is no open Box for that mz position
00904     {
00905       //There is another special case to be considered here:
00906       //Assume that the current box contains only a single element that is (slightly) smaller than the new mz value, 
00907       //then the lower bound for the new mz value is box.end and this would usually force a new entry
00908       if (!open_boxes_.empty())
00909       {
00910         if (fabs((--lower_iter)->first - mz) < 0.5*NEUTRON_MASS/(/*charge+*/1.0)) //matching box
00911         {
00912           create_new_box=false;
00913           insert_iter = lower_iter;
00914         };
00915       }
00916       else
00917       {
00918         create_new_box=true;
00919       }
00920     }
00921     else
00922     {
00923       if (upper_iter == open_boxes_.end() && fabs(lower_iter->first - mz) < 0.5*NEUTRON_MASS/(/*charge+*/1.0)) //Found matching Box
00924       {
00925         insert_iter = lower_iter;
00926         create_new_box=false;
00927       }
00928       else
00929       {
00930         create_new_box=true;
00931       };
00932     };
00933 
00934 
00935     if (upper_iter != open_boxes_.end() && lower_iter != open_boxes_.end())
00936     { 
00937       //Here is the question if you should figure out the smallest charge .... and then
00938 
00939       //Figure out which entry is closer to m/z
00940       DoubleReal dist_lower = fabs(lower_iter->first - mz);
00941       DoubleReal dist_upper = fabs(upper_iter->first - mz);
00942       dist_lower = (dist_lower < 0.5*NEUTRON_MASS/(/*charge+*/1.0)) ? dist_lower : INT_MAX;
00943       dist_upper = (dist_upper < 0.5*NEUTRON_MASS/(/*charge+*/1.0)) ? dist_upper : INT_MAX;
00944 
00945       if (dist_lower>=0.5*NEUTRON_MASS/(/*charge+*/1.0) && dist_upper>=0.5*NEUTRON_MASS/(/*charge+*/1.0)) // they are both too far away
00946       {
00947         create_new_box=true;
00948       }
00949       else
00950       {
00951         insert_iter = (dist_lower < dist_upper) ? lower_iter : upper_iter;  
00952         create_new_box=false;
00953       };
00954     }; 
00955   
00956     BoxElement_ element; 
00957     element.c = charge; element.mz = mz; element.score = score; element.RT = rt; element.intens=intens;
00958     element.RT_index = scan; element.MZ_begin = MZ_begin; element.MZ_end = MZ_end; 
00959 
00960 
00961     if (create_new_box == false)
00962     {           
00963       double max=intens;
00964       typename Box_::iterator box_iter;
00965       for (box_iter=insert_iter->second.begin(); box_iter != insert_iter->second.end(); ++box_iter)
00966       {
00967         if (box_iter->second.max_intens > max)
00968         {
00969           max=box_iter->second.max_intens;
00970         };
00971       };
00972       if (max==intens)
00973       {
00974         for (box_iter=insert_iter->second.begin(); box_iter != insert_iter->second.end(); ++box_iter)
00975         {
00976           box_iter->second.max_intens=intens;
00977         };
00978       };
00979       element.max_intens=max;     
00980       
00981       std::pair<UInt, BoxElement_> help2 (scan, element);
00982       insert_iter->second.insert (help2); 
00983 
00984       //Unfortunately, we need to change the m/z key to the average of all keys inserted in that box.
00985       Box_ replacement (insert_iter->second); 
00986 
00987       //We cannot divide both m/z by 2, since we already inserted some m/zs whose weight would be lowered.
00988       //Also note that we already inserted the new entry, leading to size-1.
00989       DoubleReal c_mz = insert_iter->first * (insert_iter->second.size()-1) + mz; 
00990       c_mz /= ((DoubleReal) insert_iter->second.size());    
00991 
00992       //Now let's remove the old and insert the new one
00993       open_boxes_.erase (insert_iter);  
00994       std::pair<DoubleReal, std::multimap<UInt, BoxElement_> > help3 (c_mz, replacement);
00995       open_boxes_.insert (help3);       
00996     }
00997     else
00998     {     
00999       element.max_intens=intens;      
01000       std::pair<UInt, BoxElement_> help2 (scan, element);
01001       std::multimap<UInt, BoxElement_> help3;
01002       help3.insert (help2);
01003       std::pair<DoubleReal, std::multimap<UInt, BoxElement_> > help4 (mz, help3);
01004       open_boxes_.insert (help4);
01005     };
01006   }
01007 
01008 
01009   template <typename PeakType>
01010   void IsotopeWaveletTransform<PeakType>::push2TmpBox_ (const DoubleReal mz, const UInt scan, UInt charge, 
01011     const DoubleReal score, const DoubleReal intens, const DoubleReal rt, const UInt MZ_begin, const UInt MZ_end) throw ()
01012   { 
01013     std::multimap<DoubleReal, Box_>& tmp_box (tmp_boxes_->at(charge));
01014     typename std::map<DoubleReal, Box_>::iterator upper_iter = tmp_box.upper_bound(mz);
01015     typename std::map<DoubleReal, Box_>::iterator lower_iter; 
01016   
01017     lower_iter = tmp_box.lower_bound(mz);
01018     if (lower_iter != tmp_box.end())
01019     {
01020       //Ugly, but necessary due to the implementation of STL lower_bound
01021       if (mz != lower_iter->first)
01022       {
01023         lower_iter = --(tmp_box.lower_bound(mz));
01024       };
01025     };
01026     
01027     typename std::map<DoubleReal, Box_>::iterator insert_iter;
01028     bool create_new_box=true;
01029     if (lower_iter == tmp_box.end()) //I.e. there is no tmp Box for that mz position
01030     {
01031       //There is another special case to be considered here:
01032       //Assume that the current box contains only a single element that is (slightly) smaller than the new mz value, 
01033       //then the lower bound for the new mz value is box.end and this would usually force a new entry
01034       if (!tmp_box.empty())
01035       {
01036         if (fabs((--lower_iter)->first - mz) < 0.5*NEUTRON_MASS/(/*charge+*/1.0)) //matching box
01037         {
01038           create_new_box=false;
01039           insert_iter = lower_iter;
01040         };
01041       }
01042       else
01043       {
01044         create_new_box=true;
01045       }
01046     }
01047     else
01048     {
01049       if (upper_iter == tmp_box.end() && fabs(lower_iter->first - mz) < 0.5*NEUTRON_MASS/(/*charge+*/1.0)) //Found matching Box
01050       {
01051         insert_iter = lower_iter;
01052         create_new_box=false;
01053       }
01054       else
01055       {
01056         create_new_box=true;
01057       };
01058     };
01059 
01060   
01061     if (upper_iter != tmp_box.end() && lower_iter != tmp_box.end())
01062     { 
01063       //Figure out which entry is closer to m/z
01064       DoubleReal dist_lower = fabs(lower_iter->first - mz);
01065       DoubleReal dist_upper = fabs(upper_iter->first - mz);
01066       dist_lower = (dist_lower < 0.5*NEUTRON_MASS/(charge+1.0)) ? dist_lower : INT_MAX;
01067       dist_upper = (dist_upper < 0.5*NEUTRON_MASS/(charge+1.0)) ? dist_upper : INT_MAX;
01068 
01069       if (dist_lower>=0.5*NEUTRON_MASS/(charge+1.0) && dist_upper>=0.5*NEUTRON_MASS/(charge+1.0)) // they are both too far away
01070       {
01071         create_new_box=true;
01072       }
01073       else
01074       {
01075         insert_iter = (dist_lower < dist_upper) ? lower_iter : upper_iter;  
01076         create_new_box=false;
01077       };
01078     }; 
01079 
01080     BoxElement_ element; 
01081     element.c = charge; element.mz = mz; element.score = score; element.RT = rt; element.intens=intens;
01082     element.RT_index = scan; element.MZ_begin = MZ_begin; element.MZ_end = MZ_end; 
01083     
01084     if (create_new_box == false)
01085     {     
01086       double max=intens;
01087       typename Box_::iterator box_iter;
01088       for (box_iter=insert_iter->second.begin(); box_iter != insert_iter->second.end(); ++box_iter)
01089         if (box_iter->second.max_intens > max)
01090           max=box_iter->second.max_intens;
01091       if (max==intens)
01092       for (box_iter=insert_iter->second.begin(); box_iter != insert_iter->second.end(); ++box_iter)
01093         box_iter->second.max_intens=intens;
01094       element.max_intens=max;     
01095       
01096       std::pair<UInt, BoxElement_> help2 (scan, element);
01097       insert_iter->second.insert (help2); 
01098 
01099       //Unfortunately, we need to change the m/z key to the average of all keys inserted in that box.
01100       Box_ replacement (insert_iter->second); 
01101 
01102       //We cannot divide both m/z by 2, since we already inserted some m/zs whose weight would be lowered.
01103       //Also note that we already inserted the new entry, leading to size-1.
01104       DoubleReal c_mz = insert_iter->first * (insert_iter->second.size()-1) + mz; 
01105       c_mz /= ((DoubleReal) insert_iter->second.size());    
01106 
01107       //Now let's remove the old and insert the new one
01108       tmp_box.erase (insert_iter);  
01109       std::pair<DoubleReal, std::multimap<UInt, BoxElement_> > help3 (c_mz, replacement); 
01110       tmp_box.insert (help3);       
01111     }
01112     else
01113     {     
01114       element.max_intens=intens;      
01115       std::pair<UInt, BoxElement_> help2 (scan, element);
01116       std::multimap<UInt, BoxElement_> help3;
01117       help3.insert (help2);
01118       std::pair<DoubleReal, std::multimap<UInt, BoxElement_> > help4 (mz, help3);
01119       tmp_box.insert (help4);
01120     };
01121   }
01122 
01123 
01124 
01125   template <typename PeakType>
01126   void IsotopeWaveletTransform<PeakType>::updateBoxStates (const UInt scan_index, const UInt RT_interleave, 
01127     const UInt RT_votes_cutoff) throw ()
01128   {
01129     typename std::map<DoubleReal, Box_>::iterator iter, iter2;
01130     for (iter=open_boxes_.begin(); iter!=open_boxes_.end(); )
01131     {
01132       //For each Box we need to figure out, if and when the last RT value has been inserted
01133       //If the Box his unchanged since RT_interleave_ scans, we will close the Box.
01134       UInt lastScan = (--(iter->second.end()))->first;
01135       if (scan_index - lastScan > RT_interleave) //I.e. close the box!
01136       {
01137         iter2 = iter;
01138         ++iter2;
01139         if (iter->second.size() >= RT_votes_cutoff)
01140         {
01141           closed_boxes_.insert (*iter);
01142         }
01143         open_boxes_.erase (iter);
01144         iter=iter2;
01145       }
01146       else
01147       {
01148         ++iter;
01149       };
01150     };
01151   }
01152   
01153   
01154   template <typename PeakType>
01155   void IsotopeWaveletTransform<PeakType>::clusterSeeds_ (const std::vector<MSSpectrum<PeakType> >& candidates, 
01156     const MSSpectrum<PeakType>& ref,  const UInt scan_index, const UInt max_charge) throw ()
01157   {
01158     typename std::map<DoubleReal, Box_>::iterator iter;
01159     typename Box_::iterator box_iter;
01160     std::vector<BoxElement_> final_box;
01161     DoubleReal c_mz, c_RT, av_max_intens=0, av_score=0, av_mz=0, av_RT=0, av_intens=0, count=0;
01162     UInt num_o_feature, l_mz, r_mz;
01163 
01164     typename std::pair<DoubleReal, DoubleReal> c_extend;
01165     for (unsigned int c=0; c<max_charge; ++c)
01166     {
01167       std::vector<BoxElement_> final_box;
01168       for (iter=tmp_boxes_->at(c).begin(); iter!=tmp_boxes_->at(c).end(); ++iter)
01169       { 
01170         Box_& c_box = iter->second;
01171         std::vector<DoubleReal> charge_votes (max_charge, 0), charge_binary_votes (max_charge, 0);
01172       
01173         av_max_intens=0, av_score=0, av_mz=0, av_RT=0, av_intens=0, count=0, l_mz=INT_MAX, r_mz=0;
01174         //Now, let's get the RT boundaries for the box
01175         for (box_iter=c_box.begin(); box_iter!=c_box.end(); ++box_iter)
01176         {
01177           c_mz = box_iter->second.mz;
01178           c_RT = box_iter->second.RT;
01179           av_score += box_iter->second.score;
01180           av_intens += box_iter->second.intens;
01181           av_mz += c_mz*box_iter->second.intens;
01182 
01183           if (l_mz > box_iter->second.MZ_begin) l_mz=box_iter->second.MZ_begin;
01184           if (r_mz < box_iter->second.MZ_end) r_mz=box_iter->second.MZ_end;
01185 
01186           ++count;
01187         };
01188 
01189         av_max_intens = c_box.begin()->second.max_intens;
01190         av_intens /= count; 
01191         //in contrast to the key entry of tmp_box_, this mz average is weighted by intensity
01192         av_mz /= count*av_intens;
01193         av_score /= count;
01194         av_RT = c_box.begin()->second.RT;
01195 
01196         BoxElement_ c_box_element;
01197         c_box_element.mz = av_mz;
01198         c_box_element.c = c;
01199         c_box_element.score = av_score;
01200         c_box_element.intens = av_intens;
01201         c_box_element.max_intens = av_max_intens;
01202         c_box_element.RT=av_RT;
01203         final_box.push_back(c_box_element);
01204       };  
01205 
01206       num_o_feature = final_box.size();
01207       if (num_o_feature == 0)
01208       {
01209         tmp_boxes_->at(c).clear();
01210         return;
01211       };
01212 
01213       //Computing the derivatives
01214       std::vector<DoubleReal> bwd_diffs(num_o_feature, 0), fwd_diffs(num_o_feature, 0); //, c_diffs (num_o_feature);
01215       /*c_diffs[0]=0; if (num_o_feature >= 1) c_diffs[num_o_feature-1]=0; 
01216       for (UInt i=1; i<num_o_feature-1; ++i)
01217       {
01218         c_diffs[i] = (final_box[i+1].max_intens-final_box[i-1].max_intens)/(final_box[i+1].mz-final_box[i+1].mz);
01219       };*/
01220 
01221       bwd_diffs[0]=0;
01222       for (UInt i=1; i<num_o_feature; ++i)
01223       {
01224         bwd_diffs[i] = (final_box[i].max_intens-final_box[i-1].max_intens)/(final_box[i].mz-final_box[i-1].mz);
01225       };    
01226 
01227       if (num_o_feature >= 1) fwd_diffs[num_o_feature-1]=0;
01228       for (UInt i=0; i<num_o_feature-1; ++i)
01229       {         
01230         fwd_diffs[i] = (final_box[i+1].max_intens-final_box[i].max_intens)/(final_box[i+1].mz-final_box[i].mz);
01231       };
01232 
01233       #ifdef DEBUG_FEATUREFINDER
01234         std::ofstream ofile_bwd ("bwd.dat"), ofile_fwd ("fwd.dat");
01235         for (unsigned int i=0; i<num_o_feature; ++i)
01236         {
01237           ofile_fwd << final_box[i].mz << "\t" << fwd_diffs[i] << std::endl;
01238           ofile_bwd << final_box[i].mz << "\t" << bwd_diffs[i] << std::endl;
01239         };
01240         ofile_bwd.close(); ofile_fwd.close();
01241       #endif
01242 
01243       for (UInt i=0; i<num_o_feature; ++i)
01244       { 
01245         while (i<num_o_feature-1)
01246         {
01247           if(final_box[i].score>0) //this has been an helping point
01248             break;
01249           ++i;
01250         };
01251 
01252         if (bwd_diffs[i]>0 && fwd_diffs[i]<0) //at the moment we will only use the forward and the backward differences
01253         {
01254           checkPosition_ (candidates[c], ref, final_box[i].mz, final_box[i].c, scan_index); 
01255           continue;
01256         };
01257       };
01258       tmp_boxes_->at(c).clear();
01259     };
01260   }
01261 
01262 
01263   template <typename PeakType>
01264   FeatureMap<Feature> IsotopeWaveletTransform<PeakType>::mapSeeds2Features 
01265     (const MSExperiment<PeakType>& map, const UInt max_charge, const UInt RT_votes_cutoff) throw ()
01266   {
01267     FeatureMap<Feature> feature_map;
01268     typename std::map<DoubleReal, Box_>::iterator iter;
01269     typename Box_::iterator box_iter;
01270     UInt best_charge_index; DoubleReal best_charge_score, c_mz, c_RT; UInt c_charge;  
01271     DoubleReal av_intens=0, av_score=0, av_mz=0, av_RT=0, av_max_intens=0; UInt peak_cutoff;
01272     ConvexHull2D c_conv_hull;
01273 
01274     typename std::pair<DoubleReal, DoubleReal> c_extend;
01275     for (iter=closed_boxes_.begin(); iter!=closed_boxes_.end(); ++iter)
01276     {   
01277       Box_& c_box = iter->second;
01278       std::vector<DoubleReal> charge_votes (max_charge, 0), charge_binary_votes (max_charge, 0);
01279     
01280       //Let's first determine the charge
01281       //Therefor, we can use two types of votes: qualitative ones (charge_binary_votes) or quantitative ones (charge_votes)
01282       for (box_iter=c_box.begin(); box_iter!=c_box.end(); ++box_iter)
01283       {
01284         charge_votes[box_iter->second.c] += box_iter->second.score;
01285         ++charge_binary_votes[box_iter->second.c];
01286       };
01287       
01288       //... determining the best fitting charge
01289       best_charge_index=0; best_charge_score=0; 
01290       for (UInt i=0; i<max_charge; ++i)
01291       {
01292         if (charge_votes[i] > best_charge_score)
01293         {
01294           best_charge_index = i;
01295           best_charge_score = charge_votes[i];
01296         };  
01297       };  
01298       
01299       //Pattern found in too few RT scan 
01300       if (charge_binary_votes[best_charge_index] < RT_votes_cutoff && RT_votes_cutoff <= map.size())
01301       {
01302         continue;
01303       }
01304 
01305       c_charge = best_charge_index + 1; //that's the finally predicted charge state for the pattern
01306 
01307       av_intens=0, av_score=0, av_mz=0, av_RT=0, av_max_intens=0;
01308       //Now, let's get the RT boundaries for the box
01309       std::vector<DPosition<2> > point_set;
01310       for (box_iter=c_box.begin(); box_iter!=c_box.end(); ++box_iter)
01311       {
01312         c_mz = box_iter->second.mz;
01313         c_RT = box_iter->second.RT;
01314         
01315         IsotopeWavelet::getAveragine (c_mz*c_charge, &peak_cutoff);
01316         peak_cutoff = getPeakCutOff_ (c_mz, c_charge);
01317         
01318         point_set.push_back (DPosition<2> (c_RT, c_mz - QUARTER_NEUTRON_MASS/(DoubleReal)c_charge)); 
01319         point_set.push_back (DPosition<2> (c_RT, c_mz + ((peak_cutoff+0.5)*NEUTRON_MASS)/(DoubleReal)c_charge)); 
01320         if (best_charge_index == box_iter->second.c)
01321         {       
01322           av_max_intens += box_iter->second.max_intens;
01323           av_score += box_iter->second.score;
01324           av_intens += box_iter->second.intens;
01325           av_mz += c_mz*box_iter->second.intens;
01326         };
01327         av_RT += c_RT;
01328       };
01329       av_intens /= (DoubleReal)charge_binary_votes[best_charge_index];
01330       av_max_intens /= (DoubleReal)charge_binary_votes[best_charge_index];
01331 
01332       av_mz /= av_intens*(DoubleReal)charge_binary_votes[best_charge_index];
01333       av_score /= (DoubleReal)charge_binary_votes[best_charge_index];
01334       av_RT /= (DoubleReal)c_box.size();
01335 
01336       Feature c_feature;
01337       c_conv_hull = point_set;
01338       c_feature.setCharge (c_charge);
01339       c_feature.setConvexHulls (std::vector<ConvexHull2D> (1, c_conv_hull));
01340       c_feature.setMZ (av_mz);
01341       c_feature.setIntensity (av_max_intens);
01342       c_feature.setRT (av_RT);
01343       c_feature.setQuality (1, av_score);
01344       feature_map.push_back (c_feature);
01345     };    
01346 
01347     return (feature_map);
01348   }
01349 
01350 
01351   template <typename PeakType>
01352   void IsotopeWaveletTransform<PeakType>::checkPosition_ (const MSSpectrum<PeakType>& candidate,
01353     const MSSpectrum<PeakType>& ref, const DoubleReal seed_mz, const UInt c, const UInt scan_index) throw ()
01354   {
01355     typename MSSpectrum<PeakType>::const_iterator right_cutoff, seed, iter, pre_iter, post_iter;
01356     UInt peak_cutoff;
01357     IsotopeWavelet::getAveragine(seed_mz*(c+1), &peak_cutoff);
01358 
01359     right_cutoff = candidate.MZBegin(seed_mz+(peak_cutoff-1)*NEUTRON_MASS/(c+1.));
01360     pre_iter = candidate.MZBegin(seed_mz-NEUTRON_MASS/(c+1.));
01361     seed = candidate.MZBegin(seed_mz);
01362     post_iter = candidate.MZBegin(seed_mz+NEUTRON_MASS/(c+1.));
01363     iter=seed; 
01364     //we can ignore those cases
01365     if (iter==candidate.begin() || iter==candidate.end() || pre_iter==candidate.begin() || post_iter == candidate.end()) 
01366     {
01367       return;
01368     };
01369 
01370     DoubleReal normal = pre_iter->getIntensity() / post_iter->getIntensity();
01371     DoubleReal pre = pre_iter->getIntensity() / iter->getIntensity();
01372 
01373     if (fabs(1-pre) <= fabs(1-normal)) //okay, let's move this peak by 1 Da to the left ...
01374     {
01375       //... but first check if the signal might be caused by an overlapping effect
01376       if ((candidate.MZBegin(pre_iter->getMZ()-NEUTRON_MASS/(c+1.)))->getIntensity() < pre_iter->getIntensity())
01377       { 
01378         //okay, these hard coded values should be checked again, but they definitely cover the *first* critical range
01379         if (seed_mz > 1500 && seed_mz < 2500)
01380         {
01381           iter = candidate.MZBegin(iter->getMZ()-NEUTRON_MASS/(c+1.));
01382         };
01383       };
01384     };
01385 
01386     if (iter->getIntensity() < 1)
01387     {
01388       return;
01389     };
01390 
01391     DoubleReal c_score = scoreThis_ (candidate, peak_cutoff, iter->getMZ(), c, iter->getIntensity(), 0);
01392     
01393     //Correct the position
01394     DoubleReal real_MZ = iter->getMZ();
01395     typename MSSpectrum<PeakType>::const_iterator real_l_MZ_iter = ref.MZBegin(real_MZ-QUARTER_NEUTRON_MASS/(c+1.));    
01396     typename MSSpectrum<PeakType>::const_iterator real_r_MZ_iter = ref.MZBegin(real_MZ+(peak_cutoff-1)*NEUTRON_MASS/(c+1.));
01397 
01398     UInt real_MZ_begin = distance (ref.begin(), real_l_MZ_iter);
01399     UInt real_MZ_end = distance (ref.begin(), real_r_MZ_iter);
01400 
01401     push2Box_ (real_MZ, scan_index, c, c_score, iter->getIntensity(), ref.getRT(), real_MZ_begin, real_MZ_end);
01402   }
01403 
01404 
01405 } //namespace
01406 
01407 #endif 

Generated Tue Apr 1 15:36:35 2008 -- using doxygen 1.5.4 OpenMS / TOPP 1.1