Home  · Classes  · Annotated Classes  · Modules  · Members  · Namespaces  · Related Pages

FeatureFinderAlgorithmPicked.h (Maintainer: Marc Sturm)

Go to the documentation of this file.
00001 // -*- Mode: C++; tab-width: 2; -*-
00002 // vi: set ts=2:
00003 //
00004 // --------------------------------------------------------------------------
00005 //                   OpenMS Mass Spectrometry Framework
00006 // --------------------------------------------------------------------------
00007 //  Copyright (C) 2003-2008 -- Oliver Kohlbacher, Knut Reinert
00008 //
00009 //  This library is free software; you can redistribute it and/or
00010 //  modify it under the terms of the GNU Lesser General Public
00011 //  License as published by the Free Software Foundation; either
00012 //  version 2.1 of the License, or (at your option) any later version.
00013 //
00014 //  This library is distributed in the hope that it will be useful,
00015 //  but WITHOUT ANY WARRANTY; without even the implied warranty of
00016 //  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00017 //  Lesser General Public License for more details.
00018 //
00019 //  You should have received a copy of the GNU Lesser General Public
00020 //  License along with this library; if not, write to the Free Software
00021 //  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
00022 //
00023 // --------------------------------------------------------------------------
00024 // $Maintainer: Marc Sturm$
00025 // --------------------------------------------------------------------------
00026 
00027 #ifndef OPENMS_TRANSFORMATIONS_FEATUREFINDER_FEATUREFINDERALGORITHMPICKED_H
00028 #define OPENMS_TRANSFORMATIONS_FEATUREFINDER_FEATUREFINDERALGORITHMPICKED_H
00029 
00030 #include <OpenMS/TRANSFORMATIONS/FEATUREFINDER/FeatureFinderAlgorithm.h>
00031 #include <OpenMS/FORMAT/MzDataFile.h>
00032 #include <OpenMS/FORMAT/FeatureXMLFile.h>
00033 #include <OpenMS/FORMAT/TextFile.h>
00034 #include <OpenMS/CHEMISTRY/IsotopeDistribution.h>
00035 #include <OpenMS/MATH/STATISTICS/Histogram.h>
00036 #include <OpenMS/MATH/STATISTICS/StatisticFunctions.h>
00037 
00038 #include <numeric>
00039 #include <fstream>
00040 
00041 #include <gsl/gsl_rng.h>
00042 #include <gsl/gsl_vector.h>
00043 #include <gsl/gsl_multifit_nlin.h>
00044 #include <gsl/gsl_blas.h>
00045 
00046 namespace OpenMS
00047 {
00063   template<class PeakType, class FeatureType> class FeatureFinderAlgorithmPicked 
00064     : public FeatureFinderAlgorithm<PeakType, FeatureType>,
00065       public FeatureFinderDefs
00066   {
00067     public:
00069 
00070       typedef typename FeatureFinderAlgorithm<PeakType, FeatureType>::MapType MapType;
00071       typedef typename MapType::SpectrumType SpectrumType;
00072       typedef typename PeakType::CoordinateType CoordinateType;
00073       typedef typename PeakType::IntensityType IntensityType;
00075       
00076       using FeatureFinderAlgorithm<PeakType, FeatureType>::map_;
00077       using FeatureFinderAlgorithm<PeakType, FeatureType>::param_;
00078         
00079     protected:
00081       struct PeakInfo
00082       {
00083         Real trace_score; 
00084         Real intensity_score; 
00085         Real pattern_score; 
00086         Real overall_score; 
00087         bool local_max; 
00088         
00090         PeakInfo()
00091           : trace_score(0.0),
00092             intensity_score(0.0),
00093             pattern_score(0.0),
00094             overall_score(0.0),
00095             local_max(false)
00096         {
00097         }
00098       };
00099 
00101       struct Seed
00102       {
00104         UInt spectrum;
00106         UInt peak;
00108         Real intensity;
00109         
00111         bool operator<(const Seed& rhs) const
00112         {
00113           return intensity<rhs.intensity;
00114         }
00115       };
00116       
00118       struct MassTrace
00119       {
00121         const PeakType* max_peak;
00123         DoubleReal max_rt;
00124 
00126         DoubleReal theoretical_int;
00127         
00129         std::vector<std::pair<DoubleReal, const PeakType*> > peaks;
00130         
00132         ConvexHull2D getConvexhull() const
00133         {
00134           ConvexHull2D::PointArrayType hull_points(peaks.size());
00135           for (UInt i=0; i<peaks.size(); ++i)
00136           {
00137             hull_points[i][0] = peaks[i].first;
00138             hull_points[i][1] = peaks[i].second->getMZ();
00139           }
00140           return hull_points;
00141         }
00142         
00144         void updateMaximum()
00145         {
00146           if (peaks.size()==0) return;
00147 
00148           max_rt = peaks.begin()->first;          
00149           max_peak = peaks.begin()->second;
00150           
00151           for (UInt i=1; i<peaks.size(); ++i)
00152           {
00153             if (peaks[i].second->getIntensity()>max_peak->getIntensity())
00154             {
00155               max_rt = peaks[i].first;          
00156               max_peak = peaks[i].second;
00157             }
00158           }
00159         }
00160 
00162         DoubleReal getAvgMZ() const
00163         {
00164           DoubleReal sum = 0.0;
00165           for (UInt i=0; i<peaks.size(); ++i)
00166           {
00167             sum += peaks[i].second->getMZ();
00168           }
00169           return sum / peaks.size();
00170         }
00171         
00173         bool isValid() const
00174         {
00175           return (peaks.size()>=3);
00176         }
00177         
00178       };
00179       
00181       struct MassTraces
00182         : public std::vector<MassTrace>
00183       {
00185         MassTraces()
00186           : max_trace(0)
00187         {
00188         }
00189         
00191         UInt getPeakCount() const
00192         {
00193           UInt sum = 0;
00194           for (UInt i=0; i<this->size(); ++i)
00195           {
00196             sum += this->at(i).peaks.size();
00197           }
00198           return sum;
00199         }
00200         
00202         bool isValid(DoubleReal seed_mz, DoubleReal trace_tolerance)
00203         {
00204           //Abort if too few traces were found
00205           if (this->size()<2) return false;
00206 
00207           //Abort if the seed was removed
00208           for (UInt j=0; j<this->size(); ++j)
00209           {
00210             if (std::fabs(seed_mz-this->at(j).getAvgMZ())<=trace_tolerance)
00211             {
00212               return true;
00213             }
00214           }
00215           return false;
00216         }
00217         
00219         UInt getTheoreticalMax() const throw (Exception::Precondition)
00220         {
00221           if (!this->size())
00222           {
00223             throw Exception::Precondition(__FILE__,__LINE__,__PRETTY_FUNCTION__,"There must be at least one trace to determine the theoretical maximum trace!");
00224           }
00225           
00226           UInt max=0;
00227           DoubleReal max_int=this->at(0).theoretical_int;
00228           for (UInt i=1; i<this->size(); ++i)
00229           {
00230             if (this->at(i).theoretical_int>max_int)
00231             {
00232               max_int = this->at(i).theoretical_int;
00233               max = i;
00234             }
00235           }
00236           return max;
00237         }
00238 
00240         void updateBaseline()
00241         {
00242           if (this->size()==0)
00243           {
00244             baseline = 0.0;
00245             return;
00246           }
00247           bool first = true;          
00248           for (UInt i=0; i<this->size(); ++i)
00249           {
00250             for (UInt j=0; j<this->at(i).peaks.size(); ++j)
00251             {
00252               if (first)
00253               {
00254                 baseline = baseline = this->at(i).peaks[j].second->getIntensity();
00255                 first = false;
00256               }
00257               if (this->at(i).peaks[j].second->getIntensity()<baseline)
00258               {
00259                 baseline = this->at(i).peaks[j].second->getIntensity();
00260               }
00261             }
00262           }
00263         }
00264 
00266         std::pair<DoubleReal,DoubleReal> getRTBounds() const throw (Exception::Precondition)
00267         {
00268           if (!this->size())
00269           {
00270             throw Exception::Precondition(__FILE__,__LINE__,__PRETTY_FUNCTION__,"There must be at least one trace to determine the RT boundaries!");
00271           }
00272           
00273           DoubleReal min = std::numeric_limits<DoubleReal>::max();
00274           DoubleReal max = -std::numeric_limits<DoubleReal>::max();
00275           //Abort if the seed was removed
00276           for (UInt i=0; i<this->size(); ++i)
00277           {
00278             for (UInt j=0; j<this->at(i).peaks.size(); ++j)
00279             {
00280               DoubleReal rt = this->at(i).peaks[j].first;
00281               if (rt>max) max = rt;
00282               if (rt<min) min = rt;
00283             }
00284           }
00285           return std::make_pair(min,max);
00286         }
00287 
00289         UInt max_trace;
00291         DoubleReal baseline;
00292       };
00293       
00295       struct TheoreticalIsotopePattern
00296       {
00298         std::vector<DoubleReal> intensity;
00300         UInt optional_begin;
00302         UInt optional_end;
00304         DoubleReal max;
00306         UInt size() const
00307         {
00308           return intensity.size();
00309         }
00310       };
00311 
00313       struct IsotopePattern
00314       {
00316         std::vector<Int> peak;
00318         std::vector<UInt> spectrum;
00320         std::vector<DoubleReal> intensity;
00322         std::vector<DoubleReal> mz_score;
00324         std::vector<DoubleReal> theoretical_mz;
00326         TheoreticalIsotopePattern theoretical_ints;
00327         
00329         IsotopePattern(UInt size)
00330           : peak(size),
00331             spectrum(size),
00332             intensity(size),
00333             mz_score(size),
00334             theoretical_mz(size)
00335         {
00336         }
00337       };
00338       
00339 
00340     public:     
00342       FeatureFinderAlgorithmPicked() 
00343         : FeatureFinderAlgorithm<PeakType,FeatureType>(),
00344           log_("featurefinder.log")
00345       {
00346         //debugging
00347         this->defaults_.setValue("debug",0,"If not 0 debug mode is activated. Then several files with intermediate results are written.");
00348         this->defaults_.setMinInt("debug",0);
00349         this->defaults_.setMaxInt("debug",1);
00350         //intensity
00351         this->defaults_.setValue("intensity:bins",10,"Number of bins per dimension (RT and m/z).");
00352         this->defaults_.setMinInt("intensity:bins",1);
00353         this->defaults_.setSectionDescription("intensity","Settings for the calculation of a score indicating if a peak's intensity is significant in the local environment (between 0 and 1)");
00354         //mass trace search parameters
00355         this->defaults_.setValue("mass_trace:mz_tolerance",0.06,"m/z difference tolerance of peaks belonging to the same mass trace.");
00356         this->defaults_.setMinFloat("mass_trace:mz_tolerance",0.0);
00357         this->defaults_.setValue("mass_trace:min_spectra",14,"Number of spectra the have to show the same peak mass for a mass trace.");
00358         this->defaults_.setMinInt("mass_trace:min_spectra",1);
00359         this->defaults_.setValue("mass_trace:max_missing",4,"Number of spectra where a high mass deviation or missing peak is acceptable.");
00360         this->defaults_.setMinInt("mass_trace:max_missing",0);
00361         this->defaults_.setValue("mass_trace:slope_bound",0.1,"The maximum slope of mass trace intensities when extending from the highest peak", true);
00362         this->defaults_.setMinFloat("mass_trace:slope_bound",0.0);
00363         this->defaults_.setSectionDescription("mass_trace","Settings for the calculation of a score indicating if a peak is part of a mass trace (between 0 and 1).");
00364         //Isotopic pattern search paramters
00365         this->defaults_.setValue("isotopic_pattern:charge_low",1,"Lowest charge to search for.");
00366         this->defaults_.setMinInt("isotopic_pattern:charge_low",1);
00367         this->defaults_.setValue("isotopic_pattern:charge_high",4,"Highest charge to search for.");
00368         this->defaults_.setMinInt("isotopic_pattern:charge_high",1);
00369         this->defaults_.setValue("isotopic_pattern:mz_tolerance",0.06,"Tolerated mass deviation from the theoretical isotopic pattern.");   
00370         this->defaults_.setMinFloat("isotopic_pattern:mz_tolerance",0.0);
00371         this->defaults_.setValue("isotopic_pattern:intensity_percentage",10.0,"Isotopic peaks that contribute more than this percentage to the overall isotope pattern intensity must be present.", true);
00372         this->defaults_.setMinFloat("isotopic_pattern:intensity_percentage",0.0);
00373         this->defaults_.setMaxFloat("isotopic_pattern:intensity_percentage",100.0);
00374         this->defaults_.setValue("isotopic_pattern:intensity_percentage_optional",0.1,"Isotopic peaks that contribute more than this percentage to the overall isotope pattern intensity can be missing.", true);
00375         this->defaults_.setMinFloat("isotopic_pattern:intensity_percentage_optional",0.0);
00376         this->defaults_.setMaxFloat("isotopic_pattern:intensity_percentage_optional",100.0);
00377         this->defaults_.setValue("isotopic_pattern:optional_fit_improvement",2.0,"Minimal percental improvement of isotope fit to allow leaving out an optional peak.", true);
00378         this->defaults_.setMinFloat("isotopic_pattern:optional_fit_improvement",0.0);
00379         this->defaults_.setMaxFloat("isotopic_pattern:optional_fit_improvement",100.0);
00380         this->defaults_.setValue("isotopic_pattern:mass_window_width",100.0,"Window width in Dalton for precalcuation of estimated isotope distribtions.", true);
00381         this->defaults_.setMinFloat("isotopic_pattern:mass_window_width",1.0);
00382         this->defaults_.setMaxFloat("isotopic_pattern:mass_window_width",200.0);
00383         this->defaults_.setSectionDescription("isotopic_pattern","Settings for the calculation of a score indicating if a peak is part of a isotoipic pattern (between 0 and 1).");
00384         //Feature settings
00385         this->defaults_.setValue("feature:min_feature_score",0.7, "Overall score threshold for a feature to be reported.");
00386         this->defaults_.setMinFloat("feature:min_feature_score",0.0);
00387         this->defaults_.setMaxFloat("feature:min_feature_score",1.0);
00388         this->defaults_.setValue("feature:min_seed_score",0.8,"Minimum score a peak has to have to be used as seed.\nThe score is calculated as the geometric mean of intensity, mass trace score and isotope pattern score.");
00389         this->defaults_.setMinFloat("feature:min_seed_score",0.0);
00390         this->defaults_.setMaxFloat("feature:min_seed_score",1.0);
00391         this->defaults_.setValue("feature:min_isotope_fit",0.8,"Minimum isotope fit quality.", true);
00392         this->defaults_.setMinFloat("feature:min_isotope_fit",0.0);
00393         this->defaults_.setMaxFloat("feature:min_isotope_fit",1.0);
00394         this->defaults_.setValue("feature:min_trace_score",0.5, "Trace score threshold.\nTraces below this threshold are removed after the fit.", true);
00395         this->defaults_.setMinFloat("feature:min_trace_score",0.0);
00396         this->defaults_.setMaxFloat("feature:min_trace_score",1.0);
00397         this->defaults_.setSectionDescription("feature","Settings for the features (intensity, quality assessment, ...)");
00398 
00399         this->defaultsToParam_();
00400       }
00401       
00403       virtual void run()
00404       {
00405         //-------------------------------------------------------------------------
00406         //General initialization
00407         this->features_->reserve(100);
00408         //quality estimation
00409         DoubleReal min_feature_score = param_.getValue("feature:min_feature_score");
00410         //initialize info_
00411         info_.resize(map_->size());
00412         for (UInt s=0; s<map_->size(); ++s)
00413         {
00414           info_[s].resize(map_->at(s).size());
00415         }
00416         bool debug = ((UInt)(param_.getValue("debug"))!=0);
00417         
00418         //---------------------------------------------------------------------------
00419         //Step 1:
00420         //Precalculate intensity scores for peaks
00421         //---------------------------------------------------------------------------
00422         log_ << "Precalculating intensity thresholds ..." << std::endl;
00423         //new scope to make local variables disappear
00424         {
00425           //TODO interplolate scores to avoid edges
00426           this->ff_->startProgress(0, intensity_bins_*intensity_bins_, "Precalculating intensity scores");
00427           DoubleReal rt_start = map_->getMinRT();
00428           DoubleReal mz_start = map_->getMinMZ();
00429           intensity_rt_step_ = (map_->getMaxRT() - rt_start ) / (DoubleReal)intensity_bins_;
00430           intensity_mz_step_ = (map_->getMaxMZ() - mz_start ) / (DoubleReal)intensity_bins_;
00431           intensity_thresholds_.resize(intensity_bins_);
00432           for (UInt rt=0; rt<intensity_bins_; ++rt)
00433           {
00434             intensity_thresholds_[rt].resize(intensity_bins_);
00435             DoubleReal min_rt = rt_start + rt * intensity_rt_step_;
00436             DoubleReal max_rt = rt_start + ( rt + 1 ) * intensity_rt_step_;
00437             std::vector<DoubleReal> tmp;
00438             for (UInt mz=0; mz<intensity_bins_; ++mz)
00439             {
00440               this->ff_->setProgress(rt*intensity_bins_+ mz);
00441               DoubleReal min_mz = mz_start + mz * intensity_mz_step_;
00442               DoubleReal max_mz = mz_start + ( mz + 1 ) * intensity_mz_step_;
00443               //std::cout << "rt range: " << min_rt << " - " << max_rt << std::endl;
00444               //std::cout << "mz range: " << min_mz << " - " << max_mz << std::endl;
00445               tmp.clear();
00446               for (typename MapType::ConstAreaIterator it = map_->areaBeginConst(min_rt,max_rt,min_mz,max_mz); it!=map_->areaEndConst(); ++it)
00447               {
00448                 tmp.push_back(it->getIntensity());
00449               }
00450               intensity_thresholds_[rt][mz] = std::vector<DoubleReal>(21);
00451               if (tmp.size()!=0)
00452               {
00453                 //store quantiles (20)
00454                 std::sort(tmp.begin(), tmp.end());
00455                 for (UInt i=0;i<21;++i)
00456                 {
00457                   UInt index = (UInt)std::floor(0.05*i*(tmp.size()-1));
00458                   intensity_thresholds_[rt][mz][i] = tmp[index];
00459                 }
00460               }
00461             }
00462           }
00463           //store intensity score in PeakInfo
00464           for (UInt s=0; s<map_->size(); ++s)
00465           {
00466             for (UInt p=0; p<map_->at(s).size(); ++p)
00467             {
00468               info_[s][p].intensity_score = intensityScore_(map_->at(s)[p].getIntensity(),s,p);
00469             }
00470           }
00471           //Store intensity score map
00472           if (debug)
00473           {
00474             MapType int_map;
00475             int_map.resize(map_->size());
00476             for (UInt s=0; s<map_->size(); ++s)
00477             {
00478               int_map[s].setRT(map_->at(s).getRT());
00479               for (UInt p=0; p<map_->at(s).size(); ++p)
00480               {
00481                 if (info_[s][p].intensity_score>0)
00482                 {
00483                   PeakType tmp;
00484                   tmp.setPos(map_->at(s)[p].getMZ());
00485                   tmp.setIntensity(info_[s][p].intensity_score);
00486                   int_map[s].push_back(tmp);
00487                 }
00488               }
00489             }
00490             MzDataFile().store("intensity_scores.mzData",int_map);
00491           }
00492           this->ff_->endProgress();
00493         }
00494 
00495         //---------------------------------------------------------------------------
00496         //Step 2:
00497         //Prealculate mass trace scores and local trace maximum for each peak
00498         //---------------------------------------------------------------------------
00499         //new scope to make local variables disappear
00500         {
00501           this->ff_->startProgress(0, map_->size(), "Precalculating mass trace scores");
00502           for (UInt s=0; s<map_->size(); ++s)
00503           {
00504             this->ff_->setProgress(s);
00505             //do nothing for the first few and last few spectra as the scans required to search for traces are missing
00506             if (s<min_spectra_ || s>=map_->size()-min_spectra_)
00507             {
00508               continue;
00509             }
00510             
00511             const SpectrumType& spectrum = map_->at(s);
00512             //iterate over all peaks of the scan
00513             for (UInt p=0; p<spectrum.size(); ++p)
00514             {
00515               std::vector<DoubleReal> scores;
00516               scores.reserve(2*min_spectra_);
00517               
00518               CoordinateType pos = spectrum[p].getMZ();
00519               IntensityType inte = spectrum[p].getIntensity();
00520               
00521               //log_ << std::endl << "Peak: " << pos << std::endl;
00522               bool is_max_peak = true; //checking the maximum intensity peaks -> use them later as feature seeds.
00523               for (UInt i=1; i<=min_spectra_; ++i)
00524               {
00525                 const SpectrumType& spec = map_->at(s+i);
00526                 UInt spec_index = spec.findNearest(pos);
00527                 DoubleReal position_score = positionScore_(pos, spec[spec_index].getMZ(), trace_tolerance_);
00528                 if (position_score >0 && spec[spec_index].getIntensity()>inte) is_max_peak = false;
00529                 scores.push_back(position_score);
00530               }
00531               for (UInt i=1; i<=min_spectra_; ++i)
00532               {
00533                 const SpectrumType& spec = map_->at(s-i);
00534                 UInt spec_index = spec.findNearest(pos);
00535                 DoubleReal position_score = positionScore_(pos, spec[spec_index].getMZ(), trace_tolerance_);
00536                 if (position_score>0 && spec[spec_index].getIntensity()>inte) is_max_peak = false;
00537                 scores.push_back(position_score);
00538               }
00539               //Calculate a consensus score out of the scores calculated before
00540               DoubleReal trace_score = std::accumulate(scores.begin(), scores.end(),0.0) / scores.size();
00541               
00542               //store final score for later use
00543               info_[s][p].trace_score = trace_score;
00544               info_[s][p].local_max = is_max_peak;
00545             }
00546           }
00547           //store mass trace score map
00548           if (debug)
00549           {
00550             MapType trace_map;
00551             trace_map.resize(map_->size());
00552             for (UInt s=0; s<map_->size(); ++s)
00553             {
00554               trace_map[s].setRT(map_->at(s).getRT());
00555               for (UInt p=0; p<map_->at(s).size(); ++p)
00556               {
00557                 if (info_[s][p].trace_score>0.0)
00558                 {
00559                   PeakType tmp;
00560                   tmp.setMZ(map_->at(s)[p].getMZ());
00561                   tmp.setIntensity(info_[s][p].trace_score);
00562                   trace_map[s].push_back(tmp);
00563                 }
00564               }
00565             }
00566             MzDataFile().store("trace_scores.mzData",trace_map);
00567           }
00568           this->ff_->endProgress();
00569         }
00570 
00571         //-------------------------------------------------------------------------
00572         //Step 3:
00573         //Charge loop (create seeds and features for each charge separately)
00574         //-------------------------------------------------------------------------
00575         UInt plot_nr = 0; //counter for the number of plots (debug info)
00576         for (UInt c=(UInt)param_.getValue("isotopic_pattern:charge_low"); c<=(UInt)param_.getValue("isotopic_pattern:charge_high"); ++c)
00577         {
00578           UInt feature_count_before = this->features_->size();
00579           std::vector<Seed> seeds;
00580           //initialize pattern scores
00581           for (UInt s=0; s<map_->size(); ++s)
00582           {
00583             for (UInt p=0; p<map_->at(s).size(); ++p)
00584             {
00585               info_[s][p].pattern_score = 0.0;
00586               info_[s][p].overall_score = 0.0;
00587             }
00588           }
00589           
00590           //-----------------------------------------------------------
00591           //Step 3.1: Precalculate IsotopePattern score
00592           //-----------------------------------------------------------
00593           this->ff_->startProgress(0, map_->size(), String("Calculating isotope pattern scores for charge ")+c);
00594           for (UInt s=0; s<map_->size(); ++s)
00595           {
00596             this->ff_->setProgress(s);
00597             const SpectrumType& spectrum = map_->at(s);
00598             for (UInt p=0; p<spectrum.size(); ++p)
00599             {
00600               DoubleReal mz = spectrum[p].getMZ();
00601               
00602               //get isotope distribution for this mass
00603               const TheoreticalIsotopePattern& isotopes = getIsotopeDistribution_(mz*c);
00604               //determine highest peak in isopope distribution
00605               UInt max_isotope = std::max_element(isotopes.intensity.begin(), isotopes.intensity.end()) - isotopes.intensity.begin();
00606               //Look up expected isotopic peaks (in the current spectrum or adjacent spectra)
00607               UInt peak_index = (UInt)spectrum.findNearest(mz-((DoubleReal)(isotopes.size()+1)/c));
00608               IsotopePattern pattern(isotopes.size());
00609               for (UInt i=0; i<isotopes.size(); ++i)
00610               {
00611                 CoordinateType isotope_pos = mz + ((DoubleReal)i-max_isotope)/c;
00612                 findIsotope_(isotope_pos, s, pattern, i, false, peak_index);
00613               }
00614               DoubleReal pattern_score = isotopeScore_(isotopes, pattern, true, false);
00615               
00616               //update pattern scores of all contained peaks (if necessary)
00617               if (pattern_score > 0.0)
00618               {
00619                 for (UInt i=0; i<pattern.peak.size(); ++i)
00620                 {
00621                   if (pattern.peak[i]>=0 && pattern_score>info_[pattern.spectrum[i]][pattern.peak[i]].pattern_score)
00622                   {
00623                     info_[pattern.spectrum[i]][pattern.peak[i]].pattern_score = pattern_score;
00624                   }
00625                 }
00626               }
00627             }
00628           }
00629           //store mass trace score map
00630           if (debug)
00631           {
00632             MapType pattern_map;
00633             pattern_map.resize(map_->size());
00634             for (UInt s=0; s<map_->size(); ++s)
00635             {
00636               pattern_map[s].setRT(map_->at(s).getRT());
00637               for (UInt p=0; p<map_->at(s).size(); ++p)
00638               {
00639                 if (info_[s][p].pattern_score>0.0)
00640                 {
00641                   Peak1D tmp;
00642                   tmp.setIntensity(info_[s][p].pattern_score);
00643                   tmp.setMZ(map_->at(s)[p].getMZ());
00644                   pattern_map[s].push_back(tmp);
00645                 }
00646               }
00647             }
00648             MzDataFile().store(String("pattern_scores_")+c+".mzData",pattern_map);
00649           }
00650           this->ff_->endProgress();
00651           
00652           //-----------------------------------------------------------
00653           //Step 3.2:
00654           //Find seeds for this charge
00655           //-----------------------------------------------------------   
00656           this->ff_->startProgress(0, map_->size(), String("Finding seeds for charge ")+c);
00657           DoubleReal min_seed_score = param_.getValue("feature:min_seed_score");
00658           for (UInt s=0; s<map_->size(); ++s)
00659           {
00660             this->ff_->setProgress(s);
00661             //do nothing for the first few and last few spectra as the scans required to search for traces are missing
00662             if (s<min_spectra_ || s>=map_->size()-min_spectra_)
00663             {
00664               continue;
00665             }
00666             const SpectrumType& spectrum = map_->at(s);
00667             //iterate over peaks
00668             for (UInt p=0; p<spectrum.size(); ++p)
00669             { 
00670               PeakInfo& info = info_[s][p];
00671               info.overall_score = std::pow(info.intensity_score*info.trace_score*info.pattern_score, 1.0f/3.0f);
00672               //add seed to vector if certain conditions are fullfilled
00673               if (info.local_max && info.overall_score>min_seed_score)
00674               {
00675                 Seed seed;
00676                 seed.spectrum = s;
00677                 seed.peak = p;
00678                 seed.intensity = map_->at(s)[p].getIntensity();               
00679                 seeds.push_back(seed);
00680               }
00681             }
00682           }
00683           //sort seeds according to intensity
00684           std::sort(seeds.rbegin(),seeds.rend());
00685           //create and store seeds map and selected peak map
00686           if (debug)
00687           {
00688             //seeds
00689             FeatureMap<> seed_map;
00690             seed_map.reserve(seeds.size());
00691             for (UInt i=0; i<seeds.size(); ++i)
00692             {
00693               UInt spectrum = seeds[i].spectrum;
00694               UInt peak = seeds[i].peak;
00695               Feature tmp;
00696               tmp.setIntensity(seeds[i].intensity);
00697               tmp.setOverallQuality(info_[spectrum][peak].overall_score);
00698               tmp.setRT(map_->at(spectrum).getRT());
00699               tmp.setMZ(map_->at(spectrum)[peak].getMZ());
00700               tmp.setMetaValue("intensity_score", info_[spectrum][peak].intensity_score);
00701               tmp.setMetaValue("pattern_score", info_[spectrum][peak].pattern_score);
00702               tmp.setMetaValue("trace_score", info_[spectrum][peak].trace_score);
00703               seed_map.push_back(tmp);
00704             }
00705             FeatureXMLFile().store(String("seeds_")+c+".featureXML", seed_map);
00706             
00707             //selected peaks
00708             MapType selected_map;
00709             selected_map.resize(map_->size());
00710             for (UInt s=0; s<map_->size(); ++s)
00711             {
00712               selected_map[s].setRT(map_->at(s).getRT());
00713               if (s<min_spectra_ || s>=map_->size()-min_spectra_)
00714               {
00715                 continue;
00716               }
00717               const SpectrumType& spectrum = map_->at(s);
00718               for (UInt p=0; p<spectrum.size(); ++p)
00719               { 
00720                 if (info_[s][p].overall_score>0.0)
00721                 {
00722                   PeakType tmp;
00723                   tmp.setPos(map_->at(s)[p].getMZ());
00724                   tmp.setIntensity(info_[s][p].overall_score);
00725                   selected_map[s].push_back(tmp);
00726                 }
00727               }
00728             }
00729             MzDataFile().store(String("selected_peaks_")+c+".mzData", selected_map);
00730           }
00731           this->ff_->endProgress();
00732           std::cout << "Found " << seeds.size() << " seeds for charge " << c << "." << std::endl;
00733           
00734           //------------------------------------------------------------------
00735           //Step 3.3:
00736           //Extension of seeds
00737           //------------------------------------------------------------------
00738           this->ff_->startProgress(0,seeds.size(), String("Extending seeds for charge ")+c);
00739           for (UInt i=0; i<seeds.size(); ++i)
00740           {
00741             //------------------------------------------------------------------
00742             //Step 3.3.1:
00743             //Extend all mass traces
00744             //------------------------------------------------------------------
00745             this->ff_->setProgress(i);
00746             log_ << std::endl << "Seed " << i+1 << ":" << std::endl;
00747             //If the intensity is zero this seed is already uses in another feature
00748             const SpectrumType& spectrum = map_->at(seeds[i].spectrum);
00749             const PeakType peak = spectrum[seeds[i].peak];
00750             log_ << " - Int: " << peak.getIntensity() << std::endl;
00751             log_ << " - RT: " << spectrum.getRT() << std::endl;
00752             log_ << " - MZ: " << peak.getMZ() << std::endl;
00753             if (seeds[i].intensity == 0.0)
00754             {
00755               log_ << "Already used in another feature => aborting!" << std::endl;
00756               continue;
00757             }
00758             
00759             //----------------------------------------------------------------
00760             //Find best fitting isotope pattern for this charge (using averagene)
00761             IsotopePattern best_pattern(0);
00762             DoubleReal isotope_fit_quality = findBestIsotopeFit_(seeds[i], c, best_pattern);
00763             if (isotope_fit_quality<min_isotope_fit_)
00764             {
00765               abort_("Could not find good enough isotope pattern containing the seed");
00766               continue;
00767             }
00768             
00769             //extend the convex hull in RT dimension (starting from the trace peaks)
00770             log_ << "Collecting mass traces" << std::endl;
00771             MassTraces traces;
00772             traces.reserve(best_pattern.peak.size());
00773             extendMassTraces_(best_pattern, traces);
00774 
00775             //check if the traces are still valid
00776             DoubleReal seed_mz = map_->at(seeds[i].spectrum)[seeds[i].peak].getMZ();
00777             if (!traces.isValid(seed_mz, trace_tolerance_))
00778             {
00779               abort_("Could not extend seed");
00780               continue;
00781             }
00782             
00783             //------------------------------------------------------------------
00784             //Step 3.3.2:
00785             //Gauss fit (first fit to find the feature boundaries)
00786             //------------------------------------------------------------------
00787             ++plot_nr;
00788             log_ << "Fitting model" << std::endl;
00789             const gsl_multifit_fdfsolver_type *T;
00790             gsl_multifit_fdfsolver *s;
00791             int status;
00792             const size_t param_count = 3;
00793             const size_t data_count = traces.getPeakCount();
00794             gsl_multifit_function_fdf func;
00795 
00796             //paramter estimates (height, x0, sigma)
00797             traces[traces.max_trace].updateMaximum();
00798             DoubleReal height = traces[traces.max_trace].max_peak->getIntensity();
00799             DoubleReal x0 = traces[traces.max_trace].max_rt;
00800             DoubleReal sigma = (traces[traces.max_trace].peaks.back().first-traces[traces.max_trace].peaks[0].first)/4.0;     
00801             double x_init[param_count] = {height, x0, sigma};
00802             log_ << " - estimates - height: " << height << " x0: " << x0 <<  " sigma: " << sigma  << std::endl;
00803 
00804             //baseline estimate
00805             traces.updateBaseline();
00806             traces.baseline = 0.75 * traces.baseline;
00807 
00808             //fit           
00809             gsl_vector_view x = gsl_vector_view_array(x_init, param_count); 
00810             const gsl_rng_type * type;
00811             gsl_rng * r;
00812             gsl_rng_env_setup();
00813             type = gsl_rng_default;
00814             r = gsl_rng_alloc(type);
00815             func.f = &gaussF_;
00816             func.df = &gaussDF_;
00817             func.fdf = &gaussFDF_;
00818             func.n = data_count;
00819             func.p = param_count;
00820             func.params = &traces;
00821             T = gsl_multifit_fdfsolver_lmsder;
00822             s = gsl_multifit_fdfsolver_alloc(T, data_count, param_count);
00823             gsl_multifit_fdfsolver_set(s, &func, &x.vector);
00824             size_t iter = 0;          
00825             do
00826             {
00827               iter++;
00828               status = gsl_multifit_fdfsolver_iterate(s);
00829               if (status) break;
00830               status = gsl_multifit_test_delta(s->dx, s->x, 0.0001, 0.0001);
00831             } 
00832             while (status == GSL_CONTINUE && iter < 5000);
00833             
00834             height = gsl_vector_get(s->x, 0);
00835             x0 = gsl_vector_get(s->x, 1);
00836             sigma = std::fabs(gsl_vector_get(s->x, 2));           
00837             gsl_multifit_fdfsolver_free(s);
00838             log_ << " - fit - height: " << height  << " x0: " << x0 << " sigma: " << sigma << std::endl;
00839             
00840             //------------------------------------------------------------------
00841             //Step 3.3.3:
00842             //Crop feature according to RT fit (2.5*sigma) and remove badly fitting traces
00843             //------------------------------------------------------------------
00844             MassTraces new_traces;
00845             DoubleReal low_bound = x0 - 2.5 * sigma;
00846             DoubleReal high_bound = x0 + 2.5 * sigma;
00847             log_ << "    => RT bounds: " << low_bound << " - " << high_bound << std::endl;
00848             for (UInt t=0; t< traces.size(); ++t)
00849             {
00850               MassTrace& trace = traces[t];
00851               log_ << "   - Trace " << t << ": (" << trace.theoretical_int << ")" << std::endl;
00852 
00853               MassTrace new_trace;
00854               //compute average relative deviation and correlation
00855               DoubleReal deviation = 0.0;
00856               std::vector<DoubleReal> v_theo, v_real;
00857               for (UInt k=0; k<trace.peaks.size(); ++k)
00858               {
00859                 //consider peaks when inside RT bounds only
00860                 if (trace.peaks[k].first>=low_bound && trace.peaks[k].first<= high_bound)
00861                 {
00862                   new_trace.peaks.push_back(trace.peaks[k]);
00863                   DoubleReal theo = traces.baseline + trace.theoretical_int *  height * exp(-0.5 * pow(trace.peaks[k].first - x0, 2) / pow(sigma, 2) );
00864                   v_theo.push_back(theo);
00865                   DoubleReal real = trace.peaks[k].second->getIntensity();
00866                   v_real.push_back(real);
00867                   deviation += std::fabs(real-theo)/theo;
00868                 }
00869               }
00870               DoubleReal fit_score = 0.0;
00871               DoubleReal correlation = 0.0;             
00872               DoubleReal final_score = 0.0;
00873               if (new_trace.peaks.size()!=0)
00874               {
00875                 fit_score = deviation / new_trace.peaks.size();
00876                 correlation = std::max(0.0, Math::pearsonCorrelationCoefficient(v_theo.begin(),v_theo.end(),v_real.begin(), v_real.end()));
00877                 final_score = std::sqrt(correlation * std::max(0.0, 1.0-fit_score));
00878               }
00879               log_ << "     - peaks: " << new_trace.peaks.size() << " / " << trace.peaks.size() << " - relative deviation: " << fit_score << " - correlation: " << correlation << " - final score: " << correlation << std::endl;
00880               //remove badly fitting traces
00881               if ( !new_trace.isValid() || final_score < min_trace_score_)
00882               {
00883                 if (t<traces.max_trace)
00884                 {
00885                   new_traces = MassTraces();
00886                   log_ << "     - removed this and previous traces due to bad fit" << std::endl;
00887                   new_traces.clear(); //remove earlier traces
00888                   continue;
00889                 }
00890                 else if (t==traces.max_trace)
00891                 {
00892                   new_traces = MassTraces();
00893                   log_ << "     - aborting (max trace was removed)" << std::endl;
00894                   break;
00895                 }
00896                 else if (t>traces.max_trace)
00897                 {
00898                   log_ << "     - removed due to bad fit => omitting the rest" << std::endl;
00899                   break; //no more traces are possible
00900                 }
00901               }
00902               //add new trace
00903               else
00904               {
00905                 new_trace.theoretical_int = trace.theoretical_int;
00906                 new_traces.push_back(new_trace);
00907                 if (t==traces.max_trace)
00908                 {
00909                   new_traces.max_trace = new_traces.size()-1;
00910                 }
00911               }
00912             }
00913             new_traces.baseline = traces.baseline;      
00914 
00915             //------------------------------------------------------------------
00916             //Step 3.3.4:
00917             //Check if feature is ok
00918             //------------------------------------------------------------------
00919             bool feature_ok = true;
00920             String error_msg = "";
00921             //check if the feature is valid
00922             if (!new_traces.isValid(seed_mz, trace_tolerance_))
00923             {
00924               feature_ok = false;
00925               error_msg = "Invalid feature after fit";
00926             }
00927             //check if x0 is inside feature bounds
00928             if (feature_ok)
00929             {
00930               std::pair<DoubleReal,DoubleReal> rt_bounds = new_traces.getRTBounds();
00931               if (x0<rt_bounds.first || x0>rt_bounds.second)
00932               {
00933                 feature_ok = false;
00934                 error_msg = "Invalid fit: Center outside of feature bounds";
00935               }
00936             }
00937             //check if the remaining traces fill out at least a third of the RT span (2*2.5 sigma)
00938             //TODO remove magic constant
00939             if (feature_ok)
00940             {
00941               std::pair<DoubleReal,DoubleReal> rt_bounds = new_traces.getRTBounds();
00942               if ((rt_bounds.second-rt_bounds.first)<0.33333*5.0*sigma )
00943               {
00944                 feature_ok = false;
00945                 error_msg = "Invalid fit: Sigma too large";
00946               }
00947             }
00948             //check if feature quality is high enough (average relative deviation and correlation of the whole feature)
00949             DoubleReal fit_score = 0.0;
00950             DoubleReal correlation = 0.0;
00951             DoubleReal final_score = 0.0;
00952             if(feature_ok)
00953             {
00954               std::vector<DoubleReal> v_theo, v_real;
00955               DoubleReal deviation = 0.0;
00956               for (UInt t=0; t< new_traces.size(); ++t)
00957               {
00958                 MassTrace& trace = new_traces[t];
00959                 for (UInt k=0; k<trace.peaks.size(); ++k)
00960                 {
00961                   DoubleReal theo = new_traces.baseline + trace.theoretical_int *  height * exp(-0.5 * pow(trace.peaks[k].first - x0, 2) / pow(sigma, 2) );
00962                   v_theo.push_back(theo);
00963                   DoubleReal real = trace.peaks[k].second->getIntensity();
00964                   v_real.push_back(real);
00965                   deviation += std::fabs(real-theo)/theo;
00966                 }
00967               }
00968               fit_score = std::max(0.0, 1.0 - (deviation / new_traces.getPeakCount()));
00969               correlation = std::max(0.0,Math::pearsonCorrelationCoefficient(v_theo.begin(),v_theo.end(),v_real.begin(), v_real.end()));
00970               final_score = std::sqrt(correlation * fit_score);
00971               if (final_score<min_feature_score)
00972               {
00973                 feature_ok = false;
00974                 error_msg = "Feature quality too low after fit";
00975               }
00976               //quality output
00977               log_ << "Quality estimation:" << std::endl;
00978               log_ << " - relative deviation: " << fit_score << std::endl;
00979               log_ << " - correlation: " << correlation << std::endl;
00980               log_ << " => final score: " << final_score << std::endl;
00981             }
00982                       
00983             //write debug output of feature
00984             if (debug)
00985             {
00986               TextFile tf;
00987               //gnuplot script  
00988               String script = String("plot \"features/") + plot_nr + ".dta\" title 'before fit (RT:" + x0 + " m/z:" + peak.getMZ() + ")' with points 1";
00989               //feature before fit
00990               for (UInt k=0; k<traces.size(); ++k)
00991               {
00992                 for (UInt j=0; j<traces[k].peaks.size(); ++j)
00993                 {
00994                   tf.push_back(String(500.0*k+traces[k].peaks[j].first) + " " + traces[k].peaks[j].second->getIntensity());
00995                 }
00996               }
00997               tf.store(String("features/") + plot_nr + ".dta");
00998               //fitted feature
00999               if (new_traces.getPeakCount()!=0)
01000               {
01001                 tf.clear();
01002                 for (UInt k=0; k<new_traces.size(); ++k)
01003                 {
01004                   for (UInt j=0; j<new_traces[k].peaks.size(); ++j)
01005                   {
01006                     tf.push_back(String(500.0*k+new_traces[k].peaks[j].first) + " " + new_traces[k].peaks[j].second->getIntensity());
01007                   }
01008                 }
01009                 tf.store(String("features/") + plot_nr + "_cropped.dta");
01010                 script = script + ", \"features/" + plot_nr + "_cropped.dta\" title 'feature ";
01011                 if (!feature_ok)
01012                 {
01013                   script = script + " - " + error_msg;
01014                 }
01015                 else
01016                 {
01017                   script = script + (this->features_->size()+1) + " (score: " + final_score + ")";
01018                 }
01019                 script = script + "' with points 3";
01020               }
01021               //fitted functions
01022               tf.clear();
01023               for (UInt k=0; k<traces.size(); ++k)
01024               {
01025                 char fun = 'f';
01026                 fun += k;
01027                 tf.push_back(String(fun)+"(x)= " + traces.baseline + " + " + (traces[k].theoretical_int*height) + " * exp(-0.5*(x-" + (500.0*k+x0) + ")**2/(" + sigma + ")**2)");
01028                 script =  script + ", " + fun + "(x) title 'Trace " + k + " (m/z:" + traces[k].getAvgMZ() + ")'";
01029               }
01030               //output
01031               tf.push_back("set xlabel \"pseudo RT (mass traces side-by-side)\"");
01032               tf.push_back("set ylabel \"intensity\"");
01033               tf.push_back(script);
01034               tf.push_back("pause -1");
01035               tf.store(String("features/") + plot_nr + ".plot");
01036             }
01037             traces = new_traces;
01038             
01039             log_ << "Plot: " << plot_nr << std::endl;
01040             
01041             //validity output
01042             if (!feature_ok)
01043             {
01044               abort_(error_msg);
01045               continue;
01046             }
01047             
01048             //------------------------------------------------------------------
01049             //Step 3.3.5:
01050             //Feature creation
01051             //------------------------------------------------------------------
01052             Feature f;
01053             f.setCharge(c);
01054             //TODO add signal-to-noise (to score?)
01055             f.setOverallQuality(final_score);
01056             if (debug)
01057             {
01058               f.setMetaValue("plot_nr",plot_nr);
01059               f.setMetaValue("score_fit",fit_score);
01060               f.setMetaValue("score_correlation",correlation);
01061             }
01062             f.setRT(x0);
01063             f.setMZ(traces[traces.getTheoreticalMax()].getAvgMZ());
01064             //Calculate intensity based on model only
01065             // - the model does not include the baseline, so we ignore it here
01066             // - as we scaled the isotope distribution to 
01067             f.setIntensity(2.5 * height * sigma / getIsotopeDistribution_(f.getMZ()).max);
01068             //add convex hulls of mass traces
01069             for (UInt j=0; j<traces.size(); ++j)
01070             {
01071               f.getConvexHulls().push_back(traces[j].getConvexhull());
01072             }
01073             this->features_->push_back(f);
01074             log_ << "Feature number: " << this->features_->size() << std::endl;
01075   
01076             //----------------------------------------------------------------
01077             //Remove all seeds that lie inside the convex hull of the new feature
01078             DBoundingBox<2> bb = f.getConvexHull().getBoundingBox();
01079             for (UInt j=i+1; j<seeds.size(); ++j)
01080             {
01081               DoubleReal rt = map_->at(seeds[j].spectrum).getRT();
01082               DoubleReal mz = map_->at(seeds[j].spectrum)[seeds[j].peak].getMZ();
01083               if (bb.encloses(rt,mz) && f.encloses(rt,mz))
01084               {
01085                 //set intensity to zero => the peak will be skipped!
01086                 seeds[j].intensity = 0.0;
01087               }
01088             }
01089           }
01090           this->ff_->endProgress();
01091           std::cout << "Found " << this->features_->size()-feature_count_before << " features candidates for charge " << c << "." << std::endl;
01092         }
01093           
01094         //------------------------------------------------------------------
01095         //Step 4:
01096         //Resolve contradicting and overlapping features
01097         //------------------------------------------------------------------
01098         this->ff_->startProgress(0, this->features_->size()*this->features_->size(), "Resolving overlapping features");
01099         log_ << "Resolving intersecting features" << std::endl;
01100         for (UInt i=0; i<this->features_->size(); ++i)
01101         {
01102           Feature& f1(this->features_->at(i));
01103           for (UInt j=i+1; j<this->features_->size(); ++j)
01104           {
01105             this->ff_->setProgress(i*this->features_->size()+j);
01106             Feature& f2(this->features_->at(j));
01107             if (f1.getIntensity()==0.0 || f2.getIntensity()==0.0) continue;
01108             //act depending on the intersection
01109             DoubleReal intersection = intersection_(f1, f2);
01110             //TODO remove magic constant
01111             if (intersection>=0.35)
01112             {
01113               log_ << " - Intersection (" << (i+1) << "/" << (j+1) << "): " << intersection << std::endl;
01114               if (f1.getCharge()==f2.getCharge())
01115               {
01116                 if (f1.getIntensity()*f1.getOverallQuality()>f2.getIntensity()*f2.getOverallQuality())
01117                 {
01118                   log_ << "   - same charge -> removing duplicate " << (j+1) << std::endl;
01119                   f2.setIntensity(0.0);
01120                 }
01121                 else
01122                 {
01123                   log_ << "   - same charge -> removing duplicate " << (i+1) << std::endl;
01124                   f1.setIntensity(0.0);
01125                 }
01126               }
01127               else if (f2.getCharge()%f1.getCharge()==0)
01128               {
01129                 log_ << "   - different charge -> removing lower charge " << (i+1) << std::endl;
01130                 f1.setIntensity(0.0);
01131               }
01132               else
01133               {
01134                 log_ << "   - contradicting features -> ??? " << std::endl;
01135               }
01136             }
01137           }
01138         }
01139         this->ff_->endProgress();
01140 
01141         std::cout << std::endl;
01142         std::cout << "Abort reasons during feature construction:" << std::endl;
01143         for (std::map<String,UInt>::const_iterator it=aborts_.begin(); it!=aborts_.end(); ++it)
01144         {
01145           std::cout << "- " << it->first << ": " << it->second << std::endl;
01146         }
01147       }
01148       
01149       static FeatureFinderAlgorithm<PeakType,FeatureType>* create()
01150       {
01151         return new FeatureFinderAlgorithmPicked();
01152       }
01153 
01154       static const String getProductName()
01155       {
01156         return "picked_peak";
01157       }
01158   
01159     protected:
01161       std::ofstream log_; 
01163       std::map<String, UInt> aborts_;
01164             
01166 
01167       DoubleReal pattern_tolerance_; 
01168       DoubleReal trace_tolerance_; 
01169       UInt min_spectra_; 
01170       UInt max_missing_trace_peaks_; 
01171       DoubleReal slope_bound_; 
01172       DoubleReal intensity_percentage_; 
01173       DoubleReal intensity_percentage_optional_; 
01174       DoubleReal optional_fit_improvement_; 
01175       DoubleReal mass_window_width_; 
01176       UInt intensity_bins_; 
01177       DoubleReal min_isotope_fit_; 
01178       DoubleReal min_trace_score_; 
01179 
01180 
01182 
01183 
01184       DoubleReal intensity_rt_step_;
01186       DoubleReal intensity_mz_step_;
01188       std::vector< std::vector< std::vector<DoubleReal> > > intensity_thresholds_;
01190 
01192       std::vector< std::vector<PeakInfo> > info_;
01193       
01195       std::vector< TheoreticalIsotopePattern > isotope_distributions_;
01196 
01197       //Docu in base class
01198       virtual void updateMembers_()
01199       {
01200         pattern_tolerance_ = param_.getValue("mass_trace:mz_tolerance");
01201         trace_tolerance_ = param_.getValue("isotopic_pattern:mz_tolerance");
01202         min_spectra_ = (UInt)std::floor((DoubleReal)param_.getValue("mass_trace:min_spectra")*0.5);
01203         max_missing_trace_peaks_ = param_.getValue("mass_trace:max_missing");
01204         slope_bound_ = param_.getValue("mass_trace:slope_bound");
01205         intensity_percentage_ = (DoubleReal)param_.getValue("isotopic_pattern:intensity_percentage")/100.0;
01206         intensity_percentage_optional_ = (DoubleReal)param_.getValue("isotopic_pattern:intensity_percentage_optional")/100.0;
01207         optional_fit_improvement_ = (DoubleReal)param_.getValue("isotopic_pattern:optional_fit_improvement")/100.0;
01208         mass_window_width_ = param_.getValue("isotopic_pattern:mass_window_width");
01209         intensity_bins_ =  param_.getValue("intensity:bins");
01210         min_isotope_fit_ = param_.getValue("feature:min_isotope_fit");
01211         min_trace_score_ = param_.getValue("feature:min_trace_score");
01212       }
01213       
01215       void abort_(const String& reason)
01216       {
01217         log_ << "Abort: " << reason << std::endl;
01218         aborts_[reason]++;
01219       }
01220 
01223       DoubleReal intersection_(const Feature& f1, const Feature& f2)
01224       {
01225         //calculate the RT range sum of feature 1
01226         DoubleReal s1 = 0.0;
01227         std::vector<ConvexHull2D> hulls1 = f1.getConvexHulls();
01228         for (UInt i=0; i<hulls1.size(); ++i)
01229         {
01230           s1 += hulls1[i].getBoundingBox().width();
01231         }
01232         
01233         //calculate the RT range sum of feature 2
01234         DoubleReal s2 = 0.0;
01235         std::vector<ConvexHull2D> hulls2 = f2.getConvexHulls();
01236         for (UInt j=0; j<hulls2.size(); ++j)
01237         {
01238           s2 += hulls2[j].getBoundingBox().width();
01239         }
01240         
01241         //calculate overlap
01242         DoubleReal overlap = 0.0;
01243         for (UInt i=0; i<hulls1.size(); ++i)
01244         {
01245           DBoundingBox<2> bb1 = hulls1[i].getBoundingBox();
01246           for (UInt j=0; j<hulls2.size(); ++j)
01247           {
01248             DBoundingBox<2> bb2 = hulls2[j].getBoundingBox();
01249             if (bb1.intersects(bb2))
01250             {
01251               if (bb1.min()[0]<=bb2.min()[0] && bb1.max()[0]>=bb2.max()[0]) //bb1 contains bb2
01252               {
01253                 overlap += bb2.width();
01254               }
01255               else if (bb2.min()[0]<=bb1.min()[0] && bb2.max()[0]>=bb1.max()[0]) //bb2 contains bb1
01256               {
01257                 overlap += bb1.width();
01258               }
01259               else if (bb1.min()[0]<=bb2.min()[0] && bb1.max()[0]<=bb2.max()[0]) //the end of bb1 overlaps with bb2
01260               {
01261                 overlap += bb1.max()[0]-bb2.min()[0];
01262               }
01263               else if (bb2.min()[0]<=bb1.min()[0] && bb2.max()[0]<=bb1.max()[0]) //the end of bb2 overlaps with bb1
01264               {
01265                 overlap += bb2.max()[0]-bb1.min()[0];
01266               }
01267             }
01268           }
01269         }
01270         
01271         return overlap/std::min(s1,s2);
01272       }
01273       
01275       const TheoreticalIsotopePattern& getIsotopeDistribution_(DoubleReal mass)
01276       {
01277         //calculate index in the vector
01278         UInt index = (UInt)std::floor(mass/mass_window_width_);
01279         
01280         //enlarge vector if necessary
01281         if (index>=isotope_distributions_.size())
01282         {
01283           isotope_distributions_.resize(index+1);
01284         }
01285         
01286         //calculate distribution if necessary
01287         if (isotope_distributions_[index].intensity.size()==0)
01288         {
01289           //log_ << "Calculating iso dist for mass: " << 0.5*mass_window_width_ + index * mass_window_width_ << std::endl;
01290           IsotopeDistribution d;
01291           d.setMaxIsotope(20);
01292           d.estimateFromPeptideWeight(0.5*mass_window_width_ + index * mass_window_width_);
01293           d.trimLeft(intensity_percentage_optional_);
01294           d.trimRight(intensity_percentage_optional_);
01295           for (IsotopeDistribution::Iterator it=d.begin(); it!=d.end(); ++it)
01296           {
01297             isotope_distributions_[index].intensity.push_back(it->second);
01298             //log_ << " - " << it->second << std::endl;
01299           }
01300           //determine the number of optional peaks at the beginning/end
01301           UInt begin = 0;
01302           UInt end = 0;
01303           bool is_begin = true;
01304           bool is_end = false;
01305           for (UInt i=0; i<isotope_distributions_[index].intensity.size(); ++i)
01306           {
01307             if (isotope_distributions_[index].intensity[i]<intensity_percentage_)
01308             {
01309               if (!is_end && !is_begin) is_end = true;
01310               if (is_begin) ++begin;
01311               else if (is_end) ++end;
01312             }
01313             else if (is_begin)
01314             {
01315               is_begin = false;
01316             }
01317           }
01318           isotope_distributions_[index].optional_begin = begin;
01319           isotope_distributions_[index].optional_end = end;
01320           //scale the distibution to a maximum of 1
01321           DoubleReal max = 0.0;
01322           for (UInt i=0; i<isotope_distributions_[index].intensity.size(); ++i)
01323           {
01324             if (isotope_distributions_[index].intensity[i]>max) max = isotope_distributions_[index].intensity[i];
01325           }
01326           isotope_distributions_[index].max = max;
01327           for (UInt i=0; i<isotope_distributions_[index].intensity.size(); ++i)
01328           {
01329             isotope_distributions_[index].intensity[i] /= max;
01330           }
01331           
01332           //log_ << " - optinal begin/end:" << begin << " / " << end << std::endl;
01333         }
01334         //Return distribution
01335         return isotope_distributions_[index];
01336       }
01337             
01345       DoubleReal findBestIsotopeFit_(const Seed& center, UInt charge, IsotopePattern& best_pattern)
01346       {
01347         log_ << "Testing isotope patterns for charge " << charge << ": " << std::endl;      
01348         const SpectrumType& spectrum = map_->at(center.spectrum);
01349         const TheoreticalIsotopePattern& isotopes = getIsotopeDistribution_(spectrum[center.peak].getMZ()*charge);  
01350         log_ << " - Seed: " << center.peak << " (mz:" << spectrum[center.peak].getMZ()<< ")" << std::endl;
01351         
01352         //Find m/z boundaries of search space (linear search as this is local and we have the center already)
01353         DoubleReal mass_window = (DoubleReal)(isotopes.size()+1) / (DoubleReal)charge;
01354         log_ << " - Mass window: " << mass_window << std::endl;
01355         UInt end = center.peak;
01356         while(end<spectrum.size() && spectrum[end].getMZ()<spectrum[center.peak].getMZ()+mass_window)
01357         {
01358           ++end;
01359         }
01360         --end;
01361         //search begin
01362         Int begin = center.peak;
01363         while(begin>=0 && spectrum[begin].getMZ()>spectrum[center.peak].getMZ()-mass_window)
01364         {
01365           --begin;
01366         }
01367         ++begin;
01368         log_ << " - Begin: " << begin << " (mz:" << spectrum[begin].getMZ()<< ")" << std::endl;
01369         log_ << " - End: " << end << " (mz:" << spectrum[end].getMZ()<< ")" << std::endl;
01370 
01371         //fit isotope distribution to peaks
01372         DoubleReal max_score = 0.0;
01373         for (UInt start=begin; start<=end; ++start)
01374         {
01375           //find isotope peaks for the current start peak
01376           UInt peak_index = start;
01377           IsotopePattern pattern(isotopes.size());
01378           pattern.intensity[0] = spectrum[start].getIntensity();
01379           pattern.peak[0] = start;
01380           pattern.spectrum[0] = center.spectrum;
01381           pattern.mz_score[0] = 1.0;
01382           pattern.theoretical_mz[0] = spectrum[start].getMZ();
01383           log_ << " - Fitting at " << start << " (mz:" << spectrum[start].getMZ() << ")" << std::endl;
01384           log_ << "   - Isotope 0: " << pattern.intensity[0] << std::endl;
01385           for (UInt iso=1; iso<isotopes.size(); ++iso)
01386           {
01387             DoubleReal pos = spectrum[start].getMZ() + iso/(DoubleReal)charge;
01388             findIsotope_(pos, center.spectrum, pattern, iso, true, peak_index);
01389           }
01390           
01391           //check if the seed is contained, otherwise abort
01392           bool seed_contained = false;
01393           for (UInt iso=0; iso<pattern.peak.size(); ++iso)
01394           {
01395             if (pattern.peak[iso]==(Int)center.peak && pattern.spectrum[iso]==center.spectrum)
01396             {
01397               seed_contained = true;
01398               break;
01399             }
01400           }
01401           if(!seed_contained)
01402           {
01403             log_ << "   - aborting: seed is not contained!" << std::endl;
01404             continue;
01405           }
01406 
01407           DoubleReal score = isotopeScore_(isotopes, pattern, false, true);
01408   
01409           //check if the seed is still contained, otherwise abort
01410           seed_contained = false;
01411           for (UInt iso=0; iso<pattern.peak.size(); ++iso)
01412           {
01413             if (pattern.peak[iso]==(Int)center.peak && pattern.spectrum[iso]==center.spectrum)
01414             {
01415               seed_contained = true;
01416               break;
01417             }
01418           }
01419           if(!seed_contained)
01420           {
01421             log_ << "   - aborting: seed was removed during isotope fit!" << std::endl;
01422             continue;
01423           }
01424           
01425           log_ << "   - final score: " << score << std::endl;
01426           if (score>max_score)
01427           {
01428             max_score = score;
01429             best_pattern = pattern;
01430           }
01431         }
01432         log_ << " - best score: " << max_score << std::endl;
01433         best_pattern.theoretical_ints = isotopes;
01434         return max_score;
01435       }
01436       
01438       void extendMassTraces_(const IsotopePattern& pattern, MassTraces& traces)
01439       {
01440         //find index of the trace with the maximum intensity
01441         DoubleReal max_int =  0.0;
01442         UInt max_trace_index = 0;
01443         for (UInt p=0; p<pattern.peak.size(); ++p)
01444         {
01445           if (pattern.peak[p]<0) continue; //skip missing and removed traces
01446           if (map_->at(pattern.spectrum[p])[pattern.peak[p]].getIntensity()>max_int)
01447           {
01448             max_int = map_->at(pattern.spectrum[p])[pattern.peak[p]].getIntensity();
01449             max_trace_index = p;
01450           }
01451         }
01452         
01453         //extend the maximum intensity trace to determine the boundaries in RT dimension
01454         UInt start_index = pattern.spectrum[max_trace_index];
01455         const PeakType* start_peak = &(map_->at(pattern.spectrum[max_trace_index])[pattern.peak[max_trace_index]]);
01456         DoubleReal start_mz = start_peak->getMZ();
01457         DoubleReal start_rt = map_->at(start_index).getRT();
01458         log_ << " - Trace " << max_trace_index << " (maximum intensity)" << std::endl;
01459         log_ << "   - extending from: " << map_->at(start_index).getRT() << " / " << start_mz << " (int: " << start_peak->getIntensity() << ")" << std::endl;
01460         //initialize the trace and extend
01461         MassTrace max_trace;
01462         max_trace.peaks.push_back(std::make_pair(start_rt,start_peak));
01463         extendMassTrace_(max_trace, start_index, start_mz, false);
01464         extendMassTrace_(max_trace, start_index, start_mz, true);
01465 
01466         DoubleReal rt_max = max_trace.peaks.back().first;
01467         DoubleReal rt_min = max_trace.peaks.begin()->first;
01468         log_ << "   - rt bounds: " << rt_min << "-" << rt_max << std::endl;
01469         //Abort if too few peak were found
01470         if (max_trace.peaks.size()<2*min_spectra_-max_missing_trace_peaks_)
01471         {
01472           log_ << "   - could not extend trace with maximum intensity => abort" << std::endl;
01473           return;
01474         }
01475         for (UInt p=0; p<pattern.peak.size(); ++p)
01476         {
01477           log_ << " - Trace " << p << std::endl;
01478           if (p==max_trace_index)
01479           {
01480             log_ << "   - previously extended maximum trace" << std::endl;
01481             traces.push_back(max_trace);
01482             traces.back().theoretical_int = pattern.theoretical_ints.intensity[p];
01483             traces.max_trace = traces.size()-1;
01484             continue;
01485           }
01486           Seed starting_peak;
01487           starting_peak.spectrum = pattern.spectrum[p];
01488           starting_peak.peak = pattern.peak[p];
01489           if (pattern.peak[p]==-2)
01490           {
01491             log_ << "   - removed during isotope fit" << std::endl;
01492             continue;
01493           }
01494           else if (pattern.peak[p]==-1)
01495           {
01496             log_ << "   - missing" << std::endl;
01497             continue;
01498           }
01499           starting_peak.intensity = map_->at(starting_peak.spectrum)[starting_peak.peak].getIntensity();
01500           log_ << "   - trace seed: " << map_->at(starting_peak.spectrum).getRT() << " / " << map_->at(starting_peak.spectrum)[starting_peak.peak].getMZ() << " (int: " << map_->at(starting_peak.spectrum)[starting_peak.peak].getIntensity() << ")" << std::endl;
01501           
01502           //search for nearby maximum of the mass trace as the extension assumes that it starts at the maximum
01503           UInt begin = std::max(0u,starting_peak.spectrum-min_spectra_);
01504           UInt end = std::min(starting_peak.spectrum+min_spectra_,(UInt)map_->size());
01505           DoubleReal mz = map_->at(starting_peak.spectrum)[starting_peak.peak].getMZ();
01506           DoubleReal inte = map_->at(starting_peak.spectrum)[starting_peak.peak].getIntensity();
01507           for (UInt spectrum_index=begin; spectrum_index<end; ++spectrum_index)
01508           {
01509             //find better seeds (no-empty scan/low mz diff/higher intensity)
01510             Int peak_index = map_->at(spectrum_index).findNearest(map_->at(starting_peak.spectrum)[starting_peak.peak].getMZ());
01511             if (peak_index==-1 ||
01512                 map_->at(spectrum_index)[peak_index].getIntensity()<=inte ||
01513                 std::fabs(mz-map_->at(spectrum_index)[peak_index].getMZ())>=pattern_tolerance_
01514                ) continue;
01515             starting_peak.spectrum = spectrum_index;
01516             starting_peak.peak = peak_index;
01517             inte = map_->at(spectrum_index)[peak_index].getIntensity();
01518           }
01519           log_ << "   - extending from: " << map_->at(starting_peak.spectrum).getRT() << " / " << map_->at(starting_peak.spectrum)[starting_peak.peak].getMZ() << " (int: " << map_->at(starting_peak.spectrum)[starting_peak.peak].getIntensity() << ")" << std::endl;
01520           
01521           //------------------------------------------------------------------
01522           //Extend seed to a mass trace
01523           MassTrace trace;
01524           const PeakType* seed = &(map_->at(starting_peak.spectrum)[starting_peak.peak]);
01525           //initialize trace with seed data and extend
01526           trace.peaks.push_back(std::make_pair(map_->at(starting_peak.spectrum).getRT(),seed));
01527           extendMassTrace_(trace, starting_peak.spectrum, seed->getMZ(), false, rt_min, rt_max);
01528           extendMassTrace_(trace, starting_peak.spectrum, seed->getMZ(), true, rt_min, rt_max);
01529           
01530           //check if enough peaks were found
01531           if (!trace.isValid())
01532           {
01533             log_ << "   - could not extend trace " << std::endl;
01534             //Missing traces in the middle of a pattern are not acceptable => fix this
01535             if (p<traces.max_trace)
01536             {
01537               traces.clear(); //remove earlier traces
01538               continue;
01539             }
01540             else if (p>traces.max_trace)
01541             {
01542               break; //no more traces are possible
01543             }
01544           }
01545           traces.push_back(trace);
01546           traces.back().theoretical_int = pattern.theoretical_ints.intensity[p];
01547         }
01548       }
01549 
01560       void extendMassTrace_(MassTrace& trace, Int spectrum_index, DoubleReal mz, bool inc_rt, DoubleReal min_rt=0.0, DoubleReal max_rt = 0.0)
01561       {
01562         //Reverse peaks if we run the method for the second time (to keep them in chronological order)
01563         if (inc_rt)
01564         {
01565           ++spectrum_index;
01566           std::reverse(trace.peaks.begin(), trace.peaks.end());
01567         }
01568         else
01569         {
01570           --spectrum_index;
01571         }
01572         //check if boundaries are set
01573         bool boundaries = false;
01574         if (max_rt != min_rt)
01575         {
01576           boundaries = true;
01577         }
01578         //Relax slope theshold if there is a hard boundary for the extension
01579         DoubleReal current_slope_bound = (1.0 + (DoubleReal)boundaries) * slope_bound_;
01580         UInt delta_count = min_spectra_;
01581         DoubleReal last_int = trace.peaks.back().second->getIntensity();
01582         std::vector<DoubleReal> deltas(delta_count-1, 0);
01583         //deltas.reserve(2*delta_count);
01584         UInt missing_peaks = 0;
01585         UInt peaks_before = trace.peaks.size();
01586         String abort_reason = "";
01587         while((!inc_rt && spectrum_index>=0) || (inc_rt && spectrum_index<(Int)map_->size()))
01588         {
01589           if(boundaries && ((!inc_rt && map_->at(spectrum_index).getRT()<min_rt) || (inc_rt && map_->at(spectrum_index).getRT()>max_rt)) )
01590           {
01591             abort_reason = "Hit upper/lower boundary";
01592             break;
01593           }
01594           Int peak_index = map_->at(spectrum_index).findNearest(mz);
01595           if (peak_index<0 || info_[spectrum_index][peak_index].overall_score<0.01 || positionScore_( mz, map_->at(spectrum_index)[peak_index].getMZ(), trace_tolerance_)==0.0)
01596           {
01597             ++missing_peaks;
01598             if(missing_peaks>max_missing_trace_peaks_)
01599             {
01600               abort_reason = "too many peaks missing";
01601               break;
01602             }
01603           }
01604           else
01605           {
01606             missing_peaks = 0;
01607             //add last peak to trace
01608             trace.peaks.push_back(std::make_pair(map_->at(spectrum_index).getRT(),&(map_->at(spectrum_index)[peak_index])));
01609             //update ints and deltas 
01610             deltas.push_back( (map_->at(spectrum_index)[peak_index].getIntensity() - last_int) / last_int);
01611             last_int = map_->at(spectrum_index)[peak_index].getIntensity();
01612 
01613             //Abort if the average delta is too big (as intensity increases then)
01614             DoubleReal average_delta = std::accumulate(deltas.end()-delta_count,deltas.end(),0.0) / (DoubleReal)delta_count;
01615             if ( average_delta > current_slope_bound)
01616             {
01617               abort_reason = String("Average delta above threshold: ")+average_delta+"/"+current_slope_bound;
01618               //remove last peaks as we extended too far
01619               UInt remove = std::min((UInt)(trace.peaks.size()-peaks_before),delta_count-1);
01620               trace.peaks.erase(trace.peaks.end()-remove,trace.peaks.end());
01621               break;
01622             }
01623           }
01624           //increase/decrease scan index
01625           if (inc_rt) ++spectrum_index; else --spectrum_index;
01626         }
01627         log_ << "   - Added " << (trace.peaks.size()-peaks_before) << " peaks (abort: " << abort_reason << ")" << std::endl;
01628       }
01629 
01631       template <typename SpectrumType>
01632       UInt nearest_(CoordinateType pos, const SpectrumType& spec, UInt start) const
01633       {
01634         UInt index = start;
01635         CoordinateType dist = std::fabs(pos-spec[index].getMZ());
01636         ++index;
01637         while (index < spec.size())
01638         {
01639           CoordinateType new_dist = std::fabs(pos-spec[index].getMZ());
01640           if (new_dist<dist)
01641           {
01642             dist = new_dist;
01643             ++index;  
01644           }
01645           else
01646           {
01647             break;
01648           }
01649         }
01650         return --index; 
01651       }
01652       
01663       void findIsotope_(CoordinateType pos, UInt spectrum_index, IsotopePattern& pattern, UInt pattern_index, bool debug, UInt& peak_index)
01664       {
01665         //search in the center spectrum
01666         const SpectrumType& spectrum = map_->at(spectrum_index);
01667         peak_index = nearest_(pos, spectrum, peak_index);
01668         DoubleReal mz_score = positionScore_(pos, spectrum[peak_index].getMZ(), pattern_tolerance_);
01669         pattern.theoretical_mz[pattern_index] = pos;
01670         if (mz_score!=0.0)
01671         {
01672           if (debug) log_ << "   - Isotope " << pattern_index << ": " << spectrum[peak_index].getIntensity() << std::endl;
01673           pattern.peak[pattern_index] = peak_index;
01674           pattern.spectrum[pattern_index] = spectrum_index;
01675           pattern.mz_score[pattern_index] = mz_score;
01676           pattern.intensity[pattern_index] = spectrum[peak_index].getIntensity();
01677           return;
01678         }
01679         //try to find the mass in the previous spectrum
01680         if (spectrum_index!=0)
01681         {
01682           const SpectrumType& spectrum_before = map_->at(spectrum_index-1);
01683           UInt index_before = spectrum_before.findNearest(pos);
01684           if (positionScore_(pos, spectrum_before[index_before].getMZ(), pattern_tolerance_)!=0.0)
01685           {
01686             if (debug) log_ << "   - Isotope " << pattern_index << ": " << spectrum_before[index_before].getIntensity() << " - previous spectrum" << std::endl;
01687             pattern.peak[pattern_index] = index_before;
01688             pattern.spectrum[pattern_index] = spectrum_index-1;
01689             pattern.mz_score[pattern_index] = positionScore_(pos, spectrum_before[index_before].getMZ(), pattern_tolerance_);
01690             pattern.intensity[pattern_index] = spectrum_before[index_before].getIntensity();
01691             return;
01692           }
01693         }
01694         //try to find the mass in the next spectrum
01695         if (spectrum_index!=map_->size()-1)
01696         {
01697           const SpectrumType& spectrum_after = map_->at(spectrum_index+1);
01698           UInt index_after = spectrum_after.findNearest(pos);
01699           if (positionScore_(pos, spectrum_after[index_after].getMZ(), pattern_tolerance_)!=0.0)
01700           {
01701             if (debug) if (debug) log_ << "   - Isotope " << pattern_index << ": " << spectrum_after[index_after].getIntensity() << " - next spectrum" << std::endl;
01702             pattern.peak[pattern_index] = index_after;
01703             pattern.spectrum[pattern_index] = spectrum_index+1;
01704             pattern.mz_score[pattern_index] = positionScore_(pos, spectrum_after[index_after].getMZ(), pattern_tolerance_);
01705             pattern.intensity[pattern_index] = spectrum_after[index_after].getIntensity();
01706             return;
01707           }
01708         }
01709         //no isotope found
01710         if (debug) log_ << "   - Isotope " << pattern_index << ": missing" << std::endl;
01711         pattern.peak[pattern_index] = -1;
01712         pattern.mz_score[pattern_index] = 0.0;
01713         pattern.intensity[pattern_index] = 0.0;
01714       }
01715 
01717       DoubleReal positionScore_(CoordinateType pos1, CoordinateType pos2, DoubleReal allowed_deviation) const
01718       {
01719         DoubleReal diff = fabs(pos1 - pos2);
01720         if (diff <= 0.5*allowed_deviation)
01721         {
01722           return 0.1*(0.5*allowed_deviation-diff)/(0.5*allowed_deviation)+0.9;
01723         }
01724         else if (diff <= allowed_deviation)
01725         {
01726           return 0.9*(allowed_deviation-diff)/(0.5*allowed_deviation);
01727         }
01728         return 0.0;
01729       }
01730 
01732       DoubleReal isotopeScore_(const TheoreticalIsotopePattern& isotopes, IsotopePattern& pattern, bool consider_mz_distances, bool debug)
01733       {
01734         //Abort if a core peak is missing
01735         for (UInt iso=0+isotopes.optional_begin; iso<pattern.peak.size()-isotopes.optional_end; ++iso)
01736         {
01737           if (pattern.peak[iso]==-1)
01738           {
01739             if (debug) log_ << "   - aborting: core peak is missing" << std::endl;
01740             return 0.0;
01741           }
01742         }
01743         //Find best isotope fit
01744         // - try to leave out optional isotope peaks to improve the fit
01745         // - do not allow gaps inside the pattern
01746         DoubleReal best_int_score = 0.01; //Not 0 as this would result in problems when checking for the percental improvement
01747         UInt best_begin = 0;
01748         for (UInt i=isotopes.optional_begin; i>0; --i)
01749         {
01750           if (pattern.peak[i-1]==-1)
01751           {
01752             best_begin = i;
01753             break;
01754           }
01755         }
01756         UInt best_end = 0;
01757         for (UInt i=isotopes.optional_end; i>0; --i)
01758         {
01759           if (pattern.peak[pattern.peak.size()-i]==-1)
01760           {
01761             best_end = i;
01762             break;
01763           }
01764         }
01765         if (debug) log_ << "   - best_begin/end: " << best_begin << "/" << best_end << std::endl;
01766         for (UInt b=best_begin; b<=isotopes.optional_begin; ++b)
01767         {
01768           for (UInt e=best_end; e<=isotopes.optional_end; ++e)
01769           {
01770             //Make sure we have more than 2 peaks (unless in the first loop interation) 
01771             if (isotopes.size()-b-e>2 || (b==best_begin && e==best_end))
01772             {
01773               DoubleReal int_score = Math::pearsonCorrelationCoefficient(isotopes.intensity.begin()+b, isotopes.intensity.end()-e, pattern.intensity.begin()+b, pattern.intensity.end()-e); 
01774               if (isnan(int_score)) int_score = 0.0;
01775               if (isotopes.size()-b-e==2 && int_score>min_isotope_fit_) int_score = min_isotope_fit_; //special case for the first loop iteration (otherwise the score is 1)
01776               if (debug) log_ << "   - fit (" << b << "/" << e << "): " << int_score;
01777               if (int_score/best_int_score>=1.0+optional_fit_improvement_)
01778               {
01779                 if (debug) log_ << " - new best fit ";
01780                 best_int_score = int_score;
01781                 best_begin = b;
01782                 best_end = e;
01783               }
01784               if (debug) log_ << std::endl;
01785             }
01786           }
01787         }
01788         //remove left out peaks from the beginning
01789         for (UInt i=0; i<best_begin; ++i)
01790         {
01791           pattern.peak[i] = -2;
01792           pattern.intensity[i] = 0.0;
01793           pattern.mz_score[i] = 0.0;
01794         }
01795         //remove left out peaks from the end
01796         for (UInt i=0; i<best_end; ++i)
01797         {
01798           pattern.peak[isotopes.size()-1-i] = -2;
01799           pattern.intensity[isotopes.size()-1-i] = 0.0;
01800           pattern.mz_score[isotopes.size()-1-i] = 0.0;
01801         }
01802         //calculate m/z score (if required)
01803         if (consider_mz_distances)
01804         {
01805           best_int_score *= std::accumulate(pattern.mz_score.begin()+best_begin, pattern.mz_score.end()-best_end,0.0) / (pattern.mz_score.size()-best_begin-best_end);
01806         }
01807         //return final score
01808         return best_int_score;
01809       }
01810       
01811       DoubleReal intensityScore_(DoubleReal intensity, UInt spectrum, UInt peak)
01812       {
01813         //calculate (half) bin numbers
01814         DoubleReal rt = map_->at(spectrum).getRT();
01815         DoubleReal mz = map_->at(spectrum)[peak].getMZ();
01816         DoubleReal rt_min = map_->getMinRT();
01817         DoubleReal mz_min = map_->getMinMZ();
01818         UInt rt_bin = std::min(2*intensity_bins_-1,(UInt)std::floor((rt - rt_min) / intensity_rt_step_ * 2.0));
01819         UInt mz_bin = std::min(2*intensity_bins_-1,(UInt)std::floor((mz - mz_min) / intensity_mz_step_ * 2.0));
01820         //determine mz bins
01821         UInt ml,mh;
01822         if (mz_bin==0 || mz_bin==2*intensity_bins_-1)
01823         {
01824           ml = mz_bin/2;
01825           mh = mz_bin/2;
01826         }
01827         else if (Math::isOdd(mz_bin))
01828         {
01829           ml = mz_bin/2;
01830           mh = mz_bin/2+1;
01831         }
01832         else
01833         {
01834           ml = mz_bin/2-1; 
01835           mh = mz_bin/2;
01836         }
01837         //determine rt bins
01838         UInt rl,rh;
01839         if (rt_bin==0 || rt_bin==2*intensity_bins_-1)
01840         {
01841           rl = rt_bin/2;
01842           rh = rt_bin/2;
01843         }
01844         else if (Math::isOdd(rt_bin))
01845         {
01846           rl = rt_bin/2;
01847           rh = rt_bin/2+1;
01848         }
01849         else
01850         {
01851           rl = rt_bin/2-1; 
01852           rh = rt_bin/2;
01853         }
01854         //calculate scores and distances
01855         DoubleReal d1 = 1.0 / (0.01 + std::fabs((rt_min+(0.5+rl)*intensity_rt_step_-rt)*(mz_min+(0.5+ml)*intensity_mz_step_-mz)));
01856         DoubleReal d2 = 1.0 / (0.01 + std::fabs((rt_min+(0.5+rh)*intensity_rt_step_-rt)*(mz_min+(0.5+ml)*intensity_mz_step_-mz)));
01857         DoubleReal d3 = 1.0 / (0.01 + std::fabs((rt_min+(0.5+rl)*intensity_rt_step_-rt)*(mz_min+(0.5+mh)*intensity_mz_step_-mz)));
01858         DoubleReal d4 = 1.0 / (0.01 + std::fabs((rt_min+(0.5+rh)*intensity_rt_step_-rt)*(mz_min+(0.5+mh)*intensity_mz_step_-mz)));
01859         DoubleReal d_sum = d1 + d2 + d3 + d4;
01860         //return weighted score
01861         return intensityScore_(rl, ml, intensity)*d1/d_sum + intensityScore_(rh, ml, intensity)*d2/d_sum + intensityScore_(rl, mh, intensity)*d3/d_sum + intensityScore_(rh, mh, intensity)*d4/d_sum;
01862       }
01863 
01864       DoubleReal intensityScore_(UInt rt_bin, UInt mz_bin, DoubleReal intensity)
01865       {
01866         //interpolate score value according to quantiles(20)
01867         std::vector<DoubleReal>& quantiles20 = intensity_thresholds_[rt_bin][mz_bin];
01868         std::vector<DoubleReal>::const_iterator it = std::lower_bound(quantiles20.begin(),quantiles20.end(),intensity);
01869         if (it==quantiles20.begin()) ++it;
01870         else if (it==quantiles20.end()) --it;
01871         std::vector<DoubleReal>::const_iterator it_before = it-1;
01872         
01873         return std::min(1.0,0.05*(intensity-*it_before)/(*it-*it_before) + 0.05*(it_before - quantiles20.begin()));
01874       }
01875 
01876       static int gaussF_(const gsl_vector* param, void* data, gsl_vector* f)
01877       {
01878         MassTraces* traces = static_cast<MassTraces*>(data);
01879         double height = gsl_vector_get (param, 0);
01880         double x0 = gsl_vector_get (param, 1);
01881         double sig = gsl_vector_get (param, 2);
01882         
01883         UInt count = 0;
01884         for (UInt t=0; t< traces->size(); ++t)
01885         {
01886           MassTrace& trace = traces->at(t);     
01887           for (UInt i=0; i<trace.peaks.size(); ++i)
01888           {
01889             gsl_vector_set(f, count, traces->baseline + trace.theoretical_int * height * exp(-0.5 * pow(trace.peaks[i].first - x0, 2)  / pow(sig, 2)) - trace.peaks[i].second->getIntensity() );
01890             ++count;
01891           }
01892         }
01893         return GSL_SUCCESS;
01894       }
01895     
01896       static int gaussDF_(const gsl_vector* param, void* data, gsl_matrix* J)
01897       {
01898         MassTraces* traces = static_cast<MassTraces*>(data);
01899         double height = gsl_vector_get (param, 0);
01900         double x0 = gsl_vector_get (param, 1);
01901         double sig = gsl_vector_get (param, 2);
01902         
01903         UInt count = 0;
01904         for (UInt t=0; t<traces->size(); ++t)
01905         {
01906           MassTrace& trace = traces->at(t);
01907           for (UInt i=0; i< trace.peaks.size(); ++i)
01908           {
01909             DoubleReal rt = trace.peaks[i].first;
01910             gsl_matrix_set (J, count, 0, trace.theoretical_int * exp(-0.5 * pow(rt-x0,2) / pow(sig,2)));
01911             gsl_matrix_set (J, count, 1, trace.theoretical_int * height * exp(-0.5 * pow(rt-x0,2) / pow(sig,2)) * (rt-x0) / pow(sig,2));
01912             gsl_matrix_set (J, count, 2, 0.125 * trace.theoretical_int * height * exp(-0.5 * pow(rt-x0,2) / pow(sig,2)) * pow(rt-x0,2) / pow(sig,3));       
01913             ++count;
01914           }
01915         }
01916         return GSL_SUCCESS;
01917       }
01918     
01919       static int gaussFDF_(const gsl_vector* param, void* data, gsl_vector* f, gsl_matrix* J)
01920       {
01921         gaussF_(param, data, f);
01922         gaussDF_(param, data, J);
01923         return GSL_SUCCESS;
01924       }
01925 
01926     private:
01927       
01929       FeatureFinderAlgorithmPicked& operator=(const FeatureFinderAlgorithmPicked&);
01931       FeatureFinderAlgorithmPicked(const FeatureFinderAlgorithmPicked&);
01932 
01933   };
01934 
01935 } // namespace OpenMS
01936 
01937 #endif // OPENMS_TRANSFORMATIONS_FEATUREFINDER_FEATUREFINDERALGORITHMPICKED_H

Generated Tue Apr 1 15:36:34 2008 -- using doxygen 1.5.4 OpenMS / TOPP 1.1