Home  · Classes  · Annotated Classes  · Modules  · Members  · Namespaces  · Related Pages

SimpleExtender.h (Maintainer: Clemens Groepl)

Go to the documentation of this file.
00001 // -*- Mode: C++; tab-width: 2; -*-
00002 // vi: set ts=2:
00003 //
00004 // --------------------------------------------------------------------------
00005 //                   OpenMS Mass Spectrometry Framework
00006 // --------------------------------------------------------------------------
00007 //  Copyright (C) 2003-2008 -- Oliver Kohlbacher, Knut Reinert
00008 //
00009 //  This library is free software; you can redistribute it and/or
00010 //  modify it under the terms of the GNU Lesser General Public
00011 //  License as published by the Free Software Foundation; either
00012 //  version 2.1 of the License, or (at your option) any later version.
00013 //
00014 //  This library is distributed in the hope that it will be useful,
00015 //  but WITHOUT ANY WARRANTY; without even the implied warranty of
00016 //  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00017 //  Lesser General Public License for more details.
00018 //
00019 //  You should have received a copy of the GNU Lesser General Public
00020 //  License along with this library; if not, write to the Free Software
00021 //  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
00022 //
00023 // --------------------------------------------------------------------------
00024 // $Maintainer: Clemens Groepl $
00025 // --------------------------------------------------------------------------
00026 
00027 #ifndef OPENMS_TRANSFORMATIONS_FEATUREFINDER_SIMPLEEXTENDER_H
00028 #define OPENMS_TRANSFORMATIONS_FEATUREFINDER_SIMPLEEXTENDER_H
00029 
00030 #include <OpenMS/TRANSFORMATIONS/FEATUREFINDER/FeaFiModule.h>
00031 #include <OpenMS/MATH/STATISTICS/AveragePosition.h>
00032 #include <OpenMS/MATH/MISC/LinearInterpolation.h>
00033 
00034 #include <queue>
00035 #include <iostream>
00036 #include <fstream>
00037 
00038 namespace OpenMS
00039 {
00040 
00070   template<class PeakType,class FeatureType>
00071   class SimpleExtender
00072     : public FeaFiModule<PeakType,FeatureType>,
00073       public FeatureFinderDefs
00074   {
00075     public:
00076     typedef FeaFiModule<PeakType,FeatureType> Base;
00077 
00079     typedef typename Base::IntensityType IntensityType;
00081     typedef typename Base::CoordinateType CoordinateType;
00083     typedef DoubleReal ProbabilityType;
00084 
00086     SimpleExtender(const MSExperiment<PeakType>* map, FeatureMap<FeatureType>* features, FeatureFinder* ff)
00087     : Base(map,features,ff),
00088       last_pos_extracted_()
00089       //tolerance_rt_(0),
00090       //tolerance_mz_(0)
00091     {
00092       this->setName("SimpleExtender");
00093       /*
00094       this->defaults_.setValue("tolerance_rt",2.0f,"Boundary width in RT dimension (used for local extension of the region)", false);
00095       this->defaults_.setMinFloat("tolerance_rt",0.0);
00096       this->defaults_.setValue("tolerance_mz",0.5f,"Boundary width in m/z dimension (used for local extension of the region)", false);
00097       this->defaults_.setMinFloat("tolerance_mz",0.0);
00098       */
00099       this->defaults_.setValue("dist_mz_up",6.0f,"Maximum high m/z distance of peak in the region/boundary from the seed.", false);
00100       this->defaults_.setMinFloat("dist_mz_up",0.0);
00101       this->defaults_.setValue("dist_mz_down",2.0f,"Maximum low m/z distance of peak in the region/boundary from the seed.", false);
00102       this->defaults_.setMinFloat("dist_mz_down",0.0);
00103       this->defaults_.setValue("dist_rt_up",5.0f,"Maximum high RT distance of peak in the region/boundary from the seed.", false);
00104       this->defaults_.setMinFloat("dist_rt_up",0.0);
00105       this->defaults_.setValue("dist_rt_down",5.0f,"Maximum low RT distance of peak in the region/boundary from the seed.", false);
00106       this->defaults_.setMinFloat("dist_rt_down",0.0);
00107 
00108       // priority check is per default switched off
00109       // these values were used for the Myoglobin quantification project
00110       // DON'T REMOVE THIS
00111       this->defaults_.setValue("priority_thr",-0.1f,"Minimum priority for data points to be included into the boundary of the feature (default 0.0). The priority of a data point is a function of its intensity and its distance to the last point included into the feature region. Setting this threshold to zero or a very small value is usually a good idea.", true);
00112      
00113       this->defaults_.setValue("intensity_factor",0.03f,"Influences for intensity (ion count) threshold in the feature extension. We include only raw data points into this region if their intensity is larger than [intensity_factor * (intensity of the seed)].", false);
00114       this->defaults_.setMinFloat("intensity_factor",0.0);
00115       this->defaults_.setMaxFloat("intensity_factor",1.0);
00116       
00117       this->defaultsToParam_();
00118     }
00119 
00121     virtual ~SimpleExtender()
00122     {
00123     }
00124 
00126     void extend(const ChargedIndexSet& seed_region, ChargedIndexSet& result_region)
00127     {
00128       // empty region and boundary datastructures
00129       result_region.clear();
00130       priorities_.clear();
00131       running_avg_.clear();
00132       boundary_ = std::priority_queue< IndexWithPriority, std::vector<IndexWithPriority>, typename IndexWithPriority::PriorityLess>();
00133 
00134 #ifdef DEBUG_FEATUREFINDER
00135       std::vector<IndexPair> debug_vector;
00136 #endif
00137 
00138       // find maximum of region (seed)
00139       CoordinateType max_intensity = 0.0;
00140       IndexPair seed;
00141 
00142       for (IndexSet::const_iterator citer = seed_region.begin(); citer != seed_region.end(); ++citer)
00143       {
00144         if (this->getPeakIntensity(*citer) > max_intensity)
00145         {
00146           seed = *citer;
00147           max_intensity = this->getPeakIntensity(seed);
00148         }
00149       }
00150 
00151       // remember last extracted point (in this case the seed !)
00152       last_pos_extracted_[RawDataPoint2D::RT] = this->getPeakRt(seed);
00153       last_pos_extracted_[RawDataPoint2D::MZ] = this->getPeakMz(seed);
00154 
00155       // Add peaks received from seeder directly to boundary
00156       for (IndexSet::const_iterator citer = seed_region.begin(); citer != seed_region.end(); ++citer)
00157       {
00158         ProbabilityType priority = computePeakPriority_(*citer);
00159         priorities_[*citer] = priority;
00160         boundary_.push(IndexWithPriority(*citer,priority));
00161       }
00162       // pass on charge information
00163       result_region.charge_ = seed_region.charge_;
00164 
00165       // re-compute intensity threshold
00166       intensity_threshold_ = (DoubleReal)(this->param_).getValue("intensity_factor") * this->getPeakIntensity(seed);
00167 
00168       std::cout << "Extending from " << this->getPeakRt(seed) << "/" << this->getPeakMz(seed);
00169       std::cout << " (" << seed.first << "/" << seed.second << ")" << std::endl;
00170       std::cout << "Intensity of seed " << this->getPeakIntensity(seed) << " intensity_threshold: " << intensity_threshold_ << std::endl;
00171 
00172       while (!boundary_.empty())
00173       {
00174         // remove peak with highest priority
00175         const IndexPair  current_index = boundary_.top().index;
00176         boundary_.pop();
00177 
00178         //  check for corrupt index
00179         if ( current_index.first >= (*this->map_).size()) std::cout << "Scan index outside of map!" << std::endl;
00180         if ( current_index.second >= (*this->map_)[current_index.first].size() ) std::cout << "Peak index outside of scan!" << std::endl;
00181 
00182         OPENMS_PRECONDITION(current_index.first<(*this->map_).size(), "Scan index outside of map!");
00183         OPENMS_PRECONDITION(current_index.second<(*this->map_)[current_index.first].size(), "Peak index outside of scan!");
00184 
00185         // remember last extracted peak
00186         last_pos_extracted_[RawDataPoint2D::RT] = this->getPeakRt(current_index);
00187         last_pos_extracted_[RawDataPoint2D::MZ] = this->getPeakMz(current_index);
00188 
00189         // Now we explore the neighbourhood of the current peak. Points in this area are included
00190         // into the boundary if their intensity is not too low and they are not too
00191         // far away from the seed.
00192         // Add position to the current average of positions weighted by intensity
00193         running_avg_.add(last_pos_extracted_,this->getPeakIntensity(current_index));
00194 
00195         // explore neighbourhood of current peak
00196         moveMzUp_(current_index);
00197         moveMzDown_(current_index);
00198         moveRtUp_(current_index);
00199         moveRtDown_(current_index);
00200 
00201         // set peak flags and add to boundary
00202         this->ff_->getPeakFlag(current_index) = USED;
00203 #ifdef DEBUG_FEATUREFINDER
00204         debug_vector.push_back(current_index);
00205 #endif
00206         result_region.insert(current_index);
00207 
00208       } // end of while ( !boundary_.empty() )
00209 
00210       std::cout << "Feature region size: " << result_region.size() << std::endl;
00211 
00212 #ifdef DEBUG_FEATUREFINDER
00213       static UInt number=1;
00214       writeDebugFile_(debug_vector,number++);
00215       debug_vector.clear();
00216 #endif
00217 
00218       return;
00219     } // end of extend
00220 
00233     struct IndexWithPriority
00234     {
00235       IndexWithPriority(const FeatureFinderDefs::IndexPair& i, DoubleReal p) 
00236         : index(i), 
00237           priority(p)
00238       {
00239       }
00240 
00241       IndexPair index;
00242       ProbabilityType priority;
00243 
00245       struct PriorityLess
00246       {
00247         inline bool operator() (const IndexWithPriority& x, const IndexWithPriority& y) const
00248         {
00249           return x.priority < y.priority;
00250         }
00251       };
00252     };
00253 
00254   protected:
00255     virtual void updateMembers_()
00256     {
00257       dist_mz_up_ = this->param_.getValue("dist_mz_up");
00258       dist_mz_down_ = this->param_.getValue("dist_mz_down");
00259       dist_rt_up_ = this->param_.getValue("dist_rt_up");
00260       dist_rt_down_ = this->param_.getValue("dist_rt_down");
00261       priority_threshold_ = this->param_.getValue("priority_thr");
00262       //tolerance_rt_ = this->param_.getValue("tolerance_rt");
00263       //tolerance_mz_ = this->param_.getValue("tolerance_mz");
00264     }
00265 
00267     void writeDebugFile_(const std::vector<IndexPair>& peaks, UInt nr_feat)
00268     {
00269       String filename = String(nr_feat).fillLeft('0',4) + "_Extension.dta2d";
00270       std::ofstream file(filename.c_str());
00271       for(UInt i=0; i<peaks.size(); ++i)
00272       {
00273         file << this->getPeakRt(peaks[i]) << " " << this->getPeakMz(peaks[i]) << " " << peaks.size()-i << std::endl;
00274       }
00275       file.close();
00276     }
00277 
00279     bool isTooFarFromCentroid_(const IndexPair& index)
00280     {
00281 
00282       if ( index.first >= (*this->map_).size()) std::cout << "Scan index outside of map!" << std::endl;
00283       if ( index.second >= (*this->map_)[index.first].size() ) std::cout << "Peak index outside of scan!" << std::endl;
00284 
00285       //Corrupt index
00286       OPENMS_PRECONDITION(index.first<(*this->map_).size(), "Scan index outside of map!");
00287       OPENMS_PRECONDITION(index.second<(*this->map_)[index.first].size() , "Peak index outside of scan!");
00288 
00289        const DPosition<2>& curr_mean = running_avg_.getPosition();
00290 
00291       if ( this->getPeakMz(index) > curr_mean[RawDataPoint2D::MZ] + dist_mz_up_   ||
00292            this->getPeakMz(index) < curr_mean[RawDataPoint2D::MZ] - dist_mz_down_ ||
00293            this->getPeakRt(index) > curr_mean[RawDataPoint2D::RT] + dist_rt_up_   ||
00294            this->getPeakRt(index) < curr_mean[RawDataPoint2D::RT] - dist_rt_down_ )
00295       {
00296         //too far
00297         return true;
00298       }
00299 
00300       //close enough
00301       return false;
00302     }
00303 
00305     void moveMzUp_(const IndexPair& index)
00306     {
00307       try
00308       {
00309         IndexPair tmp = index;
00310         while (true)
00311         {
00312           this->getNextMz(tmp);
00313           if (isTooFarFromCentroid_(tmp)) break;
00314           checkNeighbour_(tmp);
00315         }
00316       }
00317       catch(NoSuccessor)
00318       {
00319       }
00320     }
00321 
00323     void moveMzDown_(const IndexPair& index)
00324     {
00325       try
00326       {
00327         IndexPair tmp = index;
00328         while (true)
00329         {
00330           this->getPrevMz(tmp);
00331           if (isTooFarFromCentroid_(tmp)) break;
00332           checkNeighbour_(tmp);
00333         }
00334       }
00335       catch(NoSuccessor)
00336       {
00337       }
00338     }
00339 
00341     void moveRtUp_(const IndexPair& index)
00342     {
00343       try
00344       {
00345         IndexPair tmp = index;
00346 
00347         while (true)
00348         {
00349           this->getNextRt(tmp);
00350           if (isTooFarFromCentroid_(tmp)) break;
00351           checkNeighbour_(tmp);
00352         }
00353       }
00354       catch(NoSuccessor)
00355       {
00356       }
00357     }
00358 
00359 
00361     void moveRtDown_(const IndexPair& index)
00362     {
00363       try
00364       {
00365         IndexPair tmp = index;
00366         while (true)
00367         {
00368           this->getPrevRt(tmp);
00369           if (isTooFarFromCentroid_(tmp)) break;
00370           checkNeighbour_(tmp);
00371         }
00372       }
00373       catch(NoSuccessor)
00374       {
00375       }
00376     }
00377 
00379     ProbabilityType computePeakPriority_(const IndexPair& index)
00380     {
00381 
00382       return (*this->map_)[index.first][index.second].getIntensity();
00383           
00384       // usage of tolerance_rt_ and tolerance_mz_ apparently leads to undesirable priority values, so this is disabled for now
00385         
00386       /*      
00387         *
00388         std::max(0.,1-std::abs( (*this->map_)[index.first].getRT() - last_pos_extracted_[RawDataPoint2D::RT] )/tolerance_rt_ )
00389         *
00390         std::max(0.,1-std::abs( (*this->map_)[index.first][index.second].getMZ() - last_pos_extracted_[RawDataPoint2D::MZ] )/tolerance_mz_ )
00391       ;
00392       */
00393     }
00394 
00396     void checkNeighbour_(const IndexPair& index)
00397     {
00398       //Corrupt index
00399       OPENMS_PRECONDITION(index.first<(*this->map_).size(), "Scan index outside of map!");
00400       OPENMS_PRECONDITION(index.second<(*this->map_)[index.first].size(), "Peak index outside of scan!");
00401 
00402       // skip this point if its intensity is too low
00403       if (this->getPeakIntensity(index) <= intensity_threshold_)
00404       {
00405        return;
00406       }
00407       if ( this->ff_->getPeakFlag(index) == UNUSED)
00408       {
00409         DoubleReal pr_new = computePeakPriority_(index);
00410 
00411         if (pr_new > priority_threshold_)
00412         {
00413           std::map<IndexPair, DoubleReal>::iterator piter = priorities_.find(index);
00414           this->ff_->getPeakFlag(index) = USED;
00415           priorities_[index] = pr_new;
00416           boundary_.push(IndexWithPriority(index,pr_new));
00417         }
00418       }
00419     }
00420 
00422     Math::AveragePosition<2> running_avg_;
00423 
00425     std::map<IndexPair, ProbabilityType> priorities_;
00426 
00428     DPosition<2> last_pos_extracted_;
00429 
00431     std::priority_queue< IndexWithPriority, std::vector < IndexWithPriority > , typename IndexWithPriority::PriorityLess > boundary_;
00432 
00433     //Real tolerance_rt_;
00434     //Real tolerance_mz_;
00435 
00437     IntensityType intensity_threshold_;
00438 
00440     CoordinateType dist_mz_up_;
00442     CoordinateType dist_mz_down_;
00444     CoordinateType dist_rt_up_;
00446     CoordinateType dist_rt_down_;
00447 
00449     ProbabilityType priority_threshold_;
00450     
00452     ChargedIndexSet region_;
00453 
00454     private:
00456       SimpleExtender();
00458       SimpleExtender& operator=(const SimpleExtender&);
00460       SimpleExtender(const SimpleExtender&);
00461 
00462   };
00463 }
00464 #endif // OPENMS_TRANSFORMATIONS_FEATUREFINDER_SIMPLEEXTENDER_H

Generated Tue Apr 1 15:36:37 2008 -- using doxygen 1.5.4 OpenMS / TOPP 1.1