Home  · Classes  · Annotated Classes  · Modules  · Members  · Namespaces  · Related Pages

ModelFitter.h (Maintainer: Marcel Grunert)

Go to the documentation of this file.
00001 // -*- Mode: C++; tab-width: 2; -*-
00002 // vi: set ts=2:
00003 //
00004 // --------------------------------------------------------------------------
00005 //                   OpenMS Mass Spectrometry Framework
00006 // --------------------------------------------------------------------------
00007 //  Copyright (C) 2003-2008 -- Oliver Kohlbacher, Knut Reinert
00008 //
00009 //  This library is free software; you can redistribute it and/or
00010 //  modify it under the terms of the GNU Lesser General Public
00011 //  License as published by the Free Software Foundation; either
00012 //  version 2.1 of the License, or (at your option) any later version.
00013 //
00014 //  This library is distributed in the hope that it will be useful,
00015 //  but WITHOUT ANY WARRANTY; without even the implied warranty of
00016 //  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00017 //  Lesser General Public License for more details.
00018 //
00019 //  You should have received a copy of the GNU Lesser General Public
00020 //  License along with this library; if not, write to the Free Software
00021 //  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
00022 //
00023 // --------------------------------------------------------------------------
00024 // $Maintainer: Marcel Grunert $
00025 // --------------------------------------------------------------------------
00026 
00027 
00028 #ifndef OPENMS_TRANSFORMATIONS_FEATUREFINDER_MODELFITTER_H
00029 #define OPENMS_TRANSFORMATIONS_FEATUREFINDER_MODELFITTER_H
00030 
00031 #include <OpenMS/TRANSFORMATIONS/FEATUREFINDER/FeaFiModule.h>
00032 #include <OpenMS/TRANSFORMATIONS/FEATUREFINDER/ProductModel.h>
00033 #include <OpenMS/TRANSFORMATIONS/FEATUREFINDER/IsotopeModel.h>
00034 #include <OpenMS/TRANSFORMATIONS/FEATUREFINDER/ExtendedIsotopeModel.h>
00035 #include <OpenMS/TRANSFORMATIONS/FEATUREFINDER/LmaIsotopeModel.h>
00036 #include <OpenMS/TRANSFORMATIONS/FEATUREFINDER/InterpolationModel.h>
00037 #include <OpenMS/TRANSFORMATIONS/FEATUREFINDER/Fitter1D.h>
00038 #include <OpenMS/MATH/STATISTICS/AsymmetricStatistics.h>
00039 #include <OpenMS/MATH/STATISTICS/StatisticFunctions.h>
00040 #include <OpenMS/MATH/MISC/MathFunctions.h>
00041 #include <OpenMS/CONCEPT/Factory.h>
00042 
00043 #include <iostream>
00044 #include <fstream>
00045 #include <numeric>
00046 #include <math.h>
00047 #include <vector>
00048 #include <set>
00049 
00050 namespace OpenMS
00051 {
00052     
00066   template <class PeakType, class FeatureType> class ModelFitter :
00067     public FeaFiModule<PeakType,FeatureType>,
00068     public FeatureFinderDefs
00069   {
00070    public:
00071           
00073     typedef IndexSet::const_iterator IndexSetIter;
00075     typedef Feature::QualityType QualityType;
00077     typedef Feature::CoordinateType CoordinateType;
00079     typedef Feature::IntensityType IntensityType;
00081     typedef Feature::ChargeType ChargeType;
00083     typedef FeaFiModule<PeakType,FeatureType> Base;
00085     typedef RawDataPoint1D RawDataPointType;
00087     typedef DPeakArray<RawDataPointType > RawDataArrayType;
00088     
00089     enum 
00090       {
00091         RT = RawDataPoint2D::RT,
00092         MZ = RawDataPoint2D::MZ
00093       };
00094 
00096     ModelFitter(const MSExperiment<PeakType>* map, FeatureMap<FeatureType>* features, FeatureFinder* ff) :
00097       Base(map,features,ff),
00098       model2D_(),
00099       mz_stat_(),
00100       rt_stat_(),
00101       monoisotopic_mz_( 0 ),
00102       counter_( 1 ),
00103       iso_stdev_first_( 0 ),
00104       iso_stdev_last_( 0 ),
00105       iso_stdev_stepsize_( 0 ),
00106       first_mz_model_( 0 ),
00107       last_mz_model_( 0 )
00108     {
00109       this->setName("ModelFitter");
00110                 
00111       this->defaults_.setValue("fit_algorithm", "simple", "Fitting algorithm type (internal parameter).", true);
00112       std::vector<String> fit_opts;
00113       fit_opts.push_back("simple");
00114       fit_opts.push_back("simplest");
00115       fit_opts.push_back("wavelet");
00116       this->defaults_.setValidStrings("fit_algorithm", fit_opts);
00117                 
00118       this->defaults_.setValue( "max_iteration", 500, "Maximum number of iterations for fitting with Levenberg-Marquardt algorithm.", true );
00119       this->defaults_.setMinInt("max_iteration", 1);
00120       this->defaults_.setValue( "deltaAbsError", 0.0001, "Absolute error used by the Levenberg-Marquardt algorithm.", true );
00121       this->defaults_.setMinFloat("deltaAbsError", 0.0);
00122       this->defaults_.setValue( "deltaRelError", 0.0001, "Relative error used by the Levenberg-Marquardt algorithm.", true );
00123       this->defaults_.setMinFloat("deltaRelError", 0.0);
00124                 
00125       this->defaults_.setValue( "tolerance_stdev_bounding_box", 3.0f, "Bounding box has range [minimim of data, maximum of data] enlarged by tolerance_stdev_bounding_box times the standard deviation of the data", true );
00126       this->defaults_.setMinFloat("tolerance_stdev_bounding_box", 0.0);
00127                 
00128       this->defaults_.setValue( "intensity_cutoff_factor", 0.05f, "Cutoff peaks with a predicted intensity below intensity_cutoff_factor times the maximal intensity of the model", false );
00129       this->defaults_.setMinFloat("intensity_cutoff_factor", 0.0);
00130       this->defaults_.setMaxFloat("intensity_cutoff_factor", 1.0);
00131                 
00132       this->defaults_.setValue( "feature_intensity_sum", 1, "Determines what is reported as feature intensity.\n1: the sum of peak intensities;\n0: the maximum intensity of all peaks" , true);
00133       this->defaults_.setMinInt("feature_intensity_sum", 0);
00134       this->defaults_.setMaxInt("feature_intensity_sum", 1);
00135                 
00136       this->defaults_.setValue( "min_num_peaks:final", 5, "Minimum number of peaks left after cutoff. If smaller, feature will be discarded." , false);
00137       this->defaults_.setMinInt("min_num_peaks:final", 1);
00138       this->defaults_.setValue( "min_num_peaks:extended", 10, "Minimum number of peaks after extension. If smaller, feature will be discarded." , false);
00139       this->defaults_.setMinInt("min_num_peaks:extended", 1);
00140       this->defaults_.setSectionDescription( "min_num_peaks", "Required number of peaks for a feature." );
00141                 
00142       this->defaults_.setValue( "rt:interpolation_step", 0.2f, "Step size in seconds used to interpolate model for RT." , false);
00143       this->defaults_.setMinFloat("rt:interpolation_step", 0.0);
00144       this->defaults_.setSectionDescription( "rt", "Model settings in RT dimension." );
00145                 
00146       this->defaults_.setValue( "mz:interpolation_step", 0.03f, "Interpolation step size for m/z.", false );
00147       this->defaults_.setMinFloat("mz:interpolation_step", 0.001);
00148       this->defaults_.setValue( "mz:model_type:first", 0, "Numeric id of first m/z model fitted (usually indicating the charge state), 0 = no isotope pattern (fit a single gaussian).", false );
00149       this->defaults_.setMinInt("mz:model_type:first", 0);
00150       this->defaults_.setValue( "mz:model_type:last", 4, "Numeric id of last m/z model fitted (usually indicating the charge state), 0 = no isotope pattern (fit a single gaussian).", false );
00151       this->defaults_.setMinInt("mz:model_type:last", 0);
00152       this->defaults_.setSectionDescription( "mz", "Model settings in m/z dimension." );
00153                 
00154       this->defaults_.setValue( "quality:type", "Correlation", "Type of the quality measure used to assess the fit of model vs data.", true );
00155       std::vector<String> quality_opts;
00156       quality_opts.push_back("Correlation");
00157       this->defaults_.setValidStrings("quality:type", quality_opts);
00158       this->defaults_.setValue( "quality:minimum", 0.65f, "Minimum quality of fit, features below this threshold are discarded." , false);
00159       this->defaults_.setMinFloat("quality:minimum", 0.0);
00160       this->defaults_.setMaxFloat("quality:minimum", 1.0);
00161       this->defaults_.setSectionDescription( "quality", "Fitting quality settings." );
00162                 
00163       this->defaults_.setValue( "isotope_model:stdev:first", 0.04f, "First standard deviation to be considered for isotope model.", false );
00164       this->defaults_.setMinFloat("isotope_model:stdev:first", 0.0);
00165       this->defaults_.setValue( "isotope_model:stdev:last", 0.12f, "Last standard deviation to be considered for isotope model.", false );
00166       this->defaults_.setMinFloat("isotope_model:stdev:last", 0.0);
00167       this->defaults_.setValue( "isotope_model:stdev:step", 0.04f, "Step size for standard deviations considered for isotope model.", false );
00168       this->defaults_.setMinFloat("isotope_model:stdev:step", 0.0);
00169       this->defaults_.setSectionDescription( "isotope_model:stdev", "Instrument resolution settings for m/z dimension." );
00170                 
00171       this->defaults_.setValue( "isotope_model:averagines:C", 0.0443f, "Number of C atoms per Dalton of the mass.", true );
00172       this->defaults_.setMinFloat("isotope_model:averagines:C", 0.0);
00173       this->defaults_.setValue( "isotope_model:averagines:H", 0.007f, "Number of H atoms per Dalton of the mass.", true );
00174       this->defaults_.setMinFloat("isotope_model:averagines:H", 0.0);
00175       this->defaults_.setValue( "isotope_model:averagines:N", 0.0012f, "Number of N atoms per Dalton of the mass.", true );
00176       this->defaults_.setMinFloat("isotope_model:averagines:N", 0.0);
00177       this->defaults_.setValue( "isotope_model:averagines:O", 0.013f, "Number of O atoms per Dalton of the mass.", true );
00178       this->defaults_.setMinFloat("isotope_model:averagines:O", 0.0);
00179       this->defaults_.setValue( "isotope_model:averagines:S", 0.00037f, "Number of S atoms per Dalton of the mass.", true);
00180       this->defaults_.setMinFloat("isotope_model:averagines:S", 0.0);
00181       this->defaults_.setSectionDescription( "isotope_model:averagines", "Averagines are used to approximate the number of atoms (C,H,N,O,S) which a peptide of a given mass contains." ); 
00182                 
00183       this->defaults_.setValue( "isotope_model:isotope:trim_right_cutoff", 0.001f, "Cutoff for averagine distribution, trailing isotopes below this relative intensity are not considered.", true );
00184       this->defaults_.setMinFloat("isotope_model:isotope:trim_right_cutoff", 0.0);
00185       this->defaults_.setValue( "isotope_model:isotope:maximum", 100, "Maximum number of isotopes being used for the IsotopeModel.", true );
00186       this->defaults_.setMinInt("isotope_model:isotope:maximum", 1);
00187       this->defaults_.setValue( "isotope_model:isotope:distance", 1.000495f, "Distance between consecutive isotopic peaks.", true );
00188       this->defaults_.setMinFloat("isotope_model:isotope:distance", 0.0);
00189       this->defaults_.setSectionDescription( "isotope_model", "Settings of the isotope model (m/z)." );
00190     
00191       this->defaultsToParam_();
00192     }
00193 
00195     virtual ~ModelFitter()
00196     {
00197     }
00198             
00201     void setMonoIsotopicMass(CoordinateType mz)
00202     {
00203       monoisotopic_mz_ = mz;
00204     }
00205             
00207     Feature fit(const ChargedIndexSet& index_set) throw (UnableToFit)
00208     {
00209       // Test the number of peaks (not enough peaks to fit)
00210       if ( index_set.size() < ( UInt ) ( this->param_.getValue( "min_num_peaks:extended" ) ) )
00211       {
00212         String mess = String( "Skipping feature, IndexSet size too small: " ) + index_set.size();
00213         throw UnableToFit( __FILE__, __LINE__, __PRETTY_FUNCTION__, "UnableToFit-IndexSet", mess.c_str() );
00214       }
00215                 
00216       // Calculate statistics for mz and rt
00217       mz_stat_.update
00218         (
00219          Internal::IntensityIterator<ModelFitter>(index_set.begin(), this),
00220          Internal::IntensityIterator<ModelFitter>(index_set.end(), this),
00221          Internal::MzIterator<ModelFitter>(index_set.begin(), this)
00222         );
00223       rt_stat_.update
00224         (
00225          Internal::IntensityIterator<ModelFitter>(index_set.begin(), this),
00226          Internal::IntensityIterator<ModelFitter>(index_set.end(), this), 
00227          Internal::RtIterator<ModelFitter>( index_set.begin(), this)
00228         );
00229                 
00230       // set charge
00231       if (index_set.charge_ != 0)
00232       {
00233         first_mz_model_ = index_set.charge_;
00234         last_mz_model_ = index_set.charge_;
00235       }
00236                 
00237       // Check charge estimate if charge is not specified by user
00238       std::cout << "Checking charge state from " << first_mz_model_ << " to " << last_mz_model_ << std::endl;
00239                 
00240       // Compute model with the best correlation
00241       ProductModel<2>* final = 0;
00242       QualityType max_quality = fitLoop_(index_set, first_mz_model_, last_mz_model_, final);
00243                 
00244       // model_desc.createModel() returns 0 if class model_desc is not initialized
00245       // in this case something went wrong during the model fitting and we stop.
00246       if ( ! final )
00247       {
00248         throw UnableToFit( __FILE__, __LINE__, __PRETTY_FUNCTION__, "UnableToFit-BadQuality", "Zero quality after fitting. Skipping this feature" );
00249         delete final;
00250       }
00251                 
00252       // find peak with highest predicted intensity to use as cutoff
00253       IntensityType model_max = 0;
00254       for ( IndexSetIter it = index_set.begin(); it != index_set.end(); ++it )
00255       {
00256         IntensityType model_int = final->getIntensity( DPosition<2>(this->getPeakRt(*it),this->getPeakMz(*it)) );
00257         if ( model_int > model_max ) model_max = model_int;
00258       }
00259       final->setCutOff( model_max * Real( this->param_.getValue( "intensity_cutoff_factor" ) ) );
00260 
00261       // Cutoff low intensities wrt to model maximum -> cutoff independent of scaling
00262       IndexSet model_set;
00263       for ( IndexSetIter it = index_set.begin(); it != index_set.end(); ++it )
00264       {
00265         if ( final->isContained( DPosition<2>(this->getPeakRt(*it),this->getPeakMz(*it)) ) )
00266         {
00267           model_set.insert( *it );
00268         }
00269         else    // free dismissed peak via setting the appropriate flag
00270         {
00271           this->ff_->getPeakFlag( *it ) = UNUSED;
00272         }
00273       }
00274                     
00275       // Print number of selected peaks after cutoff
00276       std::cout << " Selected " << model_set.size() << " from " << index_set.size() << " peaks.\n";
00277 
00278       // not enough peaks left for feature
00279       if ( model_set.size() < ( UInt ) ( this->param_.getValue( "min_num_peaks:final" ) ) )
00280       {
00281         delete final;
00282         throw UnableToFit( __FILE__, __LINE__, __PRETTY_FUNCTION__,"UnableToFit-FinalSet",String( "Skipping feature, IndexSet size after cutoff too small: " ) + model_set.size() );
00283       }
00284 
00285       std::vector<Real> data;
00286       data.reserve(model_set.size());
00287       std::vector<Real> model;
00288       model.reserve(model_set.size());
00289 
00290       for (IndexSet::iterator it=model_set.begin();it!=model_set.end();++it)
00291       {
00292         data.push_back(this->getPeakIntensity(*it));
00293         model.push_back(final->getIntensity(DPosition<2>(this->getPeakRt(*it),this->getPeakMz(*it))));
00294       }
00295 
00296       // fit has too low quality or fit was not possible i.e. because of zero stdev
00297       if ( max_quality < ( Real ) ( this->param_.getValue( "quality:minimum" ) ) )
00298       {
00299         delete final;
00300         String mess = String( "Skipping feature, correlation too small: " ) + max_quality;
00301         throw UnableToFit( __FILE__, __LINE__, __PRETTY_FUNCTION__, "UnableToFit-Correlation", mess.c_str() );
00302       }
00303 
00304       // Calculate intensity scaling
00305       IntensityType model_sum = 0;
00306       IntensityType data_sum = 0;
00307       IntensityType data_max = 0;
00308       for ( IndexSetIter it = model_set.begin(); it != model_set.end(); ++it )
00309       {
00310         IntensityType model_int = final->getIntensity( DPosition<2>(this->getPeakRt(*it),this->getPeakMz(*it)) );
00311         model_sum += model_int;
00312         data_sum += this->getPeakIntensity( *it );
00313         if ( this->getPeakIntensity( *it ) > data_max ) data_max = this->getPeakIntensity( *it );
00314       }
00315                 
00316       // fit has too low quality or fit was not possible i.e. because of zero stdev
00317       if ( model_sum == 0 )
00318       {
00319         delete final;
00320         throw UnableToFit( __FILE__, __LINE__, __PRETTY_FUNCTION__,"UnableToFit-ZeroSum", "Skipping feature, model_sum zero." );
00321       }
00322 
00323       final->setScale( data_max / model_max );  // use max quotient instead of sum quotient
00324 
00325       // Build Feature
00326       // The feature coordinate in rt dimension is given
00327       // by the centroid of the rt model whereas the coordinate
00328       // in mz dimension is equal to the monoisotopic peak.
00329       Feature f;
00330       f.setModelDescription( ModelDescription<2>( final ) );
00331       f.setOverallQuality( max_quality );
00332       f.setRT( static_cast<InterpolationModel*>( final->getModel( RT ) ) ->getCenter() );
00333       f.setMZ( static_cast<InterpolationModel*>( final->getModel( MZ ) ) ->getCenter() );
00334                                                 
00335       // feature charge ... 
00336       // if we used a simple Gaussian model to fit the feature, we can't say anything about
00337       // its charge state. The value 0 indicates that charge state is undetermined.
00338       if ( final->getModel( MZ ) ->getName() == "LmaIsotopeModel" )
00339       {
00340         f.setCharge( static_cast<LmaIsotopeModel*>( final->getModel( MZ ) ) ->getCharge() );
00341       }
00342       else if (final->getModel( MZ ) ->getName() == "IsotopeModel")
00343       {
00344         f.setCharge( static_cast<IsotopeModel*>( final->getModel( MZ ) ) ->getCharge() );
00345       }
00346       else if (final->getModel( MZ ) ->getName() == "ExtendedIsotopeModel")
00347       {
00348         f.setCharge( static_cast<ExtendedIsotopeModel*>( final->getModel( MZ ) ) ->getCharge() );
00349       }
00350       else
00351       {
00352         f.setCharge( 0 );
00353       }
00354             
00355       // feature intensity
00356       Int const intensity_choice = this->param_.getValue( "feature_intensity_sum" );
00357       IntensityType feature_intensity = 0.0;
00358       if ( intensity_choice == 1 )
00359       {
00360         // intensity of the feature is the sum of all included data points
00361         for ( IndexSetIter it = model_set.begin(); it != model_set.end(); ++it )
00362         {
00363           feature_intensity += this->getPeakIntensity( *it );
00364         }
00365       }
00366       else
00367       {
00368         // feature intensity is the maximum intensity of all peaks
00369         for ( IndexSetIter it = model_set.begin(); it != model_set.end(); ++it )
00370         {
00371           if ( this->getPeakIntensity( *it ) > feature_intensity )
00372           {
00373             feature_intensity = this->getPeakIntensity( *it );
00374           }
00375         }
00376       }
00377                               
00378       f.setIntensity( feature_intensity );
00379       this->addConvexHull( model_set, f );
00380 
00381       if (this->param_.getValue( "fit_algorithm" ) != "wavelet")
00382       {
00383         std::cout << __FILE__ << ':' << __LINE__ << ": " << QDateTime::currentDateTime().toString( "yyyy-MM-dd hh:mm:ss" ).toStdString() << " Feature " << counter_
00384                   << ": (" << f.getRT()
00385                   << "," << f.getMZ() << ") Qual.:"
00386                   << max_quality << std::endl;
00387       }
00388 
00389       // Calculate quality  
00390       //  RT fit
00391       data.clear();
00392       model.clear();
00393       for (IndexSet::iterator it=model_set.begin();it!=model_set.end();++it)
00394       {
00395         data.push_back(this->getPeakIntensity(*it));
00396         model.push_back((final->getModel(RT))->getIntensity(this->getPeakRt(*it)));
00397       }
00398       f.setQuality( RT, Math::pearsonCorrelationCoefficient(data.begin(), data.end(), model.begin(), model.end()));
00399                 
00400       //MZ fit
00401       data.clear();
00402       model.clear();
00403       for (IndexSet::iterator it=model_set.begin();it!=model_set.end();++it)
00404       {
00405         data.push_back(this->getPeakIntensity(*it));
00406         model.push_back((final->getModel(MZ))->getIntensity(this->getPeakMz(*it)));
00407       }
00408       f.setQuality( MZ, Math::pearsonCorrelationCoefficient(data.begin(), data.end(), model.begin(), model.end()));
00409 
00410       // Save meta data in feature for TOPPView
00411       std::stringstream meta ;
00412       meta << "Feature #" << counter_ << ", +"  << f.getCharge() << ", " << index_set.size() << "->" << model_set.size()
00413            << ", Corr: (" << max_quality << "," << f.getQuality( RT ) << "," << f.getQuality( MZ ) << ")";
00414       f.setMetaValue( 3, String( meta.str() ) );
00415 
00416                 
00417 #ifdef DEBUG_FEATUREFINDER
00418       std::cout << "Feature charge: " << f.getCharge() << std::endl;
00419       std::cout << "Feature quality in mz: " << f.getQuality( MZ ) << std::endl;
00420 #endif
00421         
00422 #ifdef DEBUG_FEATUREFINDER
00423       // write debug output
00424       CoordinateType rt = f.getRT();
00425       CoordinateType mz = f.getMZ();
00426         
00427       // write feature model
00428       String fname = String( "model" ) + counter_ + "_" + rt + "_" + mz;
00429       std::ofstream file( fname.c_str() );
00430       for ( IndexSetIter it = model_set.begin(); it != model_set.end(); ++it )
00431       {
00432         DPosition<2> pos = DPosition<2>(this->getPeakRt(*it),this->getPeakMz(*it));
00433         if ( final->isContained( pos ) )
00434         {
00435           file << pos[ RT ] << " " << pos[ MZ ] << " " << final->getIntensity( DPosition<2>(this->getPeakRt(*it),this->getPeakMz(*it)) ) << "\n";
00436         }
00437       }
00438       file.close();
00439         
00440       // wrote peaks remaining after model fit
00441       fname = String( "feature" ) + counter_ + "_" + rt + "_" + mz;
00442       std::ofstream file2( fname.c_str() );
00443       for ( IndexSetIter it = model_set.begin(); it != model_set.end(); ++it )
00444       {
00445         DPosition<2> pos = DPosition<2>(this->getPeakRt(*it),this->getPeakMz(*it));
00446         if ( final->isContained( pos ) )
00447         {
00448           file2 << pos[ RT ] << " " << pos[ MZ ] << " " << this->getPeakIntensity( *it ) << "\n";
00449         }
00450       }
00451       file2.close();
00452 #endif                
00453       // Count features
00454       ++counter_;
00455                 
00456       delete final;
00457 
00458       return f;
00459     }
00460             
00461    protected:
00462 
00463     virtual void updateMembers_()
00464     {
00465       algorithm_ = this->param_.getValue( "fit_algorithm" );
00466                 
00467       max_iteration_ = this->param_.getValue("max_iteration");
00468       deltaAbsError_ = this->param_.getValue("deltaAbsError");
00469       deltaRelError_ = this->param_.getValue("deltaRelError");
00470                 
00471       tolerance_stdev_box_ = this->param_.getValue( "tolerance_stdev_bounding_box" );
00472       max_isotope_ = this->param_.getValue("isotope_model:isotope:maximum");
00473                 
00474       interpolation_step_mz_ = this->param_.getValue( "mz:interpolation_step" );
00475       interpolation_step_rt_ = this->param_.getValue( "rt:interpolation_step" );
00476     
00477       iso_stdev_first_ = this->param_.getValue( "isotope_model:stdev:first" );
00478       iso_stdev_last_ = this->param_.getValue( "isotope_model:stdev:last" );
00479       iso_stdev_stepsize_ = this->param_.getValue( "isotope_model:stdev:step" );
00480     
00481       first_mz_model_ = ( Int ) this->param_.getValue( "mz:model_type:first" );
00482       last_mz_model_ = ( Int ) this->param_.getValue( "mz:model_type:last" );
00483     }
00484              
00486     QualityType fitLoop_(const ChargedIndexSet& set, Int& first_mz, Int& last_mz, ProductModel<2>*& final) 
00487     {
00488       // Projection    
00489       doProjectionDim_(set, rt_input_data_, RT, algorithm_);
00490       total_intensity_mz_ = doProjectionDim_(set, mz_input_data_, MZ, algorithm_);
00491             
00492       // Fit rt model
00493               
00494       QualityType quality_rt; 
00495       quality_rt = fitDim_(RT, algorithm_);
00496               
00497       // Fit mz model ... test different charge states and stdevs
00498       QualityType quality_mz = 0.0;
00499       QualityType max_quality_mz = -std::numeric_limits<QualityType>::max();
00500       
00501       std::map<QualityType,ProductModel<2> > model_map;
00502       for ( Real stdev = iso_stdev_first_; stdev <= iso_stdev_last_; stdev += iso_stdev_stepsize_)
00503       {
00504         for (Int mz_fit_type = first_mz; mz_fit_type <= last_mz; ++mz_fit_type)
00505         {
00506           charge_ = mz_fit_type;
00507           isotope_stdev_ = stdev;
00508           quality_mz = fitDim_(MZ, algorithm_);
00509                     
00510           if (quality_mz > max_quality_mz)
00511           {
00512             max_quality_mz = quality_mz;
00513             model_map.insert( std::make_pair( quality_mz, model2D_) ); 
00514           }
00515         }
00516       }
00517               
00518       std::map<QualityType,ProductModel<2> >::iterator it_map = model_map.find(max_quality_mz);
00519       final = new ProductModel<2>((*it_map).second);
00520               
00521       // Compute overall quality
00522       QualityType max_quality = 0.0;
00523       max_quality = evaluate_(set, final, algorithm_);
00524             
00525       return max_quality;
00526     }
00527             
00529     QualityType evaluate_(const IndexSet& set, ProductModel<2>*& final, String algorithm)
00530     {
00531       QualityType quality = 1.0;
00532               
00533       // Calculate the pearson correlation coefficient for the values in [begin_a, end_a) and [begin_b, end_b)
00534       if (algorithm!="")
00535       {
00536         std::vector<Real> real_data;
00537         real_data.reserve(set.size());
00538         std::vector<Real> model_data;
00539         model_data.reserve(set.size());
00540           
00541         for (IndexSet::iterator it=set.begin(); it != set.end(); ++it)
00542         {
00543           real_data.push_back(this->getPeakIntensity(*it));
00544           model_data.push_back(final->getIntensity(DPosition<2>(this->getPeakRt(*it),this->getPeakMz(*it))));
00545         }
00546                     
00547         quality = Math::pearsonCorrelationCoefficient(real_data.begin(), real_data.end(), model_data.begin(), model_data.end());
00548       }
00549               
00550       if (isnan(quality)) quality = -1.0; 
00551     
00552       return quality;
00553     }
00554             
00556     QualityType fitDim_(Int dim, String algorithm)  
00557     {
00558       QualityType quality;
00559       Param param;
00560       Fitter1D* fitter;
00561       InterpolationModel* model = 0;
00562               
00563       if (dim==RT) // RT dimension
00564       {
00565         if (algorithm=="simplest") // Fit with BiGauss
00566         {
00567           param.setValue( "tolerance_stdev_bounding_box", tolerance_stdev_box_);
00568           param.setValue( "statistics:mean", rt_stat_.mean() );
00569           param.setValue( "statistics:variance", rt_stat_.variance() );
00570           param.setValue( "statistics:variance1", rt_stat_.variance1() );
00571           param.setValue( "statistics:variance2", rt_stat_.variance2() );
00572           param.setValue( "interpolation_step", interpolation_step_rt_ );
00573                     
00574           fitter = Factory<Fitter1D >::create("BiGaussFitter1D");
00575         }
00576         else // Fit with EMG (LM optimization)
00577         {
00578           param.setValue( "tolerance_stdev_bounding_box", tolerance_stdev_box_);
00579           param.setValue( "statistics:mean", rt_stat_.mean() );
00580           param.setValue( "statistics:variance", rt_stat_.variance() );
00581           param.setValue( "interpolation_step", interpolation_step_rt_ );
00582           param.setValue( "max_iteration", max_iteration_);
00583           param.setValue( "deltaAbsError", deltaAbsError_);
00584           param.setValue( "deltaRelError", deltaRelError_);
00585                       
00586           fitter = Factory<Fitter1D >::create("EmgFitter1D");
00587         }
00588 
00589         // Set parameter for fitter                
00590         fitter->setParameters( param );
00591       
00592         // Construct model for rt
00593         quality = fitter->fit1d(rt_input_data_, model);
00594       }
00595       else // MZ dimension
00596       {
00597         param.setValue( "tolerance_stdev_bounding_box", tolerance_stdev_box_);
00598         param.setValue( "statistics:mean", mz_stat_.mean() );
00599         param.setValue( "statistics:variance", mz_stat_.variance() );
00600         param.setValue( "interpolation_step", interpolation_step_mz_ );
00601                 
00602         if ( monoisotopic_mz_ != 0 ) // monnoisotopic mz is known
00603         {
00604           param.setValue( "statistics:mean",monoisotopic_mz_ );
00605         }
00606                 
00607         if (charge_ != 0) // charge is not zero
00608         {
00609           param.setValue( "charge", charge_ );
00610           param.setValue( "isotope:stdev", isotope_stdev_ );
00611           param.setValue( "isotope:maximum", max_isotope_ );
00612           fitter = Factory<Fitter1D >::create("IsotopeFitter1D");
00613         }
00614         else // charge is zero
00615         {
00616           if (algorithm=="simplest") // Fit with GaussModel
00617           {
00618             param.setValue( "charge", charge_ );
00619             param.setValue( "isotope:stdev", isotope_stdev_ );
00620             param.setValue( "isotope:maximum", max_isotope_ );
00621             fitter = Factory<Fitter1D >::create("IsotopeFitter1D");
00622           }
00623           else // Fit with LmaGaussModel
00624           {
00625             param.setValue( "max_iteration", max_iteration_);
00626             param.setValue( "deltaAbsError", deltaAbsError_);
00627             param.setValue( "deltaRelError", deltaRelError_);
00628                  
00629             fitter = Factory<Fitter1D >::create("LmaGaussFitter1D");
00630           }
00631         }
00632 
00633         // Set parameter for fitter                
00634         fitter->setParameters( param );
00635                   
00636         // Construct model for mz
00637         quality = fitter->fit1d(mz_input_data_, model);
00638       }
00639 
00640       // Check quality
00641       if (isnan(quality) ) quality = -1.0;
00642               
00643       // Set model in 2D-model            
00644       model2D_.setModel(dim, model);
00645               
00646       delete(fitter);
00647               
00648       return quality;
00649     }
00650            
00652     CoordinateType doProjectionDim_(const ChargedIndexSet& index_set, RawDataArrayType& set, Int dim, String algorithm)
00653     {
00654       CoordinateType total_intensity = 0;
00655          
00656       if (algorithm!="")
00657       {
00658         std::map<CoordinateType,CoordinateType> data_map;
00659                  
00660         if (dim==MZ)
00661         {
00662           for ( IndexSet::const_iterator it = index_set.begin(); it != index_set.end(); ++it)
00663           {   
00664             data_map[this->getPeakMz(*it)] += this->getPeakIntensity(*it);
00665           }  
00666         }
00667         else
00668         {
00669           for ( IndexSet::const_iterator it = index_set.begin(); it != index_set.end(); ++it)
00670           { 
00671             data_map[this->getPeakRt(*it)] += this->getPeakIntensity(*it);
00672           }  
00673         }
00674                  
00675         // Copy the raw data into a DPeakArray<DRawDataPoint<D> >
00676         set.resize(data_map.size());
00677         std::map<CoordinateType,CoordinateType>::iterator it;
00678         UInt i=0;
00679         for ( it=data_map.begin() ; it != data_map.end(); ++it, ++i )
00680         {
00681           set[i].setPosition((*it).first);
00682           set[i].setIntensity((*it).second);
00683           total_intensity += (*it).second;
00684         }   
00685                 
00686         data_map.clear();
00687       }            
00688               
00689       return total_intensity;
00690     }
00691             
00693     ProductModel<2> model2D_;
00695     Math::BasicStatistics<> mz_stat_;
00697     Math::AsymmetricStatistics<> rt_stat_;
00699     RawDataArrayType mz_input_data_;
00701     RawDataArrayType rt_input_data_;
00703     CoordinateType tolerance_stdev_box_;
00705     CoordinateType monoisotopic_mz_;
00707     UInt counter_;
00709     CoordinateType interpolation_step_mz_;
00711     CoordinateType interpolation_step_rt_;
00713     Int max_isotope_;
00715     CoordinateType iso_stdev_first_;
00717     CoordinateType iso_stdev_last_;
00719     CoordinateType iso_stdev_stepsize_;
00721     Int first_mz_model_;      
00723     Int last_mz_model_;
00725     ChargeType charge_;
00727     CoordinateType isotope_stdev_;
00729     String algorithm_;
00731     Int max_iteration_;
00733 
00734     CoordinateType deltaAbsError_;
00736     CoordinateType deltaRelError_;
00738     Math::BasicStatistics<> basic_stat_;
00740     CoordinateType total_intensity_mz_;
00741 
00742    private:
00743 
00745     ModelFitter();
00747     ModelFitter& operator=(const ModelFitter&);
00749     ModelFitter(const ModelFitter&);
00750 
00751   };
00752  
00753 }
00754 #endif // OPENMS_TRANSFORMATIONS_FEATUREFINDER_MODELFITTER_H

Generated Tue Apr 1 15:36:36 2008 -- using doxygen 1.5.4 OpenMS / TOPP 1.1