Home  · Classes  · Annotated Classes  · Modules  · Members  · Namespaces  · Related Pages

FeatureDecharger.h (Maintainer: Chris Bielow)

Go to the documentation of this file.
00001 // -*- Mode: C++; tab-width: 2; -*-
00002 // vi: set ts=2:
00003 //
00004 // --------------------------------------------------------------------------
00005 //                   OpenMS Mass Spectrometry Framework
00006 // --------------------------------------------------------------------------
00007 //  Copyright (C) 2003-2008 -- Oliver Kohlbacher, Knut Reinert
00008 //
00009 //  This library is free software; you can redistribute it and/or
00010 //  modify it under the terms of the GNU Lesser General Public
00011 //  License as published by the Free Software Foundation; either
00012 //  version 2.1 of the License, or (at your option) any later version.
00013 //
00014 //  This library is distributed in the hope that it will be useful,
00015 //  but WITHOUT ANY WARRANTY; without even the implied warranty of
00016 //  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00017 //  Lesser General Public License for more details.
00018 //
00019 //  You should have received a copy of the GNU Lesser General Public
00020 //  License along with this library; if not, write to the Free Software
00021 //  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
00022 //
00023 // --------------------------------------------------------------------------
00024 // $Maintainer: Chris Bielow $
00025 // --------------------------------------------------------------------------
00026 
00027 #ifndef OPENMS_ANALYSIS_DECHARGING_FEATUREDECHARGER_H
00028 #define OPENMS_ANALYSIS_DECHARGING_FEATUREDECHARGER_H
00029 
00030 // OpenMS
00031 #include <OpenMS/CONCEPT/Exception.h>
00032 #include <OpenMS/KERNEL/Feature.h>
00033 #include <OpenMS/KERNEL/FeatureMap.h>
00034 #include <OpenMS/COMPARISON/CLUSTERING/HierarchicalClustering.h>
00035 #include <OpenMS/DATASTRUCTURES/DPosition.h>
00036 #include <OpenMS/DATASTRUCTURES/DefaultParamHandler.h>
00037 
00038 // STL
00039 #include <vector>
00040 
00041 namespace OpenMS
00042 {
00061   class FeatureDecharger : public DefaultParamHandler
00062   {
00063     public:
00064     
00065       typedef FeatureMap<> FeatureMapType;
00066       typedef Feature FeatureType;
00067       typedef DPosition<2> ClusterPointType;
00068       using DefaultParamHandler::param_;
00069       using DefaultParamHandler::defaults_;
00070           
00074 
00075       FeatureDecharger()
00076       : DefaultParamHandler("FeatureDecharger")
00077       {
00078         defaults_.setValue("cluster_rt_mz_relation", 100.0, "Multiplication factor for m/z coordinates used to balance the dimension differences of RT and m/z.", false);
00079         
00080         HierarchicalClustering<> hc;
00081         defaults_.insert("hierarchical_clustering:",hc.getParameters());
00082         
00083         defaultsToParam_();
00084       }
00085 
00087       inline FeatureDecharger(const FeatureDecharger& source)
00088           : DefaultParamHandler(source),
00089           featuremap_dc_(source.featuremap_dc_)
00090       {}
00091 
00093       inline FeatureDecharger& operator=(const FeatureDecharger& source)
00094       {
00095         if (&source==this)
00096         {
00097           return *this;
00098         }
00099 
00100         DefaultParamHandler::operator=(source);
00101         featuremap_dc_ = source.featuremap_dc_;
00102 
00103         return *this;          
00104       };
00105 
00106         
00107 
00109       virtual ~FeatureDecharger() 
00110       {};
00112 
00116 
00118       const FeatureMapType& getFeatureMap() const
00119       {
00120         return featuremap_dc_; 
00121       }
00122 
00124       
00126       void compute(FeatureMapType &map) 
00127       {
00128 
00129         std::vector<ClusterPointType> feature_stripped;
00130 
00131         // remove charge
00132         
00133         double RT_MZ_relation = param_.getValue("cluster_rt_mz_relation");
00134         
00135         //std::cout << "creating initial clusters ... ";
00136         for (FeatureMapType::iterator iter = map.begin(); iter!=map.end(); ++iter)
00137         {
00138           ClusterPointType point;
00139           double mass = iter->getMZ() * iter->getCharge() - iter->getCharge();   //TODO: rectify by real Proton weight (1.07...)
00140           iter->setMZ(mass);
00141           point.setX( iter->getRT() );
00142           point.setY( mass*RT_MZ_relation );
00143           //std::cout << "x:y " << point.getX() << " " << point.getY() << "\n";
00144           feature_stripped.push_back(point);
00145         }
00146 
00147         // cluster
00148         
00149         HierarchicalClustering<ClusterPointType> hierclust;
00150         Param mod_param = param_.copy("hierarchical_clustering:",true);
00151         if (mod_param.empty()) 
00152         {
00153           std::cout << "HierarchicalClustering: param is emtpy. Using defaults!\n";
00154         }
00155         else
00156         {
00157           hierclust.setParameters(mod_param);
00158         }
00159         
00160         hierclust.compute(feature_stripped);
00161     
00162         HierarchicalClustering<ClusterPointType>::ClusterIdxVectorType clusters = hierclust.getClusters();
00163 
00164         hierclust.printStatistics(std::cout);
00165         
00166         // combine all features which belong to the same cluster
00167         FeatureType feature;
00168         featuremap_dc_.clear();
00169         featuremap_dc_.assign(clusters.size(), feature);
00170 
00171         uint idx_validCluster = 0;
00172         bool is_bad_cluster = false;
00173         uint bad_clusters = 0;
00174         uint bad_clusters_resolved = 0;                
00175         for (uint i=0; i<clusters.size(); ++i)
00176         {
00177           
00178           double rt_avg = 0;
00179           double m_avg = 0;
00180           double int_sum = 0;
00181           
00182           is_bad_cluster = false;
00183           // 
00184           std::vector < int > charge_variants(clusters[i].size());
00185           //std::cout << "cluster size: " << clusters[i].size() << "\n";
00186           for (uint j=0; j<clusters[i].size() ;++j)
00187           {
00188             rt_avg += map[clusters[i][j]].getRT();
00189             m_avg += map[clusters[i][j]].getMZ();
00190             int_sum += map[clusters[i][j]].getIntensity();
00191             
00192             //std::cout << "  adding " << map[clusters[i][j]].getRT() << "   " << map[clusters[i][j]].getMZ() << "   "<< map[clusters[i][j]].getIntensity() << "\n";
00193             
00194             // store charge of current feature
00195             charge_variants[j] = map[clusters[i][j]].getCharge();
00196           }
00197           
00198           // check if charges within current cluster are unique
00199           std::sort(charge_variants.begin(), charge_variants.end());
00200           for (uint j = 1; j<clusters[i].size() ;++j)
00201           {
00202             // check if a charge variant appears more than once
00203             if (charge_variants[j-1]==charge_variants[j])
00204             {
00205               is_bad_cluster = true;
00206               ++bad_clusters;
00207               // if the cluster has only one charge variant ...
00208               if (charge_variants[0]==charge_variants[clusters[i].size()-1])
00209               {
00210                 ++bad_clusters_resolved;
00211                 // ... we can split it
00212                 FeatureType feature;
00213                 featuremap_dc_.insert(featuremap_dc_.end(), clusters[i].size(), feature);
00214                 
00215                 // append single elements of current cluster
00216                 // --> does not work? why
00217                 //std::vector < ClusterPointType > newCluster(1); 
00218                 //clusters.insert(clusters.end(), clusters[i].size(), newCluster);
00219                 
00220                 for (uint new_cl = 0; new_cl<clusters[i].size(); ++new_cl)
00221                 {
00222                   featuremap_dc_[idx_validCluster].setRT (map[clusters[i][new_cl]].getRT() );
00223                   featuremap_dc_[idx_validCluster].setMZ ( map[clusters[i][new_cl]].getMZ() );
00224                   featuremap_dc_[idx_validCluster].setIntensity ( map[clusters[i][new_cl]].getIntensity() );
00225                   featuremap_dc_[idx_validCluster].setCharge ( 0 );        
00226                   ++idx_validCluster;          
00227                 }
00228               }  
00229             }               
00230           }
00231           
00232           if (is_bad_cluster == false)
00233           {
00234             featuremap_dc_[idx_validCluster].setRT (rt_avg / clusters[i].size() );
00235             featuremap_dc_[idx_validCluster].setMZ ( m_avg / clusters[i].size() );
00236             featuremap_dc_[idx_validCluster].setIntensity ( int_sum );
00237             featuremap_dc_[idx_validCluster].setCharge ( 0 );
00238             
00239             //TODO average over quality as well?
00240             //feature.setQuality(0,1); // override default
00241             //feature.setQuality(1,1); // override default
00242             //feature.setOverallQuality(1); // override default
00243             ++idx_validCluster;
00244           }
00245           
00246         }
00247 
00248         // erase all elements past idx_validCluster-1
00249         featuremap_dc_.erase(featuremap_dc_.begin()+idx_validCluster, featuremap_dc_.end());    
00250         
00251   
00252         std::cout << "STATISTICS:\n  #valid cluster (incl. recovered):" << idx_validCluster << "\n  #badCluster:" << bad_clusters << "\n  #Cluster recovered from bad:" << bad_clusters_resolved << "\n";
00253         
00254         featuremap_dc_.updateRanges();
00255         
00256         return;
00257       }
00258 
00259     protected:
00261       FeatureMapType featuremap_dc_;     
00262   };
00263 } // namespace OpenMS
00264 
00265 #endif // OPENMS_ANALYSIS_DECHARGING_FEATUREDECHARGER_H

Generated Tue Apr 1 15:36:34 2008 -- using doxygen 1.5.4 OpenMS / TOPP 1.1