Home  · Classes  · Annotated Classes  · Modules  · Members  · Namespaces  · Related Pages

IDFilter.h (Maintainer: Nico Pfeifer)

Go to the documentation of this file.
00001 // -*- Mode: C++; tab-width: 2; -*-
00002 // vi: set ts=2:
00003 //
00004 // --------------------------------------------------------------------------
00005 //                   OpenMS Mass Spectrometry Framework
00006 // --------------------------------------------------------------------------
00007 //  Copyright (C) 2003-2008 -- Oliver Kohlbacher, Knut Reinert
00008 //
00009 //  This library is free software; you can redistribute it and/or
00010 //  modify it under the terms of the GNU Lesser General Public
00011 //  License as published by the Free Software Foundation; either
00012 //  version 2.1 of the License, or (at your option) any later version.
00013 // 
00014 //  This library is distributed in the hope that it will be useful,
00015 //  but WITHOUT ANY WARRANTY; without even the implied warranty of
00016 //  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00017 //  Lesser General Public License for more details.
00018 //
00019 //  You should have received a copy of the GNU Lesser General Public
00020 //  License along with this library; if not, write to the Free Software
00021 //  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
00022 //
00023 // --------------------------------------------------------------------------
00024 // $Maintainer: Nico Pfeifer $
00025 // --------------------------------------------------------------------------
00026 
00027 #ifndef OPENMS_FILTERING_ID_IDFILTER_H
00028 #define OPENMS_FILTERING_ID_IDFILTER_H
00029 
00030 #include <OpenMS/DATASTRUCTURES/String.h>
00031 #include <OpenMS/METADATA/ProteinIdentification.h>
00032 #include <OpenMS/KERNEL/MSExperiment.h>
00033 
00034 #include <vector>
00035 
00036 namespace OpenMS 
00037 {
00048   class IDFilter
00049   {
00050     public:
00051 
00053       IDFilter();
00054       
00056       ~IDFilter();
00057 
00059       template <class IdentificationType>
00060       void filterIdentificationsByThreshold(const IdentificationType& identification, DoubleReal threshold_fraction, IdentificationType& filtered_identification)
00061       {
00062         typedef typename IdentificationType::HitType HitType;
00063         std::vector<HitType> temp_hits;
00064         std::vector<HitType> filtered_hits;
00065 
00066         filtered_identification = identification;
00067         filtered_identification.setHits(std::vector<HitType>());
00068         
00069         for(typename std::vector<HitType>::const_iterator it = identification.getHits().begin();
00070           it != identification.getHits().end(); 
00071           ++it)
00072         {
00073           if (it->getScore() >= threshold_fraction * identification.getSignificanceThreshold())
00074           { 
00075             filtered_hits.push_back(*it);
00076           } 
00077         }
00078 
00079         if (filtered_hits.size() > 0)
00080         {
00081           filtered_identification.setHits(filtered_hits);                                           
00082           filtered_identification.assignRanks();                                                  
00083         }
00084       }
00085       
00093       template <class IdentificationType>
00094       void filterIdentificationsByScore(const IdentificationType& identification, DoubleReal threshold_score, IdentificationType& filtered_identification)
00095       {
00096         typedef typename IdentificationType::HitType HitType;
00097         std::vector<HitType> temp_hits;
00098         std::vector<HitType> filtered_hits;
00099 
00100         filtered_identification = identification;
00101         filtered_identification.setHits(std::vector<HitType>());
00102         
00103         for(typename std::vector<HitType>::const_iterator it = identification.getHits().begin();
00104           it != identification.getHits().end(); 
00105           ++it)
00106         {
00107           if (identification.isHigherScoreBetter())
00108           {
00109             if (it->getScore() >= threshold_score)
00110             { 
00111               filtered_hits.push_back(*it);
00112             }
00113           }
00114           else
00115           {
00116             if (it->getScore() <= threshold_score)
00117             { 
00118               filtered_hits.push_back(*it);
00119             }
00120           } 
00121         }
00122 
00123         if (filtered_hits.size() > 0)
00124         {
00125           filtered_identification.setHits(filtered_hits);                                           
00126           filtered_identification.assignRanks();                                                  
00127         }
00128       }
00129       
00136       template <class IdentificationType>
00137       void filterIdentificationsByBestNHits(const IdentificationType& identification, UInt n, IdentificationType& filtered_identification)
00138       {
00139         typedef typename IdentificationType::HitType HitType;
00140         std::vector<HitType> temp_hits;
00141         std::vector<HitType> filtered_hits;
00142         IdentificationType temp_identification;
00143         UInt count = 0;
00144         
00145         temp_identification = identification;
00146         filtered_identification = identification;
00147         filtered_identification.setHits(std::vector<HitType>());
00148         
00149         temp_identification.sort();
00150         
00151         typename std::vector<HitType>::const_iterator it = temp_identification.getHits().begin();
00152         while(it != temp_identification.getHits().end()
00153               && count < n)
00154         {
00155           filtered_hits.push_back(*it);
00156           ++it;
00157           ++count;  
00158         }
00159 
00160         if (filtered_hits.size() > 0)
00161         {
00162           filtered_identification.setHits(filtered_hits);                                           
00163           filtered_identification.assignRanks();                                                  
00164         }
00165       }
00166       
00168       void filterIdentificationsByBestHits(const PeptideIdentification& identification, PeptideIdentification& filtered_identification, bool strict = false);
00169 
00171       void filterIdentificationsByProteins(const PeptideIdentification& identification, std::vector< std::pair<String, String> > proteins, PeptideIdentification& filtered_identification);
00172 
00174       void filterIdentificationsByProteins(const ProteinIdentification& identification, std::vector< std::pair<String, String> > proteins, ProteinIdentification& filtered_identification);
00175                                                             
00177       void filterIdentificationsByExclusionPeptides(const PeptideIdentification& identification, std::vector<String> peptides, PeptideIdentification& filtered_identification);
00178                                                             
00188       void filterIdentificationsByRTPValues(const PeptideIdentification& identification, PeptideIdentification&         filtered_identification, DoubleReal p_value = 0.05);
00189 
00191       template <class PeakT>
00192       void filterIdentificationsByThresholds(MSExperiment< PeakT >& experiment, DoubleReal peptide_threshold_fraction, DoubleReal protein_threshold_fraction)
00193       {
00194         //filter protein hits
00195         ProteinIdentification temp_protein_identification;        
00196         std::vector<ProteinIdentification> filtered_protein_identifications;
00197           
00198         for(UInt j = 0; j < experiment.getProteinIdentifications().size(); j++)
00199         {
00200           filterIdentificationsByThreshold(experiment.getProteinIdentifications()[j], protein_threshold_fraction, temp_protein_identification);
00201           if (!temp_protein_identification.getHits().empty())
00202           {
00203             filtered_protein_identifications.push_back(temp_protein_identification);
00204           }
00205         }
00206         experiment.setProteinIdentifications(filtered_protein_identifications);
00207         
00208         //filter peptide hits
00209         PeptideIdentification temp_identification;
00210         std::vector<PeptideIdentification> filtered_identifications;
00211         
00212         for(UInt i = 0; i < experiment.size(); i++)
00213         {
00214           for(UInt j = 0; j < experiment[i].getPeptideIdentifications().size(); j++)
00215           {
00216             filterIdentificationsByThreshold(experiment[i].getPeptideIdentifications()[j], peptide_threshold_fraction, temp_identification);
00217             if (!temp_identification.getHits().empty())
00218             {
00219               filtered_identifications.push_back(temp_identification);
00220             }
00221           }
00222           experiment[i].setPeptideIdentifications(filtered_identifications);
00223           filtered_identifications.clear();         
00224         }       
00225       }
00226                                                                     
00228       template <class PeakT>
00229       void filterIdentificationsByScores(MSExperiment< PeakT >& experiment, DoubleReal peptide_threshold_score, DoubleReal protein_threshold_score)
00230       {
00231         //filter protein hits
00232         ProteinIdentification temp_protein_identification;        
00233         std::vector<ProteinIdentification> filtered_protein_identifications;
00234           
00235         for(UInt j = 0; j < experiment.getProteinIdentifications().size(); j++)
00236         {
00237           filterIdentificationsByScore(experiment.getProteinIdentifications()[j], protein_threshold_score, temp_protein_identification);
00238           if (!temp_protein_identification.getHits().empty())
00239           {
00240             filtered_protein_identifications.push_back(temp_protein_identification);
00241           }
00242         }
00243         experiment.setProteinIdentifications(filtered_protein_identifications);
00244         
00245         //filter peptide hits
00246         PeptideIdentification temp_identification;
00247         std::vector<PeptideIdentification> filtered_identifications;
00248         
00249         for(UInt i = 0; i < experiment.size(); i++)
00250         {
00251           for(UInt j = 0; j < experiment[i].getPeptideIdentifications().size(); j++)
00252           {
00253             filterIdentificationsByScore(experiment[i].getPeptideIdentifications()[j], peptide_threshold_score, temp_identification);
00254             if (!temp_identification.getHits().empty())
00255             {
00256               filtered_identifications.push_back(temp_identification);
00257             }
00258           }
00259           experiment[i].setPeptideIdentifications(filtered_identifications);
00260           filtered_identifications.clear();         
00261         }       
00262       }
00263                                                                     
00265       template <class PeakT>
00266       void filterIdentificationsByBestNHits(MSExperiment< PeakT >& experiment, UInt n)
00267       {
00268         //filter protein hits
00269         ProteinIdentification temp_protein_identification;        
00270         std::vector<ProteinIdentification> filtered_protein_identifications;
00271           
00272         for(UInt j = 0; j < experiment.getProteinIdentifications().size(); j++)
00273         {
00274           filterIdentificationsByBestNHits(experiment.getProteinIdentifications()[j], n, temp_protein_identification);
00275           if (!temp_protein_identification.getHits().empty())
00276           {
00277             filtered_protein_identifications.push_back(temp_protein_identification);
00278           }
00279         }
00280         experiment.setProteinIdentifications(filtered_protein_identifications);
00281         
00282         //filter peptide hits
00283         PeptideIdentification temp_identification;
00284         std::vector<PeptideIdentification> filtered_identifications;
00285         
00286         for(UInt i = 0; i < experiment.size(); i++)
00287         {
00288           for(UInt j = 0; j < experiment[i].getPeptideIdentifications().size(); j++)
00289           {
00290             filterIdentificationsByBestNHits(experiment[i].getPeptideIdentifications()[j], n, temp_identification);
00291             if (!temp_identification.getHits().empty())
00292             {
00293               filtered_identifications.push_back(temp_identification);
00294             }
00295           }
00296           experiment[i].setPeptideIdentifications(filtered_identifications);
00297           filtered_identifications.clear();         
00298         }       
00299       }
00300                                                                     
00302       template <class PeakT>
00303       void filterIdentificationsByProteins(MSExperiment< PeakT >& experiment, 
00304                                            std::vector< std::pair<String, String> >proteins)
00305       {
00306         std::vector<PeptideIdentification> temp_identifications;
00307         std::vector<PeptideIdentification> filtered_identifications;
00308         PeptideIdentification temp_identification;
00309 
00310         for(UInt i = 0; i < experiment.size(); i++)
00311         {
00312           if (experiment[i].getMSLevel() == 2)
00313           {
00314             temp_identifications = experiment[i].getPeptideIdentifications();
00315             for(UInt j = 0; j < temp_identifications.size(); j++)
00316             {
00317               filterIdentificationsByProteins(temp_identifications[j], proteins, temp_identification);
00318               if (!temp_identification.getHits().empty())
00319               {
00320                 filtered_identifications.push_back(temp_identification);
00321               }
00322             }
00323             experiment[i].setPeptideIdentifications(filtered_identifications);          
00324             filtered_identifications.clear();         
00325           }
00326         }       
00327       }
00328   };
00329  
00330 } // namespace OpenMS
00331 
00332 #endif // OPENMS_FILTERING_ID_IDFILTER_H

Generated Tue Apr 1 15:36:35 2008 -- using doxygen 1.5.4 OpenMS / TOPP 1.1