00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027 #ifndef OPENMS_FILTERING_ID_IDFILTER_H
00028 #define OPENMS_FILTERING_ID_IDFILTER_H
00029
00030 #include <OpenMS/DATASTRUCTURES/String.h>
00031 #include <OpenMS/METADATA/ProteinIdentification.h>
00032 #include <OpenMS/KERNEL/MSExperiment.h>
00033
00034 #include <vector>
00035
00036 namespace OpenMS
00037 {
00048 class IDFilter
00049 {
00050 public:
00051
00053 IDFilter();
00054
00056 ~IDFilter();
00057
00059 template <class IdentificationType>
00060 void filterIdentificationsByThreshold(const IdentificationType& identification, DoubleReal threshold_fraction, IdentificationType& filtered_identification)
00061 {
00062 typedef typename IdentificationType::HitType HitType;
00063 std::vector<HitType> temp_hits;
00064 std::vector<HitType> filtered_hits;
00065
00066 filtered_identification = identification;
00067 filtered_identification.setHits(std::vector<HitType>());
00068
00069 for(typename std::vector<HitType>::const_iterator it = identification.getHits().begin();
00070 it != identification.getHits().end();
00071 ++it)
00072 {
00073 if (it->getScore() >= threshold_fraction * identification.getSignificanceThreshold())
00074 {
00075 filtered_hits.push_back(*it);
00076 }
00077 }
00078
00079 if (filtered_hits.size() > 0)
00080 {
00081 filtered_identification.setHits(filtered_hits);
00082 filtered_identification.assignRanks();
00083 }
00084 }
00085
00093 template <class IdentificationType>
00094 void filterIdentificationsByScore(const IdentificationType& identification, DoubleReal threshold_score, IdentificationType& filtered_identification)
00095 {
00096 typedef typename IdentificationType::HitType HitType;
00097 std::vector<HitType> temp_hits;
00098 std::vector<HitType> filtered_hits;
00099
00100 filtered_identification = identification;
00101 filtered_identification.setHits(std::vector<HitType>());
00102
00103 for(typename std::vector<HitType>::const_iterator it = identification.getHits().begin();
00104 it != identification.getHits().end();
00105 ++it)
00106 {
00107 if (identification.isHigherScoreBetter())
00108 {
00109 if (it->getScore() >= threshold_score)
00110 {
00111 filtered_hits.push_back(*it);
00112 }
00113 }
00114 else
00115 {
00116 if (it->getScore() <= threshold_score)
00117 {
00118 filtered_hits.push_back(*it);
00119 }
00120 }
00121 }
00122
00123 if (filtered_hits.size() > 0)
00124 {
00125 filtered_identification.setHits(filtered_hits);
00126 filtered_identification.assignRanks();
00127 }
00128 }
00129
00136 template <class IdentificationType>
00137 void filterIdentificationsByBestNHits(const IdentificationType& identification, UInt n, IdentificationType& filtered_identification)
00138 {
00139 typedef typename IdentificationType::HitType HitType;
00140 std::vector<HitType> temp_hits;
00141 std::vector<HitType> filtered_hits;
00142 IdentificationType temp_identification;
00143 UInt count = 0;
00144
00145 temp_identification = identification;
00146 filtered_identification = identification;
00147 filtered_identification.setHits(std::vector<HitType>());
00148
00149 temp_identification.sort();
00150
00151 typename std::vector<HitType>::const_iterator it = temp_identification.getHits().begin();
00152 while(it != temp_identification.getHits().end()
00153 && count < n)
00154 {
00155 filtered_hits.push_back(*it);
00156 ++it;
00157 ++count;
00158 }
00159
00160 if (filtered_hits.size() > 0)
00161 {
00162 filtered_identification.setHits(filtered_hits);
00163 filtered_identification.assignRanks();
00164 }
00165 }
00166
00168 void filterIdentificationsByBestHits(const PeptideIdentification& identification, PeptideIdentification& filtered_identification, bool strict = false);
00169
00171 void filterIdentificationsByProteins(const PeptideIdentification& identification, std::vector< std::pair<String, String> > proteins, PeptideIdentification& filtered_identification);
00172
00174 void filterIdentificationsByProteins(const ProteinIdentification& identification, std::vector< std::pair<String, String> > proteins, ProteinIdentification& filtered_identification);
00175
00177 void filterIdentificationsByExclusionPeptides(const PeptideIdentification& identification, std::vector<String> peptides, PeptideIdentification& filtered_identification);
00178
00188 void filterIdentificationsByRTPValues(const PeptideIdentification& identification, PeptideIdentification& filtered_identification, DoubleReal p_value = 0.05);
00189
00191 template <class PeakT>
00192 void filterIdentificationsByThresholds(MSExperiment< PeakT >& experiment, DoubleReal peptide_threshold_fraction, DoubleReal protein_threshold_fraction)
00193 {
00194
00195 ProteinIdentification temp_protein_identification;
00196 std::vector<ProteinIdentification> filtered_protein_identifications;
00197
00198 for(UInt j = 0; j < experiment.getProteinIdentifications().size(); j++)
00199 {
00200 filterIdentificationsByThreshold(experiment.getProteinIdentifications()[j], protein_threshold_fraction, temp_protein_identification);
00201 if (!temp_protein_identification.getHits().empty())
00202 {
00203 filtered_protein_identifications.push_back(temp_protein_identification);
00204 }
00205 }
00206 experiment.setProteinIdentifications(filtered_protein_identifications);
00207
00208
00209 PeptideIdentification temp_identification;
00210 std::vector<PeptideIdentification> filtered_identifications;
00211
00212 for(UInt i = 0; i < experiment.size(); i++)
00213 {
00214 for(UInt j = 0; j < experiment[i].getPeptideIdentifications().size(); j++)
00215 {
00216 filterIdentificationsByThreshold(experiment[i].getPeptideIdentifications()[j], peptide_threshold_fraction, temp_identification);
00217 if (!temp_identification.getHits().empty())
00218 {
00219 filtered_identifications.push_back(temp_identification);
00220 }
00221 }
00222 experiment[i].setPeptideIdentifications(filtered_identifications);
00223 filtered_identifications.clear();
00224 }
00225 }
00226
00228 template <class PeakT>
00229 void filterIdentificationsByScores(MSExperiment< PeakT >& experiment, DoubleReal peptide_threshold_score, DoubleReal protein_threshold_score)
00230 {
00231
00232 ProteinIdentification temp_protein_identification;
00233 std::vector<ProteinIdentification> filtered_protein_identifications;
00234
00235 for(UInt j = 0; j < experiment.getProteinIdentifications().size(); j++)
00236 {
00237 filterIdentificationsByScore(experiment.getProteinIdentifications()[j], protein_threshold_score, temp_protein_identification);
00238 if (!temp_protein_identification.getHits().empty())
00239 {
00240 filtered_protein_identifications.push_back(temp_protein_identification);
00241 }
00242 }
00243 experiment.setProteinIdentifications(filtered_protein_identifications);
00244
00245
00246 PeptideIdentification temp_identification;
00247 std::vector<PeptideIdentification> filtered_identifications;
00248
00249 for(UInt i = 0; i < experiment.size(); i++)
00250 {
00251 for(UInt j = 0; j < experiment[i].getPeptideIdentifications().size(); j++)
00252 {
00253 filterIdentificationsByScore(experiment[i].getPeptideIdentifications()[j], peptide_threshold_score, temp_identification);
00254 if (!temp_identification.getHits().empty())
00255 {
00256 filtered_identifications.push_back(temp_identification);
00257 }
00258 }
00259 experiment[i].setPeptideIdentifications(filtered_identifications);
00260 filtered_identifications.clear();
00261 }
00262 }
00263
00265 template <class PeakT>
00266 void filterIdentificationsByBestNHits(MSExperiment< PeakT >& experiment, UInt n)
00267 {
00268
00269 ProteinIdentification temp_protein_identification;
00270 std::vector<ProteinIdentification> filtered_protein_identifications;
00271
00272 for(UInt j = 0; j < experiment.getProteinIdentifications().size(); j++)
00273 {
00274 filterIdentificationsByBestNHits(experiment.getProteinIdentifications()[j], n, temp_protein_identification);
00275 if (!temp_protein_identification.getHits().empty())
00276 {
00277 filtered_protein_identifications.push_back(temp_protein_identification);
00278 }
00279 }
00280 experiment.setProteinIdentifications(filtered_protein_identifications);
00281
00282
00283 PeptideIdentification temp_identification;
00284 std::vector<PeptideIdentification> filtered_identifications;
00285
00286 for(UInt i = 0; i < experiment.size(); i++)
00287 {
00288 for(UInt j = 0; j < experiment[i].getPeptideIdentifications().size(); j++)
00289 {
00290 filterIdentificationsByBestNHits(experiment[i].getPeptideIdentifications()[j], n, temp_identification);
00291 if (!temp_identification.getHits().empty())
00292 {
00293 filtered_identifications.push_back(temp_identification);
00294 }
00295 }
00296 experiment[i].setPeptideIdentifications(filtered_identifications);
00297 filtered_identifications.clear();
00298 }
00299 }
00300
00302 template <class PeakT>
00303 void filterIdentificationsByProteins(MSExperiment< PeakT >& experiment,
00304 std::vector< std::pair<String, String> >proteins)
00305 {
00306 std::vector<PeptideIdentification> temp_identifications;
00307 std::vector<PeptideIdentification> filtered_identifications;
00308 PeptideIdentification temp_identification;
00309
00310 for(UInt i = 0; i < experiment.size(); i++)
00311 {
00312 if (experiment[i].getMSLevel() == 2)
00313 {
00314 temp_identifications = experiment[i].getPeptideIdentifications();
00315 for(UInt j = 0; j < temp_identifications.size(); j++)
00316 {
00317 filterIdentificationsByProteins(temp_identifications[j], proteins, temp_identification);
00318 if (!temp_identification.getHits().empty())
00319 {
00320 filtered_identifications.push_back(temp_identification);
00321 }
00322 }
00323 experiment[i].setPeptideIdentifications(filtered_identifications);
00324 filtered_identifications.clear();
00325 }
00326 }
00327 }
00328 };
00329
00330 }
00331
00332 #endif // OPENMS_FILTERING_ID_IDFILTER_H