Home  · Classes  · Annotated Classes  · Modules  · Members  · Namespaces  · Related Pages

MzXMLHandler.h (Maintainer: Marc Sturm)

Go to the documentation of this file.
00001 // -*- Mode: C++; tab-width: 2; -*-
00002 // vi: set ts=2:
00003 //
00004 // --------------------------------------------------------------------------
00005 //                   OpenMS Mass Spectrometry Framework
00006 // --------------------------------------------------------------------------
00007 //  Copyright (C) 2003-2008 -- Oliver Kohlbacher, Knut Reinert
00008 //
00009 //  This library is free software; you can redistribute it and/or
00010 //  modify it under the terms of the GNU Lesser General Public
00011 //  License as published by the Free Software Foundation; either
00012 //  version 2.1 of the License, or (at your option) any later version.
00013 //
00014 //  This library is distributed in the hope that it will be useful,
00015 //  but WITHOUT ANY WARRANTY; without even the implied warranty of
00016 //  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00017 //  Lesser General Public License for more details.
00018 //
00019 //  You should have received a copy of the GNU Lesser General Public
00020 //  License along with this library; if not, write to the Free Software
00021 //  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
00022 //
00023 // --------------------------------------------------------------------------
00024 // $Maintainer: Marc Sturm $
00025 // --------------------------------------------------------------------------
00026 
00027 #ifndef OPENMS_FORMAT_HANDLERS_MZXMLHANDLER_H
00028 #define OPENMS_FORMAT_HANDLERS_MZXMLHANDLER_H
00029 
00030 #include <OpenMS/CONCEPT/ProgressLogger.h>
00031 #include <OpenMS/FORMAT/Base64.h>
00032 #include <OpenMS/FORMAT/PeakFileOptions.h>
00033 #include <OpenMS/FORMAT/HANDLERS/XMLHandler.h>
00034 #include <OpenMS/DATASTRUCTURES/String.h>
00035 #include <OpenMS/KERNEL/MSExperiment.h>
00036 
00037 #include <stack>
00038 
00039 namespace OpenMS
00040 {
00041   class MetaInfoInterface;
00042   
00043   namespace Internal
00044   {
00053     template <typename MapType>
00054     class MzXMLHandler
00055       : public XMLHandler
00056     {
00057       public:
00060 
00061         MzXMLHandler(MapType& exp, const String& filename, const String& version, ProgressLogger& logger)
00062         : XMLHandler(filename,version),
00063           exp_(&exp), 
00064           cexp_(0),
00065           decoder_(),
00066           peak_count_(0),
00067           char_rest_(),
00068           skip_spectrum_(false),
00069           spec_write_counter_(1),
00070           logger_(logger)
00071         {
00072           cv_terms_.resize(6);
00073           //Polarity
00074           String("any;+;-").split(';',cv_terms_[0]);
00075           //Scan type
00076           String(";zoom;Full").split(';',cv_terms_[1]);
00077           //Ionization method
00078           String(";ESI;EI;CI;FAB;TSP;MALDI;FD;FI;PD;SI;TI;API;ISI;CID;CAD;HN;APCI;APPI;ICP").split(';',cv_terms_[2]);
00079           //Mass analyzer
00080           String(";Quadrupole;Quadrupole Ion Trap;;;TOF;Magnetic Sector;FT-ICR;").split(';',cv_terms_[3]);
00081           //Detector
00082           String(";EMT;Daly;;Faraday Cup;;;;Channeltron").split(';',cv_terms_[4]);
00083           //Resolution method
00084           String(";FWHM;TenPercentValley;Baseline").split(';',cv_terms_[5]);
00085         }
00086   
00088         MzXMLHandler(const MapType& exp, const String& filename, const String& version, const ProgressLogger& logger)
00089         : XMLHandler(filename,version),
00090           exp_(0), 
00091           cexp_(&exp),
00092           decoder_(),
00093           peak_count_(0),
00094           char_rest_(),
00095           skip_spectrum_(false),
00096           spec_write_counter_(1),
00097           logger_(logger)
00098         {
00099           cv_terms_.resize(6);
00100           //Polarity
00101           String("any;+;-").split(';',cv_terms_[0]);
00102           //Scan type
00103           String(";zoom;Full").split(';',cv_terms_[1]);
00104           //Ionization method
00105           String(";ESI;EI;CI;FAB;TSP;MALDI;FD;FI;PD;SI;TI;API;ISI;CID;CAD;HN;APCI;APPI;ICP").split(';',cv_terms_[2]);
00106           //Mass analyzer
00107           String(";Quadrupole;Quadrupole Ion Trap;;;TOF;Magnetic Sector;FT-ICR;").split(';',cv_terms_[3]);
00108           //Detector
00109           String(";EMT;Daly;;Faraday Cup;;;;Channeltron").split(';',cv_terms_[4]);
00110           //Resolution method
00111           String(";FWHM;TenPercentValley;Baseline").split(';',cv_terms_[5]);
00112         }
00113   
00115         virtual ~MzXMLHandler(){}
00117         
00118         // Docu in base class
00119         virtual void endElement( const XMLCh* const uri, const XMLCh* const local_name, const XMLCh* const qname);
00120         
00121         // Docu in base class
00122         virtual void startElement(const XMLCh* const uri, const XMLCh* const local_name, const XMLCh* const qname, const xercesc::Attributes& attributes);
00123         
00124         // Docu in base class
00125         virtual void characters(const XMLCh* const chars, const unsigned int length);
00126   
00128         void writeTo(std::ostream& os);
00129         
00131         void setOptions(const PeakFileOptions& opt) { options_ = opt; }
00132   
00133       protected:
00134         
00136         typedef typename MapType::PeakType PeakType;
00138         typedef MSSpectrum<PeakType, std::allocator<PeakType> > SpectrumType;        
00139         
00140         typedef typename SpectrumType::Iterator  PeakIterator;
00141         typedef typename SpectrumType::PrecursorPeakType PrecursorPeakType;
00142         
00144         MapType* exp_;
00146         const MapType* cexp_;
00147         
00149         PeakFileOptions options_;
00150         
00153         Base64 decoder_;
00154         UInt peak_count_;
00155         String precision_;
00156         String char_rest_;
00158         
00160         bool skip_spectrum_;
00161         
00163         UInt spec_write_counter_;
00164         
00166         const ProgressLogger& logger_;
00167     
00169         inline void writeUserParam_(std::ostream& os, const MetaInfoInterface& meta, int indent=4, String tag="nameValue")
00170         {
00171           std::vector<String> keys;  // Vector to hold keys to meta info
00172           meta.getKeys(keys);
00173     
00174           for (std::vector<String>::const_iterator it = keys.begin(); it!=keys.end(); ++it)
00175             if ( (*it)[0] != '#')  // internally used meta info start with '#'
00176           {
00177             String name = *it;
00178             os << String(indent,'\t') << "<" << tag << " name=\"";
00179             if (tag=="processingOperation" && name.find('#')!=std::string::npos)
00180             {
00181               std::vector<String> parts;
00182               name.split('#',parts);
00183               os << parts[0] << "\" type=\"" << parts[1];
00184             }
00185             else
00186             {
00187               os << name;
00188             }
00189             os << "\" value=\""
00190                << meta.getMetaValue(*it) << "\"/>\n";
00191           }
00192         }
00193       
00194       private:
00196         MzXMLHandler();
00197     };
00198   
00199     //--------------------------------------------------------------------------------
00200   
00201     template <typename MapType>
00202     void MzXMLHandler<MapType>::startElement(const XMLCh* const /*uri*/, const XMLCh* const /*local_name*/, const XMLCh* const qname, const xercesc::Attributes& attributes)
00203     {
00204       static const XMLCh* s_value = xercesc::XMLString::transcode("value");
00205       static const XMLCh* s_count = xercesc::XMLString::transcode("scanCount");
00206       static const XMLCh* s_type = xercesc::XMLString::transcode("type");
00207       static const XMLCh* s_name = xercesc::XMLString::transcode("name");
00208       static const XMLCh* s_version = xercesc::XMLString::transcode("version");
00209       static const XMLCh* s_filename = xercesc::XMLString::transcode("fileName");
00210       static const XMLCh* s_filetype = xercesc::XMLString::transcode("fileType");
00211       static const XMLCh* s_filesha1 = xercesc::XMLString::transcode("fileSha1");
00212       static const XMLCh* s_completiontime = xercesc::XMLString::transcode("completionTime");
00213       static const XMLCh* s_precision = xercesc::XMLString::transcode("precision");
00214       static const XMLCh* s_byteorder = xercesc::XMLString::transcode("byteOrder");
00215       static const XMLCh* s_pairorder = xercesc::XMLString::transcode("pairOrder");
00216       static const XMLCh* s_precursorintensity = xercesc::XMLString::transcode("precursorIntensity");
00217       static const XMLCh* s_precursorcharge = xercesc::XMLString::transcode("precursorCharge");
00218       static const XMLCh* s_windowwideness = xercesc::XMLString::transcode("windowWideness");
00219       static const XMLCh* s_mslevel = xercesc::XMLString::transcode("msLevel");
00220       static const XMLCh* s_peakscount = xercesc::XMLString::transcode("peaksCount");
00221       static const XMLCh* s_polarity = xercesc::XMLString::transcode("polarity");
00222       static const XMLCh* s_scantype = xercesc::XMLString::transcode("scanType");
00223       static const XMLCh* s_retentiontime = xercesc::XMLString::transcode("retentionTime");
00224       static const XMLCh* s_collisionenergy = xercesc::XMLString::transcode("collisionEnergy");
00225       static const XMLCh* s_startmz = xercesc::XMLString::transcode("startMz");
00226       static const XMLCh* s_endmz = xercesc::XMLString::transcode("endMz");
00227       static const XMLCh* s_first = xercesc::XMLString::transcode("first");
00228       static const XMLCh* s_last = xercesc::XMLString::transcode("last");
00229       static const XMLCh* s_phone = xercesc::XMLString::transcode("phone");
00230       static const XMLCh* s_email = xercesc::XMLString::transcode("email");
00231       static const XMLCh* s_uri = xercesc::XMLString::transcode("URI");
00232       static const XMLCh* s_intensitycutoff = xercesc::XMLString::transcode("intensityCutoff");
00233       static const XMLCh* s_centroided = xercesc::XMLString::transcode("centroided");
00234       static const XMLCh* s_deisotoped = xercesc::XMLString::transcode("deisotoped");
00235       static const XMLCh* s_chargedeconvoluted = xercesc::XMLString::transcode("chargeDeconvoluted");
00236       
00237       
00238       String tag = sm_.convert(qname);
00239       open_tags_.push_back(tag);
00240       //std::cout << " -- Start -- "<< tag << " -- " << std::endl;
00241       
00242       //Skip all tags until the the next scan
00243       if (skip_spectrum_ && tag!="scan") return;
00244       
00245       if (tag=="msRun")
00246       {
00247         Int count = 0;
00248         optionalAttributeAsInt_(count, attributes, s_count);
00249         exp_->reserve(count);
00250         logger_.startProgress(0,count,"loading mzXML file");
00251       }
00252       else if (tag=="parentFile")
00253       {
00254         exp_->getSourceFile().setNameOfFile(attributeAsString_(attributes, s_filename));
00255         exp_->getSourceFile().setFileType(attributeAsString_(attributes, s_filetype));
00256         exp_->getSourceFile().setSha1(attributeAsString_(attributes, s_filesha1));          
00257       }
00258       else if (tag=="software")
00259       {
00260         String& parent_tag = *(open_tags_.end()-2);
00261         //TODO dataProcessing - software can occur several times. Can we store that?
00262         //     Perhaps we need to adjust our model!
00263         if (parent_tag=="dataProcessing")
00264         {
00265           exp_->getSoftware().setVersion(attributeAsString_(attributes, s_version));
00266           exp_->getSoftware().setName(attributeAsString_(attributes, s_name));
00267           //TODO Software type can be aquisition/conversion/processing
00268           //     Should we store information like that?
00269           exp_->getSoftware().setComment(attributeAsString_(attributes, s_type));
00270           
00271           String time;
00272           optionalAttributeAsString_(time,attributes,s_completiontime);
00273           exp_->getSoftware().setCompletionTime( asDateTime_(time) );
00274         }
00275         else if (parent_tag=="msInstrument")
00276         {
00277           // not part of METADATA -> putting it into MetaInfo
00278           MetaInfo().registry().registerName("#InstSoftware","Instrument software name");
00279           exp_->getInstrument().setMetaValue("#InstSoftware", (String)attributeAsString_(attributes, s_name));
00280           
00281           MetaInfo().registry().registerName("#InstSoftwareVersion","Instrument software version");
00282           exp_->getInstrument().setMetaValue("#InstSoftwareVersion", (String)attributeAsString_(attributes, s_version));
00283           
00284           MetaInfo().registry().registerName("#InstSoftwareType","Instrument software type");
00285           exp_->getInstrument().setMetaValue("#InstSoftwareType", (String)attributeAsString_(attributes, s_type));
00286           
00287           String time;
00288           optionalAttributeAsString_(time,attributes,s_completiontime);
00289           if (time!="")
00290           {
00291             MetaInfo().registry().registerName("#InstSoftwareTime","Instrument software completion time");
00292             exp_->getInstrument().setMetaValue("#InstSoftwareTime",time);
00293           }
00294         }
00295       }
00296       else if (tag=="peaks")
00297       {
00298         precision_ = attributeAsString_(attributes, s_precision);
00299         if (precision_!="32" && precision_!="64")
00300         {
00301           error(String("Invalid precision '") + precision_ + "' in element 'peaks'");
00302         }
00303         String tmp = attributeAsString_(attributes, s_byteorder);
00304         if (tmp!="network")
00305         {
00306           error(String("Invalid byte order '") + tmp + "' in element 'peaks'. Must be 'network'!");
00307         }
00308         tmp = attributeAsString_(attributes, s_pairorder);
00309         if (tmp!="m/z-int")
00310         {
00311           error(String("Invalid pair order '") + tmp + "' in element 'peaks'. Must be 'm/z-int'!");
00312         }
00313       }
00314       else if (tag=="precursorMz")
00315       {
00316         exp_->back().getPrecursorPeak().setIntensity( attributeAsDouble_(attributes, s_precursorintensity) );
00317         
00318         Int charge = 0;
00319         optionalAttributeAsInt_(charge, attributes, s_precursorcharge);
00320         exp_->back().getPrecursorPeak().setCharge(charge);
00321         
00322         DoubleReal window = 0;
00323         optionalAttributeAsDouble_(window, attributes, s_windowwideness);
00324         exp_->back().getPrecursor().setWindowSize(window);
00325       }
00326       else if (tag=="scan")
00327       {
00328         skip_spectrum_ = false;
00329         
00330         if (options_.getMetadataOnly()) throw EndParsingSoftly(__FILE__,__LINE__,__PRETTY_FUNCTION__);
00331         
00332         // check if the scan is in the desired MS / RT range
00333         UInt ms_level = attributeAsInt_(attributes, s_mslevel);
00334         //parse retention time and convert it from xs:duration to seconds
00335         DoubleReal retention_time = 0.0;
00336         String time_string = "";
00337         optionalAttributeAsString_(time_string, attributes, s_retentiontime);
00338         time_string = time_string.suffix('T');
00339         //std::cout << "Initial trim: " << time_string << std::endl;
00340         if (time_string.has('H'))
00341         {
00342           retention_time += 3600*asDouble_(time_string.prefix('H'));
00343           time_string = time_string.suffix('H');
00344           //std::cout << "After H: " << time_string << std::endl;
00345         }
00346         if (time_string.has('M'))
00347         {
00348           retention_time += 60*asDouble_(time_string.prefix('M'));
00349           time_string = time_string.suffix('M');
00350           //std::cout << "After M: " << time_string << std::endl;
00351         }
00352         if (time_string.has('S'))
00353         {
00354           retention_time += asDouble_(time_string.prefix('S'));
00355           time_string = time_string.suffix('S');
00356           //std::cout << "After S: " << time_string << std::endl;
00357         }
00358         
00359         if (options_.hasRTRange() && !options_.getRTRange().encloses(DPosition<1>(retention_time))
00360          || options_.hasMSLevels() && !options_.containsMSLevel(ms_level))
00361         {
00362           // skip this tag
00363           skip_spectrum_ = true;          
00364           return;
00365         }
00366         
00367         logger_.setProgress(exp_->size());
00368         //Add a new spectrum and set MS level and RT
00369         exp_->resize(exp_->size()+1);
00370         exp_->back().setMSLevel(ms_level);
00371         exp_->back().setRT(retention_time);
00372         
00373         //peak count == twice the scan size
00374         peak_count_ = attributeAsInt_(attributes, s_peakscount);
00375         exp_->back().getContainer().reserve(peak_count_);
00376         
00377         //TODO centroided, chargeDeconvoluted, deisotoped are ignored.
00378         //     Should we include them into our model?
00379 
00380         //other optional attributes
00381         DoubleReal tmp = 0.0;
00382         optionalAttributeAsDouble_(tmp, attributes, s_startmz);
00383         exp_->back().getInstrumentSettings().setMzRangeStart(tmp);
00384         
00385         tmp = 0.0;
00386         optionalAttributeAsDouble_(tmp, attributes, s_endmz);
00387         exp_->back().getInstrumentSettings().setMzRangeStop(tmp);
00388 
00389         tmp = 0.0;
00390         optionalAttributeAsDouble_(tmp, attributes, s_collisionenergy);
00391         exp_->back().getPrecursor().setActivationEnergy(tmp);
00392         
00393         String polarity = "any";
00394         optionalAttributeAsString_(polarity, attributes, s_polarity);
00395         exp_->back().getInstrumentSettings().setPolarity( (IonSource::Polarity) cvStringToEnum_(0,polarity,"polarity") );
00396         
00397         String type = "";
00398         optionalAttributeAsString_(type, attributes, s_scantype);
00399         exp_->back().getInstrumentSettings().setScanMode( (InstrumentSettings::ScanMode) cvStringToEnum_(1,type,"scanType") );
00400       }
00401       else if (tag=="operator")
00402       {
00403         exp_->getContacts().resize(1);
00404         exp_->getContacts().back().setFirstName(attributeAsString_(attributes, s_first));
00405         exp_->getContacts().back().setLastName(attributeAsString_(attributes, s_last));
00406         
00407         String tmp = "";
00408         optionalAttributeAsString_(tmp, attributes,s_email);
00409         exp_->getContacts().back().setEmail(tmp);
00410         
00411         //TODO all other info has to go into misc info field
00412         String contact_info;
00413         tmp = "";
00414         optionalAttributeAsString_(tmp, attributes,s_phone);
00415         if (tmp != "") 
00416         {
00417           contact_info = "PHONE: " + tmp;
00418         }
00419         tmp = "";
00420         optionalAttributeAsString_(tmp, attributes,s_uri);
00421         if (tmp != "") 
00422         {
00423           contact_info += String(contact_info == "" ? "" : " ") + "URI: " + tmp;
00424         }
00425         if (contact_info != "")
00426         {
00427           exp_->getContacts().back().setContactInfo(contact_info);
00428         }
00429       }
00430       else if (tag=="msManufacturer")
00431       {
00432         exp_->getInstrument().setVendor(attributeAsString_(attributes, s_value));
00433       }
00434       else if (tag=="msModel")
00435       {
00436         exp_->getInstrument().setModel(attributeAsString_(attributes, s_value));
00437       }
00438       else if (tag=="msIonisation")
00439       {
00440         exp_->getInstrument().getIonSource().setIonizationMethod((IonSource::IonizationMethod) cvStringToEnum_(2, attributeAsString_(attributes, s_value), "msIonization") );
00441       }
00442       else if (tag=="msMassAnalyzer")
00443       {
00444         exp_->getInstrument().getMassAnalyzers().resize(1);
00445         exp_->getInstrument().getMassAnalyzers()[0].setType( (MassAnalyzer::AnalyzerType) cvStringToEnum_(3, attributeAsString_(attributes, s_value), "msMassAnalyzer") );
00446       }
00447       else if (tag=="msDetector")
00448       {
00449         exp_->getInstrument().getIonDetector().setType( (IonDetector::Type) cvStringToEnum_(4, attributeAsString_(attributes, s_value), "msDetector") );
00450       }
00451       else if (tag=="msResolution")
00452       {
00453         exp_->getInstrument().getMassAnalyzers()[0].setResolutionMethod( (MassAnalyzer::ResolutionMethod) cvStringToEnum_(5, attributeAsString_(attributes, s_value), "msResolution"));
00454       }
00455       //TODO dataProcessing can occur several times. Can we store that?
00456       //     Perhaps we need to adjust our model!
00457       else if (tag=="dataProcessing")
00458       {
00459         String boolean = "";
00460         optionalAttributeAsString_(boolean, attributes, s_deisotoped);
00461         if (boolean == "true" || boolean == "1")
00462         {
00463           exp_->getProcessingMethod().setDeisotoping(true);
00464         }
00465         
00466         boolean = "";
00467         optionalAttributeAsString_(boolean, attributes, s_chargedeconvoluted);
00468         if (boolean == "true" || boolean == "1")
00469         {
00470           exp_->getProcessingMethod().setChargeDeconvolution(true);
00471         }
00472         
00473         DoubleReal cutoff = 0.0;
00474         optionalAttributeAsDouble_(cutoff, attributes, s_intensitycutoff);
00475         exp_->getProcessingMethod().setIntensityCutoff(cutoff);
00476         
00477         String peaks = "";
00478         optionalAttributeAsString_(peaks, attributes, s_centroided);
00479         if (peaks == "true" || peaks == "1")
00480         {
00481           exp_->getProcessingMethod().setSpectrumType(SpectrumSettings::PEAKS);
00482         }
00483       }
00484       else if (tag=="nameValue")
00485       {
00486         String name = "";
00487         optionalAttributeAsString_(name, attributes, s_name);
00488         if (name == "") return;
00489 
00490         String value = "";
00491         optionalAttributeAsString_(value, attributes, s_value);
00492         
00493         String& parent_tag = *(open_tags_.end()-2);
00494                 
00495         if (parent_tag == "msInstrument")
00496         {
00497           exp_->getInstrument().setMetaValue(name, value);
00498         }
00499         else if (parent_tag == "scan")
00500         {
00501           exp_->back().setMetaValue(name, value);
00502         }
00503         else
00504         {
00505           std::cout << " Warning: Unexpected tag 'nameValue' in tag '" << parent_tag << "'" << std::endl;
00506         }
00507       }
00508       //TODO dataProcessing - processingOperation can occur several times. Can we store that?
00509       //     Perhaps we need to adjust our model!
00510       else if (tag=="processingOperation")
00511       {
00512         //TODO This is currently ignored
00513       }
00514       
00515       //std::cout << " -- !Start -- " << std::endl;
00516     }
00517   
00518   
00519     template <typename MapType>
00520     void MzXMLHandler<MapType>::endElement(const XMLCh* const /*uri*/, const XMLCh* const /*local_name*/, const XMLCh* const qname)
00521     {
00522       //std::cout << " -- End -- " << sm_.convert(qname) << " -- " << std::endl;
00523       
00524       static const XMLCh* s_mzxml = xercesc::XMLString::transcode("mzXML");
00525       static const XMLCh* s_peaks = xercesc::XMLString::transcode("peaks");
00526       
00527       open_tags_.pop_back();
00528       
00529       //abort if this scan should be skipped
00530       if (skip_spectrum_) return;
00531       
00532       if (equal_(qname,s_mzxml))
00533       {
00534         logger_.endProgress();
00535       }
00536       else if (equal_(qname,s_peaks))
00537       {
00538         //std::cout << "reading scan" << std::endl;
00539         if (char_rest_=="") // no peaks
00540         {
00541           return;
00542         }
00543         if (precision_=="64")
00544         {
00545           std::vector<DoubleReal> data;
00546           decoder_.decode(char_rest_, Base64::BYTEORDER_BIGENDIAN, data);
00547           char_rest_ = "";
00548           PeakType peak;
00549           //push_back the peaks into the container
00550           for (UInt n = 0 ; n < ( 2 * peak_count_) ; n += 2)
00551           {
00552             // check if peak in in the specified m/z  and intensity range
00553             if ((!options_.hasMZRange() || options_.getMZRange().encloses(DPosition<1>(data[n])))
00554              && (!options_.hasIntensityRange() || options_.getIntensityRange().encloses(DPosition<1>(data[n+1]))))
00555             {
00556               peak.setPosition(data[n]);
00557               peak.setIntensity(data[n+1]);
00558               exp_->back().push_back(peak);
00559             }
00560           }
00561         }
00562         else  //precision 32
00563         {
00564           std::vector<Real> data;
00565           decoder_.decode(char_rest_, Base64::BYTEORDER_BIGENDIAN, data);
00566           char_rest_ = "";
00567           PeakType peak;
00568           //push_back the peaks into the container
00569           for (UInt n = 0 ; n < (2 * peak_count_) ; n += 2)
00570           {
00571             if ((!options_.hasMZRange() || options_.getMZRange().encloses(DPosition<1>(data[n])))
00572              && (!options_.hasIntensityRange() || options_.getIntensityRange().encloses(DPosition<1>(data[n+1]))))
00573             {
00574               peak.setPosition(data[n]);
00575               peak.setIntensity(data[n+1]);
00576               exp_->back().push_back(peak);
00577             }
00578           }
00579         }
00580       }
00581       //std::cout << " -- End -- " << std::endl;
00582       sm_.clear();
00583     }
00584   
00585     template <typename MapType>
00586     void MzXMLHandler<MapType>::characters(const XMLCh* const chars, unsigned int /*length*/)
00587     {
00588       //Abort if this spectrum should be skipped
00589       if (skip_spectrum_) return;
00590       
00591       char* transcoded_chars = sm_.convert(chars);
00592       
00593       if(open_tags_.back()=="peaks")
00594       {
00595         //chars may be split to several chunks => concatenate them
00596         char_rest_ += transcoded_chars;
00597       }
00598       else if ( open_tags_.back()=="offset" || open_tags_.back()=="indexOffset" || open_tags_.back()=="sha1")
00599       {
00600         
00601       }
00602       else if ( open_tags_.back()=="precursorMz")
00603       {
00604         exp_->back().getPrecursorPeak().getPosition()[0] = asFloat_(transcoded_chars);
00605       }
00606       else if ( open_tags_.back()=="comment")
00607       {
00608         String parent_tag = *(open_tags_.end()-2);
00609         //std::cout << "- Comment of parent " << parent_tag << std::endl;
00610           
00611         if (parent_tag=="msInstrument")
00612         {
00613           exp_->getInstrument().setMetaValue("#Comment" , String(transcoded_chars));
00614         }
00615         //TODO dataProcessing - comment can occur several times. Can we store that?
00616         //     Perhaps we need to adjust our model!
00617         else if (parent_tag=="dataProcessing")
00618         {
00619           //TODO this is currently ignored
00620         }
00621         else if (parent_tag=="scan")
00622         {
00623           exp_->back().setComment( transcoded_chars );
00624         }
00625         else if (String(transcoded_chars).trim()!="")
00626         {
00627           std::cerr << "Unhandled comment '" << transcoded_chars << "' in element '" << open_tags_.back() << "'" << std::endl;
00628         }
00629       }
00630       else if (String(transcoded_chars).trim()!="")
00631       {
00632           std::cerr << "Unhandled character content '" << transcoded_chars << "' in tag '" << open_tags_.back() << "'" << std::endl;
00633       }
00634       //std::cout << " -- !Chars -- " << std::endl;
00635     }
00636   
00637     template <typename MapType>
00638     void MzXMLHandler<MapType>::writeTo(std::ostream& os)
00639     {
00640       //determine how many spectra there are (count only those with peaks)
00641       UInt count_tmp_  = 0;
00642       for (UInt s=0; s<cexp_->size(); s++)
00643       {
00644         const SpectrumType& spec = (*cexp_)[s];
00645         if (spec.size()!=0) ++count_tmp_;
00646       }
00647       logger_.startProgress(0,cexp_->size(),"storing mzXML file");
00648       os  << "<?xml version=\"1.0\" encoding=\"ISO-8859-1\"?>\n"
00649          << "<mzXML xmlns=\"http://sashimi.sourceforge.net/schema_revision/mzXML_2.1\" "
00650          << "xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\" "
00651          << "xsi:schemaLocation=\"http://sashimi.sourceforge.net/schema_revision/mzXML_2.1 "
00652          << "http://sashimi.sourceforge.net/schema_revision/mzXML_2.1/mzXML_idx_2.1.xsd\">\n"
00653          << "\t<msRun scanCount=\"" << count_tmp_ << "\">\n"
00654          << "\t\t<parentFile fileName=\"" << cexp_->getSourceFile().getNameOfFile()
00655          //file type is an enum in mzXML => search for 'raw' string
00656          << "\" fileType=\"";
00657          String tmp_string = cexp_->getSourceFile().getFileType();
00658          tmp_string.toLower();
00659          if (tmp_string.hasSubstring("raw"))
00660          {
00661           os << "RAWData";
00662          }
00663          else
00664          {
00665           os << "processedData";
00666          }
00667          //Sha1 checksum must have 40 characters => create a fake if it is unknown
00668          os << "\" fileSha1=\"";
00669          tmp_string = cexp_->getSourceFile().getSha1();
00670          if (cexp_->getSourceFile().getSha1().size()!=40)
00671          {
00672            os << "0000000000000000000000000000000000000000";
00673          }
00674          else
00675          {
00676            os << cexp_->getSourceFile().getSha1();
00677          }
00678          os  << "\"/>\n";
00679   
00680       if (cexp_->getInstrument() != Instrument())
00681       {
00682         const Instrument& inst = cexp_->getInstrument();
00683         os << "\t\t<msInstrument>\n"
00684            << "\t\t\t<msManufacturer category=\"msManufacturer\" value=\""
00685            << inst.getVendor() << "\"/>\n"
00686            << "\t\t\t<msModel category=\"msModel\" value=\""
00687            << inst.getModel() << "\"/>\n"
00688            << "\t\t\t<msIonisation category=\"msIonisation\" value=\""
00689            << cv_terms_[2][inst.getIonSource().getIonizationMethod()]
00690            << "\"/>\n";
00691   
00692         const std::vector<MassAnalyzer>& analyzers = inst.getMassAnalyzers();
00693         if ( analyzers.size()>0 )
00694         {
00695           os << "\t\t\t<msMassAnalyzer category=\"msMassAnalyzer\" value=\""
00696              << cv_terms_[3][analyzers[0].getType()]  << "\"/>\n";
00697         }
00698         else
00699         {
00700           std::cout << " Warning: mzXML supports only one analyzer! Skipping the other " << (analyzers.size()-1) << "mass analyzers." << std::endl;
00701         }
00702         os << "\t\t\t<msDetector category=\"msDetector\" value=\""
00703            << cv_terms_[4][inst.getIonDetector().getType()] << "\"/>\n";
00704         try
00705         {
00706           String type = inst.getMetaValue("#InstSoftwareType").toString();
00707           //invalid type is resetted to 'processing' as it fits all actions
00708           if (type!="acquisition" && type!="conversion" && type!="processing")
00709           {
00710             type = "processing";
00711           }
00712           String name = inst.getMetaValue("#InstSoftware").toString();
00713           String version = inst.getMetaValue("#InstSoftwareVersion").toString();
00714           String str = inst.getMetaValue("#InstSoftwareTime").toString();
00715           String time(str);
00716           time.substitute(' ', 'T');
00717           os << "\t\t\t<software type=\"" << type
00718              << "\" name=\"" << name
00719              << "\" version=\"" << version << "\"";
00720           if (time != "")
00721           {
00722             os << " completionTime=\"" << time << "\"";
00723           }
00724           os << "/>\n";
00725         }
00726         catch(Exception::InvalidValue exception)
00727         {
00728   
00729         }
00730         
00731         if ( analyzers.size()>0 )
00732         {
00733           if (analyzers[0].getResolutionMethod())
00734             os << "\t\t\t<msResolution category=\"msResolution\" value=\""
00735                << cv_terms_[5][analyzers[0].getResolutionMethod()] << "\"/>\n";
00736         }
00737         else
00738         {
00739           std::cout << "Warning: mzXML supports only one analyzer! Skipping the other " << (analyzers.size()-1) << "mass analyzers." << std::endl;
00740         }
00741         
00742         if ( cexp_->getContacts().size()>0 )
00743         {
00744           const ContactPerson& cont = cexp_->getContacts()[0];
00745           
00746           os << "\t\t\t<operator first=\"" << cont.getFirstName() << "\" last=\"" << cont.getLastName();
00747           
00748           String info = cont.getContactInfo();
00749           std::string::size_type phone = info.find("PHONE:");
00750           std::string::size_type uri = info.find("URI:");
00751           if (phone != std::string::npos)
00752           {
00753             UInt end = uri != std::string::npos ? uri : info.size();
00754             os << "\" phone=\"" << info.substr(phone + 6, end - phone + 6);
00755           }
00756           
00757           if (cont.getEmail() != "")
00758           {
00759             os << "\" email=\"" << cont.getEmail();
00760           }
00761           
00762           if (uri != std::string::npos)
00763           {
00764             UInt uri = info.find("URI:");
00765             os << "\" URI=\"" << info.substr(uri+4).trim();
00766           }
00767           
00768           os << "\"/>\n";
00769         }
00770         writeUserParam_(os,inst,3);
00771         try
00772         {
00773           DataValue com = inst.getMetaValue("#Comment");
00774           if (!com.isEmpty()) os << "\t\t\t<comment>" << com << "</comment>\n";
00775         }
00776         catch(Exception::InvalidValue exception)
00777         {
00778   
00779         }
00780         os << "\t\t</msInstrument>\n";
00781       }
00782   
00783       const Software& software = cexp_->getSoftware();
00784       os << "\t\t<dataProcessing deisotoped=\""
00785          << cexp_->getProcessingMethod().getDeisotoping()
00786          << "\" chargeDeconvoluted=\""
00787          << cexp_->getProcessingMethod().getChargeDeconvolution()
00788          << "\" centroided=\"";
00789       if(cexp_->getProcessingMethod().getSpectrumType()==SpectrumSettings::PEAKS)
00790       {
00791         os << "1";
00792       }
00793       else
00794       {
00795         os << "0";
00796       }
00797       os << "\" intensityCutoff=\""
00798          << cexp_->getProcessingMethod().getIntensityCutoff()
00799          << "\">\n"
00800          << "\t\t\t<software type=\"processing\" name=\"" << software.getName()
00801          << "\" version=\"" << software.getVersion();
00802   
00803       if (software.getCompletionTime() != DateTime())
00804       {
00805         String tmp;
00806         software.getCompletionTime().get(tmp);
00807         String qtmp(tmp);
00808         qtmp.substitute(' ', 'T');
00809         os << "\" completionTime=\"" << qtmp;
00810       }
00811       os << "\"/>\n";
00812       writeUserParam_(os,cexp_->getProcessingMethod(),3,"processingOperation");
00813   
00814       os << "\t\t</dataProcessing>\n";
00815       
00816       std::stack<UInt> open_scans;
00817       
00818       // write scans
00819       for (UInt s=0; s<cexp_->size(); s++)
00820       {
00821         logger_.setProgress(s);
00822         const SpectrumType& spec = (*cexp_)[s];
00823               
00824         int ms_level = spec.getMSLevel();
00825         open_scans.push(ms_level);
00826         
00827         os << String(ms_level+1,'\t')
00828            << "<scan num=\"" << spec_write_counter_++ << "\" msLevel=\""
00829            << ms_level << "\" peaksCount=\""
00830            << spec.size() << "\" polarity=\"";
00831         if (spec.getInstrumentSettings().getPolarity()==IonSource::POSITIVE)
00832         {
00833           os << "+";
00834         }
00835         else if (spec.getInstrumentSettings().getPolarity()==IonSource::NEGATIVE)
00836         {
00837           os << "-";
00838         }
00839         else
00840         {
00841           os << "any";
00842         }
00843         
00844         if (spec.getInstrumentSettings().getScanMode())
00845         {
00846           os << "\" scanType=\""
00847              << cv_terms_[1][spec.getInstrumentSettings().getScanMode()];
00848         }
00849         os << "\" retentionTime=\"PT"
00850            << spec.getRT() << "S\"";
00851         if (spec.getInstrumentSettings().getMzRangeStart()!=0)
00852           os << " startMz=\"" << spec.getInstrumentSettings().getMzRangeStart() << "\"";
00853         if (spec.getInstrumentSettings().getMzRangeStop()!=0)
00854           os << " endMz=\"" << spec.getInstrumentSettings().getMzRangeStop() << "\"";
00855         os << ">\n";
00856   
00857         const PrecursorPeakType& peak = spec.getPrecursorPeak();
00858         if (peak!= PrecursorPeakType())
00859         {
00860           os << String(ms_level+2,'\t') << "<precursorMz precursorIntensity=\""
00861              << peak.getIntensity();
00862           if (peak.getCharge()!=0)
00863             os << "\" precursorCharge=\"" << peak.getCharge();
00864           os << "\">"
00865              << peak.getPosition()[0] << "</precursorMz>\n";
00866         }
00867   
00868         if (spec.size() > 0)
00869         {
00870           os << String(ms_level+2,'\t') << "<peaks precision=\"32\""
00871              << " byteOrder=\"network\" pairOrder=\"m/z-int\">";
00872           
00873           //std::cout << "Writing scan " << s << std::endl;
00874           std::vector<Real> tmp;
00875           for (UInt i=0; i<spec.size(); i++)
00876           {
00877             tmp.push_back(spec.getContainer()[i].getMZ());
00878             tmp.push_back(spec.getContainer()[i].getIntensity());
00879           }
00880           
00881           std::string encoded;
00882           decoder_.encode(tmp, Base64::BYTEORDER_BIGENDIAN, encoded);
00883           os << encoded << "</peaks>\n";
00884         }
00885         else
00886         {
00887           os << String(ms_level+2,'\t') << "<peaks precision=\"32\""
00888              << " byteOrder=\"network\" pairOrder=\"m/z-int\" xsi:nil=\"1\"/>\n";
00889         }
00890         
00891         writeUserParam_(os,spec,ms_level+2);
00892         if (spec.getComment() != "")
00893         {
00894           os << String(ms_level+2,'\t') << "<comment>" << spec.getComment() << "</comment>\n";
00895         }
00896         
00897         //check MS level of next scan and close scans (scans can be nested)
00898         int next_ms_level = 0;
00899         if (s < cexp_->size()-1)
00900         {
00901           next_ms_level = ((*cexp_)[s+1]).getMSLevel();
00902         }
00903         //std::cout << "scan: " << s << " this: " << ms_level << " next: " << next_ms_level << std::endl;
00904         if (next_ms_level <= ms_level)
00905         {
00906           for (Int i = 0; i<= ms_level-next_ms_level && !open_scans.empty(); ++i)
00907           {
00908             os << String(ms_level-i+1,'\t') << "</scan>\n";
00909             open_scans.pop();
00910           }
00911         }
00912       }
00913   
00914       os << "\t</msRun>\n"
00915          << "\t<indexOffset>0</indexOffset>\n"
00916          << "</mzXML>\n";
00917       
00918       logger_.endProgress();
00919       spec_write_counter_ = 1;
00920     }
00921 
00922   } // namespace Internal
00923 
00924 } // namespace OpenMS
00925 
00926 #endif

Generated Tue Apr 1 15:36:36 2008 -- using doxygen 1.5.4 OpenMS / TOPP 1.1