Home  · Classes  · Annotated Classes  · Modules  · Members  · Namespaces  · Related Pages

SuffixArraySeqan.h (Maintainer: Chris Bauer)

Go to the documentation of this file.
00001 // -*- Mode: C++; tab-width: 2; -*-
00002 // vi: set ts=2:
00003 //
00004 // --------------------------------------------------------------------------
00005 //                   OpenMS Mass Spectrometry Framework
00006 // --------------------------------------------------------------------------
00007 //  Copyright (C) 2003-2008 -- Oliver Kohlbacher, Knut Reinert
00008 //
00009 //  This library is free software; you can redistribute it and/or
00010 //  modify it under the terms of the GNU Lesser General Public
00011 //  License as published by the Free Software Foundation; either
00012 //  version 2.1 of the License, or (at your option) any later version.
00013 //
00014 //  This library is distributed in the hope that it will be useful,
00015 //  but WITHOUT ANY WARRANTY; without even the implied warranty of
00016 //  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00017 //  Lesser General Public License for more details.
00018 //
00019 //  You should have received a copy of the GNU Lesser General Public
00020 //  License along with this library; if not, write to the Free Software
00021 //  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
00022 //
00023 // --------------------------------------------------------------------------
00024 // $Maintainer: Chris Bauer$
00025 // --------------------------------------------------------------------------
00026 
00027 
00028 
00029 #ifndef OPENMS_DATASTRUCTURES_SUFFIXARRAYSEQAN_H
00030 #define OPENMS_DATASTRUCTURES_SUFFIXARRAYSEQAN_H
00031 
00032 #include <vector>
00033 #include <OpenMS/DATASTRUCTURES/String.h>
00034 #include <seqan/index.h>
00035 #include <OpenMS/DATASTRUCTURES/SuffixArray.h>
00036 
00037 namespace OpenMS {
00038 
00046 class SuffixArraySeqan : public SuffixArray {
00047   
00048   typedef seqan::Index<seqan::String<char>, seqan::Index_ESA<> > TIndex;
00049 
00050 public:
00051 
00057   SuffixArraySeqan(const String& st,const String& sa_file_name) throw (Exception::InvalidValue,Exception::FileNotFound);
00058 
00062   SuffixArraySeqan(const SuffixArraySeqan& source);
00063 
00067   virtual ~SuffixArraySeqan();
00068 
00072   String toString();
00073 
00081   void findSpec(std::vector<std::vector<std::pair<std::pair<int, int>, float > > >& candidates, const std::vector<double> & spec) throw (Exception::InvalidValue);
00082 
00088   bool save(const String& file_name) throw (Exception::UnableToCreateFile);
00089 
00095   bool open(const String& file_name) throw (Exception::FileNotFound);
00096 
00101   void setTolerance(double t) throw (Exception::InvalidValue);
00102 
00107   double getTolerance() const;
00108 
00115   bool isDigestingEnd(const char aa1, const char aa2) const;
00116 
00122   void setTags(const std::vector<OpenMS::String>& tags) throw (OpenMS::Exception::InvalidValue);
00123 
00128   const std::vector<OpenMS::String>& getTags ();
00129 
00134   void setUseTags(bool use_tags);
00135 
00140   bool getUseTags();
00141 
00146   void setNumberOfModifications(unsigned int number_of_mods);
00147   
00152   unsigned int getNumberOfModifications();
00153 
00165   template <typename TIndex, typename TSpec> 
00166   inline void goNextSubTree(seqan::Iter<TIndex, seqan::VSTree<seqan::TopDown<seqan::ParentLinks<TSpec> > > >& it, double& m, std::stack<double>& allm, std::stack<std::map<double, int> >& mod_map) 
00167   {
00168     // preorder dfs
00169     if (!goRight(it))
00170     {
00171       while (true)
00172       {
00173         if (goUp(it))
00174         {
00175           m -= allm.top();
00176           allm.pop();
00177           mod_map.pop();
00178         }
00179         else
00180         {
00181           break;
00182         }
00183 
00184         if (goRight(it))
00185         {
00186           m -= allm.top();
00187           allm.pop();
00188           mod_map.pop();
00189           break;
00190         }
00191       }
00192     }
00193     else
00194     {
00195       m -= allm.top();
00196       allm.pop();
00197       mod_map.pop();
00198     }
00199     if (isRoot(it))
00200     {
00201       clear(it);
00202     }
00203   }
00204 
00210   template <typename TIndex, typename TSpec> inline void goNextSubTree(seqan::Iter<TIndex, seqan::VSTree<seqan::TopDown<seqan::ParentLinks<TSpec> > > >& it)
00211   {
00212     // preorder dfs
00213     if (!goRight(it))
00214     {
00215       while (true)
00216       {
00217         if (!goUp(it))
00218         {
00219           break;
00220         }
00221         if (goRight(it))
00222         {
00223           break;
00224         }
00225       }
00226     }
00227     if (isRoot(it))
00228     {
00229       clear(it);
00230     }
00231   }
00232 
00244   template <typename TIndex, typename TSpec> 
00245   inline void goNext(seqan::Iter<TIndex, seqan::VSTree<seqan::TopDown<seqan::ParentLinks<TSpec> > > >& it, double& m, std::stack<double>& allm, std::stack<std::map<double, int> >& mod_map)
00246   {
00247     // preorder dfs
00248     if (!goDown(it))
00249     {
00250       goNextSubTree(it, m, allm, mod_map);
00251     }
00252   }
00253 
00254 
00255   
00256   template <typename TIndex, typename TSpec>
00257   inline void parseTree(seqan::Iter<TIndex, seqan::VSTree<seqan::TopDown<seqan::ParentLinks<TSpec> > > >& it, std::vector<std::pair<int, int> >& out_number, std::vector<std::pair<int, int> >& edge_length, std::vector<int>& leafe_depth)
00258   {
00259     int depth = 1;
00260     while (!atEnd(it))
00261     {
00262       int le = 0;
00263       bool isLeaf = false;
00264       if (length(parentEdgeLabel(it))>0){
00265         if (countChildren(it)>0)
00266         {
00267           edge_length.push_back(std::pair<int,int>(depth,length(parentEdgeLabel(it))));
00268         } else
00269         {
00270           //le <- length(representative(it));
00271           //isLeaf = true;
00272         }
00273       }
00274       if (countChildren(it)>0) {
00275         out_number.push_back(std::pair<int,int> (depth,countChildren(it)));
00276       } else {
00277         leafe_depth.push_back(depth);
00278       }
00279       if (goDown(it)){
00280         depth++;
00281       } else if (!goRight(it)) {
00282         while(!goRight(it)) {
00283           goUp(it);
00284           if (isLeaf) {
00285             edge_length.push_back(std::pair<int,int>(depth,le - length(parentEdgeLabel(it))));
00286             isLeaf = false;
00287           }
00288           depth--;
00289           if (isRoot(it)) return;
00290         }
00291       } 
00292       else 
00293       {
00294       }
00295     }
00296   }
00297 
00298   
00299   void printStatistic ();
00300   
00301 protected:
00302 
00303   TIndex index_; 
00304 
00305   seqan::Iter<TIndex, seqan::VSTree<seqan::TopDown<seqan::ParentLinks<seqan::Preorder> > > >* it_; 
00306 
00314   int findFirst_ (const std::vector<double> & spec, double & m);
00315 
00325   int findFirst_ (const std::vector<double> & spec, double & m,int start, int  end);
00326 
00327   const String& s_; 
00328 
00329   double masse_[255]; 
00330 
00331   int number_of_modifications_; 
00332 
00333   std::vector<String> tags_; 
00334 
00335   bool use_tags_; 
00336 
00337   double tol_; 
00338 };
00339 }
00340 
00341 #endif //OPENMS_DATASTRUCTURES_SUFFIXARRAYSEQAN_H

Generated Tue Apr 1 15:36:38 2008 -- using doxygen 1.5.4 OpenMS / TOPP 1.1