00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029 #ifndef OPENMS_DATASTRUCTURES_SUFFIXARRAYSEQAN_H
00030 #define OPENMS_DATASTRUCTURES_SUFFIXARRAYSEQAN_H
00031
00032 #include <vector>
00033 #include <OpenMS/DATASTRUCTURES/String.h>
00034 #include <seqan/index.h>
00035 #include <OpenMS/DATASTRUCTURES/SuffixArray.h>
00036
00037 namespace OpenMS {
00038
00046 class SuffixArraySeqan : public SuffixArray {
00047
00048 typedef seqan::Index<seqan::String<char>, seqan::Index_ESA<> > TIndex;
00049
00050 public:
00051
00057 SuffixArraySeqan(const String& st,const String& sa_file_name) throw (Exception::InvalidValue,Exception::FileNotFound);
00058
00062 SuffixArraySeqan(const SuffixArraySeqan& source);
00063
00067 virtual ~SuffixArraySeqan();
00068
00072 String toString();
00073
00081 void findSpec(std::vector<std::vector<std::pair<std::pair<int, int>, float > > >& candidates, const std::vector<double> & spec) throw (Exception::InvalidValue);
00082
00088 bool save(const String& file_name) throw (Exception::UnableToCreateFile);
00089
00095 bool open(const String& file_name) throw (Exception::FileNotFound);
00096
00101 void setTolerance(double t) throw (Exception::InvalidValue);
00102
00107 double getTolerance() const;
00108
00115 bool isDigestingEnd(const char aa1, const char aa2) const;
00116
00122 void setTags(const std::vector<OpenMS::String>& tags) throw (OpenMS::Exception::InvalidValue);
00123
00128 const std::vector<OpenMS::String>& getTags ();
00129
00134 void setUseTags(bool use_tags);
00135
00140 bool getUseTags();
00141
00146 void setNumberOfModifications(unsigned int number_of_mods);
00147
00152 unsigned int getNumberOfModifications();
00153
00165 template <typename TIndex, typename TSpec>
00166 inline void goNextSubTree(seqan::Iter<TIndex, seqan::VSTree<seqan::TopDown<seqan::ParentLinks<TSpec> > > >& it, double& m, std::stack<double>& allm, std::stack<std::map<double, int> >& mod_map)
00167 {
00168
00169 if (!goRight(it))
00170 {
00171 while (true)
00172 {
00173 if (goUp(it))
00174 {
00175 m -= allm.top();
00176 allm.pop();
00177 mod_map.pop();
00178 }
00179 else
00180 {
00181 break;
00182 }
00183
00184 if (goRight(it))
00185 {
00186 m -= allm.top();
00187 allm.pop();
00188 mod_map.pop();
00189 break;
00190 }
00191 }
00192 }
00193 else
00194 {
00195 m -= allm.top();
00196 allm.pop();
00197 mod_map.pop();
00198 }
00199 if (isRoot(it))
00200 {
00201 clear(it);
00202 }
00203 }
00204
00210 template <typename TIndex, typename TSpec> inline void goNextSubTree(seqan::Iter<TIndex, seqan::VSTree<seqan::TopDown<seqan::ParentLinks<TSpec> > > >& it)
00211 {
00212
00213 if (!goRight(it))
00214 {
00215 while (true)
00216 {
00217 if (!goUp(it))
00218 {
00219 break;
00220 }
00221 if (goRight(it))
00222 {
00223 break;
00224 }
00225 }
00226 }
00227 if (isRoot(it))
00228 {
00229 clear(it);
00230 }
00231 }
00232
00244 template <typename TIndex, typename TSpec>
00245 inline void goNext(seqan::Iter<TIndex, seqan::VSTree<seqan::TopDown<seqan::ParentLinks<TSpec> > > >& it, double& m, std::stack<double>& allm, std::stack<std::map<double, int> >& mod_map)
00246 {
00247
00248 if (!goDown(it))
00249 {
00250 goNextSubTree(it, m, allm, mod_map);
00251 }
00252 }
00253
00254
00255
00256 template <typename TIndex, typename TSpec>
00257 inline void parseTree(seqan::Iter<TIndex, seqan::VSTree<seqan::TopDown<seqan::ParentLinks<TSpec> > > >& it, std::vector<std::pair<int, int> >& out_number, std::vector<std::pair<int, int> >& edge_length, std::vector<int>& leafe_depth)
00258 {
00259 int depth = 1;
00260 while (!atEnd(it))
00261 {
00262 int le = 0;
00263 bool isLeaf = false;
00264 if (length(parentEdgeLabel(it))>0){
00265 if (countChildren(it)>0)
00266 {
00267 edge_length.push_back(std::pair<int,int>(depth,length(parentEdgeLabel(it))));
00268 } else
00269 {
00270
00271
00272 }
00273 }
00274 if (countChildren(it)>0) {
00275 out_number.push_back(std::pair<int,int> (depth,countChildren(it)));
00276 } else {
00277 leafe_depth.push_back(depth);
00278 }
00279 if (goDown(it)){
00280 depth++;
00281 } else if (!goRight(it)) {
00282 while(!goRight(it)) {
00283 goUp(it);
00284 if (isLeaf) {
00285 edge_length.push_back(std::pair<int,int>(depth,le - length(parentEdgeLabel(it))));
00286 isLeaf = false;
00287 }
00288 depth--;
00289 if (isRoot(it)) return;
00290 }
00291 }
00292 else
00293 {
00294 }
00295 }
00296 }
00297
00298
00299 void printStatistic ();
00300
00301 protected:
00302
00303 TIndex index_;
00304
00305 seqan::Iter<TIndex, seqan::VSTree<seqan::TopDown<seqan::ParentLinks<seqan::Preorder> > > >* it_;
00306
00314 int findFirst_ (const std::vector<double> & spec, double & m);
00315
00325 int findFirst_ (const std::vector<double> & spec, double & m,int start, int end);
00326
00327 const String& s_;
00328
00329 double masse_[255];
00330
00331 int number_of_modifications_;
00332
00333 std::vector<String> tags_;
00334
00335 bool use_tags_;
00336
00337 double tol_;
00338 };
00339 }
00340
00341 #endif //OPENMS_DATASTRUCTURES_SUFFIXARRAYSEQAN_H