00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027 #ifndef OPENMS_FORMAT_LIBSVMENCODER_H
00028 #define OPENMS_FORMAT_LIBSVMENCODER_H
00029
00030 #include <OpenMS/DATASTRUCTURES/String.h>
00031 #include <OpenMS/CHEMISTRY/ResidueDB.h>
00032 #include <svm.h>
00033
00034 #include <vector>
00035 #include <utility>
00036
00037 namespace OpenMS
00038 {
00047 class LibSVMEncoder
00048 {
00049 public:
00051 LibSVMEncoder();
00053 ~LibSVMEncoder();
00054
00064 void encodeCompositionVector(const String& sequence, std::vector< std::pair<Int, DoubleReal> >& encoded_vector, const String& allowed_characters = "ACDEFGHIKLMNPQRSTVWY");
00065
00075 void encodeCompositionVectors(const std::vector<String>& sequences, const String& allowed_characters, std::vector< std::vector< std::pair<Int, DoubleReal> > >& composition_vectors);
00077 svm_node* encodeLibSVMVector(const std::vector< std::pair<Int, DoubleReal> >& feature_vector);
00078
00080 void encodeLibSVMVectors(const std::vector< std::vector< std::pair<Int, DoubleReal> > >& feature_vectors, std::vector<svm_node*>& libsvm_vectors);
00081
00083 svm_problem* encodeLibSVMProblem(const std::vector<svm_node*>& vectors,
00084 std::vector<DoubleReal>& labels);
00085
00087 svm_problem* encodeLibSVMProblemWithCompositionVectors(const std::vector<String>& sequences,
00088 std::vector<DoubleReal>& labels,
00089 const String& allowed_characters);
00090
00092 svm_problem* encodeLibSVMProblemWithCompositionAndLengthVectors(const std::vector<String>& sequences,
00093 std::vector<DoubleReal>& labels,
00094 const String& allowed_characters,
00095 UInt maximum_sequence_length);
00096
00098 bool storeLibSVMProblem(const String& filename, const svm_problem* problem) const;
00099
00101 svm_problem* loadLibSVMProblem(const String& filename);
00102
00104 void encodeOligoBorders(String sequence,
00105 UInt k_mer_length,
00106 const String& allowed_characters,
00107 UInt border_length,
00108 std::vector< std::pair<Int, DoubleReal> >& libsvm_vector,
00109 bool strict = false,
00110 bool unpaired = false,
00111 bool length_encoding = false);
00112
00114 svm_problem* encodeLibSVMProblemWithOligoBorderVectors(const std::vector<String>& sequences,
00115 std::vector<DoubleReal>& labels,
00116 UInt k_mer_length,
00117 const String& allowed_characters,
00118 UInt border_length,
00119 bool strict = false,
00120 bool unpaired = false,
00121 bool length_encoding = false);
00122
00129 void libSVMVectorToString(svm_node* vector, String& output);
00130
00137 void libSVMVectorsToString(svm_problem* vector, String& output);
00138
00139 };
00140
00141 }
00142
00143 #endif // OPENMS_FORMAT_LIBSVMENCODER_H