Home  · Classes  · Annotated Classes  · Modules  · Members  · Namespaces  · Related Pages

Base64.h (Maintainer: Marc Sturm)

Go to the documentation of this file.
00001 // -*- Mode: C++; tab-width: 2; -*-
00002 // vi: set ts=2:
00003 //
00004 // --------------------------------------------------------------------------
00005 //                   OpenMS Mass Spectrometry Framework
00006 // --------------------------------------------------------------------------
00007 //  Copyright (C) 2003-2008 -- Oliver Kohlbacher, Knut Reinert
00008 //
00009 //  This library is free software; you can redistribute it and/or
00010 //  modify it under the terms of the GNU Lesser General Public
00011 //  License as published by the Free Software Foundation; either
00012 //  version 2.1 of the License, or (at your option) any later version.
00013 //
00014 //  This library is distributed in the hope that it will be useful,
00015 //  but WITHOUT ANY WARRANTY; without even the implied warranty of
00016 //  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00017 //  Lesser General Public License for more details.
00018 //
00019 //  You should have received a copy of the GNU Lesser General Public
00020 //  License along with this library; if not, write to the Free Software
00021 //  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
00022 //
00023 // --------------------------------------------------------------------------
00024 // $Maintainer: Marc Sturm $
00025 // --------------------------------------------------------------------------
00026 
00027 #ifndef OPENMS_FORMAT_BASE64_H
00028 #define OPENMS_FORMAT_BASE64_H
00029 
00030 #ifndef OPENMS_IS_BIG_ENDIAN
00031 #if defined OPENMS_BIG_ENDIAN
00032 #define OPENMS_IS_BIG_ENDIAN true
00033 #else
00034 #define OPENMS_IS_BIG_ENDIAN false
00035 #endif
00036 #endif
00037 
00038 #include <cmath>
00039 #include <OpenMS/CONCEPT/Types.h>
00040 
00041 #include <string>
00042 #include <vector>
00043 
00044 namespace OpenMS
00045 {
00052   class Base64
00053   {
00054   public:
00056     Base64();
00057 
00059     virtual ~Base64();
00060 
00061     enum ByteOrder{BYTEORDER_BIGENDIAN,BYTEORDER_LITTLEENDIAN};
00062 
00068     template <typename FromType>
00069     void encode(std::vector<FromType>& in, ByteOrder to_byte_order, std::string& out);
00070 
00076     template <typename ToType>
00077     void decode(const std::string& in, ByteOrder from_byte_order, std::vector<ToType>& out);
00078 
00079   private:
00080     static const char encoder_[];
00081     static const char decoder_[];
00082   };
00083 
00084 
00085   template <typename FromType>
00086   void Base64::encode(std::vector<FromType>& in, ByteOrder to_byte_order, std::string& out)
00087   {
00088     bool convert = false;
00089     out = std::string();
00090     if (in.size() == 0) return;
00091 
00092     if ((OPENMS_IS_BIG_ENDIAN && to_byte_order == Base64::BYTEORDER_LITTLEENDIAN) ||
00093       (!OPENMS_IS_BIG_ENDIAN && to_byte_order == Base64::BYTEORDER_BIGENDIAN))
00094     {
00095       convert = true;
00096     }
00097   
00098     UInt element_size = sizeof (FromType);
00099     UInt size = element_size * in.size();
00100     UInt padding = 0;
00101     if (size%3 == 2) padding=1; 
00102     if (size%3 == 1) padding=2;
00103     
00104     register unsigned char a;
00105     register unsigned char b;
00106 
00107 /*
00108   Inline documentation:
00109 
00110   If you want to understand the following, this link might be crucial:
00111 
00112   http://babbage.cs.qc.edu/IEEE-754/32bit.html (and some links at the bottom of this document)
00113   (online converter - *extremely* helpful for debugging)
00114 
00115   3 bytes are encoded to 4 base64 chars after the following method:
00116   |7 6 5 4 3 2 1 0|7 6 5 4 3 2 1 0|7 6 5 4 3 2 1 0| 3 Bytes
00117   |5 4 3 2 1 0|5 4 3 2 1 0|5 4 3 2 1 0|5 4 3 2 1 0| 4 "Bytes"
00118 
00119   Each resulting byte is assigned a printable character based upon its
00120   bit value (0..(2^6)-1), according to a special encoding character list.
00121   See top.
00122 
00123   On LITTLE endian machines, a vector<Real> looks like this:
00124 
00125   Memory Bytes   0 1 2 3 4 5 6 7 ... 
00126   Byte of Real   4 3 2 1 4 3 2 1 ...
00127   Vector element 1       2       ...
00128 
00129   On BIG endian machines, a vector<Real> looks like this:
00130 
00131   Memory Bytes   0 1 2 3 4 5 6 7 ... 
00132   Byte of Real   1 2 3 4 1 2 3 4 ...
00133   Vector element 1       2       ...
00134 
00135   When encoding to the same ENDIAN as the host byte order, we don't need to
00136   do any conversion.
00137 
00138   When encoding to the other ENDIAN, we need to
00139   do the conversion as follows (for every value):
00140   - mirror the byte orders
00141   - go on as usual
00142 
00143   To encode a vector<Real>, we need bytewise access to the original vector.
00144   We accomplish that by interpreting the Real as char[] and accessing it
00145   by index. This index is also a method for converting ENDIAN methods.
00146   - Starting index =
00147     same endian:      0, increment 1
00148     different endian: element_size-1, increment -1
00149 */
00150 
00151     UInt i = 0;
00152     UInt pos = 0;       // position in vector
00153     UInt offset = 0;    // offset in Real
00154     int inc = 1;        // increment
00155 
00156     if (convert == false) inc = 1;
00157     else inc = -1;
00158 
00159     for (i=0; i<size-3; i+=3)  
00160     {
00161       pos = i / element_size;
00162 
00163       if (convert == false)
00164       {
00165         offset = i % element_size;      // same endian
00166       }
00167       else
00168       {
00169         offset = (element_size - 1) - (i % element_size);   // other endian
00170       }
00171 
00172 //      printf ("pos %d, offset %d\n", pos, offset);
00173 //      printf ("i= %d, read %d\n", i, ((char*) &(in[pos]))[offset]);
00174       
00175       // encode 3 Byte to 4 Base64-Chars
00176       // a = byte at position i
00177       a = ((char*) &(in[pos]))[offset];
00178 
00179       // b = byte at position i+1
00180       pos = (i+1) / element_size;
00181       offset = (offset+inc) % element_size;
00182 //      printf ("i+1: pos %d, offset %d\n", pos, offset);
00183       b = ((char*) &(in[pos]))[offset];
00184       out.push_back(encoder_[a>>2]);
00185       out.push_back(encoder_[((a&3)<<4) | (b>>4)]);
00186 
00187       // a = byte at position i + 2
00188       pos = (i+2) / element_size;
00189       offset = (offset+inc) % element_size;
00190 //      printf ("i+2: pos %d, offset %d\n", pos, offset);
00191       a = ((char*) &(in[pos]))[offset];
00192 
00193       out.push_back(encoder_[((b&15)<<2) | (a>>6)]);
00194       out.push_back(encoder_[a&63]);
00195     }
00196 
00197     // encode last 3 Byte (fill missing bits with 0)
00198     pos = i / element_size;
00199     offset = (offset+inc) % element_size;
00200 //    printf ("i: %d, last byte: pos %d, offset %d\n", i, pos, offset);
00201     a = ((char*) &(in[pos]))[offset];
00202     out.push_back(encoder_[a>>2]);
00203 
00204     if (padding == 2)
00205     {
00206 /*
00207       One overlapping byte in input (for example 4 bytes = 1 real => 8 chars)
00208       last sequence:
00209       8 7 6 5 4 3 2 1|0 0 0 0 0 0 0 0|0 0 0 0 0 0 0 0
00210       6 5 4 3 2 1|6 5 4 3 2 1|6 5 4 3 2 1|6 5 4 3 2 1
00211       X           X           =           =
00212 */
00213       out.push_back(encoder_[(a&3)<<4]);
00214       out.push_back('=');
00215       out.push_back('=');
00216     }
00217     else if (padding)
00218     {
00219 /*
00220       Two overlapping bytes in input (for example 8 bytes = 2 reals => 12 chars)
00221       last sequence:
00222       8 7 6 5 4 3 2 1|8 7 6 5 4 3 2 1|0 0 0 0 0 0 0 0
00223       6 5 4 3 2 1|6 5 4 3 2 1|6 5 4 3 2 1|6 5 4 3 2 1
00224       X           X           X           =
00225 */
00226 
00227       i++;
00228       pos = i / element_size;
00229       offset = (offset+inc) % element_size;
00230       b = ((char*) &(in[pos]))[offset];
00231       out.push_back(encoder_[((a&3)<<4) | (b>>4)]);
00232       out.push_back(encoder_[(b&15)<<2]);
00233       out.push_back('=');       
00234     } 
00235     else
00236     {
00237       i++;
00238       pos = i / element_size;
00239       offset = (offset+inc) % element_size;
00240       b = ((char*) &(in[pos]))[offset];
00241       out.push_back(encoder_[((a&3)<<4) | (b>>4)]);
00242 
00243       i++;
00244       pos = i / element_size;
00245       offset = (offset+inc) % element_size;
00246       a = ((char*) &(in[pos]))[offset];
00247       out.push_back(encoder_[((b&15)<<2) | (a>>6)]);
00248       out.push_back(encoder_[a&63]);
00249     }
00250   }
00251 
00253   template <typename ToType>
00254   void Base64::decode(const std::string& in, ByteOrder from_byte_order, std::vector<ToType>& out)
00255   {
00256     out = std::vector<ToType>();
00257     if (in == "") return;
00258   
00259     UInt src_size = in.size();
00260     // last one or two '=' are skipped if contained
00261     int padding = 0;
00262     if (in[src_size-1] == '=') padding++;
00263     if (in[src_size-2] == '=') padding++;
00264 
00265     src_size -= padding;
00266 
00267     register UInt a;
00268     register UInt b;
00269 
00270     UInt offset = 0;
00271     bool convert = false;
00272     int inc = 1;
00273     UInt written = 0;
00274 
00275     UInt element_size = sizeof(ToType);
00276 
00277     // enough for either float or double
00278     char element[8] = "\x00\x00\x00\x00\x00\x00\x00";
00279 
00280     if ((OPENMS_IS_BIG_ENDIAN && from_byte_order == Base64::BYTEORDER_LITTLEENDIAN) ||
00281       (!OPENMS_IS_BIG_ENDIAN && from_byte_order == Base64::BYTEORDER_BIGENDIAN))
00282     {
00283       convert = true;
00284       offset = (element_size - 1);    // other endian
00285       inc = -1;
00286     }
00287     else
00288     {
00289       offset = 0;
00290       inc = 1;
00291     }
00292 
00293     // sort all read bytes correctly into a char[4] (double) or
00294     // char[8] (Real) and push_back when necessary.
00295     for (UInt i=0; i<src_size; i+=4)
00296     {
00297 //    printf ("start: i=%d, offset %d\n", i, offset);
00298 
00299       // decode 4 Base64-Chars to 3 Byte
00300       a = decoder_[(int)in[i]-43]-62;
00301       b = decoder_[(int)in[i+1]-43]-62;
00302       if (i+1 >= src_size) b=0;
00303       element[offset] = (unsigned char) ((a<<2) | (b>>4));
00304       written++;
00305 //    printf ("1: i=%d, offset %d, wrote %d\n", i, offset, element[offset]);
00306       offset = (offset + inc) % element_size;
00307 
00308       if (written % sizeof(ToType) == 0)
00309       {
00310         out.push_back(((ToType*)element)[0]);
00311         strcpy(element, "");
00312       }
00313 
00314       a = decoder_[(int)in[i+2]-43]-62;
00315       if (i+2 >= src_size) a=0;
00316       element[offset] = (unsigned char) (((b&15)<<4) | (a>>2));
00317       written++;
00318 //    printf ("2: i=%d, offset %d, wrote %d\n", i, offset, element[offset]);
00319       offset = (offset + inc) % element_size;
00320 
00321       if (written % sizeof(ToType) == 0)
00322       {
00323         // debug: output float in binary
00324 /*
00325         for (int sl = 0; sl != sizeof(ToType); sl++)
00326         {
00327           for (int sl2 = 128; sl2 >= 1; sl2 /= 2)
00328           {
00329             std::cout << (element[sl] & sl2);
00330           }
00331           std::cout << " ";
00332         }
00333 */
00334         out.push_back(((ToType*)element)[0]);
00335         strcpy(element, "");
00336       }
00337 
00338       b = decoder_[(int)in[i+3]-43]-62;
00339       if (i+3 >= src_size) b=0;
00340       element[offset] = (unsigned char) (((a&3)<<6) | b);
00341       written++;
00342 //    printf ("3: i=%d, offset %d, wrote %d\n", i, offset, element[offset]);
00343       offset = (offset + inc) % element_size;
00344 
00345       if (written % sizeof(ToType) == 0)
00346       {
00347         out.push_back(((ToType*)element)[0]);
00348         strcpy(element, "");
00349       }
00350     }
00351   }
00352 
00353 } //namespace OpenMS
00354 
00355 #endif /* OPENMS_FORMAT_BASE64_H */

Generated Tue Apr 1 15:36:32 2008 -- using doxygen 1.5.4 OpenMS / TOPP 1.1