Home  · Classes  · Annotated Classes  · Modules  · Members  · Namespaces  · Related Pages

LinearRegression.h (Maintainer: Eva Lange)

Go to the documentation of this file.
00001 // -*- Mode: C++; tab-width: 2; -*-
00002 // vi: set ts=2:
00003 //
00004 // --------------------------------------------------------------------------
00005 //                   OpenMS Mass Spectrometry Framework
00006 // --------------------------------------------------------------------------
00007 //  Copyright (C) 2003-2008 -- Oliver Kohlbacher, Knut Reinert
00008 //
00009 //  This library is free software; you can redistribute it and/or
00010 //  modify it under the terms of the GNU Lesser General Public
00011 //  License as published by the Free Software Foundation; either
00012 //  version 2.1 of the License, or (at your option) any later version.
00013 //
00014 //  This library is distributed in the hope that it will be useful,
00015 //  but WITHOUT ANY WARRANTY; without even the implied warranty of
00016 //  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00017 //  Lesser General Public License for more details.
00018 //
00019 //  You should have received a copy of the GNU Lesser General Public
00020 //  License along with this library; if not, write to the Free Software
00021 //  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
00022 //
00023 // --------------------------------------------------------------------------
00024 // $Maintainer: Eva Lange $
00025 // --------------------------------------------------------------------------
00026 //
00027 
00028 #ifndef OPENMS_MATH_STATISTICS_LINEARREGRESSION_H
00029 #define OPENMS_MATH_STATISTICS_LINEARREGRESSION_H
00030 
00031 #include<OpenMS/CONCEPT/Types.h>
00032 
00033 #include<iostream>
00034 #include<vector>
00035 #include<gsl/gsl_fit.h>
00036 #include<gsl/gsl_statistics.h>
00037 #include<gsl/gsl_cdf.h>
00038 
00039 using namespace std;
00040 
00041 namespace OpenMS
00042 {
00043   namespace Math
00044   {
00065     template <typename Iterator>
00066     class LinearRegression
00067     {
00068       public:
00069 
00071         LinearRegression()
00072             : intercept_(0),
00073             slope_(0),
00074             x_intercept_(0),
00075             lower_(0),
00076             upper_(0),
00077             t_star_(0),
00078             r_squared_(0),
00079             stand_dev_residuals_(0),
00080             mean_residuals_(0),
00081             stand_error_slope_(0),
00082             chi_squared_(0),
00083             rsd_(0)
00084         {}
00085 
00087         LinearRegression ( LinearRegression const & arg )
00088             : intercept_(arg.intercept_),
00089             slope_(arg.slope_),
00090             x_intercept_(arg.x_intercept_),
00091             lower_(arg.lower_),
00092             upper_(arg.upper_),
00093             t_star_(arg.t_star_),
00094             r_squared_(arg.r_squared_),
00095             stand_dev_residuals_(arg.stand_dev_residuals_),
00096             mean_residuals_(arg.mean_residuals_),
00097             stand_error_slope_(arg.stand_error_slope_),
00098             chi_squared_(arg.chi_squared_),
00099             rsd_(arg.rsd_)
00100         {}
00101 
00103         LinearRegression & operator = ( LinearRegression const & arg )
00104         {
00105           // take care of self assignments
00106           if (this == &arg)
00107           {
00108             return *this;
00109           }
00110 
00111           intercept_ = arg.intercept_;
00112           slope_ = arg.slope_;
00113           x_intercept_ = arg.x_intercept_;
00114           lower_ = arg.lower_;
00115           upper_ = arg.upper_;
00116           t_star_ = arg.t_star_;
00117           r_squared_ = arg.r_squared_;
00118           stand_dev_residuals_ = arg.stand_dev_residuals_;
00119           mean_residuals_  = arg.mean_residuals_;
00120           stand_error_slope_ = arg.stand_error_slope_;
00121           chi_squared_ = arg.chi_squared_;
00122           rsd_ = arg.rsd_;
00123 
00124           return *this;
00125         }
00126 
00127 
00129         virtual ~LinearRegression()
00130         {}
00131 
00132 
00145         int computeInterceptXAxis
00146         (double confidence_interval_P,
00147          Iterator x_begin,
00148          Iterator x_end,
00149          Iterator y_begin);
00150 
00163         int computeInterceptXAxisWeighted
00164         (double confidence_interval_P,
00165          Iterator x_begin,
00166          Iterator x_end,
00167          Iterator y_begin,
00168          Iterator w_begin);
00169 
00170 
00172         inline DoubleReal getIntercept() const
00173         {
00174           return intercept_;
00175         }
00177         inline DoubleReal getSlope() const
00178         {
00179           return slope_;
00180         }
00182         inline DoubleReal getXIntercept() const
00183         {
00184           return x_intercept_;
00185         }
00187         inline DoubleReal getLower() const
00188         {
00189           return lower_;
00190         }
00192         inline DoubleReal getUpper() const
00193         {
00194           return upper_;
00195         }
00197         inline DoubleReal getTValue() const
00198         {
00199           return t_star_;
00200         }
00202         inline DoubleReal getRSquared() const
00203         {
00204           return r_squared_;
00205         }
00207         inline DoubleReal getStandDevRes() const
00208         {
00209           return stand_dev_residuals_;
00210         }
00211         
00213         inline DoubleReal getMeanRes() const
00214         {
00215           return mean_residuals_;
00216         }
00217         
00219         inline DoubleReal getStandErrSlope() const
00220         {
00221           return stand_error_slope_;
00222         }
00224         inline DoubleReal getChiSquared() const
00225         {
00226           return chi_squared_;
00227         }
00228         
00230         inline DoubleReal getRSD() const
00231         {
00232           return rsd_;
00233         }
00234 
00235 
00236       protected:
00237 
00239         double intercept_;
00241         double slope_;
00243         double x_intercept_;
00245         double lower_;
00247         double upper_;
00249         double t_star_;
00251         double r_squared_;
00253         double stand_dev_residuals_;
00255         double mean_residuals_;
00257         double stand_error_slope_;
00259         double chi_squared_;
00261         double rsd_;
00262 
00263 
00265         void computeGoodness_
00266         (double* X,
00267          double* Y,
00268          int N,
00269          double confidence_interval_P);
00270 
00272         void iteratorRange2Arrays_
00273         (Iterator x_begin,
00274          Iterator x_end,
00275          Iterator y_begin,
00276          double* x_array,
00277          double* y_array);
00278 
00283         void iteratorRange3Arrays_
00284         (Iterator x_begin,
00285          Iterator x_end,
00286          Iterator y_begin,
00287          Iterator w_begin,
00288          double* x_array,
00289          double* y_array,
00290          double* w_array);
00291 
00292     };
00293 
00294 
00295     template <typename Iterator>
00296     int LinearRegression<Iterator>::computeInterceptXAxis
00297     (double confidence_interval_P,
00298      Iterator x_begin,
00299      Iterator x_end,
00300      Iterator y_begin
00301     )
00302     {
00303       int N=distance(x_begin,x_end);
00304 
00305       double* X = new double[N];
00306       double* Y = new double[N];
00307       iteratorRange2Arrays_(x_begin,x_end,y_begin,X,Y);
00308 
00309       double cov00, cov01, cov11;
00310 
00311       // Compute the unweighted linear fit.
00312       // Get the intercept and the slope of the regression Y_hat=intercept_+slope_*X
00313       // and the value of Chi squared, the covariances of the intercept and the slope
00314       int error = gsl_fit_linear (X,1,Y,1,N,&intercept_, &slope_, &cov00, &cov01, &cov11, &chi_squared_);
00315 
00316       if (!error)
00317       {
00318         computeGoodness_(X,Y,N,confidence_interval_P);
00319       }
00320 
00321       delete [] X;
00322       delete [] Y;
00323 
00324       return error;
00325     }
00326 
00327 
00328     template <typename Iterator>
00329     int LinearRegression<Iterator>::computeInterceptXAxisWeighted
00330     (double confidence_interval_P,
00331      Iterator x_begin,
00332      Iterator x_end,
00333      Iterator y_begin,
00334      Iterator w_begin
00335     )
00336     {
00337       int N=distance(x_begin,x_end);
00338 
00339       double* X = new double[N];
00340       double* Y = new double[N];
00341       double* W = new double[N];
00342       iteratorRange3Arrays_(x_begin,x_end,y_begin,w_begin,X,Y,W);
00343 
00344       double cov00, cov01, cov11;
00345 
00346       // Compute the weighted linear fit.
00347       // Get the intercept and the slope of the regression Y_hat=intercept_+slope_*X
00348       // and the value of Chi squared, the covariances of the intercept and the slope
00349       int error = gsl_fit_wlinear (X,1,W,1,Y,1,N,&intercept_, &slope_, &cov00, &cov01, &cov11, &chi_squared_);
00350 
00351       if (!error)
00352       {
00353         computeGoodness_(X,Y,N,confidence_interval_P);
00354       }
00355 
00356       delete [] X;
00357       delete [] Y;
00358       delete [] W;
00359 
00360       return error;
00361     }
00362 
00363 
00364     template <typename Iterator>
00365     void LinearRegression<Iterator>::computeGoodness_(double* X, double* Y,int N, double confidence_interval_P)
00366     {
00367       // Variance and Covariances
00368       double var_X = gsl_stats_variance(X,1,N);
00369       double var_Y = gsl_stats_variance(Y,1,N);
00370       double cov_XY = gsl_stats_covariance(X,1,Y,1,N);
00371 
00372       // Mean of abscissa and ordinate values
00373       double x_mean = gsl_stats_mean(X,1,N);
00374       double y_mean = gsl_stats_mean(Y,1,N);
00375         
00376       // S_xx
00377       double s_XX = 0;
00378       for (int i=0; i<N; ++i)
00379       {
00380         double d=(X[i]-x_mean);
00381         s_XX +=d*d;
00382       }
00383 
00384       // Compute the squared Pearson coefficient
00385       r_squared_=(cov_XY*cov_XY)/(var_X*var_Y);
00386 
00387       // The standard deviation of the residuals
00388       double sum = 0;
00389       for (Int i = 0; i < N; ++i)
00390       {
00391         double x_i = fabs(Y[i] - (intercept_ + slope_ * X[i]));
00392         sum += x_i;
00393       }
00394       mean_residuals_       = sum / N;
00395       stand_dev_residuals_ = sqrt((chi_squared_ - (sum*sum)/N)/(N-1));
00396 
00397       // The Standard error of the slope
00398       stand_error_slope_=stand_dev_residuals_/sqrt(s_XX);
00399 
00400       // and the intersection of Y_hat with the x-axis
00401       x_intercept_= -(intercept_/slope_);
00402 
00403       double P=1-(1-confidence_interval_P)/2;
00404       t_star_=gsl_cdf_tdist_Pinv(P,N-2);
00405 
00406       //Compute the asymmetric 95% confidence intervall of around the X-intercept
00407       double g =(t_star_/(slope_/stand_error_slope_));
00408       g *= g;
00409       double left = (x_intercept_-x_mean)*g;
00410       double bottom = 1-g;
00411       double d=(x_intercept_-x_mean);
00412       double right = t_star_*(stand_dev_residuals_/slope_)*sqrt((d*d)/s_XX + (bottom/N));
00413 
00414       // Confidence intervall lower_ <= X_intercept <= upper_
00415       lower_ = x_intercept_+(left+right)/bottom;
00416       upper_ = x_intercept_+(left-right)/bottom;
00417 
00418       if (lower_ > upper_)
00419       {
00420         std::swap(lower_,upper_);
00421       }
00422       
00423       double tmp = 0;
00424       for (Int i = 0; i<N;++i)
00425       {
00426         tmp += (X[i] - x_mean) * (X[i] - x_mean);
00427       }
00428       
00429 //      cout << "100.0 / abs( x_intercept_ ) " << (100.0 / fabs( x_intercept_ )) << endl;
00430 //      cout << "tmp : " << tmp << endl;
00431 //      cout << "slope_ " << slope_ << endl;
00432 //      cout << "y_mean " << y_mean << endl;
00433 //      cout << "N " << N << endl;
00434 //      cout << "stand_dev_residuals_ " << stand_dev_residuals_ << endl;
00435 //      cout << " (1.0/ (double) N)  " <<  (1.0/ (double) N)  << endl;
00436 //      cout << "sx hat " << (stand_dev_residuals_ / slope_) * sqrt(  (1.0/ (double) N) * (y_mean / (slope_ * slope_ * tmp ) ) ) << endl;
00437   
00438       // compute relative standard deviation (non-standard formula, taken from Mayr et al. (2006) )
00439       rsd_ = (100.0 / fabs( x_intercept_ )) * (stand_dev_residuals_ / slope_) * sqrt(  (1.0/ (double) N) * (y_mean / (slope_ * slope_ * tmp ) ) ) ; 
00440       
00441       if (rsd_ < 0.0)
00442       {
00443       std::cout << "rsd < 0.0 " << std::endl;
00444       std::cout <<   "Intercept                                " << intercept_
00445                 << "\nSlope                                    " << slope_
00446                 << "\nSquared pearson coefficient              " << r_squared_
00447                 << "\nValue of the t-distribution              " << t_star_
00448                 << "\nStandard deviation of the residuals      " << stand_dev_residuals_ 
00449                 << "\nStandard error of the slope              " << stand_error_slope_
00450                 << "\nThe X intercept                          " << x_intercept_
00451                 << "\nThe lower border of confidence interval  " << lower_
00452                 << "\nThe higher border of confidence interval " << upper_
00453                 << "\nChi squared value                        " << chi_squared_
00454                 << "\nx mean                                   " << x_mean
00455                 << "\nstand_error_slope/slope_                 " << (stand_dev_residuals_/slope_)
00456                 << "\nCoefficient of Variation                 " << (stand_dev_residuals_/slope_)/x_mean*100  << std::endl
00457                 << "========================================="
00458                 << std::endl;   
00459       }
00460       
00461  
00462     }
00463 
00464 
00465 
00466     template <typename Iterator>
00467     void LinearRegression<Iterator>::iteratorRange2Arrays_
00468     (Iterator x_begin,
00469      Iterator x_end,
00470      Iterator y_begin,
00471      double* x_array,
00472      double* y_array)
00473     {
00474       int i=0;
00475       while (x_begin < x_end)
00476       {
00477         x_array[i]= *x_begin;
00478         y_array[i]= *y_begin;
00479         ++x_begin;
00480         ++y_begin;
00481         ++i;
00482       }
00483     }
00484 
00485 
00486     template <typename Iterator>
00487     void LinearRegression<Iterator>::iteratorRange3Arrays_
00488     (Iterator x_begin,
00489      Iterator x_end,
00490      Iterator y_begin,
00491      Iterator w_begin,
00492      double* x_array,
00493      double* y_array,
00494      double* w_array)
00495     {
00496       int i=0;
00497       while (x_begin < x_end)
00498       {
00499         x_array[i]=*x_begin;
00500         y_array[i]=*y_begin;
00501         w_array[i]=*w_begin;
00502         ++x_begin;
00503         ++y_begin;
00504         ++w_begin;
00505         ++i;
00506       }
00507     }
00508   } // namespace Math
00509 } // namespace OpenMS
00510 
00511 
00512 #endif

Generated Tue Apr 1 15:36:35 2008 -- using doxygen 1.5.4 OpenMS / TOPP 1.1