The Machine Perception Toolbox (MPT)

00001 /*
00002  *  eyefinder.cc
00003  *
00004  *  Author:Ian Fasel
00005  *  Fixes:
00006  *
00007  *  Copyright (c) 2003 Machine Perception Laboratory
00008  *  University of California San Diego.
00009  *
00010  * Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
00011  *
00012  *    1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
00013  *    2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.
00014  *    3. The name of the author may not be used to endorse or promote products derived from this software without specific prior written permission.
00015  *
00016  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
00017  *
00018  */
00019 
00020 #include "eyefinder.h"
00021 #include "eye_finder_ROI.h"
00022 #include <math.h>
00023 #include <string>
00024 #include <iostream>
00025 
00026 #define ROI_namespace eyefinder_ROI_low_thresh
00027 
00028 GPrior::GPrior(){};
00029 GPrior::GPrior(const vector< float > &mean_, const vector< vector< float > > &cov_, const vector< vector< float > > &invCov_){
00030   SetPrior(mean_, cov_, invCov_);
00031 }
00032 void GPrior::Release(){
00033   mean.clear();
00034   cov.clear();
00035   invCov.clear();
00036 }
00037 void GPrior::SetPrior(const vector< float > &mean_, const vector< vector< float > > &cov_, const vector< vector< float > > &invCov_){
00038   mean=mean_;
00039   cov=cov_;
00040   invCov=invCov_;
00041 }
00042 
00043 void ROIdata::Release(){
00044   mean.clear();
00045   cov.clear();
00046   invCov.clear();
00047   scales.clear();
00048   bounds.clear();
00049 }
00050 
00051 
00052 MPEyeFinder::MPEyeFinder ( ) : MPISearchFaceDetector(),
00053      m_rez(DEFAULT_PATCH_REZ), m_centering(DEFAULT_CENTERING){ 
00054   initROIdata();
00055 }
00056 MPEyeFinder::~MPEyeFinder ( ){ 
00057   releaseStream();
00058   leftROIdata.Release();
00059   rightROIdata.Release();
00060 
00061 }
00062 
00063 void MPEyeFinder::SetCentering(const centering_condition &c){m_centering = c;}
00064 void MPEyeFinder::SetRez(const patch_rez &r){m_rez = r;}
00065 
00066 void MPEyeFinder::initStream(const int width, const int height, double WINSHIFT){
00067   stream.init(width, height, data, WINSHIFT);
00068   left_eye_stream.init(stream, left_eye_data, 1);
00069   right_eye_stream.init(stream, right_eye_data, 1);
00070   //printData(left_eye_data,left_eye_data.numfeatures-1);
00071   //printData(right_eye_data,left_eye_data.numfeatures-1);
00072 }
00073 
00074 void MPEyeFinder::resetStream(const int width, const int height, double WINSHIFT){
00075   // Note: order matters! eye_streams are not owners of all their data, so they must be released first
00076   releaseStream();
00077   stream.init(width, height, data, WINSHIFT);
00078   left_eye_stream.init(stream, left_eye_data, 1);
00079   right_eye_stream.init(stream, right_eye_data, 1);
00080 }
00081 void MPEyeFinder::releaseStream(){
00082   // Note: order matters! eye_streams are not owners of all their data, so they must be released first
00083   left_eye_stream.release();
00084   right_eye_stream.release();
00085   stream.release();
00086 }
00087 
00088 void MPEyeFinder::initROIdata(){
00089   const int dim = 3; // 3-dimensional data
00090   int i;
00091 
00092   for(i = 0; i < dim; ++i)
00093     leftROIdata.mean.push_back(ROI_namespace::leftmean[i]);
00094   for(i = 0; i < dim; ++i){
00095     vector< float > v;
00096     vector< float > iv;
00097     for(int j = 0; j < dim; ++j){
00098       v.push_back(ROI_namespace::leftcov[i][j]);
00099       iv.push_back(ROI_namespace::leftInvCov[i][j]);
00100     }
00101     leftROIdata.cov.push_back(v);
00102     leftROIdata.invCov.push_back(iv);
00103   }
00104   leftROIdata.numscales = ROI_namespace::numleftscales;
00105   for(i = 0; i < leftROIdata.numscales; ++i)
00106     leftROIdata.scales.push_back(ROI_namespace::leftscales[i]);
00107   for(i = 0; i < leftROIdata.numscales; ++i){
00108     vector< float > v;
00109     for(int j = 0; j < 6; ++j)
00110       v.push_back(ROI_namespace::leftbounds[i][j]);
00111     leftROIdata.bounds.push_back(v);
00112   }
00113 
00114   for(i = 0; i < dim; ++i)
00115     rightROIdata.mean.push_back(ROI_namespace::rightmean[i]);
00116   for(i = 0; i < dim; ++i){
00117     vector< float > v;
00118     vector< float > iv;
00119     for(int j = 0; j < dim; ++j){
00120       v.push_back(ROI_namespace::rightcov[i][j]);
00121       iv.push_back(ROI_namespace::rightInvCov[i][j]);
00122     }
00123     rightROIdata.cov.push_back(v);
00124     rightROIdata.invCov.push_back(iv);
00125   }
00126   rightROIdata.numscales = ROI_namespace::numrightscales;
00127   for(i = 0; i < rightROIdata.numscales; ++i)
00128     rightROIdata.scales.push_back(ROI_namespace::rightscales[i]);
00129   for(i = 0; i < rightROIdata.numscales; ++i){
00130     vector< float > v;
00131     for(int j = 0; j < 6; ++j)
00132       v.push_back(ROI_namespace::rightbounds[i][j]);
00133     rightROIdata.bounds.push_back(v);
00134   }
00135 }
00136 
00137 GPrior MPEyeFinder::setROI(FaceObject* &face, feature_type eye_type){
00138   ROIdata *d;
00139   MPIImagePyramid<MPISEARCH_OBJECT_TYPE> *mpi;
00140   float xoff, yoff, patchWidthPct, patch_width, patch_height;
00141   int i;
00142 
00143   getEyeOffsets(m_rez, m_centering, xoff, yoff);
00144   getPatchWidth(m_rez, patchWidthPct);
00145 
00146   switch(eye_type){
00147   case lefteye:
00148     d = &leftROIdata;
00149     mpi = left_eye_stream.mpi;
00150     xoff = -xoff;
00151     patch_width = left_eye_stream.m_data->patch_width;
00152     patch_height = left_eye_stream.m_data->patch_width;
00153     break;
00154   case righteye:
00155     d = &rightROIdata;
00156     mpi = right_eye_stream.mpi;
00157     patch_width = right_eye_stream.m_data->patch_width;
00158     patch_height = right_eye_stream.m_data->patch_width;
00159     break;
00160   }
00161 
00162   // First, clear out the ROI in the current stream
00163   // mpi->InitROI();
00164   ROI roi = mpi->getROI();
00165   for(i = 0; i < roi.vmin_x.size(); ++i){
00166     roi.vmin_x[i] = 0; roi.vmax_x[i] = 0; roi.vmin_y[i] = 0; roi.vmax_y[i] = 0; 
00167   }
00168   
00169   // Get the multiplier for finding the scale_factor of an eyepatch given an eye distance
00170   float scale_factor_ratio = patchWidthPct * face->xSize * 2.0f / patch_width;
00171 
00172   // Now, loop through all the scales we would ideally search, and set
00173   // the bounds according to the actual scales we have access to, given
00174   // the size of the expected eye patch and the fact that we can only
00175   // access integer scales
00176 
00177   float scale_factor;
00178   int current_scale_ind;
00179   int last_scale_ind = -1;
00180   for(i = 0; i < d->numscales; ++i){
00181     scale_factor = scale_factor_ratio * (d->mean[2]+d->scales[i]);
00182     current_scale_ind = mpi->getClosestScale(scale_factor);
00183     //scale_factor = mpi->scale_factors[current_scale_ind];
00184     if(current_scale_ind != last_scale_ind){
00185       scale_factor = static_cast<int>(max(1.0f,scale_factor) * 2.0f + 1.0f)/ 2.0f; // grows tiny scales a bit
00186       float half_window_width = patch_width * scale_factor / (2.0f-max(0.0f,1.0f-2.0f*patchWidthPct)); // widens small scale detectors
00187       // Commentary:
00188       // (bounds[i][1] + d->mean[0] + xoff):  the ROI w.r.t the center of the search patch
00189       //             * face->xSize: convert to pixels
00190       //             +-  half_window_width: adjust so this ROI bounds searchable patches on left and right edge
00191       //             + face->x,y: make it with respect to the face location
00192 
00193       // POSSIBLE BUG! ERROR! Only uses window width, not height.  Check here if problems occur
00194       //cout << "d->bounds[i][1] + d->mean[0] + xoff" << d->bounds[i][1] + d->mean[0] + xoff << endl;
00195       roi.vmin_x[current_scale_ind] = (int)((d->bounds[i][1] + d->mean[0] + xoff) * face->xSize - half_window_width + face->x);
00196       roi.vmax_x[current_scale_ind] = (int)((d->bounds[i][2] + d->mean[0] + xoff) * face->xSize + half_window_width + face->x);
00197       roi.vmin_y[current_scale_ind] = (int)((d->bounds[i][4] + d->mean[1] + yoff) * face->ySize - half_window_width + face->y);
00198       roi.vmax_y[current_scale_ind] = (int)((d->bounds[i][5] + d->mean[1] + yoff) * face->ySize + half_window_width + face->y);
00199       last_scale_ind = current_scale_ind;
00200     }
00201   }
00202   // Now, set the current stream to use this ROI
00203   mpi->SetROI(roi);
00204   return GPrior(d->mean, d->cov, d->invCov);
00205 }
00206 
00207 
00208 int MPEyeFinder::findEyes(const RImage<MPISEARCH_OBJECT_TYPE> &pixels, VisualObject &mFaces,
00209                           float WINSHIFT, combine_mode mode){
00210   mFaces.clear();
00211   // First, find the faces using built-in face detector weights and search algorithm
00212   FaceBoxList faces;
00213   //pixels.print();
00214   int numwindows = search(pixels, faces, 1, WINSHIFT);
00215   faces.simplify(0.20f);
00216   std::cout << "Found " << faces.size() << " faces after simplification." << endl;
00217   //if(faces.size()==0){
00218   //  pixels.print(20);
00219   //  setDebug(true);
00220   //  numwindows = search(pixels, faces, 1, WINSHIFT);
00221   //}
00222   int totalRightEyes, totalLeftEyes;
00223   if(faces.size() != 0) {
00224     // for each found face, find eyes
00225     list<Square>::iterator face = faces.begin();
00226     list<Square>::iterator last_face = faces.end();
00227     for( ; face != last_face; ++face){
00228       //cout << "Found face at (x=" << face->x<<", y=" << face->y << ", size= "<< face->size << ")" << endl;
00229       FaceObject *current_face = new FaceObject(face);
00230       if (mode != face_only){
00231 
00232         // search for left and right eyes
00233         GPrior rightPrior = setROI(current_face,righteye);
00234         totalRightEyes = eyeSearch(right_eye_stream, right_eye_data, current_face->rightEyes, current_face, rightPrior, righteye, mode);
00235         std::cout << "totalRightEyes: " << totalRightEyes << ", rightEyes.size(): "<< current_face->rightEyes.size() << endl;
00236         GPrior leftPrior = setROI(current_face, lefteye);
00237         totalLeftEyes = eyeSearch(left_eye_stream, left_eye_data, current_face->leftEyes, current_face, leftPrior, lefteye, mode);
00238         std::cout << "totalLeftEyes: " << totalLeftEyes << ", leftEyes.size(): "<< current_face->leftEyes.size() << endl;
00239       }
00240 
00241       // make final hypothesis of best eyes and add face to list
00242       current_face->posterior(mode);
00243       mFaces.push_front(current_face);
00244     }
00245   }
00246   faces.clear();
00247   return numwindows;
00248 }
00249 
00250 
00251 // EyeSearch imitates the built-in search, but it doesn't need to re-integrate images,
00252 // and doesn't need to worry about so many variable options, such as block flags or
00253 // returning real valued activations.
00254 int MPEyeFinder::eyeSearch(MPISearchStream<MPISEARCH_OBJECT_TYPE> &thestream, FeatureData &thedata, vector< EyeObject > &eyelist, FaceObject *current_face, GPrior &gp, feature_type eye_type, combine_mode mode){
00255   int scale_index;
00256   float scale_factor;
00257   int x,y;
00258   int totalEyesFound = 0;
00259   double activation;
00260   double *activation_ptr = &activation;
00261   float xoff, yoff;
00262   float meanSub[3], rtnVector[3];
00263   float lp[1];
00264 
00265   // Compute the offset of the eye location from the ul corner of the patch
00266   // NOTE: I think this is correct, but it bears going over
00267   // It should reduce to an offset of exactly halfpatch in the eye_centered condition
00268   float patchWidthPct;
00269   getEyeOffsets(m_rez, m_centering, xoff, yoff);
00270   getPatchWidth(m_rez, patchWidthPct);
00271   float patch_ratio = patchWidthPct * leftROIdata.mean[2] * 2.0f; // get halfpatch size as proportion of face size
00272   float my_yoff = yoff / patch_ratio; // convert offset from face size ratio to halfpatch ratio
00273   float my_xoff = xoff / patch_ratio; // convert offset from face size ratio to halfpatch ratio
00274   if(eye_type == lefteye) my_xoff = -my_xoff; // note the mirror imaging for right eyes
00275   my_yoff = (0.5f-my_yoff) * (thedata.patch_width-1); // eye's offset from ul corner =  halfpatch-offset
00276   my_xoff = (0.5f-my_xoff) * (thedata.patch_width-1); // eye's offset from ul corner =  halfpatch-offset
00277 
00278   MPIImagePyramid<float>::const_iterator scale = thestream.mpi->begin(), last_scale = thestream.mpi->end();
00279   for( ; scale != last_scale; ++scale){ 
00280     scale_index = scale.getScale(scale_factor);
00281     int current_xoff = static_cast<int>(scale_factor * my_xoff);
00282     int current_yoff = static_cast<int>(scale_factor * my_yoff);
00283     //cout << "scale_factor: " << scale_factor << ", current_xoff: " << current_xoff << ", current_yoff: " << current_yoff << endl;
00284     // get pointers to cached values for this scale
00285     MPISEARCH_OBJECT_TYPE sf2 = scale_factor * scale_factor;
00286     CornerCache<float> **corners = thestream.corners[scale_index];
00287     MPISEARCH_OBJECT_TYPE *fns = thestream.fns[scale_index];
00288     CornerCache<float> *nw_c = thestream.nw_c[scale_index];
00289     MPISEARCH_OBJECT_TYPE nw_fn = thestream.nw_fn[scale_index];
00290     MPIScaledImage<float>::const_iterator window = (*scale).begin(), last_window = (*scale).end();
00291     for( ; window != last_window; ++window){
00292       // check the window.  If it passes, cascade_level will be 1.  Otherwise, it will be  <= 0.
00293       double cascade_level = classifyWindow(window, thedata, corners, fns, thestream.norm_window, nw_c,
00294                                             nw_fn, scale_factor, sf2, activation_ptr);
00295       switch(mode){
00296       case none:
00297 
00298         window.getCoords(x,y);
00299         eyelist.push_back(EyeObject(eye_type, x+current_xoff, y+current_yoff, scale_factor, activation, cascade_level));
00300         totalEyesFound++;
00301         //cout << "("<< x+current_xoff << "," << y+current_yoff << "): " << activation << endl; 
00302         break;
00303                         
00304       case wt_max:
00305       case wt_avg:
00306         if(cascade_level > 0){
00307           window.getCoords(x,y);
00308           x += current_xoff;
00309           y += current_yoff;
00310                                         
00311           // gaussian section
00312           meanSub[0] = ((x-current_face->x)/current_face->xSize)-gp.mean[0]; 
00313           meanSub[1] = ((y-current_face->y)/current_face->ySize)-gp.mean[1]; 
00314           int scale_size = scale_factor*(thedata.patch_width-1);
00315           meanSub[2] = ((scale_size*gp.mean[2])-(current_face->xSize*gp.mean[2]))/scale_size;
00316           matrixMult(1, 3, meanSub, 3, 3, gp.invCov, rtnVector);
00317           matrixMult(1, 3, rtnVector, 3, 1, meanSub, lp);
00318           activation = (2*activation) - (.5*(*lp));
00319 
00320           if(mode == wt_max){
00321             eyelist.push_back(EyeObject(eye_type, x, y, scale_factor, activation, cascade_level));
00322             totalEyesFound++;
00323           }
00324           else if(mode == wt_avg){
00325             if(eyelist.size() < 10){
00326               eyelist.push_back(EyeObject(eye_type, x, y, scale_factor, activation, cascade_level));
00327               totalEyesFound++;
00328               if (eyelist.size() == 10)
00329                 sort(eyelist.begin(), eyelist.end());
00330             }
00331             else {
00332               if (activation > eyelist[0].activation){
00333                 eyelist[0] = EyeObject(eye_type, x, y, scale_factor, activation, cascade_level);
00334                 sort(eyelist.begin(), eyelist.end());
00335               }
00336             }
00337           }
00338         }
00339         break;
00340       case average:
00341         window.getCoords(x,y);
00342         x += current_xoff;
00343         y += current_yoff;
00344         if(eyelist.size() < 10){
00345           eyelist.push_back(EyeObject(eye_type, x, y, scale_factor, activation, cascade_level));
00346           totalEyesFound++;
00347           if (eyelist.size() == 10)
00348             sort(eyelist.begin(), eyelist.end());
00349         }
00350         else {
00351           if (activation > eyelist[0].activation){
00352             eyelist[0] = EyeObject(eye_type, x, y, scale_factor, activation, cascade_level);
00353             sort(eyelist.begin(), eyelist.end());
00354           }
00355         }
00356         break;
00357       default:
00358         eyelist.push_back(EyeObject(eye_type, x, y, scale_factor, activation, cascade_level));
00359         break;
00360       };
00361     } // end window loop
00362   }//end scale loop
00363   //cout << endl;
00364   return totalEyesFound;
00365 }
00366 
00367 
00368 
00369 
00370 void MPEyeFinder::getPatchWidth(patch_rez p, float &patchWidthPct){
00371   switch(p){ // note: in near future, change this to look up p from FeatureData
00372   case largest_2:
00373     patchWidthPct = 3.4f; break;
00374   case largest:
00375     patchWidthPct = 1.5f; break;
00376   case eye_dist:
00377     patchWidthPct = 1; break;
00378   case half_dist:
00379     patchWidthPct = .5f; break;
00380   case eye_only:
00381     patchWidthPct = .22f; break;
00382   case smallest:
00383     patchWidthPct = .11f; break;
00384   }
00385 }
00386 
00387 void MPEyeFinder::getEyeOffsets(patch_rez p, centering_condition centering, float &xoff, float &yoff){
00388   switch(centering) {
00389   case eye_centered:
00390     xoff = 0;
00391     yoff = 0;
00392     break;
00393   case face_centered:
00394     switch(p) {
00395     case largest_2:
00396       xoff = 0;
00397       yoff = 0;
00398       break;
00399     case largest:
00400       xoff = 0.10451f;
00401       yoff = 0.09642f;
00402       break;
00403     case eye_dist:
00404       xoff = 0.090967f;
00405       yoff = 0.082883f;
00406       break;
00407     case half_dist:
00408       xoff = 0;
00409       yoff = 0;
00410       break;
00411     case eye_only:
00412       xoff = 0;
00413       yoff = 0;
00414       break;
00415     case smallest:
00416       xoff = 0;
00417       yoff = 0;
00418       break;
00419     }
00420     break;
00421   }
00422 }
00423 
00424 int MPEyeFinder::matrixMult(int row1,int col1, float *matrix1, int row2, int col2, float *matrix2, float *rtn_matrix){
00425   if(col1 != row2)
00426     return 0;
00427   float comb;
00428   for(int cur_row = 0; cur_row < row1; ++cur_row){
00429     for(int cur_col = 0; cur_col < col2; ++cur_col){
00430       comb = 0;
00431       for(int cur_pos = 0; cur_pos < col1; ++cur_pos){
00432         comb += matrix1[(cur_row*col1)+cur_pos] * matrix2[cur_col+(cur_pos*col2)];
00433       }
00434       int pos = cur_row*col2+cur_col;
00435       rtn_matrix[pos] = comb;
00436     }
00437   }
00438   return 1;
00439 }
00440 
00441 int MPEyeFinder::matrixMult(int row1,int col1, float *matrix1, int row2, int col2, vector< vector< float > > matrix2, float *rtn_matrix){
00442   if(col1 != row2)
00443     return 0;
00444   float comb;
00445   for(int cur_row = 0; cur_row < row1; ++cur_row){
00446     for(int cur_col = 0; cur_col < col2; ++cur_col){
00447       comb = 0;
00448       for(int cur_pos = 0; cur_pos < col1; ++cur_pos){
00449         comb += matrix1[(cur_row*col1)+cur_pos] * matrix2[cur_pos][cur_col];//[(cur_col*row2)+cur_pos];
00450       }
00451       rtn_matrix[(cur_row*col2)+cur_col] = comb;
00452     }
00453   }
00454   return 1;
00455 }
00456 
00457 /*
00458  * 
00459  * Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
00460  * 
00461  *    1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
00462  *    2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.
00463  *    3. The name of the author may not be used to endorse or promote products derived from this software without specific prior written permission.
00464  * 
00465  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
00466  * 
00467  */
00468