The Machine Perception Toolbox (MPT)

00001 
00019 #include <iostream.h>
00020 #include <mpisearch.h>
00021 #include <Magick++.h>
00022 
00023 using namespace Magick;
00024 using namespace std;
00025 
00026 
00027 
00084 class CenterSurround : public MPISearchObjectDetector< float > { 
00085 public:
00086   CenterSurround();
00087   ~CenterSurround() {};
00090   void search(RImage< float> &pixels); 
00091 };
00092 
00101 CenterSurround::CenterSurround(){
00102   // Because we are working with discrete pixels, not real-valued
00103   // points, we run into off-by-one issues.  To compute the integral
00104   // between points a and b in 1-D continuous space, given the
00105   // integrated function f(x), we would compute f(b) - f(a).  In
00106   // words, you want to look up the integral at b, then subtract off
00107   // everything to the left of a.  In discrete pixel space, since
00108   // pixels take up space, in order to subtract off everything to the
00109   // left of a, you have to go to the pixel at a-1 so you don't
00110   // subtract off the integral *at* pixel a.  In our 2-D case, this
00111   // means that we always have to widen our features by one, up and to
00112   // the left.  The result is that the total size of a patch is one
00113   // plus the width of the window of interest in each direction.  Lets
00114   // take an example: To get the sum of image pixels within the
00115   // rectangle with upper left hand corner at (1,1) and lower right
00116   // hand corner at (2,2), the corners used in the *integral-image*
00117   // feature are (0,0), (0,2), (2,0), and (2,2).  So the total extent
00118   // of the feature, in terms of the distance (inclusive) between the
00119   // pixels we will do lookups on in the *integral* image, is 3, not
00120   // 2.  It may be confusing at first to see that the patch width is
00121   // searchWidth +1, so until you get used to it, just keep in mind
00122   // that this off-by-one business can be a little confusing, so be
00123   // careful!
00124 
00125   // That said, we want our basic center-surround to be of size 5 x 5.
00126   // Thus, the smallest valid integral image patch would be 6 x 6 (if
00127   // we had logic to check if we went off the edge of the image at
00128   // every pixel access, we could make it 5 x 5, but that would really
00129   // slow things down).
00130   data.patch_width = 5 + 1;
00131   data.patch_height = 5 + 1;  
00132   int searchWidth = data.patch_width - 1, searchHeight = data.patch_height - 1;
00133   
00134   // Now, allocate space for the features
00135   data.numfeatures = 2;
00136   data.features = new Feature[data.numfeatures]; 
00137 
00138   // The first feature is a rectangle over the entire patch -- this is
00139   // used in DC subtraction Note that we're specifying indexes into
00140   // pixels from 0,0 to 5,5 -- which requires a 6x6 patch of valid
00141   // pixels.
00142   data.features[0].numcorners = 4;  
00143   data.features[0].corners = new Corner[data.features[0].numcorners]; 
00144   data.features[0].corners[0].x = 0;           data.features[0].corners[0].y = 0;            data.features[0].corners[0].value =  1;
00145   data.features[0].corners[1].x = searchWidth; data.features[0].corners[1].y = 0;            data.features[0].corners[1].value = -1;
00146   data.features[0].corners[2].x = 0;           data.features[0].corners[2].y = searchHeight; data.features[0].corners[2].value = -1;
00147   data.features[0].corners[3].x = searchWidth; data.features[0].corners[3].y = searchHeight; data.features[0].corners[3].value =  1;
00148   
00155   int innerWidth = (int)(searchWidth*.20), outerWidth = searchWidth-innerWidth;
00156   int innerHeight = (int)(searchWidth*.20), outerHeight = searchHeight-innerHeight;
00157   data.features[1].numcorners = 8;
00158   // Outer rect
00159   data.features[1].corners = new Corner[data.features[1].numcorners]; 
00160   data.features[1].corners[0].x = 0;           data.features[1].corners[0].y = 0;            data.features[1].corners[0].value =  1;
00161   data.features[1].corners[1].x = searchWidth; data.features[1].corners[1].y = 0;            data.features[1].corners[1].value = -1;
00162   data.features[1].corners[2].x = 0;           data.features[1].corners[2].y = searchHeight; data.features[1].corners[2].value = -1;
00163   data.features[1].corners[3].x = searchWidth; data.features[1].corners[3].y = searchHeight; data.features[1].corners[3].value =  1;
00164   // Inner rect. In LGN X cells, the gain of the inner rectangler is
00165   // about -3.52 times the gain of the outer rectangle. Since "value"
00166   // is an integer, we use -4
00167   data.features[1].corners[4].x = innerWidth; data.features[1].corners[4].y = innerHeight; data.features[1].corners[4].value = -4;
00168   data.features[1].corners[5].x = outerWidth; data.features[1].corners[5].y = innerHeight; data.features[1].corners[5].value =  4;
00169   data.features[1].corners[6].x = innerWidth; data.features[1].corners[6].y = outerHeight; data.features[1].corners[6].value =  4;
00170   data.features[1].corners[7].x = outerWidth; data.features[1].corners[7].y = outerHeight; data.features[1].corners[7].value = -4;
00171   
00172 }
00173 
00179 void CenterSurround::search(RImage< float> &pixels){
00180 
00181   // stream.images is an array of pointers to the RImage<T> base
00182   // class.  We cast stream.images[0] as a pointer to its derived
00183   // class RIntegral, and call the integrate method.  Javier: Why not
00184   // let RImage have an integrate method itself?  Ian: Because RImage
00185   // is the *base* class from which RIntegral and RDerivative are
00186   // derived.  We do this because we want to be able to use lists of
00187   // images of different types.  Thus we use a list of pointers to the
00188   // base class, even though in this case we are always looking at
00189   // RIntegrals.  In the future, I think a good idea might be to use
00190   // the mixed list type from the Boost Library, which allows mixed
00191   // types in the list.  Then we wouldn't have to typescast the
00192   // pointer.  If we do that, then perhaps we should also think about
00193   // using Boost shared_pointers, which are reference counted, so we
00194   // don't have to worry about memory management as much.  Currently
00195   // memory is managed pretty well by hand, but as libmpisearch
00196   // evolves, use of shared_pointers would make development of less
00197   // error-prone code a lot easier.  The only possible downside would
00198   // be a potential speed hit from shared_pointer, but I think this
00199   // could be minimized, even down to *no* effective penalty.
00200   static_cast<RIntegral< float >* >(stream.images[0])->integrate(pixels);
00201   
00202   float a,b,c,d,s,mean;
00203   int scale_index, x, y;
00204   float scale_factor, area;
00205   unsigned int rect_ind = 0, cent_surr_ind = 1; 
00206   int numrectcorners = data.features[rect_ind].numcorners;
00207   int numcent_surrcorners = data.features[cent_surr_ind].numcorners;
00208   int numWindows = 0;
00209 
00210 
00211     // An image pyramid consists of scales and scales consist of
00212     // windows.  The outer for loop below iterates through scales. The
00213     // inner loop iterates over windows within each scale.
00214 
00215 
00216 
00217 
00218   // Get begin and end iterators for the outer loop.  mpi is a
00219   // datamember of the class MPISearchStream and it is of type
00220   // MPIImagePyramid.  MPIImagePyramid is a container class
00221   // representing all the patches at all scales in an image.
00222 
00223    
00224   MPIImagePyramid< float >::const_iterator scale = stream.mpi->begin();
00225   MPIImagePyramid< float >::const_iterator last_scale = stream.mpi->end();
00226   
00227   for( ; scale != last_scale; ++scale){
00228 
00229     // Retreive the cached info about this scale
00230 
00231     scale_index = scale.getScale(scale_factor);
00232 
00233     //The class CornerCache has 4 members: scaledCornerX,
00234     //scaledCornerY, scaledIndex, value 
00235 
00236 
00237     // Javier: What are CornerCache
00238     //used for?  stream is of type MPISearchStream. corners is a
00239     //private datamember of MPISearchStream. Corners is of type
00240     //vector<CornerCache<T>**>
00241 
00242     CornerCache< float > **corners = stream.corners[scale_index];
00243     CornerCache< float > *rect_corners = corners[rect_ind];
00244     CornerCache< float > *cs_corners = corners[cent_surr_ind];
00245   
00246     
00247 
00248     // stream.fns[i] is a vector which counts, at each scale i, the
00249     // total number of additions or subtraction of a pixel by each
00250     // feature. This can be used to remove the DC component of the
00251     // filter, i.e, we want for the filter to have zero output to
00252     // patches that have pixels of constant value.  For instance,
00253     // suppose a feature operates on a 5x5 patch by adding the 25
00254     // pixels of the patch and then substracting the center pixel. If
00255     // presented with a 5x5 patch in which all the pixels have a
00256     // constant intensity x then the output of the filter would be
00257     // (25-1) * x . If we want for the filter to have zero output to
00258     // constant valued patches we would then have to substract (25 -1)
00259     // * mean, where mean is the average value of the 25 pixels in the
00260     // patch.
00261 
00262 
00263     // Below this comment, we store the inverse of the number of
00264     // pixels in the 0th feature ahead of time, which helps us compute
00265     // the mean.  Why the inverse?  So we don't have to issue a divide
00266     // at each step, rather we can issue a mult, which is much faster
00267     // on most CPUs.
00268     
00269     // Javier: Should not we take into consideration the weights of
00270     // the feature componets. In our case, for example, the
00271 
00272     float one_over_rect_area = 1.0 / stream.fns[scale_index][rect_ind];
00273 
00274 
00275 
00276     // Finally, we will have to scale the image to between 0 and 1 at
00277     // the end due to ImageMagick's assumptions about how float images
00278     // are represented.
00279     float minval = 99999, maxval= -99999;
00280     
00281 
00282 
00283 
00284     // A pyramid consists of scales and scales consist of windows. We
00285     // now iterate over the windows within each scale. 
00286 
00287 
00288 
00289     MPIScaledImage< float >::const_iterator window = (*scale).begin(), last_window = (*scale).end();
00290     for( ; window != last_window; ++window, ++numWindows){      
00291       // First, compute sum of entire patch, so we can remove DC component.
00292       // since we know this is just a square, we don't need any for loops
00293       a = window.getPixel0( rect_corners[0].scaledIndex ) * rect_corners[0].value;
00294       b = window.getPixel0( rect_corners[1].scaledIndex ) * rect_corners[1].value;
00295       c = window.getPixel0( rect_corners[2].scaledIndex ) * rect_corners[2].value;
00296       d = window.getPixel0( rect_corners[3].scaledIndex ) * rect_corners[3].value;
00297       mean = a + b + c + d;
00298       mean *= one_over_rect_area;
00299       
00300       // Now, compute the center surround feature. In this case, we
00301       // could compute all 8 corners explicitly as above, but insetad
00302       // lets do it in a more generic way, so that we don't have to
00303       // know anything about the feature a priori. Done this way, we
00304       // can compute an arbitrary feature stored as feature 1 of
00305       // "data".  We could generalize this even more by looping over
00306       // data.numfeatures instead of specifying the center_surround
00307       // feature as we do here.  This way, you could compute N
00308       // features in an identical way.  To see this in action, look at
00309       // MPISearchObjectDetector.classifyWindow
00310       //
00311       // Instead of looping over every corner, we unroll this loop a
00312       // little, which clues the compiler in to the fact that the
00313       // value of each corner (or at least, each group of 4 corners)
00314       // is independent.  This allows it to schedule instructions in
00315       // such a way that achieved a 40% speed improvement on a
00316       // PowerMac G4 using gcc 3.1 due to efficient use of the
00317       // pipeline. This sets the stage for vectorization using AltiVec
00318       // or SSD, if someone ever wants to try this.
00319       s = 0;
00320       for(int corner = 0; corner < numcent_surrcorners; corner+=4) {
00321         a = window.getPixel0( cs_corners[corner].scaledIndex   ) * cs_corners[corner].value;
00322         b = window.getPixel0( cs_corners[corner+1].scaledIndex ) * cs_corners[corner+1].value;
00323         c = window.getPixel0( cs_corners[corner+2].scaledIndex ) * cs_corners[corner+2].value;
00324         d = window.getPixel0( cs_corners[corner+3].scaledIndex ) * cs_corners[corner+3].value;
00325         s += a + b + c + d;
00326       }
00327       // Now, remove the DC component. This is done by counting the
00328       // number of times a pixel was added or subtracted ( stored in
00329       // stream.fns ), multiplying by the mean, then subtracting it
00330       // from the total.
00331       s -= mean*stream.fns[scale_index][cent_surr_ind];
00332 
00333       // Now set every pixel of the corresponding block in our output image to s
00334       for(int i = 0; i < scale_factor; ++i)
00335         for(int j = 0; j < scale_factor; ++j)
00336           window.setPixel(1, j, i, s);
00337 
00338       // Finally, because of the way we are displaying the image, we
00339       // need to get min and max vals.
00340       if(s < minval)
00341         minval = s;
00342       if(s > maxval)
00343         maxval = s;
00344       window.getCoords(x,y);
00345     }
00346     // We're finished, so now display the result.  
00347     // First, make sure all values are between 0 and 1;
00348     float range = maxval - minval; 
00349     float one_over_range = 1.0/range;
00350     unsigned int numpixels = stream.images[1]->width*stream.images[1]->height;
00351     float * p = stream.images[1]->array;
00352     for(unsigned int i = 0; i < numpixels ; ++i)
00353       *p = (*p++ - minval)*one_over_range;
00354     // Now, copy the pixels into an ImageMagick image.
00355     Image img((const unsigned int)stream.images[1]->width,(const unsigned int)stream.images[1]->height,
00356               "I",FloatPixel,stream.images[1]->array);
00357 
00358     // Crop and scale the image for visibility.
00359     const Geometry cropregion(x+scale_factor,y+scale_factor);
00360     img.crop(cropregion);
00361     const Geometry newsize(stream.images[1]->width-1,stream.images[1]->height-1);
00362     img.scale(newsize);
00363     img.type(GrayscaleType);
00364     img.fontPointsize(40);
00365     //Geometry(width, height, xoffset, yoffset);
00366     char tmpstring[20];
00367     sprintf(tmpstring,"Scale %d", (int)scale_factor);
00368     img.annotate(tmpstring, Geometry(50,50, 40, 40) );
00369     img.display();
00370     // img.write("Output2.jpg");
00371   }
00372 }
00373 
00374 int main()
00375 {
00376   // Load the image
00377    Image img("../../Media/ExampleImage.jpg");
00378   //  Image img("../../Media/spock.gif");
00379   // Write the word "Hello" on the image
00380   //   img.fontPointsize(100);
00381    //  Geometry(width, height, xoffset, yoffset)    
00382    //  img.annotate("Hello2", Geometry(400,400, 0, 220) );
00383 
00384   img.display();
00385 
00386 // Construct an RImage, which is expected by MPISearch
00387 // This constructor sets the member variables for width, height, and
00388 // number of pixels and allocates memory for the member variable
00389 // "array", which is a vector of size npixels
00390 
00391   RImage< float > pixels(img.columns(), img.rows()); 
00392 
00393   // Copy the pixels into an RImage, which is expected by MPISearch
00394   img.write(0,0, img.columns(), img.rows(), "I", FloatPixel, pixels.array); 
00395 // The 'I' tells ImageMagick to take the intensity channel.  I think
00396 // there are better ways to do this, but I haven't looked into them.
00397 
00398 
00399   CenterSurround CS;
00400 
00401   // stream is a data member of CenterSurround, of type
00402   // MPISearchStream MPISearchStream holds all the memory and caches
00403   // for the class MPISearchObjectDetector, which Provides an
00404   // architecture for performing operations on every sub-window of an
00405   // image at multiple scales.
00406 
00407   // CS.inistream calls stream.init(width,height,data,WINSHIFT)
00408   // were data is a data member of CenterSurround  of type FeatureData
00409   // stream.init 
00410 
00411 
00412 
00413   CS.initStream(pixels.width, pixels.height, 1.0f);
00414 
00415 // Now run over the image at multiple scales and compute the mean
00416 // pixel at that size
00417 
00418 
00419   CS.search(pixels);
00420   
00421   return 0;
00422 }
The Machine Perception Toolbox

LGN.cpp