The Machine Perception Toolbox

[Introduction]- [News]- [Download]- [Screenshots]- [Manual (pdf)]- [Forums]- [API Reference]- [Repository ]

 

Main Page | Namespace List | Class Hierarchy | Alphabetical List | Class List | Directories | File List | Namespace Members | Class Members | File Members | Related Pages

scan.c

Go to the documentation of this file.
00001 /*
00002  * Copyright 1993-2002 Christopher Seiwald and Perforce Software, Inc.
00003  *
00004  * This file is part of Jam - see jam.c for Copyright information.
00005  */
00006 
00007 # include "jam.h"
00008 # include "lists.h"
00009 # include "parse.h"
00010 # include "scan.h"
00011 # include "jamgram.h"
00012 # include "jambase.h"
00013 # include "newstr.h"
00014 
00015 /*
00016  * scan.c - the jam yacc scanner
00017  *
00018  * 12/26/93 (seiwald) - bump buf in yylex to 10240 - yuk.
00019  * 09/16/94 (seiwald) - check for overflows, unmatched {}'s, etc.
00020  *                      Also handle tokens abutting EOF by remembering
00021  *                      to return EOF now matter how many times yylex()
00022  *                      reinvokes yyline().
00023  * 02/11/95 (seiwald) - honor only punctuation keywords if SCAN_PUNCT.
00024  * 07/27/95 (seiwald) - Include jamgram.h after scan.h, so that YYSTYPE is
00025  *                      defined before Linux's yacc tries to redefine it.
00026  */
00027 
00028 struct keyword {
00029         char *word;
00030         int type;
00031 } keywords[] = {
00032 # include "jamgramtab.h"
00033         { 0, 0 }
00034 } ;
00035 
00036 struct include {
00037         struct include *next;   /* next serial include file */
00038         char    *string;        /* pointer into current line */
00039         char    **strings;      /* for yyfparse() -- text to parse */
00040         FILE    *file;          /* for yyfparse() -- file being read */
00041         char    *fname;         /* for yyfparse() -- file name */
00042         int     line;           /* line counter for error messages */
00043         char    buf[ 512 ];     /* for yyfparse() -- line buffer */
00044 } ;
00045 
00046 static struct include *incp = 0; /* current file; head of chain */
00047 
00048 static int scanmode = SCAN_NORMAL;
00049 static int anyerrors = 0;
00050 static char *symdump( YYSTYPE *s );
00051 
00052 # define BIGGEST_TOKEN 10240    /* no single token can be larger */
00053 
00054 /* 
00055  * Set parser mode: normal, string, or keyword
00056  */
00057 
00058 void
00059 yymode( int n )
00060 {
00061         scanmode = n;
00062 }
00063 
00064 void
00065 yyerror( char *s )
00066 {
00067         if( incp )
00068             printf( "%s: line %d: ", incp->fname, incp->line );
00069 
00070         printf( "%s at %s\n", s, symdump( &yylval ) );
00071 
00072         ++anyerrors;
00073 }
00074 
00075 int
00076 yyanyerrors()
00077 {
00078         return anyerrors != 0;
00079 }
00080 
00081 void
00082 yyfparse( char *s )
00083 {
00084         struct include *i = (struct include *)malloc( sizeof( *i ) );
00085 
00086         /* Push this onto the incp chain. */
00087 
00088         i->string = "";
00089         i->strings = 0;
00090         i->file = 0;
00091         i->fname = copystr( s );
00092         i->line = 0;
00093         i->next = incp;
00094         incp = i;
00095 
00096         /* If the filename is "+", it means use the internal jambase. */
00097 
00098         if( !strcmp( s, "+" ) )
00099             i->strings = jambase;
00100 }
00101 
00102 /*
00103  * yyline() - read new line and return first character
00104  *
00105  * Fabricates a continuous stream of characters across include files,
00106  * returning EOF at the bitter end.
00107  */
00108 
00109 int
00110 yyline()
00111 {
00112         struct include *i = incp;
00113 
00114         if( !incp )
00115             return EOF;
00116 
00117         /* Once we start reading from the input stream, we reset the */
00118         /* include insertion point so that the next include file becomes */
00119         /* the head of the list. */
00120 
00121         /* If there is more data in this line, return it. */
00122 
00123         if( *i->string )
00124             return *i->string++;
00125 
00126         /* If we're reading from an internal string list, go to the */
00127         /* next string. */
00128 
00129         if( i->strings )
00130         {
00131             if( !*i->strings )
00132                 goto next;
00133 
00134             i->line++;
00135             i->string = *(i->strings++);
00136             return *i->string++;
00137         }
00138 
00139         /* If necessary, open the file */
00140 
00141         if( !i->file )
00142         {
00143             FILE *f = stdin;
00144 
00145             if( strcmp( i->fname, "-" ) && !( f = fopen( i->fname, "r" ) ) )
00146                 perror( i->fname );
00147 
00148             i->file = f;
00149         }
00150 
00151         /* If there's another line in this file, start it. */
00152 
00153         if( i->file && fgets( i->buf, sizeof( i->buf ), i->file ) )
00154         {
00155             i->line++;
00156             i->string = i->buf;
00157             return *i->string++;
00158         }
00159 
00160     next:
00161         /* This include is done.  */
00162         /* Free it up and return EOF so yyparse() returns to parse_file(). */
00163 
00164         incp = i->next;
00165 
00166         /* Close file, free name */
00167 
00168         if( i->file && i->file != stdin )
00169             fclose( i->file );
00170         freestr( i->fname );
00171         free( (char *)i );
00172 
00173         return EOF;
00174 }
00175 
00176 /*
00177  * yylex() - set yylval to current token; return its type
00178  *
00179  * Macros to move things along:
00180  *
00181  *      yychar() - return and advance character; invalid after EOF
00182  *      yyprev() - back up one character; invalid before yychar()
00183  *
00184  * yychar() returns a continuous stream of characters, until it hits
00185  * the EOF of the current include file.
00186  */
00187 
00188 # define yychar() ( *incp->string ? *incp->string++ : yyline() )
00189 # define yyprev() ( incp->string-- )
00190 
00191 int
00192 yylex()
00193 {
00194         int c;
00195         char buf[BIGGEST_TOKEN];
00196         char *b = buf;
00197 
00198         if( !incp )
00199             goto eof;
00200 
00201         /* Get first character (whitespace or of token) */
00202 
00203         c = yychar();
00204 
00205         if( scanmode == SCAN_STRING )
00206         {
00207             /* If scanning for a string (action's {}'s), look for the */
00208             /* closing brace.  We handle matching braces, if they match! */
00209 
00210             int nest = 1;
00211 
00212             while( c != EOF && b < buf + sizeof( buf ) )
00213             {
00214                     if( c == '{' )
00215                         nest++;
00216 
00217                     if( c == '}' && !--nest )
00218                         break;
00219 
00220                     *b++ = c;
00221 
00222                     c = yychar();
00223 
00224                     /* turn trailing "\r\n" sequences into plain "\n"
00225                      * for Cygwin
00226                      */
00227                     if (c == '\n' && b[-1] == '\r')
00228                         --b;
00229             }
00230 
00231             /* We ate the ending brace -- regurgitate it. */
00232 
00233             if( c != EOF )
00234                 yyprev();
00235 
00236             /* Check obvious errors. */
00237 
00238             if( b == buf + sizeof( buf ) )
00239             {
00240                 yyerror( "action block too big" );
00241                 goto eof;
00242             }
00243 
00244             if( nest )
00245             {
00246                 yyerror( "unmatched {} in action block" );
00247                 goto eof;
00248             }
00249 
00250             *b = 0;
00251             yylval.type = STRING;
00252             yylval.string = newstr( buf );
00253         yylval.file = incp->fname;
00254         yylval.line = incp->line;
00255         
00256         }
00257         else
00258         {
00259             char *b = buf;
00260             struct keyword *k;
00261             int inquote = 0;
00262             int notkeyword;
00263                 
00264             /* Eat white space */
00265 
00266             for( ;; )
00267             {
00268             /* Skip past white space */
00269 
00270             while( c != EOF && isspace( c ) )
00271                 c = yychar();
00272 
00273             /* Not a comment?  Swallow up comment line. */
00274 
00275             if( c != '#' )
00276                 break;
00277             while( ( c = yychar() ) != EOF && c != '\n' )
00278                 ;
00279             }
00280 
00281             /* c now points to the first character of a token. */
00282 
00283             if( c == EOF )
00284                 goto eof;
00285 
00286         yylval.file = incp->fname;
00287         yylval.line = incp->line;
00288         
00289             /* While scanning the word, disqualify it for (expensive) */
00290             /* keyword lookup when we can: $anything, "anything", \anything */
00291 
00292             notkeyword = c == '$';
00293 
00294             /* look for white space to delimit word */
00295             /* "'s get stripped but preserve white space */
00296             /* \ protects next character */
00297 
00298             while( 
00299                 c != EOF &&
00300                 b < buf + sizeof( buf ) &&
00301                 ( inquote || !isspace( c ) ) )
00302             {
00303                 if( c == '"' )
00304                 {
00305                     /* begin or end " */
00306                     inquote = !inquote;
00307                     notkeyword = 1;
00308                 }
00309                 else if( c != '\\' )
00310                 {
00311                     /* normal char */
00312                     *b++ = c;
00313                 }
00314                 else if( ( c = yychar()) != EOF )
00315             {
00316                     /* \c */
00317                     *b++ = c;
00318                     notkeyword = 1;
00319                 }
00320                 else
00321                 {
00322                     /* \EOF */
00323                     break;
00324                 }
00325 
00326                 c = yychar();
00327             }
00328 
00329             /* Check obvious errors. */
00330 
00331             if( b == buf + sizeof( buf ) )
00332             {
00333                 yyerror( "string too big" );
00334                 goto eof;
00335             }
00336 
00337             if( inquote )
00338             {
00339                 yyerror( "unmatched \" in string" );
00340                 goto eof;
00341             }
00342 
00343             /* We looked ahead a character - back up. */
00344 
00345             if( c != EOF )
00346                 yyprev();
00347 
00348             /* scan token table */
00349             /* don't scan if it's obviously not a keyword or if its */
00350             /* an alphabetic when were looking for punctuation */
00351 
00352             *b = 0;
00353             yylval.type = ARG;
00354 
00355             if( !notkeyword && !( isalpha( *buf ) && scanmode == SCAN_PUNCT ) )
00356             {
00357                 for( k = keywords; k->word; k++ )
00358                     if( *buf == *k->word && !strcmp( k->word, buf ) )
00359                 {
00360                     yylval.type = k->type;
00361                     yylval.string = k->word;    /* used by symdump */
00362                     break;
00363                 }
00364             }
00365 
00366             if( yylval.type == ARG )
00367                 yylval.string = newstr( buf );
00368         }
00369 
00370         if( DEBUG_SCAN )
00371                 printf( "scan %s\n", symdump( &yylval ) );
00372 
00373         return yylval.type;
00374 
00375 eof:
00376     yylval.file = "end-of-input"; /* just in case */
00377     yylval.line = 0;
00378         
00379         yylval.type = EOF;
00380         return yylval.type;
00381 }
00382 
00383 static char *
00384 symdump( YYSTYPE *s )
00385 {
00386         static char buf[ BIGGEST_TOKEN + 20 ];
00387 
00388         switch( s->type )
00389         {
00390         case EOF:
00391                 sprintf( buf, "EOF" );
00392                 break;
00393         case 0:
00394                 sprintf( buf, "unknown symbol %s", s->string );
00395                 break;
00396         case ARG:
00397                 sprintf( buf, "argument %s", s->string );
00398                 break;
00399         case STRING:
00400                 sprintf( buf, "string \"%s\"", s->string );
00401                 break;
00402         default:
00403                 sprintf( buf, "keyword %s", s->string );
00404                 break;
00405         }
00406         return buf;
00407 }
00408 
00409 /*  Get information about the current file and line, for those epsilon
00410  *  transitions that produce a parse
00411  */
00412 void yyinput_stream( char** name, int* line )
00413 {
00414     if (incp)
00415     {
00416         *name = incp->fname;
00417         *line = incp->line;
00418     }
00419     else
00420     {
00421         *name = "(builtin)";
00422         *line = -1;
00423     }
00424 }

Generated on Mon Nov 8 17:07:52 2004 for MPT by  doxygen 1.3.9.1