/* * ll(1) descent-recursive parser for the following grammar (taken from the * dragon-book): * * e -> t e' * e' -> + t e' | $ * t -> f t' * t' -> * f t' | $ * f -> (e) | id * * to compile: gcc -o parser parser.c * * you can also pass -DDEBUG to debug parser's moves: * * gcc -DDEBUG -o parser parser.c * */ #include #include #include #include #include /* syntax tests */ //#define buffer "a+b*c" //#define buffer "a*b" //#define buffer "a+b*2" //#define buffer "a+(b+c)*2" //#define buffer "a+(a*b+(a*b))*d" //#define buffer "a+(a*b+(a*b+c))*d+2+(a*b+2)" #define buffer "a+b*c" /* erratic sintaxes tests */ //#define buffer "(a^b)*2" //#define buffer "(a/b)*2" //#define buffer "b>>2" /* * productions. * * what I've done is to keep production values within the byte-boundary as a * simple cross-indexing method (see the is_terminal macro bellow, and the * table itself) */ typedef enum { PROD_ERROR = 256, /* this means error */ PROD_EXPRESSION, /* e -> t e' */ PROD_EXPRESSION_P, /* e' -> + t e' */ PROD_TERM, /* t -> f t' */ PROD_TERM_P, /* t' -> * f t' */ PROD_FACTOR, /* f -> (e) | id */ PROD_EPSILON /* any -> $ */ } productions; /* * whether a value on stack represents a terminal or a non terminal */ #define is_terminal(x) ((x>>8) ? 0 : 1) /* tokens */ typedef enum { TOK_ID = 1, TOK_PLUS, TOK_TIMES, TOK_LPAREN, TOK_RPAREN, TOK_EOF } tokens; /* symbol table for this grammar */ int symtab[] = { /* id + * ( ) $ */ /* e */ 257, 256, 256, 257, 256, 256, /* e' */ 256, 258, 256, 256, 262, 261, /* t */ 259, 256, 256, 259, 256, 256, /* t' */ 256, 261, 260, 256, 262, 261, /* f */ 261, 256, 256, 261, 256, 256, /* $ */ 262, 262, 262, 262, 262, 262 }; /* it's factor (actually rows) */ static int SYMTAB_FACTOR = 6; /* a simple malloc wrapper */ #define pmalloc(var, type) assert((var = malloc(type))); /* stack's structure */ typedef struct _struct_stack_t { int value; struct _struct_stack_t *next; } st_stack_t, *stackPtr; /* function prototypes */ void parse (int action); int get_token (char *bptr); void push (stackPtr * stack, int value); void pop (stackPtr * stack); void debug_stack (stackPtr * stack); void throw_error (char *ptr); /* * global variables */ stackPtr stack; /* parser's stack */ static int current = 0; /* current token's id */ static int table_return = 0; /* returned value from the symbol table */ /****************************************************************************** * PROGRAM STARTS HERE * *****************************************************************************/ int main (int argc, char **argv) { /* let ptr point to the begining of the text being parsed */ static char *ptr = buffer; /* push our end-marker onto the stack, so we can have a reference */ push (&stack, '\0'); /* push the start symbol as well */ push (&stack, PROD_EXPRESSION); /* get the first token's value */ current = get_token (ptr); #ifdef DEBUG /* step no. */ static int step = 1; printf ("\n=> analyzing \"%s\" against the following grammar:\n", buffer); printf ("\n\te -> t e'\n"); printf ("\te'-> + t e' | $ \n"); printf ("\tt -> f t' \n"); printf ("\tt'-> * f t' | $\n"); printf ("\tf -> (e) | id \n"); printf ("\nInput\t\tstep#\tproduction\t\tStack (top is left)\n"); printf ("==============\t=====\t=================\t===================\n"); printf ("%s$\t\t%i\tinit\t\t", buffer, step); debug_stack (&stack); step++; #endif /* launch the parsing process */ while (stack->value != '\0') { if (stack->value == current) { pop (&stack); current = get_token (++ptr); #ifdef DEBUG printf ("%s$\t\t%i\taccept\t\t", ptr, step); #endif } else { int index, rval; /* just helper variables to make this clearer */ #ifdef DEBUG printf ("%s$\t\t%i", ptr, step); #endif if (is_terminal (stack->value)) throw_error (ptr); /* get the index value */ rval = ((stack->value) & 0xf) - 1; index = (((rval * SYMTAB_FACTOR) + current) - 1); /* lookup next production to apply on the symbol table */ if ((table_return = symtab[index]) == PROD_ERROR) throw_error (ptr); /* noe we want to replace the left hand side of the production, with it's right hand side, so we first pop the left hand side off the stack */ pop (&stack); /* and then push it's rhs, whatever the table says it is */ switch (table_return) { case PROD_EXPRESSION: /* e-> t e' */ #ifdef DEBUG printf("\tPROD_EXPRESSION\t"); #endif push (&stack, PROD_EXPRESSION_P); push (&stack, PROD_TERM); break; case PROD_EXPRESSION_P: /* e'-> + t e' | $ */ #ifdef DEBUG printf("\tPROD_EXPRESSION_P"); #endif if (current == TOK_PLUS) { push (&stack, PROD_EXPRESSION_P); push (&stack, PROD_TERM); push (&stack, current); } else push(&stack, PROD_EPSILON); break; case PROD_TERM: /* t-> f t' */ #ifdef DEBUG printf("\tPROD_TERM\t"); #endif push (&stack, PROD_TERM_P); push (&stack, PROD_FACTOR); break; case PROD_TERM_P: /* t'-> * f t' */ #ifdef DEBUG printf("\tPROD_TERM_P\t"); #endif switch (current) { case TOK_ID: push (&stack, current); break; case TOK_TIMES: push (&stack, PROD_TERM_P); push (&stack, PROD_FACTOR); push (&stack, current); break; default: push(&stack, PROD_EPSILON); } break; case PROD_FACTOR: /* f -> (e) | id */ #ifdef DEBUG printf("\tPROD_FACTOR\t"); #endif switch (current) { case TOK_ID: push (&stack, current); break; case TOK_LPAREN: push (&stack, TOK_RPAREN); push (&stack, PROD_EXPRESSION); push (&stack, current); break; default: push(&stack, PROD_EPSILON); } break; case PROD_EPSILON: #ifdef DEBUG printf("\tPROD_EPSILON\t"); #endif /* we do nothing, this is just a pop by itself */ break; } } #ifdef DEBUG /* advance the steps' counter */ step++; /* and debug the stack */ debug_stack (&stack); #endif } #ifdef DEBUG putchar ('\n'); #endif if (!*ptr && !stack->value) printf ("=> No errors found when parsing `%s' against the given grammar\n\n", buffer); else printf ("=> The expression `%s' isn 't valid for this grammar\n", buffer); if(stack) free(stack); return 0; } /* get the next token's id */ int get_token (char *ptr) { switch (*ptr) { case '(': return TOK_LPAREN; case ')': return TOK_RPAREN; case '+': return TOK_PLUS; case '*': return TOK_TIMES; case '\0': return TOK_EOF; } if (isdigit (*ptr) || isalpha (*ptr)) return TOK_ID; return TOK_EOF; } /* pushes a value onto the given stack */ void push (stackPtr *stack, int value) { stackPtr node = NULL; pmalloc (node, (sizeof (st_stack_t))); node->value = value; node->next = *stack; *stack = node; return; } /* pops a value off the given stack */ void pop (stackPtr *stack) { stackPtr node = *stack; *stack = node->next; free (node); return; } /* debug stack's contents */ void debug_stack (stackPtr *stack) { stackPtr node = *stack; int i = 0; int k = 0; printf ("\t["); for (; node != NULL; node = node->next) { k = (node->value >> 8); if (k) { switch (node->value) { case PROD_EXPRESSION: printf ("e, "); break; case PROD_EXPRESSION_P: printf ("e', "); break; case PROD_TERM: printf ("t, "); break; case PROD_TERM_P: printf ("t', "); break; case PROD_FACTOR: printf ("f, "); break; case PROD_EPSILON: printf("eps, "); break; } } else { switch (node->value) { case TOK_ID: printf ("id, "); break; case TOK_PLUS: printf ("+, "); break; case TOK_TIMES: printf ("*, "); break; case TOK_LPAREN: printf ("(, "); break; case TOK_RPAREN: printf ("), "); break; case '\0': printf ("$"); break; default: printf("%c, ", node->value); } } i++; } printf ("]\n"); } /* very simple error reporting function */ void throw_error (char *ptr) { printf ("\nsyntax error: "); if (current == PROD_ERROR) printf ("Mismatched '%c', ", *ptr); switch (stack->value) { case PROD_FACTOR: printf ("expecting factor or sub-expression\n"); break; } /* free stack's contents */ if(stack) { while(stack->value != '\0') pop(&stack); free(stack); } /* and exit */ exit (1); }