/* W A R T * * pre-process a lex-like file into a C program. * * Jeff Damens, Columbia University Center for Computing Activites, 11/84. * (Reorganized by Frank da Cruz into a single source module for ease * of distribution). * Copyright (C) 1984, Trustees of Columbia University. * May be copied and used except for explicitly commercial purposes. * * input format is: * lines to be copied | %state * %% * | CHAR { actions } * ... * %% */ /* modified for CTOS C2.0 by Joel Dunn, UNC-CH, October 1986 */ #include #include /* token types */ #define SEP 1 #define LBRACK 2 #define RBRACK 3 #define WORD 4 #define COMMA 5 /* storage sizes */ #define MAXSTATES 50 /* max number of states */ #define MAXWORD 50 /* max # of chars/word */ #define SBYTES ((MAXSTATES+7)/8) /* # of bytes for state bitmask */ /* name of wart function in generated program */ #ifndef FNAME #define FNAME "wart" #endif /* data structure for state information */ #ifdef PROVX1 typedef unsigned short CHAR; #else typedef unsigned char CHAR; #endif struct trans { CHAR states[SBYTES]; /* included states */ int anyst; /* true if this good from any state */ CHAR inchr; /* input character */ int actno; /* associated action */ struct trans *nxt; }; /* next transition */ typedef struct trans *Trans; /* Variables and tables */ int lines,nstates,nacts; char tokval[MAXWORD]; int tbl[MAXSTATES*128]; char *txt1 = "\n\ #define BEGIN state =\n\ \n\ int state = 0;\n\ \n"; char *fname = FNAME; /* function name goes here */ /* rest of program... */ char *txt2 = "()\n\ {\n\ int c,actno;\n\ extern int tbl[];\n\ while (1) {\n\ c = input();\n\ if ((actno = tbl[c + state*128]) != -1)\n\ switch(actno) {\n"; /* this program's output goes here, followed by final text... */ char *txt3 = "\n }\n }\n\}\n\n"; /* * turn on the bit associated with the given state * */ setstate(state,t) int state; Trans t; { int idx,msk; idx = state/8; /* byte associated with state */ msk = 0x80 >> (state % 8); /* bit mask for state */ t->states[idx] |= msk; } /* * see if the state is involved in the transition * */ teststate(state,t) int state; Trans t; { int idx,msk; idx = state/8; msk = 0x80 >> (state % 8); return(t->states[idx] & msk); } /* * read input from here... * */ Trans rdinput(infp,outfp) FILE *infp,*outfp; { Trans x,rdrules(); lines = 1; /* line counter */ nstates = 0; /* no states */ nacts = 0; /* no actions yet */ fprintf(outfp,"\n\ /* WARNING -- This C source program generated by Wart preprocessor. */\n"); fprintf(outfp,"\ /* Do not edit this file; edit the Wart-format source file instead, */\n"); fprintf(outfp,"\ /* and then run it through Wart to produce a new C source file. */\n\n"); initial(infp,outfp); /* read state names, initial defs */ prolog(outfp); /* write out our initial code */ x = rdrules(infp,outfp); /* read rules */ epilogue(outfp); /* write out epilogue code */ return(x); } /* * initial - read initial definitions and state names. Returns * on EOF or %%. * */ initial(infp,outfp) FILE *infp,*outfp; { int c; char wordbuf[MAXWORD]; while ((c = getc(infp)) != EOF) { if (c == '%') { rdword(infp,wordbuf); if (strcmp(wordbuf,"states") == 0) rdstates(infp,outfp); else if (strcmp(wordbuf,"%") == 0) return; else fprintf(outfp,"%%%s",wordbuf); } else putc(c,outfp); if (c == '\n') lines++; } } /* * boolean function to tell if the given character can be part of * a word. * */ isin(s,c) char *s; int c; { for (; *s != '\0'; s++) if (*s == c) return(1); return(0); } isword(c) int c; { static char special[] = ".%_-$@"; /* these are allowable */ return(isalnum(c) || isin(special,c)); } /* * read the next word into the given buffer. * */ rdword(fp,buf) FILE *fp; char *buf; { int len = 0,c; while (isword(c = getc(fp)) && ++len < MAXWORD) *buf++ = c; *buf++ = '\0'; /* tie off word */ ungetc(c,fp); /* put break char back */ } /* * read state names, up to a newline. * */ rdstates(fp,ofp) FILE *fp,*ofp; { int c; char wordbuf[MAXWORD]; while ((c = getc(fp)) != EOF && c != '\n') { if (isspace(c)) continue; /* skip whitespace */ ungetc(c,fp); /* put char back */ rdword(fp,wordbuf); /* read the whole word */ enter(wordbuf,++nstates); /* put into symbol tbl */ fprintf(ofp,"#define %s %d\n",wordbuf,nstates); } lines++; } /* * allocate a new, empty transition node * */ Trans newtrans() { Trans new; int i; new = (Trans) malloc(sizeof (struct trans)); for (i=0; istates[i] = 0; new->anyst = 0; new->nxt = NULL; return(new); } /* * read all the rules. * */ Trans rdrules(fp,out) FILE *fp,*out; { Trans head,cur,prev; int curtok,i; head = cur = NULL; while ((curtok = gettoken(fp)) != SEP) switch(curtok) { case LBRACK: if (cur == NULL) cur = newtrans(); else fatal("duplicate state list"); statelist(fp,cur);/* set states */ continue; /* prepare to read char */ case WORD: if (strlen(tokval) != 1) fatal("multiple chars in state"); if (cur == NULL) { cur = newtrans(); cur->anyst = 1; } cur->actno = ++nacts; cur->inchr = tokval[0]; if (head == NULL) head = cur; else prev->nxt = cur; prev = cur; cur = NULL; copyact(fp,out,nacts); break; default: fatal("bad input format"); } return(head); } /* * read a list of (comma-separated) states, set them in the * given transition. * */ statelist(fp,t) FILE *fp; Trans t; { int curtok,sval; curtok = COMMA; while (curtok != RBRACK) { if (curtok != COMMA) fatal("missing comma"); if ((curtok = gettoken(fp)) != WORD) fatal("missing state name"); if ((sval = lkup(tokval)) == -1) { fprintf(stderr,"state %s undefined\n",tokval); fatal("undefined state"); } setstate(sval,t); curtok = gettoken(fp); } } /* * copy an action from the input to the output file * */ copyact(inp,outp,actno) FILE *inp,*outp; int actno; { int c,bcnt; fprintf(outp,"case %d:\n",actno); while ((c = getc(inp)) != '\n' && isspace(c)); /* skip whitespace */ if (c == '{') { bcnt = 1; putc(c,outp); while (bcnt > 0 && (c = getc(inp)) != EOF) { if (c == '{') bcnt++; else if (c == '}') bcnt--; else if (c == '\n') lines++; putc(c,outp); } if (bcnt > 0) fatal("action doesn't end"); } else { while (c != '\n' && c != EOF) { putc(c,outp); c = getc(inp); } lines++; } fprintf(outp,"\nbreak;\n"); } /* * find the action associated with a given character and state. * returns -1 if one can't be found. * */ faction(hd,state,chr) Trans hd; int state,chr; { while (hd != NULL) { if (hd->anyst || teststate(state,hd)) if (hd->inchr == '.' || hd->inchr == chr) return(hd->actno); hd = hd->nxt; } return(-1); } /* * empty the table... * */ emptytbl() { int i; for (i=0; i 1) { argv++; if ((infile = fopen(*argv,"r")) == NULL) { fprintf(stderr,"Can't open %s\n",*argv); fatal("unreadable input file"); } } else infile = stdin; if (argc > 2) { argv++; if ((outfile = fopen(*argv,"w")) == NULL) { fprintf(stderr,"Can't write to %s\n",*argv); fatal("bad output file"); } } else outfile = stdout; clrhash(); /* empty hash table */ head = rdinput(infile,outfile); /* read input file */ emptytbl(); /* empty our tables */ for (state = 0; state <= nstates; state++) for (c = 1; c < 128; c++) addaction(faction(head,state,c),state,c); /* find actions, add to tbl */ writetbl(outfile); copyrest(infile,outfile); printf("%d states, %d actions\n",nstates,nacts); #ifdef undef for (state = 1; state <= nstates; state ++) for (c = 1; c < 128; c++) if (tbl[state*128 + c] != -1) printf("state %d, chr %d, act %d\n", state,c,tbl[state*128 + c]); #endif fclose(infile); fclose(outfile); exit(0); } /* * fatal error handler * */ fatal(msg) char *msg; { fprintf(stderr,"error in line %d: %s\n",lines,msg); exit(1); } prolog(outfp) FILE *outfp; { int c; while ((c = *txt1++) != '\0') putc(c,outfp); while ((c = *fname++) != '\0') putc(c,outfp); while ((c = *txt2++) != '\0') putc(c,outfp); } epilogue(outfp) FILE *outfp; { int c; while ((c = *txt3++) != '\0') putc(c,outfp); } copyrest(in,out) FILE *in,*out; { int c; while ((c = getc(in)) != EOF) putc(c,out); } /* * gettoken - returns token type of next token, sets tokval * to the string value of the token if appropriate. * */ gettoken(fp) FILE *fp; { int c; while (1) { /* loop if reading comments... */ do { c = getc(fp); if (c == '\n') lines++; } while (isspace(c)); /* skip whitespace */ switch(c) { case EOF: return(SEP); case '%': if ((c = getc(fp)) == '%') return(SEP); tokval[0] = '%'; tokval[1] = c; rdword(fp,tokval+2); return(WORD); case '<': return(LBRACK); case '>': return(RBRACK); case ',': return(COMMA); case '/': if ((c = getc(fp)) == '*') { rdcmnt(fp); /* skip over the comment */ continue; } /* and keep looping */ else { ungetc(c); /* put this back into input */ c = '/'; } /* put character back, fall thru */ default: if (isword(c)) { ungetc(c,fp); rdword(fp,tokval); return(WORD); } else fatal("Invalid character in input"); } } } /* * skip over a comment * */ rdcmnt(fp) FILE *fp; { int c,star,prcnt; prcnt = star = 0; /* no star seen yet */ while (!((c = getc(fp)) == '/' && star)) { if (c == EOF || (prcnt && c == '%')) fatal("Unterminated comment"); prcnt = (c == '%'); star = (c == '*'); if (c == '\n') lines++; } } /* * symbol table management for wart * * entry points: * clrhash - empty hash table. * enter - enter a name into the symbol table * lkup - find a name's value in the symbol table. * */ #define HASHSIZE 101 /* # of entries in hash table */ struct sym { char *name; /* symbol name */ int val; /* value */ struct sym *hnxt; } /* next on collision chain */ *htab[HASHSIZE]; /* the hash table */ /* * empty the hash table before using it... * */ clrhash() { int i; for (i=0; iname = copy(name); cur->val = svalue; cur->hnxt = htab[h]; htab[h] = cur; } /* * find name in the symbol table, return its value. Returns -1 * if not found. * */ lkup(name) char *name; { struct sym *cur; for (cur = htab[hash(name)]; cur != NULL; cur = cur->hnxt) if (strcmp(cur->name,name) == 0) return(cur->val); return(-1); }