/* * COMPILE: compiles a regular expression for execute() * Adapted from Software Tools, by Kernigham and Plauger * written 1984 by orc (David Parsons) */ #include #include #include "re_token.h" char *strchr(); #define YES 1 #define NO 0 #define SQ_BEG '{' #define SQ_END '}' #define MAXPAT 1024 #define MAXARG 10 char *REargs[2*MAXARG]; /* Arguments */ char *RElastp; /* last character matched */ char REpattern[MAXPAT+1]; /* last compiled pattern */ static char *p; /* code pointer for ^^^ */ #define escaped (badclose+4) /* deviousity for saving a (tiny) bit of space */ static char badclose[] = { LSTART, LEND, CLOSURE,PLUS, ARGBEG, ARGEND, TOKBEG, TOKEND, SQ_BEG, 0 }; static moveright(from,too,len) register char *from, *too; register len; { from += len; too += len; while (len-- > 0) *--too = *--from; } static concatch(c) /* add a character to the pattern */ char c; { if (p < REpattern + (MAXPAT-1)) *p++ = c; } static char esc(s) register char **s; { if (**s == ESCAPE && *(1+*s) ) switch (*++(*s)) { case 'n': return '\n'; case 'r': return '\r'; case 't': return '\t'; case 'f': return '\f'; case 's': return ' ' ; case 'b': return '\b'; } return **s; } static char * dodash(src) /* parse the innards of a [] */ char *src; { register int k; register char *start = src; char cs[128]; register char c,uc; register cnt; memset(cs, NO, sizeof(cs)); while (*src && *src != CCLEND) { if (*src == DASH && src > start && src[1] != CCLEND) { ++src; /* get to next character */ uc = c; /* save last char */ c = esc(&src); /* what are we matching up to */ for ( k = uc; k <= c; k++) cs[k] = YES; } else { c = esc(&src); cs[c] = YES; } src++; } for (cnt=k=0; k < sizeof(cs); k++) if (cs[k]) { concatch(k); cnt++; } return cnt ? src : (char*)0; } static char * badccl(src) /* a [] was encountered. is it a CCL (match one of the included * characters); or is it a NCCL (match all but the included characters)? */ register char *src; { register char *jstart; if (*src == NEGATE) { concatch(NCCL); ++src; } else concatch(CCL); jstart = p; concatch(0); /* this will be the length of the pattern */ src = dodash(src); *jstart = (p-jstart)-1; return src; } char * compile(arg) char *arg; /* make up the pattern string for find -- ripped from 'Software Tools' */ { register char *cp, *oldcp; register char *start = arg; char *patstart = 2+REpattern; register size; register argno=0, inarg=0; REpattern[0] = REpattern[1] = -1; p = patstart; while (*arg != 0) { oldcp = cp; cp = p; if (*arg == ANY) concatch(ANY); else if (*arg == LSTART && arg == start) concatch(LSTART); else if (*arg == LEND && *(arg+1) == 0) concatch(LEND); else if (*arg == CCL) { if ((arg = badccl(1+arg)) == (char*)0) return "Badly formed closure"; } else if ((*arg == CLOSURE || *arg == PLUS) && (p != REpattern)) { cp = oldcp; if (strchr(badclose, *cp) || p >= REpattern+(MAXPAT-4)) return "Bad character in closure"; moveright(cp, cp+3, (int)(p-cp)); cp[0] = CLOSURE; cp[1] = (*arg == CLOSURE) ? 0 : 1; cp[2] = 0; p += 3; } else if (*arg == ESCAPE && arg[1] && (isdigit(arg[1])||strchr(escaped, arg[1]))) { arg++; if (*arg == ARGBEG) { if (inarg++) return "Nested arguments"; else if (++argno >= 10) return "Too many arguments"; } else if (*arg == ARGEND) { if (--inarg < 0) return "unexpected \\)"; } else if (*arg == SQ_BEG) { register unsigned tmp; cp = oldcp; if (strchr(badclose, *cp) || p >= REpattern+(MAXPAT-4)) return "Unterminated closure"; moveright(cp, cp+3, (int)(p-cp)); p += 3; cp[0] = CLOSURE; for (tmp=0, ++arg; isdigit(*arg); ++arg) tmp = (tmp*10) + (*arg - '0'); if (tmp > 256) return "{ matchcount overflow"; cp[1] = tmp; if (*arg == ',') { for (tmp=0, ++arg; isdigit(*arg); ++arg) tmp = (tmp*10) + (*arg - '0'); if (tmp > 256) return "{ required matchcount overflow"; } cp[2] = tmp; if (*arg == SQ_END) { arg++; continue; } else return "unclosed {"; } else if (*arg != TOKBEG && *arg != TOKEND) { register unsigned tmp = (*arg++) - '0'; if (tmp == 0 || tmp > argno) return "Bad rematch argument"; concatch(AGAIN); concatch(tmp); continue; } concatch(*arg); } else { concatch(LITCHAR); concatch(esc(&arg)); } if (*arg) arg++; } if (inarg) return "Unterminated argument"; else if (p-patstart >= MAXPAT) return "compile-space overflow"; *((int*)REpattern) = (int)(p-patstart); concatch(0); memset(REargs, 0, sizeof REargs); #if DEBUG { register char *i; printf("[%d]", (int)(p-patstart)); for (i=patstart; i= ' ' && *i <= '~') putchar(*i); else printf("{%02x}", *i); putchar('\n'); } #endif return (char*)0; }