00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035
00036
00037 #include <sys/types.h>
00038 #include <stdio.h>
00039 #include <string.h>
00040 #include <ctype.h>
00041 #include <limits.h>
00042 #include <stdlib.h>
00043 #include <regex.h>
00044
00045 #include "utils.h"
00046 #include "regex2.h"
00047
00048 #include "cclass.h"
00049 #include "cname.h"
00050
00051
00052
00053
00054
00055 struct parse {
00056 char *next;
00057 char *end;
00058 int error;
00059 sop *strip;
00060 sopno ssize;
00061 sopno slen;
00062 int ncsalloc;
00063 struct re_guts *g;
00064 # define NPAREN 10
00065 sopno pbegin[NPAREN];
00066 sopno pend[NPAREN];
00067 };
00068
00069 static void p_ere(struct parse *, int);
00070 static void p_ere_exp(struct parse *);
00071 static void p_str(struct parse *);
00072 static void p_bre(struct parse *, int, int);
00073 static int p_simp_re(struct parse *, int);
00074 static int p_count(struct parse *);
00075 static void p_bracket(struct parse *);
00076 static void p_b_term(struct parse *, cset *);
00077 static void p_b_cclass(struct parse *, cset *);
00078 static void p_b_eclass(struct parse *, cset *);
00079 static char p_b_symbol(struct parse *);
00080 static char p_b_coll_elem(struct parse *, int);
00081 static char othercase(int);
00082 static void bothcases(struct parse *, int);
00083 static void ordinary(struct parse *, int);
00084 static void nonnewline(struct parse *);
00085 static void repeat(struct parse *, sopno, int, int);
00086 static int seterr(struct parse *, int);
00087 static cset *allocset(struct parse *);
00088 static void freeset(struct parse *, cset *);
00089 static int freezeset(struct parse *, cset *);
00090 static int firstch(struct parse *, cset *);
00091 static int nch(struct parse *, cset *);
00092 static void mcadd(struct parse *, cset *, char *);
00093 static void mcinvert(struct parse *, cset *);
00094 static void mccase(struct parse *, cset *);
00095 static int isinsets(struct re_guts *, int);
00096 static int samesets(struct re_guts *, int, int);
00097 static void categorize(struct parse *, struct re_guts *);
00098 static sopno dupl(struct parse *, sopno, sopno);
00099 static void doemit(struct parse *, sop, size_t);
00100 static void doinsert(struct parse *, sop, size_t, sopno);
00101 static void dofwd(struct parse *, sopno, sop);
00102 static void enlarge(struct parse *, sopno);
00103 static void stripsnug(struct parse *, struct re_guts *);
00104 static void findmust(struct parse *, struct re_guts *);
00105 static sopno pluscount(struct parse *, struct re_guts *);
00106
00107 static char nuls[10];
00108
00109
00110
00111
00112
00113 #define PEEK() (*p->next)
00114 #define PEEK2() (*(p->next+1))
00115 #define MORE() (p->next < p->end)
00116 #define MORE2() (p->next+1 < p->end)
00117 #define SEE(c) (MORE() && PEEK() == (c))
00118 #define SEETWO(a, b) (MORE() && MORE2() && PEEK() == (a) && PEEK2() == (b))
00119 #define EAT(c) ((SEE(c)) ? (NEXT(), 1) : 0)
00120 #define EATTWO(a, b) ((SEETWO(a, b)) ? (NEXT2(), 1) : 0)
00121 #define NEXT() (p->next++)
00122 #define NEXT2() (p->next += 2)
00123 #define NEXTn(n) (p->next += (n))
00124 #define GETNEXT() (*p->next++)
00125 #define SETERROR(e) seterr(p, (e))
00126 #define REQUIRE(co, e) ((co) || SETERROR(e))
00127 #define MUSTSEE(c, e) (REQUIRE(MORE() && PEEK() == (c), e))
00128 #define MUSTEAT(c, e) (REQUIRE(MORE() && GETNEXT() == (c), e))
00129 #define MUSTNOTSEE(c, e) (REQUIRE(!MORE() || PEEK() != (c), e))
00130 #define EMIT(op, sopnd) doemit(p, (sop)(op), (size_t)(sopnd))
00131 #define INSERT(op, pos) doinsert(p, (sop)(op), HERE()-(pos)+1, pos)
00132 #define AHEAD(pos) dofwd(p, pos, HERE()-(pos))
00133 #define ASTERN(sop, pos) EMIT(sop, HERE()-pos)
00134 #define HERE() (p->slen)
00135 #define THERE() (p->slen - 1)
00136 #define THERETHERE() (p->slen - 2)
00137 #define DROP(n) (p->slen -= (n))
00138
00139 #ifndef NDEBUG
00140 static int never = 0;
00141 #else
00142 #define never 0
00143 #endif
00144
00145
00146
00147
00148 int
00149 regcomp(regex_t *preg, const char *pattern, int cflags)
00150 {
00151 struct parse pa;
00152 struct re_guts *g;
00153 struct parse *p = &pa;
00154 int i;
00155 size_t len;
00156 #ifdef REDEBUG
00157 # define GOODFLAGS(f) (f)
00158 #else
00159 # define GOODFLAGS(f) ((f)&~REG_DUMP)
00160 #endif
00161
00162 cflags = GOODFLAGS(cflags);
00163 if ((cflags®_EXTENDED) && (cflags®_NOSPEC))
00164 return(REG_INVARG);
00165
00166 if (cflags®_PEND) {
00167 if (preg->re_endp < pattern)
00168 return(REG_INVARG);
00169 len = preg->re_endp - pattern;
00170 } else
00171 len = strlen((char *)pattern);
00172
00173
00174 g = (struct re_guts *)malloc(sizeof(struct re_guts) +
00175 (NC-1)*sizeof(cat_t));
00176 if (g == NULL)
00177 return(REG_ESPACE);
00178 p->ssize = len/(size_t)2*(size_t)3 + (size_t)1;
00179 p->strip = (sop *)calloc(p->ssize, sizeof(sop));
00180 p->slen = 0;
00181 if (p->strip == NULL) {
00182 free((char *)g);
00183 return(REG_ESPACE);
00184 }
00185
00186
00187 p->g = g;
00188 p->next = (char *)pattern;
00189 p->end = p->next + len;
00190 p->error = 0;
00191 p->ncsalloc = 0;
00192 for (i = 0; i < NPAREN; i++) {
00193 p->pbegin[i] = 0;
00194 p->pend[i] = 0;
00195 }
00196 g->csetsize = NC;
00197 g->sets = NULL;
00198 g->setbits = NULL;
00199 g->ncsets = 0;
00200 g->cflags = cflags;
00201 g->iflags = 0;
00202 g->nbol = 0;
00203 g->neol = 0;
00204 g->must = NULL;
00205 g->mlen = 0;
00206 g->nsub = 0;
00207 g->ncategories = 1;
00208 g->categories = &g->catspace[-(CHAR_MIN)];
00209 (void) memset((char *)g->catspace, 0, NC*sizeof(cat_t));
00210 g->backrefs = 0;
00211
00212
00213 EMIT(OEND, 0);
00214 g->firststate = THERE();
00215 if (cflags®_EXTENDED)
00216 p_ere(p, OUT);
00217 else if (cflags®_NOSPEC)
00218 p_str(p);
00219 else
00220 p_bre(p, OUT, OUT);
00221 EMIT(OEND, 0);
00222 g->laststate = THERE();
00223
00224
00225 categorize(p, g);
00226 stripsnug(p, g);
00227 findmust(p, g);
00228 g->nplus = pluscount(p, g);
00229 g->magic = MAGIC2;
00230 preg->re_nsub = g->nsub;
00231 preg->re_g = g;
00232 preg->re_magic = MAGIC1;
00233 #ifndef REDEBUG
00234
00235 if (g->iflags&BAD)
00236 SETERROR(REG_ASSERT);
00237 #endif
00238
00239
00240 if (p->error != 0)
00241 regfree(preg);
00242 return(p->error);
00243 }
00244
00245
00246
00247
00248 static void
00249 p_ere(struct parse *p, int stop)
00250 {
00251 char c;
00252 sopno prevback;
00253 sopno prevfwd;
00254 sopno conc;
00255 int first = 1;
00256
00257 for (;;) {
00258
00259 conc = HERE();
00260 while (MORE() && (c = PEEK()) != '|' && c != stop)
00261 p_ere_exp(p);
00262 REQUIRE(HERE() != conc, REG_EMPTY);
00263
00264 if (!EAT('|'))
00265 break;
00266
00267 if (first) {
00268 INSERT(OCH_, conc);
00269 prevfwd = conc;
00270 prevback = conc;
00271 first = 0;
00272 }
00273 ASTERN(OOR1, prevback);
00274 prevback = THERE();
00275 AHEAD(prevfwd);
00276 prevfwd = HERE();
00277 EMIT(OOR2, 0);
00278 }
00279
00280 if (!first) {
00281 AHEAD(prevfwd);
00282 ASTERN(O_CH, prevback);
00283 }
00284
00285 assert(!MORE() || SEE(stop));
00286 }
00287
00288
00289
00290
00291 static void
00292 p_ere_exp(struct parse *p)
00293 {
00294 char c;
00295 sopno pos;
00296 int count;
00297 int count2;
00298 sopno subno;
00299 int wascaret = 0;
00300
00301 assert(MORE());
00302 c = GETNEXT();
00303
00304 pos = HERE();
00305 switch (c) {
00306 case '(':
00307 REQUIRE(MORE(), REG_EPAREN);
00308 p->g->nsub++;
00309 subno = p->g->nsub;
00310 if (subno < NPAREN)
00311 p->pbegin[subno] = HERE();
00312 EMIT(OLPAREN, subno);
00313 if (!SEE(')'))
00314 p_ere(p, ')');
00315 if (subno < NPAREN) {
00316 p->pend[subno] = HERE();
00317 assert(p->pend[subno] != 0);
00318 }
00319 EMIT(ORPAREN, subno);
00320 MUSTEAT(')', REG_EPAREN);
00321 break;
00322 #ifndef POSIX_MISTAKE
00323 case ')':
00324
00325
00326
00327
00328
00329
00330
00331 SETERROR(REG_EPAREN);
00332 break;
00333 #endif
00334 case '^':
00335 EMIT(OBOL, 0);
00336 p->g->iflags |= USEBOL;
00337 p->g->nbol++;
00338 wascaret = 1;
00339 break;
00340 case '$':
00341 EMIT(OEOL, 0);
00342 p->g->iflags |= USEEOL;
00343 p->g->neol++;
00344 break;
00345 case '|':
00346 SETERROR(REG_EMPTY);
00347 break;
00348 case '*':
00349 case '+':
00350 case '?':
00351 SETERROR(REG_BADRPT);
00352 break;
00353 case '.':
00354 if (p->g->cflags®_NEWLINE)
00355 nonnewline(p);
00356 else
00357 EMIT(OANY, 0);
00358 break;
00359 case '[':
00360 p_bracket(p);
00361 break;
00362 case '\\':
00363 REQUIRE(MORE(), REG_EESCAPE);
00364 c = GETNEXT();
00365 ordinary(p, c);
00366 break;
00367 case '{':
00368 REQUIRE(!MORE() || !isdigit((uch)PEEK()), REG_BADRPT);
00369
00370 default:
00371 ordinary(p, c);
00372 break;
00373 }
00374
00375 if (!MORE())
00376 return;
00377 c = PEEK();
00378
00379 if (!( c == '*' || c == '+' || c == '?' ||
00380 (c == '{' && MORE2() && isdigit((uch)PEEK2())) ))
00381 return;
00382 NEXT();
00383
00384 REQUIRE(!wascaret, REG_BADRPT);
00385 switch (c) {
00386 case '*':
00387
00388 INSERT(OPLUS_, pos);
00389 ASTERN(O_PLUS, pos);
00390 INSERT(OQUEST_, pos);
00391 ASTERN(O_QUEST, pos);
00392 break;
00393 case '+':
00394 INSERT(OPLUS_, pos);
00395 ASTERN(O_PLUS, pos);
00396 break;
00397 case '?':
00398
00399 INSERT(OCH_, pos);
00400 ASTERN(OOR1, pos);
00401 AHEAD(pos);
00402 EMIT(OOR2, 0);
00403 AHEAD(THERE());
00404 ASTERN(O_CH, THERETHERE());
00405 break;
00406 case '{':
00407 count = p_count(p);
00408 if (EAT(',')) {
00409 if (isdigit((uch)PEEK())) {
00410 count2 = p_count(p);
00411 REQUIRE(count <= count2, REG_BADBR);
00412 } else
00413 count2 = INFINITY;
00414 } else
00415 count2 = count;
00416 repeat(p, pos, count, count2);
00417 if (!EAT('}')) {
00418 while (MORE() && PEEK() != '}')
00419 NEXT();
00420 REQUIRE(MORE(), REG_EBRACE);
00421 SETERROR(REG_BADBR);
00422 }
00423 break;
00424 }
00425
00426 if (!MORE())
00427 return;
00428 c = PEEK();
00429 if (!( c == '*' || c == '+' || c == '?' ||
00430 (c == '{' && MORE2() && isdigit((uch)PEEK2())) ) )
00431 return;
00432 SETERROR(REG_BADRPT);
00433 }
00434
00435
00436
00437
00438 static void
00439 p_str(struct parse *p)
00440 {
00441 REQUIRE(MORE(), REG_EMPTY);
00442 while (MORE())
00443 ordinary(p, GETNEXT());
00444 }
00445
00446
00447
00448
00449
00450
00451
00452
00453
00454
00455
00456 static void
00457 p_bre(struct parse *p,
00458 int end1,
00459 int end2)
00460 {
00461 sopno start = HERE();
00462 int first = 1;
00463 int wasdollar = 0;
00464
00465 if (EAT('^')) {
00466 EMIT(OBOL, 0);
00467 p->g->iflags |= USEBOL;
00468 p->g->nbol++;
00469 }
00470 while (MORE() && !SEETWO(end1, end2)) {
00471 wasdollar = p_simp_re(p, first);
00472 first = 0;
00473 }
00474 if (wasdollar) {
00475 DROP(1);
00476 EMIT(OEOL, 0);
00477 p->g->iflags |= USEEOL;
00478 p->g->neol++;
00479 }
00480
00481 REQUIRE(HERE() != start, REG_EMPTY);
00482 }
00483
00484
00485
00486
00487 static int
00488 p_simp_re(struct parse *p,
00489 int starordinary)
00490 {
00491 int c;
00492 int count;
00493 int count2;
00494 sopno pos;
00495 int i;
00496 sopno subno;
00497 # define BACKSL (1<<CHAR_BIT)
00498
00499 pos = HERE();
00500
00501 assert(MORE());
00502 c = GETNEXT();
00503 if (c == '\\') {
00504 REQUIRE(MORE(), REG_EESCAPE);
00505 c = BACKSL | GETNEXT();
00506 }
00507 switch (c) {
00508 case '.':
00509 if (p->g->cflags®_NEWLINE)
00510 nonnewline(p);
00511 else
00512 EMIT(OANY, 0);
00513 break;
00514 case '[':
00515 p_bracket(p);
00516 break;
00517 case BACKSL|'{':
00518 SETERROR(REG_BADRPT);
00519 break;
00520 case BACKSL|'(':
00521 p->g->nsub++;
00522 subno = p->g->nsub;
00523 if (subno < NPAREN)
00524 p->pbegin[subno] = HERE();
00525 EMIT(OLPAREN, subno);
00526
00527 if (MORE() && !SEETWO('\\', ')'))
00528 p_bre(p, '\\', ')');
00529 if (subno < NPAREN) {
00530 p->pend[subno] = HERE();
00531 assert(p->pend[subno] != 0);
00532 }
00533 EMIT(ORPAREN, subno);
00534 REQUIRE(EATTWO('\\', ')'), REG_EPAREN);
00535 break;
00536 case BACKSL|')':
00537 case BACKSL|'}':
00538 SETERROR(REG_EPAREN);
00539 break;
00540 case BACKSL|'1':
00541 case BACKSL|'2':
00542 case BACKSL|'3':
00543 case BACKSL|'4':
00544 case BACKSL|'5':
00545 case BACKSL|'6':
00546 case BACKSL|'7':
00547 case BACKSL|'8':
00548 case BACKSL|'9':
00549 i = (c&~BACKSL) - '0';
00550 assert(i < NPAREN);
00551 if (p->pend[i] != 0) {
00552 assert(i <= p->g->nsub);
00553 EMIT(OBACK_, i);
00554 assert(p->pbegin[i] != 0);
00555 assert(OP(p->strip[p->pbegin[i]]) == OLPAREN);
00556 assert(OP(p->strip[p->pend[i]]) == ORPAREN);
00557 (void) dupl(p, p->pbegin[i]+1, p->pend[i]);
00558 EMIT(O_BACK, i);
00559 } else
00560 SETERROR(REG_ESUBREG);
00561 p->g->backrefs = 1;
00562 break;
00563 case '*':
00564 REQUIRE(starordinary, REG_BADRPT);
00565
00566 default:
00567 ordinary(p, (char)c);
00568 break;
00569 }
00570
00571 if (EAT('*')) {
00572
00573 INSERT(OPLUS_, pos);
00574 ASTERN(O_PLUS, pos);
00575 INSERT(OQUEST_, pos);
00576 ASTERN(O_QUEST, pos);
00577 } else if (EATTWO('\\', '{')) {
00578 count = p_count(p);
00579 if (EAT(',')) {
00580 if (MORE() && isdigit((uch)PEEK())) {
00581 count2 = p_count(p);
00582 REQUIRE(count <= count2, REG_BADBR);
00583 } else
00584 count2 = INFINITY;
00585 } else
00586 count2 = count;
00587 repeat(p, pos, count, count2);
00588 if (!EATTWO('\\', '}')) {
00589 while (MORE() && !SEETWO('\\', '}'))
00590 NEXT();
00591 REQUIRE(MORE(), REG_EBRACE);
00592 SETERROR(REG_BADBR);
00593 }
00594 } else if (c == '$')
00595 return(1);
00596
00597 return(0);
00598 }
00599
00600
00601
00602
00603 static int
00604 p_count(struct parse *p)
00605 {
00606 int count = 0;
00607 int ndigits = 0;
00608
00609 while (MORE() && isdigit((uch)PEEK()) && count <= DUPMAX) {
00610 count = count*10 + (GETNEXT() - '0');
00611 ndigits++;
00612 }
00613
00614 REQUIRE(ndigits > 0 && count <= DUPMAX, REG_BADBR);
00615 return(count);
00616 }
00617
00618
00619
00620
00621
00622
00623
00624 static void
00625 p_bracket(struct parse *p)
00626 {
00627 cset *cs;
00628 int invert = 0;
00629
00630
00631 if (p->next + 5 < p->end && strncmp(p->next, "[:<:]]", 6) == 0) {
00632 EMIT(OBOW, 0);
00633 NEXTn(6);
00634 return;
00635 }
00636 if (p->next + 5 < p->end && strncmp(p->next, "[:>:]]", 6) == 0) {
00637 EMIT(OEOW, 0);
00638 NEXTn(6);
00639 return;
00640 }
00641
00642 if ((cs = allocset(p)) == NULL) {
00643
00644 return;
00645 }
00646
00647 if (EAT('^'))
00648 invert++;
00649 if (EAT(']'))
00650 CHadd(cs, ']');
00651 else if (EAT('-'))
00652 CHadd(cs, '-');
00653 while (MORE() && PEEK() != ']' && !SEETWO('-', ']'))
00654 p_b_term(p, cs);
00655 if (EAT('-'))
00656 CHadd(cs, '-');
00657 MUSTEAT(']', REG_EBRACK);
00658
00659 if (p->error != 0) {
00660 freeset(p, cs);
00661 return;
00662 }
00663
00664 if (p->g->cflags®_ICASE) {
00665 int i;
00666 int ci;
00667
00668 for (i = p->g->csetsize - 1; i >= 0; i--)
00669 if (CHIN(cs, i) && isalpha(i)) {
00670 ci = othercase(i);
00671 if (ci != i)
00672 CHadd(cs, ci);
00673 }
00674 if (cs->multis != NULL)
00675 mccase(p, cs);
00676 }
00677 if (invert) {
00678 int i;
00679
00680 for (i = p->g->csetsize - 1; i >= 0; i--)
00681 if (CHIN(cs, i))
00682 CHsub(cs, i);
00683 else
00684 CHadd(cs, i);
00685 if (p->g->cflags®_NEWLINE)
00686 CHsub(cs, '\n');
00687 if (cs->multis != NULL)
00688 mcinvert(p, cs);
00689 }
00690
00691 assert(cs->multis == NULL);
00692
00693 if (nch(p, cs) == 1) {
00694 ordinary(p, firstch(p, cs));
00695 freeset(p, cs);
00696 } else
00697 EMIT(OANYOF, freezeset(p, cs));
00698 }
00699
00700
00701
00702
00703 static void
00704 p_b_term(struct parse *p, cset *cs)
00705 {
00706 char c;
00707 char start, finish;
00708 int i;
00709
00710
00711 switch ((MORE()) ? PEEK() : '\0') {
00712 case '[':
00713 c = (MORE2()) ? PEEK2() : '\0';
00714 break;
00715 case '-':
00716 SETERROR(REG_ERANGE);
00717 return;
00718 break;
00719 default:
00720 c = '\0';
00721 break;
00722 }
00723
00724 switch (c) {
00725 case ':':
00726 NEXT2();
00727 REQUIRE(MORE(), REG_EBRACK);
00728 c = PEEK();
00729 REQUIRE(c != '-' && c != ']', REG_ECTYPE);
00730 p_b_cclass(p, cs);
00731 REQUIRE(MORE(), REG_EBRACK);
00732 REQUIRE(EATTWO(':', ']'), REG_ECTYPE);
00733 break;
00734 case '=':
00735 NEXT2();
00736 REQUIRE(MORE(), REG_EBRACK);
00737 c = PEEK();
00738 REQUIRE(c != '-' && c != ']', REG_ECOLLATE);
00739 p_b_eclass(p, cs);
00740 REQUIRE(MORE(), REG_EBRACK);
00741 REQUIRE(EATTWO('=', ']'), REG_ECOLLATE);
00742 break;
00743 default:
00744
00745 start = p_b_symbol(p);
00746 if (SEE('-') && MORE2() && PEEK2() != ']') {
00747
00748 NEXT();
00749 if (EAT('-'))
00750 finish = '-';
00751 else
00752 finish = p_b_symbol(p);
00753 } else
00754 finish = start;
00755
00756 REQUIRE(start <= finish, REG_ERANGE);
00757 for (i = start; i <= finish; i++)
00758 CHadd(cs, i);
00759 break;
00760 }
00761 }
00762
00763
00764
00765
00766 static void
00767 p_b_cclass(struct parse *p, cset *cs)
00768 {
00769 char *sp = p->next;
00770 struct cclass *cp;
00771 size_t len;
00772 char *u;
00773 char c;
00774
00775 while (MORE() && isalpha(PEEK()))
00776 NEXT();
00777 len = p->next - sp;
00778 for (cp = cclasses; cp->name != NULL; cp++)
00779 if (strncmp(cp->name, sp, len) == 0 && cp->name[len] == '\0')
00780 break;
00781 if (cp->name == NULL) {
00782
00783 SETERROR(REG_ECTYPE);
00784 return;
00785 }
00786
00787 u = cp->chars;
00788 while ((c = *u++) != '\0')
00789 CHadd(cs, c);
00790 for (u = cp->multis; *u != '\0'; u += strlen(u) + 1)
00791 MCadd(p, cs, u);
00792 }
00793
00794
00795
00796
00797
00798
00799 static void
00800 p_b_eclass(struct parse *p, cset *cs)
00801 {
00802 char c;
00803
00804 c = p_b_coll_elem(p, '=');
00805 CHadd(cs, c);
00806 }
00807
00808
00809
00810
00811 static char
00812 p_b_symbol(struct parse *p)
00813 {
00814 char value;
00815
00816 REQUIRE(MORE(), REG_EBRACK);
00817 if (!EATTWO('[', '.'))
00818 return(GETNEXT());
00819
00820
00821 value = p_b_coll_elem(p, '.');
00822 REQUIRE(EATTWO('.', ']'), REG_ECOLLATE);
00823 return(value);
00824 }
00825
00826
00827
00828
00829 static char
00830 p_b_coll_elem(struct parse *p,
00831 int endc)
00832 {
00833 char *sp = p->next;
00834 struct cname *cp;
00835 int len;
00836
00837 while (MORE() && !SEETWO(endc, ']'))
00838 NEXT();
00839 if (!MORE()) {
00840 SETERROR(REG_EBRACK);
00841 return(0);
00842 }
00843 len = p->next - sp;
00844 for (cp = cnames; cp->name != NULL; cp++)
00845 if (strncmp(cp->name, sp, len) == 0 && cp->name[len] == '\0')
00846 return(cp->code);
00847 if (len == 1)
00848 return(*sp);
00849 SETERROR(REG_ECOLLATE);
00850 return(0);
00851 }
00852
00853
00854
00855
00856 static char
00857 othercase(int ch)
00858 {
00859 ch = (uch)ch;
00860 assert(isalpha(ch));
00861 if (isupper(ch))
00862 return ((uch)tolower(ch));
00863 else if (islower(ch))
00864 return ((uch)toupper(ch));
00865 else
00866 return(ch);
00867 }
00868
00869
00870
00871
00872
00873
00874 static void
00875 bothcases(struct parse *p, int ch)
00876 {
00877 char *oldnext = p->next;
00878 char *oldend = p->end;
00879 char bracket[3];
00880
00881 ch = (uch)ch;
00882 assert(othercase(ch) != ch);
00883 p->next = bracket;
00884 p->end = bracket+2;
00885 bracket[0] = ch;
00886 bracket[1] = ']';
00887 bracket[2] = '\0';
00888 p_bracket(p);
00889 assert(p->next == bracket+2);
00890 p->next = oldnext;
00891 p->end = oldend;
00892 }
00893
00894
00895
00896
00897 static void
00898 ordinary(struct parse *p, int ch)
00899 {
00900 cat_t *cap = p->g->categories;
00901
00902 if ((p->g->cflags®_ICASE) && isalpha((uch)ch) && othercase(ch) != ch)
00903 bothcases(p, ch);
00904 else {
00905 EMIT(OCHAR, (uch)ch);
00906 if (cap[ch] == 0)
00907 cap[ch] = p->g->ncategories++;
00908 }
00909 }
00910
00911
00912
00913
00914
00915
00916 static void
00917 nonnewline(struct parse *p)
00918 {
00919 char *oldnext = p->next;
00920 char *oldend = p->end;
00921 char bracket[4];
00922
00923 p->next = bracket;
00924 p->end = bracket+3;
00925 bracket[0] = '^';
00926 bracket[1] = '\n';
00927 bracket[2] = ']';
00928 bracket[3] = '\0';
00929 p_bracket(p);
00930 assert(p->next == bracket+3);
00931 p->next = oldnext;
00932 p->end = oldend;
00933 }
00934
00935
00936
00937
00938 static void
00939 repeat(struct parse *p,
00940 sopno start,
00941 int from,
00942 int to)
00943 {
00944 sopno finish = HERE();
00945 # define N 2
00946 # define INF 3
00947 # define REP(f, t) ((f)*8 + (t))
00948 # define MAP(n) (((n) <= 1) ? (n) : ((n) == INFINITY) ? INF : N)
00949 sopno copy;
00950
00951 if (p->error != 0)
00952 return;
00953
00954 assert(from <= to);
00955
00956 switch (REP(MAP(from), MAP(to))) {
00957 case REP(0, 0):
00958 DROP(finish-start);
00959 break;
00960 case REP(0, 1):
00961 case REP(0, N):
00962 case REP(0, INF):
00963
00964 INSERT(OCH_, start);
00965 repeat(p, start+1, 1, to);
00966 ASTERN(OOR1, start);
00967 AHEAD(start);
00968 EMIT(OOR2, 0);
00969 AHEAD(THERE());
00970 ASTERN(O_CH, THERETHERE());
00971 break;
00972 case REP(1, 1):
00973
00974 break;
00975 case REP(1, N):
00976
00977 INSERT(OCH_, start);
00978 ASTERN(OOR1, start);
00979 AHEAD(start);
00980 EMIT(OOR2, 0);
00981 AHEAD(THERE());
00982 ASTERN(O_CH, THERETHERE());
00983 copy = dupl(p, start+1, finish+1);
00984 assert(copy == finish+4);
00985 repeat(p, copy, 1, to-1);
00986 break;
00987 case REP(1, INF):
00988 INSERT(OPLUS_, start);
00989 ASTERN(O_PLUS, start);
00990 break;
00991 case REP(N, N):
00992 copy = dupl(p, start, finish);
00993 repeat(p, copy, from-1, to-1);
00994 break;
00995 case REP(N, INF):
00996 copy = dupl(p, start, finish);
00997 repeat(p, copy, from-1, to);
00998 break;
00999 default:
01000 SETERROR(REG_ASSERT);
01001 break;
01002 }
01003 }
01004
01005
01006
01007
01008 static int
01009 seterr(struct parse *p, int e)
01010 {
01011 if (p->error == 0)
01012 p->error = e;
01013 p->next = nuls;
01014 p->end = nuls;
01015 return(0);
01016 }
01017
01018
01019
01020
01021 static cset *
01022 allocset(struct parse *p)
01023 {
01024 int no = p->g->ncsets++;
01025 size_t nc;
01026 size_t nbytes;
01027 cset *cs;
01028 size_t css = (size_t)p->g->csetsize;
01029 int i;
01030
01031 if (no >= p->ncsalloc) {
01032 void *ptr;
01033
01034 p->ncsalloc += CHAR_BIT;
01035 nc = p->ncsalloc;
01036 assert(nc % CHAR_BIT == 0);
01037 nbytes = nc / CHAR_BIT * css;
01038
01039 ptr = (cset *)realloc((char *)p->g->sets, nc * sizeof(cset));
01040 if (ptr == NULL)
01041 goto nomem;
01042 p->g->sets = ptr;
01043
01044 ptr = (uch *)realloc((char *)p->g->setbits, nbytes);
01045 if (ptr == NULL)
01046 goto nomem;
01047 p->g->setbits = ptr;
01048
01049 for (i = 0; i < no; i++)
01050 p->g->sets[i].ptr = p->g->setbits + css*(i/CHAR_BIT);
01051
01052 (void) memset((char *)p->g->setbits + (nbytes - css), 0, css);
01053 }
01054
01055 if (p->g->sets == NULL || p->g->setbits == NULL)
01056 goto nomem;
01057
01058 cs = &p->g->sets[no];
01059 cs->ptr = p->g->setbits + css*((no)/CHAR_BIT);
01060 cs->mask = 1 << ((no) % CHAR_BIT);
01061 cs->hash = 0;
01062 cs->smultis = 0;
01063 cs->multis = NULL;
01064
01065 return(cs);
01066 nomem:
01067 free(p->g->sets);
01068 p->g->sets = NULL;
01069 free(p->g->setbits);
01070 p->g->setbits = NULL;
01071
01072 SETERROR(REG_ESPACE);
01073
01074 return(NULL);
01075 }
01076
01077
01078
01079
01080 static void
01081 freeset(struct parse *p, cset *cs)
01082 {
01083 int i;
01084 cset *top = &p->g->sets[p->g->ncsets];
01085 size_t css = (size_t)p->g->csetsize;
01086
01087 for (i = 0; i < css; i++)
01088 CHsub(cs, i);
01089 if (cs == top-1)
01090 p->g->ncsets--;
01091 }
01092
01093
01094
01095
01096
01097
01098
01099
01100
01101
01102 static int
01103 freezeset(struct parse *p, cset *cs)
01104 {
01105 uch h = cs->hash;
01106 int i;
01107 cset *top = &p->g->sets[p->g->ncsets];
01108 cset *cs2;
01109 size_t css = (size_t)p->g->csetsize;
01110
01111
01112 for (cs2 = &p->g->sets[0]; cs2 < top; cs2++)
01113 if (cs2->hash == h && cs2 != cs) {
01114
01115 for (i = 0; i < css; i++)
01116 if (!!CHIN(cs2, i) != !!CHIN(cs, i))
01117 break;
01118 if (i == css)
01119 break;
01120 }
01121
01122 if (cs2 < top) {
01123 freeset(p, cs);
01124 cs = cs2;
01125 }
01126
01127 return((int)(cs - p->g->sets));
01128 }
01129
01130
01131
01132
01133 static int
01134 firstch(struct parse *p, cset *cs)
01135 {
01136 int i;
01137 size_t css = (size_t)p->g->csetsize;
01138
01139 for (i = 0; i < css; i++)
01140 if (CHIN(cs, i))
01141 return((char)i);
01142 assert(never);
01143 return(0);
01144 }
01145
01146
01147
01148
01149 static int
01150 nch(struct parse *p, cset *cs)
01151 {
01152 int i;
01153 size_t css = (size_t)p->g->csetsize;
01154 int n = 0;
01155
01156 for (i = 0; i < css; i++)
01157 if (CHIN(cs, i))
01158 n++;
01159 return(n);
01160 }
01161
01162
01163
01164
01165 static void
01166 mcadd( struct parse *p, cset *cs, char *cp)
01167 {
01168 size_t oldend = cs->smultis;
01169 void *np;
01170
01171 cs->smultis += strlen(cp) + 1;
01172 np = realloc(cs->multis, cs->smultis);
01173 if (np == NULL) {
01174 if (cs->multis)
01175 free(cs->multis);
01176 cs->multis = NULL;
01177 SETERROR(REG_ESPACE);
01178 return;
01179 }
01180 cs->multis = np;
01181
01182 strlcpy(cs->multis + oldend - 1, cp, cs->smultis - oldend + 1);
01183 }
01184
01185
01186
01187
01188
01189
01190
01191
01192 static void
01193 mcinvert(struct parse *p, cset *cs)
01194 {
01195 assert(cs->multis == NULL);
01196 }
01197
01198
01199
01200
01201
01202
01203
01204
01205 static void
01206 mccase(struct parse *p, cset *cs)
01207 {
01208 assert(cs->multis == NULL);
01209 }
01210
01211
01212
01213
01214 static int
01215 isinsets(struct re_guts *g, int c)
01216 {
01217 uch *col;
01218 int i;
01219 int ncols = (g->ncsets+(CHAR_BIT-1)) / CHAR_BIT;
01220 unsigned uc = (uch)c;
01221
01222 for (i = 0, col = g->setbits; i < ncols; i++, col += g->csetsize)
01223 if (col[uc] != 0)
01224 return(1);
01225 return(0);
01226 }
01227
01228
01229
01230
01231 static int
01232 samesets(struct re_guts *g, int c1, int c2)
01233 {
01234 uch *col;
01235 int i;
01236 int ncols = (g->ncsets+(CHAR_BIT-1)) / CHAR_BIT;
01237 unsigned uc1 = (uch)c1;
01238 unsigned uc2 = (uch)c2;
01239
01240 for (i = 0, col = g->setbits; i < ncols; i++, col += g->csetsize)
01241 if (col[uc1] != col[uc2])
01242 return(0);
01243 return(1);
01244 }
01245
01246
01247
01248
01249 static void
01250 categorize(struct parse *p, struct re_guts *g)
01251 {
01252 cat_t *cats = g->categories;
01253 int c;
01254 int c2;
01255 cat_t cat;
01256
01257
01258 if (p->error != 0)
01259 return;
01260
01261 for (c = CHAR_MIN; c <= CHAR_MAX; c++)
01262 if (cats[c] == 0 && isinsets(g, c)) {
01263 cat = g->ncategories++;
01264 cats[c] = cat;
01265 for (c2 = c+1; c2 <= CHAR_MAX; c2++)
01266 if (cats[c2] == 0 && samesets(g, c, c2))
01267 cats[c2] = cat;
01268 }
01269 }
01270
01271
01272
01273
01274 static sopno
01275 dupl(struct parse *p,
01276 sopno start,
01277 sopno finish)
01278 {
01279 sopno ret = HERE();
01280 sopno len = finish - start;
01281
01282 assert(finish >= start);
01283 if (len == 0)
01284 return(ret);
01285 enlarge(p, p->ssize + len);
01286 assert(p->ssize >= p->slen + len);
01287 (void) memcpy((char *)(p->strip + p->slen),
01288 (char *)(p->strip + start), (size_t)len*sizeof(sop));
01289 p->slen += len;
01290 return(ret);
01291 }
01292
01293
01294
01295
01296
01297
01298
01299
01300 static void
01301 doemit(struct parse *p, sop op, size_t opnd)
01302 {
01303
01304 if (p->error != 0)
01305 return;
01306
01307
01308 assert(opnd < 1<<OPSHIFT);
01309
01310
01311 if (p->slen >= p->ssize)
01312 enlarge(p, (p->ssize+1) / 2 * 3);
01313 assert(p->slen < p->ssize);
01314
01315
01316 p->strip[p->slen++] = SOP(op, opnd);
01317 }
01318
01319
01320
01321
01322 static void
01323 doinsert(struct parse *p, sop op, size_t opnd, sopno pos)
01324 {
01325 sopno sn;
01326 sop s;
01327 int i;
01328
01329
01330 if (p->error != 0)
01331 return;
01332
01333 sn = HERE();
01334 EMIT(op, opnd);
01335 assert(HERE() == sn+1);
01336 s = p->strip[sn];
01337
01338
01339 assert(pos > 0);
01340 for (i = 1; i < NPAREN; i++) {
01341 if (p->pbegin[i] >= pos) {
01342 p->pbegin[i]++;
01343 }
01344 if (p->pend[i] >= pos) {
01345 p->pend[i]++;
01346 }
01347 }
01348
01349 memmove((char *)&p->strip[pos+1], (char *)&p->strip[pos],
01350 (HERE()-pos-1)*sizeof(sop));
01351 p->strip[pos] = s;
01352 }
01353
01354
01355
01356
01357 static void
01358 dofwd(struct parse *p, sopno pos, sop value)
01359 {
01360
01361 if (p->error != 0)
01362 return;
01363
01364 assert(value < 1<<OPSHIFT);
01365 p->strip[pos] = OP(p->strip[pos]) | value;
01366 }
01367
01368
01369
01370
01371 static void
01372 enlarge(struct parse *p, sopno size)
01373 {
01374 sop *sp;
01375
01376 if (p->ssize >= size)
01377 return;
01378
01379 sp = (sop *)realloc(p->strip, size*sizeof(sop));
01380 if (sp == NULL) {
01381 SETERROR(REG_ESPACE);
01382 return;
01383 }
01384 p->strip = sp;
01385 p->ssize = size;
01386 }
01387
01388
01389
01390
01391 static void
01392 stripsnug(struct parse *p, struct re_guts *g)
01393 {
01394 g->nstates = p->slen;
01395 g->strip = (sop *)realloc((char *)p->strip, p->slen * sizeof(sop));
01396 if (g->strip == NULL) {
01397 SETERROR(REG_ESPACE);
01398 g->strip = p->strip;
01399 }
01400 }
01401
01402
01403
01404
01405
01406
01407
01408
01409
01410
01411 static void
01412 findmust(struct parse *p, struct re_guts *g)
01413 {
01414 sop *scan;
01415 sop *start;
01416 sop *newstart;
01417 sopno newlen;
01418 sop s;
01419 char *cp;
01420 sopno i;
01421
01422
01423 if (p->error != 0)
01424 return;
01425
01426
01427 newlen = 0;
01428 scan = g->strip + 1;
01429 do {
01430 s = *scan++;
01431 switch (OP(s)) {
01432 case OCHAR:
01433 if (newlen == 0)
01434 newstart = scan - 1;
01435 newlen++;
01436 break;
01437 case OPLUS_:
01438 case OLPAREN:
01439 case ORPAREN:
01440 break;
01441 case OQUEST_:
01442 case OCH_:
01443 scan--;
01444 do {
01445 scan += OPND(s);
01446 s = *scan;
01447
01448 if (OP(s) != O_QUEST && OP(s) != O_CH &&
01449 OP(s) != OOR2) {
01450 g->iflags |= BAD;
01451 return;
01452 }
01453 } while (OP(s) != O_QUEST && OP(s) != O_CH);
01454
01455 default:
01456 if (newlen > g->mlen) {
01457 start = newstart;
01458 g->mlen = newlen;
01459 }
01460 newlen = 0;
01461 break;
01462 }
01463 } while (OP(s) != OEND);
01464
01465 if (g->mlen == 0)
01466 return;
01467
01468
01469 g->must = malloc((size_t)g->mlen + 1);
01470 if (g->must == NULL) {
01471 g->mlen = 0;
01472 return;
01473 }
01474 cp = g->must;
01475 scan = start;
01476 for (i = g->mlen; i > 0; i--) {
01477 while (OP(s = *scan++) != OCHAR)
01478 continue;
01479 assert(cp < g->must + g->mlen);
01480 *cp++ = (char)OPND(s);
01481 }
01482 assert(cp == g->must + g->mlen);
01483 *cp++ = '\0';
01484 }
01485
01486
01487
01488
01489 static sopno
01490 pluscount(struct parse *p, struct re_guts *g)
01491 {
01492 sop *scan;
01493 sop s;
01494 sopno plusnest = 0;
01495 sopno maxnest = 0;
01496
01497 if (p->error != 0)
01498 return(0);
01499
01500 scan = g->strip + 1;
01501 do {
01502 s = *scan++;
01503 switch (OP(s)) {
01504 case OPLUS_:
01505 plusnest++;
01506 break;
01507 case O_PLUS:
01508 if (plusnest > maxnest)
01509 maxnest = plusnest;
01510 plusnest--;
01511 break;
01512 }
01513 } while (OP(s) != OEND);
01514 if (plusnest != 0)
01515 g->iflags |= BAD;
01516 return(maxnest);
01517 }