The regex library used to have a notion of a "collating element"
that was distinct from a "character", but Henry Spencer never
actually implemented support for multi-character collating elements,
and the Tcl crew ripped out most of the stubs for it years ago.
The only thing left that distinguished the "celt" typedef from the
"chr" typedef was that "celt" was supposed to also be able to hold
the not-a-character "NOCELT" value. I noticed however that the
NOCELT macro is no longer used anywhere, which means we could remove
it as well as the separate typedef, and just deal in chrs. This
simplifies matters and also removes a trap for the unwary, in that
celt is signed while chr is not, so comparisons could mean different
things. There's no bug there today because we restrict CHR_MAX to
be less than INT_MAX, but I think there may have been such bugs before
we did that, and there could be again if anyone ever decides to fool
with the range of chr.
The attached patch removes celt and NOCELT, and also removes unnecessary
casts to "chr" (some of them were already unnecessary, probably as a
result of the earlier celt-support-removal changes).
Comments? Anyone feel like actually reviewing this, or shall I just
push it?
regards, tom lane
diff --git a/src/backend/regex/regc_lex.c b/src/backend/regex/regc_lex.c
index f62ec7d..cd34c8a 100644
*** a/src/backend/regex/regc_lex.c
--- b/src/backend/regex/regc_lex.c
*************** lexescape(struct vars * v)
*** 870,876 ****
if (v->now == save || ((int) c > 0 && (int) c <= v->nsubexp))
{
NOTE(REG_UBACKREF);
! RETV(BACKREF, (chr) c);
}
/* oops, doesn't look like it's a backref after all... */
v->now = save;
--- 870,876 ----
if (v->now == save || ((int) c > 0 && (int) c <= v->nsubexp))
{
NOTE(REG_UBACKREF);
! RETV(BACKREF, c);
}
/* oops, doesn't look like it's a backref after all... */
v->now = save;
*************** lexdigits(struct vars * v,
*** 986,995 ****
*/
static int /* 1 normal, 0 failure */
brenext(struct vars * v,
! chr pc)
{
- chr c = (chr) pc;
-
switch (c)
{
case CHR('*'):
--- 986,993 ----
*/
static int /* 1 normal, 0 failure */
brenext(struct vars * v,
! chr c)
{
switch (c)
{
case CHR('*'):
*************** chrnamed(struct vars * v,
*** 1153,1159 ****
const chr *endp, /* just past end of name */
chr lastresort) /* what to return if name lookup fails */
{
! celt c;
int errsave;
int e;
struct cvec *cv;
--- 1151,1157 ----
const chr *endp, /* just past end of name */
chr lastresort) /* what to return if name lookup fails */
{
! chr c;
int errsave;
int e;
struct cvec *cv;
*************** chrnamed(struct vars * v,
*** 1165,1174 ****
v->err = errsave;
if (e != 0)
! return (chr) lastresort;
cv = range(v, c, c, 0);
if (cv->nchrs == 0)
! return (chr) lastresort;
return cv->chrs[0];
}
--- 1163,1172 ----
v->err = errsave;
if (e != 0)
! return lastresort;
cv = range(v, c, c, 0);
if (cv->nchrs == 0)
! return lastresort;
return cv->chrs[0];
}
diff --git a/src/backend/regex/regc_locale.c b/src/backend/regex/regc_locale.c
index 4fe6292..399de02 100644
*** a/src/backend/regex/regc_locale.c
--- b/src/backend/regex/regc_locale.c
*************** static const struct cname
*** 361,369 ****
/*
! * element - map collating-element name to celt
*/
! static celt
element(struct vars * v, /* context */
const chr *startp, /* points to start of name */
const chr *endp) /* points just past end of name */
--- 361,369 ----
/*
! * element - map collating-element name to chr
*/
! static chr
element(struct vars * v, /* context */
const chr *startp, /* points to start of name */
const chr *endp) /* points just past end of name */
*************** element(struct vars * v, /* context */
*** 401,413 ****
*/
static struct cvec *
range(struct vars * v, /* context */
! celt a, /* range start */
! celt b, /* range end, might equal a */
int cases) /* case-independent? */
{
int nchrs;
struct cvec *cv;
! celt c,
cc;
if (a != b && !before(a, b))
--- 401,413 ----
*/
static struct cvec *
range(struct vars * v, /* context */
! chr a, /* range start */
! chr b, /* range end, might equal a */
int cases) /* case-independent? */
{
int nchrs;
struct cvec *cv;
! chr c,
cc;
if (a != b && !before(a, b))
*************** range(struct vars * v, /* context */
*** 444,450 ****
for (c = a; c <= b; c++)
{
! cc = pg_wc_tolower((chr) c);
if (cc != c &&
(before(cc, a) || before(b, cc)))
{
--- 444,450 ----
for (c = a; c <= b; c++)
{
! cc = pg_wc_tolower(c);
if (cc != c &&
(before(cc, a) || before(b, cc)))
{
*************** range(struct vars * v, /* context */
*** 455,461 ****
}
addchr(cv, cc);
}
! cc = pg_wc_toupper((chr) c);
if (cc != c &&
(before(cc, a) || before(b, cc)))
{
--- 455,461 ----
}
addchr(cv, cc);
}
! cc = pg_wc_toupper(c);
if (cc != c &&
(before(cc, a) || before(b, cc)))
{
*************** range(struct vars * v, /* context */
*** 477,486 ****
}
/*
! * before - is celt x before celt y, for purposes of range legality?
*/
static int /* predicate */
! before(celt x, celt y)
{
if (x < y)
return 1;
--- 477,486 ----
}
/*
! * before - is chr x before chr y, for purposes of range legality?
*/
static int /* predicate */
! before(chr x, chr y)
{
if (x < y)
return 1;
*************** before(celt x, celt y)
*** 493,499 ****
*/
static struct cvec *
eclass(struct vars * v, /* context */
! celt c, /* Collating element representing the
* equivalence class. */
int cases) /* all cases? */
{
--- 493,499 ----
*/
static struct cvec *
eclass(struct vars * v, /* context */
! chr c, /* Collating element representing the
* equivalence class. */
int cases) /* all cases? */
{
*************** eclass(struct vars * v, /* context */
*** 503,514 ****
if ((v->cflags & REG_FAKE) && c == 'x')
{
cv = getcvec(v, 4, 0);
! addchr(cv, (chr) 'x');
! addchr(cv, (chr) 'y');
if (cases)
{
! addchr(cv, (chr) 'X');
! addchr(cv, (chr) 'Y');
}
return cv;
}
--- 503,514 ----
if ((v->cflags & REG_FAKE) && c == 'x')
{
cv = getcvec(v, 4, 0);
! addchr(cv, CHR('x'));
! addchr(cv, CHR('y'));
if (cases)
{
! addchr(cv, CHR('X'));
! addchr(cv, CHR('Y'));
}
return cv;
}
*************** eclass(struct vars * v, /* context */
*** 518,524 ****
return allcases(v, c);
cv = getcvec(v, 1, 0);
assert(cv != NULL);
! addchr(cv, (chr) c);
return cv;
}
--- 518,524 ----
return allcases(v, c);
cv = getcvec(v, 1, 0);
assert(cv != NULL);
! addchr(cv, c);
return cv;
}
*************** cclass(struct vars * v, /* context */
*** 673,687 ****
*/
static struct cvec *
allcases(struct vars * v, /* context */
! chr pc) /* character to get case equivs of */
{
struct cvec *cv;
- chr c = (chr) pc;
chr lc,
uc;
! lc = pg_wc_tolower((chr) c);
! uc = pg_wc_toupper((chr) c);
cv = getcvec(v, 2, 0);
addchr(cv, lc);
--- 673,686 ----
*/
static struct cvec *
allcases(struct vars * v, /* context */
! chr c) /* character to get case equivs of */
{
struct cvec *cv;
chr lc,
uc;
! lc = pg_wc_tolower(c);
! uc = pg_wc_toupper(c);
cv = getcvec(v, 2, 0);
addchr(cv, lc);
diff --git a/src/backend/regex/regcomp.c b/src/backend/regex/regcomp.c
index cc589b0..48d63da 100644
*** a/src/backend/regex/regcomp.c
--- b/src/backend/regex/regcomp.c
*************** static pg_wchar pg_wc_toupper(pg_wchar c
*** 210,219 ****
static pg_wchar pg_wc_tolower(pg_wchar c);
/* === regc_locale.c === */
! static celt element(struct vars *, const chr *, const chr *);
! static struct cvec *range(struct vars *, celt, celt, int);
! static int before(celt, celt);
! static struct cvec *eclass(struct vars *, celt, int);
static struct cvec *cclass(struct vars *, const chr *, const chr *, int);
static struct cvec *allcases(struct vars *, chr);
static int cmp(const chr *, const chr *, size_t);
--- 210,219 ----
static pg_wchar pg_wc_tolower(pg_wchar c);
/* === regc_locale.c === */
! static chr element(struct vars *, const chr *, const chr *);
! static struct cvec *range(struct vars *, chr, chr, int);
! static int before(chr, chr);
! static struct cvec *eclass(struct vars *, chr, int);
static struct cvec *cclass(struct vars *, const chr *, const chr *, int);
static struct cvec *allcases(struct vars *, chr);
static int cmp(const chr *, const chr *, size_t);
*************** brackpart(struct vars * v,
*** 1424,1431 ****
struct state * lp,
struct state * rp)
{
! celt startc;
! celt endc;
struct cvec *cv;
const chr *startp;
const chr *endp;
--- 1424,1431 ----
struct state * lp,
struct state * rp)
{
! chr startc;
! chr endc;
struct cvec *cv;
const chr *startp;
const chr *endp;
diff --git a/src/include/regex/regcustom.h b/src/include/regex/regcustom.h
index 60034da..459851a 100644
*** a/src/include/regex/regcustom.h
--- b/src/include/regex/regcustom.h
***************
*** 58,72 ****
/* internal character type and related */
typedef pg_wchar chr; /* the type itself */
typedef unsigned uchr; /* unsigned type that will hold a chr */
- typedef int celt; /* type to hold chr, or NOCELT */
- #define NOCELT (-1) /* celt value which is not valid chr */
#define CHR(c) ((unsigned char) (c)) /* turn char literal into chr literal */
#define DIGITVAL(c) ((c)-'0') /* turn chr digit into its value */
#define CHRBITS 32 /* bits in a chr; must not use sizeof */
#define CHR_MIN 0x00000000 /* smallest and largest chr; the value */
#define CHR_MAX 0x7ffffffe /* CHR_MAX-CHR_MIN+1 must fit in an int, and
! * CHR_MAX+1 must fit in both chr and celt */
/*
* Check if a chr value is in range. Ideally we'd just write this as
--- 58,70 ----
/* internal character type and related */
typedef pg_wchar chr; /* the type itself */
typedef unsigned uchr; /* unsigned type that will hold a chr */
#define CHR(c) ((unsigned char) (c)) /* turn char literal into chr literal */
#define DIGITVAL(c) ((c)-'0') /* turn chr digit into its value */
#define CHRBITS 32 /* bits in a chr; must not use sizeof */
#define CHR_MIN 0x00000000 /* smallest and largest chr; the value */
#define CHR_MAX 0x7ffffffe /* CHR_MAX-CHR_MIN+1 must fit in an int, and
! * CHR_MAX+1 must fit in a chr variable */
/*
* Check if a chr value is in range. Ideally we'd just write this as