Hello.
This is based on a bug report submitted for gawk, but I have reproduced
it with the dfa.c in grep 2.14.
Attached are the following files:
testdfa.c: A harness that pulls in all the gawk stuff needed
to call dfa the same way gawk does.
dfa_test_re1: A regexp that causes dfa to assert.
dfa_test_re2: A similar regexp that doesn't. The difference is the
[..] which are not present.
dfa_test.in: Input data
Compile testdfa.c with dfa.o and run as follows:
LC_ALL=en_US.UTF-8 testdfa "$(cat dfa_test_re1)" < dfa_test.in
LC_ALL=en_US.UTF-8 testdfa "$(cat dfa_test_re2)" < dfa_test.in
The first will assert. The second will not.
There is no problem if LC_ALL=C.
Thanks!
Arnold
--
Aharon (Arnold) Robbins arnold AT skeeve DOT com
P.O. Box 354 Home Phone: +972 8 979-0381
Nof Ayalon
D.N. Shimshon 9978500 ISRAEL
/*
* testdfa.c --- abstracted from gawk.
*/
/*
* Copyright (C) 1986, 1988, 1989, 1991-2013 the Free Software Foundation, Inc.
*
* This file is part of GAWK, the GNU implementation of the
* AWK Programming Language.
*
* GAWK is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 3 of the License, or
* (at your option) any later version.
*
* GAWK is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
*/
#include <stdio.h>
#include <ctype.h>
#include <errno.h>
#include <fcntl.h>
#include <getopt.h>
#include <locale.h>
#include <stdarg.h>
#include <stdbool.h>
#include <stdlib.h>
#include <string.h>
#include <regex.h>
#include <unistd.h>
#include <wchar.h>
#include <sys/types.h>
#include <sys/stat.h>
#define _Noreturn
#include "dfa.h"
const char *regexflags2str(int flags);
char *databuf(int fd);
const char * reflags2str(int flagval);
int parse_escape(const char **string_ptr);
char *setup_pattern(const char *pattern, size_t len);
char casetable[];
reg_syntax_t syn;
struct flagtab {
int val;
const char *name;
};
/* usage --- print an error message and die */
void usage(const char *myname)
{
fprintf(stderr, "usage: %s [-ipt] 'awk-regex' < file\n", myname);
exit(EXIT_FAILURE);
}
/* main --- parse options, compile and run the regex */
int main(int argc, char **argv)
{
int c, ret, try_backref;
struct re_pattern_buffer pat;
struct re_registers regs;
struct dfa *dfareg;
size_t len;
const char *pattern;
const char *rerr;
int infd;
char *data;
reg_syntax_t dfa_syn;
bool ignorecase = false;
char save;
size_t count = 0;
char *place;
if (argc < 2)
usage(argv[0]);
memset(& pat, 0, sizeof(pat));
memset(& regs, 0, sizeof(regs));
/* default syntax */
syn = RE_SYNTAX_GNU_AWK;
/* parse options, update syntax, ignorecase */
while ((c = getopt(argc, argv, "pit")) != -1) {
switch (c) {
case 'i':
ignorecase = true;
break;
case 'p':
syn = RE_SYNTAX_POSIX_AWK;
break;
case 't':
syn = RE_SYNTAX_AWK;
break;
case '?':
default:
usage(argv[0]);
break;
}
}
if (optind == argc)
usage(argv[0]);
pattern = argv[optind];
len = strlen(pattern);
setlocale(LC_CTYPE, "");
setlocale(LC_COLLATE, "");
setlocale(LC_MESSAGES, "");
setlocale(LC_NUMERIC, "");
setlocale(LC_TIME, "");
printf("Ignorecase: %s\nSyntax: %s\n",
(ignorecase ? "true" : "false"),
reflags2str(syn));
printf("Pattern: /%s/\n", pattern);
pattern = setup_pattern(pattern, len);
len = strlen(pattern);
pat.fastmap = (char *) malloc(256);
if (pat.fastmap == NULL) {
perror("malloc");
exit(EXIT_FAILURE);
}
printf("MB_CUR_MAX = %d\n", (int) MB_CUR_MAX);
if (ignorecase) {
if (MB_CUR_MAX > 1) {
syn |= RE_ICASE;
pat.translate = NULL;
} else {
syn &= ~RE_ICASE;
pat.translate = (RE_TRANSLATE_TYPE) casetable;
}
} else {
pat.translate = NULL;
syn &= ~RE_ICASE;
}
dfa_syn = syn;
if (ignorecase)
dfa_syn |= RE_ICASE;
dfasyntax(dfa_syn, ignorecase, '\n');
re_set_syntax(syn);
if ((rerr = re_compile_pattern(pattern, len, & pat)) != NULL) {
fprintf(stderr, "%s: %s: cannot compile pattern '%s'\n",
argv[0], rerr, pattern);
exit(EXIT_FAILURE);
}
/* gack. this must be done *after* re_compile_pattern */
pat.newline_anchor = false; /* don't get \n in middle of string */
dfareg = dfaalloc();
printf("Calling dfacomp(%s, %d, %p, true)\n",
pattern, (int) len, dfareg);
dfacomp(pattern, len, dfareg, true);
data = databuf(STDIN_FILENO);
/* run the regex matcher */
ret = re_search(& pat, data, len, 0, len, NULL);
printf("re_search returned %d (%s)\n", ret, (ret != 0) ? "true" : "false");
/* run the dfa matcher */
/*
* dfa likes to stick a '\n' right after the matched
* text. So we just save and restore the character.
*/
save = data[len];
place = dfaexec(dfareg, data, data+len, true,
&count, &try_backref);
data[len] = save;
printf("dfaexec returned %p (%.3s)\n", place, place);
/* release storage */
regfree(& pat);
if (regs.start)
free(regs.start);
if (regs.end)
free(regs.end);
dfafree(dfareg);
free(dfareg);
}
/* genflags2str --- general routine to convert a flag value to a string */
const char *
genflags2str(int flagval, const struct flagtab *tab)
{
static char buffer[BUFSIZ];
char *sp;
int i, space_left, space_needed;
sp = buffer;
space_left = BUFSIZ;
for (i = 0; tab[i].name != NULL; i++) {
if ((flagval & tab[i].val) != 0) {
/*
* note the trick, we want 1 or 0 for whether we need
* the '|' character.
*/
space_needed = (strlen(tab[i].name) + (sp != buffer));
if (space_left <= space_needed) {
fprintf(stderr, "buffer overflow in genflags2str");
exit(EXIT_FAILURE);
}
if (sp != buffer) {
*sp++ = '|';
space_left--;
}
strcpy(sp, tab[i].name);
/* note ordering! */
space_left -= strlen(sp);
sp += strlen(sp);
}
}
*sp = '\0';
return buffer;
}
/* reflags2str --- make a regex flags value readable */
const char *
reflags2str(int flagval)
{
static const struct flagtab values[] = {
{ RE_BACKSLASH_ESCAPE_IN_LISTS, "RE_BACKSLASH_ESCAPE_IN_LISTS" },
{ RE_BK_PLUS_QM, "RE_BK_PLUS_QM" },
{ RE_CHAR_CLASSES, "RE_CHAR_CLASSES" },
{ RE_CONTEXT_INDEP_ANCHORS, "RE_CONTEXT_INDEP_ANCHORS" },
{ RE_CONTEXT_INDEP_OPS, "RE_CONTEXT_INDEP_OPS" },
{ RE_CONTEXT_INVALID_OPS, "RE_CONTEXT_INVALID_OPS" },
{ RE_DOT_NEWLINE, "RE_DOT_NEWLINE" },
{ RE_DOT_NOT_NULL, "RE_DOT_NOT_NULL" },
{ RE_HAT_LISTS_NOT_NEWLINE, "RE_HAT_LISTS_NOT_NEWLINE" },
{ RE_INTERVALS, "RE_INTERVALS" },
{ RE_LIMITED_OPS, "RE_LIMITED_OPS" },
{ RE_NEWLINE_ALT, "RE_NEWLINE_ALT" },
{ RE_NO_BK_BRACES, "RE_NO_BK_BRACES" },
{ RE_NO_BK_PARENS, "RE_NO_BK_PARENS" },
{ RE_NO_BK_REFS, "RE_NO_BK_REFS" },
{ RE_NO_BK_VBAR, "RE_NO_BK_VBAR" },
{ RE_NO_EMPTY_RANGES, "RE_NO_EMPTY_RANGES" },
{ RE_UNMATCHED_RIGHT_PAREN_ORD, "RE_UNMATCHED_RIGHT_PAREN_ORD" },
{ RE_NO_POSIX_BACKTRACKING, "RE_NO_POSIX_BACKTRACKING" },
{ RE_NO_GNU_OPS, "RE_NO_GNU_OPS" },
{ RE_INVALID_INTERVAL_ORD, "RE_INVALID_INTERVAL_ORD" },
{ RE_ICASE, "RE_ICASE" },
{ RE_CARET_ANCHORS_HERE, "RE_CARET_ANCHORS_HERE" },
{ RE_CONTEXT_INVALID_DUP, "RE_CONTEXT_INVALID_DUP" },
{ RE_NO_SUB, "RE_NO_SUB" },
{ 0, NULL },
};
if (flagval == RE_SYNTAX_EMACS) /* == 0 */
return "RE_SYNTAX_EMACS";
return genflags2str(flagval, values);
}
/*
* dfawarn() is called by the dfa routines whenever a regex is compiled
* must supply a dfawarn.
*/
void
dfawarn(const char *dfa_warning)
{
fprintf(stderr, "dfa warning: %s\n", dfa_warning);
}
/* dfaerror --- print an error message for the dfa routines */
void
dfaerror(const char *s)
{
fprintf(stderr, "dfa-error: %s\n", s);
exit(EXIT_FAILURE);
}
/* databuf --- read the input file */
char *
databuf(int fd)
{
char *buf;
struct stat sbuf;
ssize_t count;
if (fstat(fd, & sbuf) < 0)
return NULL;
buf = (char *) malloc(sbuf.st_size + 3);
if (buf == NULL)
return NULL;
if ((count = read(fd, buf, sbuf.st_size)) != sbuf.st_size) {
perror("read");
return NULL;
}
buf[sbuf.st_size] = '\0';
(void) close(fd);
return buf;
}
/* xmalloc --- for dfa.c */
void *
xmalloc(size_t bytes)
{
void *p = malloc(bytes);
if (p == NULL) {
fprintf(stderr, "xmalloc: malloc failed: %s\n", strerror(errno));
exit(EXIT_FAILURE);
}
return p;
}
/* r_fatal --- print a fatal error message. also for dfa.c */
void
r_fatal(const char *mesg, ...)
{
va_list args;
va_start(args, mesg);
fprintf(stderr, "fatal: ");
vfprintf(stderr, mesg, args);
va_end(args);
exit(EXIT_FAILURE);
}
/* setup_pattern --- do what gawk does with the pattern string */
char *
setup_pattern(const char *pattern, size_t len)
{
size_t is_multibyte = 0;
int c, c2;
size_t buflen;
mbstate_t mbs;
bool has_anchor = false;
char *buf, *dest;
const char *src, *end;
src = pattern;
end = pattern + len;
/* Handle escaped characters first. */
/*
* Build a copy of the string (in buf) with the
* escaped characters translated, and generate the regex
* from that.
*/
if (buf == NULL) {
buf = (char *) malloc(len + 2);
if (buf == NULL) {
fprintf(stderr, "%s: malloc failed\n", __func__);
exit(EXIT_FAILURE);
}
buflen = len;
} else if (len > buflen) {
buf = (char *) realloc(buf, len + 2);
if (buf == NULL) {
fprintf(stderr, "%s: realloc failed\n", __func__);
exit(EXIT_FAILURE);
}
buflen = len;
}
dest = buf;
while (src < end) {
if (MB_CUR_MAX > 1 && ! is_multibyte) {
/* The previous byte is a singlebyte character, or last byte
of a multibyte character. We check the next character. */
is_multibyte = mbrlen(src, end - src, &mbs);
if ( is_multibyte == 1
|| is_multibyte == (size_t) -1
|| is_multibyte == (size_t) -2
|| is_multibyte == 0) {
/* We treat it as a single-byte character. */
is_multibyte = 0;
}
}
/* We skip multibyte character, since it must not be a special
character. */
if ((MB_CUR_MAX == 1 || ! is_multibyte) &&
(*src == '\\')) {
c = *++src;
switch (c) {
case 'a':
case 'b':
case 'f':
case 'n':
case 'r':
case 't':
case 'v':
case 'x':
case '0':
case '1':
case '2':
case '3':
case '4':
case '5':
case '6':
case '7':
c2 = parse_escape(&src);
if (c2 < 0) {
fprintf(stderr, "%s: parse_escape failed\n", __func__);
exit(EXIT_FAILURE);
}
/*
* Unix awk treats octal (and hex?) chars
* literally in re's, so escape regexp
* metacharacters.
*/
if (syn == RE_SYNTAX_AWK
&& (isdigit(c) || c == 'x')
&& strchr("()|*+?.^$\\[]", c2) != NULL)
*dest++ = '\\';
*dest++ = (char) c2;
break;
case '8':
case '9': /* a\9b not valid */
*dest++ = c;
src++;
break;
case 'y': /* normally \b */
/* gnu regex op */
if (syn == RE_SYNTAX_GNU_AWK) {
*dest++ = '\\';
*dest++ = 'b';
src++;
break;
}
/* else, fall through */
default:
*dest++ = '\\';
*dest++ = (char) c;
src++;
break;
} /* switch */
} else {
c = *src;
if (c == '^' || c == '$')
has_anchor = true;
*dest++ = *src++; /* not '\\' */
}
if (MB_CUR_MAX > 1 && is_multibyte)
is_multibyte--;
} /* while */
*dest = '\0';
len = dest - buf;
return buf;
}
/*
* parse_escape:
*
* Parse a C escape sequence. STRING_PTR points to a variable containing a
* pointer to the string to parse. That pointer is updated past the
* characters we use. The value of the escape sequence is returned.
*
* A negative value means the sequence \ newline was seen, which is supposed to
* be equivalent to nothing at all.
*
* If \ is followed by a null character, we return a negative value and leave
* the string pointer pointing at the null character.
*
* If \ is followed by 000, we return 0 and leave the string pointer after the
* zeros. A value of 0 does not mean end of string.
*
* POSIX doesn't allow \x.
*/
int
parse_escape(const char **string_ptr)
{
int c = *(*string_ptr)++;
int i;
int count;
int j;
const char *start;
switch (c) {
case 'a':
return '\a';
case 'b':
return '\b';
case 'f':
return '\f';
case 'n':
return '\n';
case 'r':
return '\r';
case 't':
return '\t';
case 'v':
return '\v';
case '\n':
return -2;
case 0:
(*string_ptr)--;
return -1;
case '0':
case '1':
case '2':
case '3':
case '4':
case '5':
case '6':
case '7':
i = c - '0';
count = 0;
while (++count < 3) {
if ((c = *(*string_ptr)++) >= '0' && c <= '7') {
i *= 8;
i += c - '0';
} else {
(*string_ptr)--;
break;
}
}
return i;
case 'x':
if (! isxdigit((unsigned char) (*string_ptr)[0])) {
return ('x');
}
i = j = 0;
start = *string_ptr;
for (;; j++) {
/* do outside test to avoid multiple side effects */
c = *(*string_ptr)++;
if (isxdigit(c)) {
i *= 16;
if (isdigit(c))
i += c - '0';
else if (isupper(c))
i += c - 'A' + 10;
else
i += c - 'a' + 10;
} else {
(*string_ptr)--;
break;
}
}
return i;
case '\\':
case '"':
return c;
default:
return c;
}
}
/* This rather ugly macro is for VMS C */
#ifdef C
#undef C
#endif
#define C(c) ((char)c)
/*
* This table is used by the regexp routines to do case independent
* matching. Basically, every ascii character maps to itself, except
* uppercase letters map to lower case ones. This table has 256
* entries, for ISO 8859-1. Note also that if the system this
* is compiled on doesn't use 7-bit ascii, casetable[] should not be
* defined to the linker, so gawk should not load.
*
* Do NOT make this array static, it is used in several spots, not
* just in this file.
*
* 6/2004:
* This table is also used for IGNORECASE for == and !=, and index().
* Although with GLIBC, we could use tolower() everywhere and RE_ICASE
* for the regex matcher, precomputing this table once gives us a
* performance improvement. I also think it's better for portability
* to non-GLIBC systems. All the world is not (yet :-) GNU/Linux.
*/
#if 'a' == 97 /* it's ascii */
char casetable[] = {
'\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007',
'\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017',
'\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027',
'\030', '\031', '\032', '\033', '\034', '\035', '\036', '\037',
/* ' ' '!' '"' '#' '$' '%' '&' ''' */
'\040', '\041', '\042', '\043', '\044', '\045', '\046', '\047',
/* '(' ')' '*' '+' ',' '-' '.' '/' */
'\050', '\051', '\052', '\053', '\054', '\055', '\056', '\057',
/* '0' '1' '2' '3' '4' '5' '6' '7' */
'\060', '\061', '\062', '\063', '\064', '\065', '\066', '\067',
/* '8' '9' ':' ';' '<' '=' '>' '?' */
'\070', '\071', '\072', '\073', '\074', '\075', '\076', '\077',
/* '@' 'A' 'B' 'C' 'D' 'E' 'F' 'G' */
'\100', '\141', '\142', '\143', '\144', '\145', '\146', '\147',
/* 'H' 'I' 'J' 'K' 'L' 'M' 'N' 'O' */
'\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157',
/* 'P' 'Q' 'R' 'S' 'T' 'U' 'V' 'W' */
'\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167',
/* 'X' 'Y' 'Z' '[' '\' ']' '^' '_' */
'\170', '\171', '\172', '\133', '\134', '\135', '\136', '\137',
/* '`' 'a' 'b' 'c' 'd' 'e' 'f' 'g' */
'\140', '\141', '\142', '\143', '\144', '\145', '\146', '\147',
/* 'h' 'i' 'j' 'k' 'l' 'm' 'n' 'o' */
'\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157',
/* 'p' 'q' 'r' 's' 't' 'u' 'v' 'w' */
'\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167',
/* 'x' 'y' 'z' '{' '|' '}' '~' */
'\170', '\171', '\172', '\173', '\174', '\175', '\176', '\177',
/* Latin 1: */
C('\200'), C('\201'), C('\202'), C('\203'), C('\204'), C('\205'), C('\206'), C('\207'),
C('\210'), C('\211'), C('\212'), C('\213'), C('\214'), C('\215'), C('\216'), C('\217'),
C('\220'), C('\221'), C('\222'), C('\223'), C('\224'), C('\225'), C('\226'), C('\227'),
C('\230'), C('\231'), C('\232'), C('\233'), C('\234'), C('\235'), C('\236'), C('\237'),
C('\240'), C('\241'), C('\242'), C('\243'), C('\244'), C('\245'), C('\246'), C('\247'),
C('\250'), C('\251'), C('\252'), C('\253'), C('\254'), C('\255'), C('\256'), C('\257'),
C('\260'), C('\261'), C('\262'), C('\263'), C('\264'), C('\265'), C('\266'), C('\267'),
C('\270'), C('\271'), C('\272'), C('\273'), C('\274'), C('\275'), C('\276'), C('\277'),
C('\340'), C('\341'), C('\342'), C('\343'), C('\344'), C('\345'), C('\346'), C('\347'),
C('\350'), C('\351'), C('\352'), C('\353'), C('\354'), C('\355'), C('\356'), C('\357'),
C('\360'), C('\361'), C('\362'), C('\363'), C('\364'), C('\365'), C('\366'), C('\327'),
C('\370'), C('\371'), C('\372'), C('\373'), C('\374'), C('\375'), C('\376'), C('\337'),
C('\340'), C('\341'), C('\342'), C('\343'), C('\344'), C('\345'), C('\346'), C('\347'),
C('\350'), C('\351'), C('\352'), C('\353'), C('\354'), C('\355'), C('\356'), C('\357'),
C('\360'), C('\361'), C('\362'), C('\363'), C('\364'), C('\365'), C('\366'), C('\367'),
C('\370'), C('\371'), C('\372'), C('\373'), C('\374'), C('\375'), C('\376'), C('\377'),
};
#elif 'a' == 0x81 /* it's EBCDIC */
char casetable[] = {
/*00 NU SH SX EX PF HT LC DL */
0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
/*08 SM VT FF CR SO SI */
0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F,
/*10 DE D1 D2 TM RS NL BS IL */
0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
/*18 CN EM CC C1 FS GS RS US */
0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F,
/*20 DS SS FS BP LF EB EC */
0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27,
/*28 SM C2 EQ AK BL */
0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F,
/*30 SY PN RS UC ET */
0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
/*38 C3 D4 NK SU */
0x38, 0x39, 0x3A, 0x3B, 0x3C, 0x3D, 0x3E, 0x3F,
/*40 SP */
0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
/*48 CENT . < ( + | */
0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F,
/*50 & */
0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
/*58 ! $ * ) ; ^ */
0x58, 0x59, 0x5A, 0x5B, 0x5C, 0x5D, 0x5E, 0x5F,
/*60 - / */
0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
/*68 | , % _ > ? */
0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
/*70 */
0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
/*78 ` : # @ ' = " */
0x78, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F,
/*80 a b c d e f g */
0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
/*88 h i { */
0x88, 0x89, 0x8A, 0x8B, 0x8C, 0x8D, 0x8E, 0x8F,
/*90 j k l m n o p */
0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97,
/*98 q r } */
0x98, 0x99, 0x9A, 0x9B, 0x9C, 0x9D, 0x9E, 0x9F,
/*A0 ~ s t u v w x */
0xA0, 0xA1, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6, 0xA7,
/*A8 y z [ */
0xA8, 0xA9, 0xAA, 0xAB, 0xAC, 0xAD, 0xAE, 0xAF,
/*B0 */
0xB0, 0xB1, 0xB2, 0xB3, 0xB4, 0xB5, 0xB6, 0xB7,
/*B8 ] */
0xB8, 0xB9, 0xBA, 0xBB, 0xBC, 0xBD, 0xBE, 0xBF,
/*C0 { A B C D E F G */
0xC0, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
/*C8 H I */
0x88, 0x89, 0xCA, 0xCB, 0xCC, 0xCD, 0xCE, 0xCF,
/*D0 } J K L M N O P */
0xD0, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97,
/*D8 Q R */
0x98, 0x99, 0xDA, 0xDB, 0xDC, 0xDD, 0xDE, 0xDF,
/*E0 \ S T U V W X */
0xE0, 0xE1, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6, 0xA7,
/*E8 Y Z */
0xA8, 0xA9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF,
/*F0 0 1 2 3 4 5 6 7 */
0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7,
/*F8 8 9 */
0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0xFD, 0xFE, 0xFF
};
#else
#include "You lose. You will need a translation table for your character set."
#endif
#undef C
/* xalloc.h -- malloc with out-of-memory checking
Copyright (C) 1990, 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999,
2000, 2003, 2004, 2006, 2007, 2008, 2009, 2010 Free Software Foundation,
Inc.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>. */
#ifndef XALLOC_H_
# define XALLOC_H_
# include <stddef.h>
# ifdef __cplusplus
extern "C" {
# endif
# ifndef __attribute__
# if __GNUC__ < 2 || (__GNUC__ == 2 && __GNUC_MINOR__ < 8)
# define __attribute__(x)
# endif
# endif
# ifndef ATTRIBUTE_NORETURN
# define ATTRIBUTE_NORETURN __attribute__ ((__noreturn__))
# endif
# ifndef ATTRIBUTE_MALLOC
# if __GNUC__ >= 3
# define ATTRIBUTE_MALLOC __attribute__ ((__malloc__))
# else
# define ATTRIBUTE_MALLOC
# endif
# endif
/* This function is always triggered when memory is exhausted.
It must be defined by the application, either explicitly
or by using gnulib's xalloc-die module. This is the
function to call when one wants the program to die because of a
memory allocation failure. */
extern void xalloc_die (void);
void *xmalloc (size_t s) ATTRIBUTE_MALLOC;
void *xzalloc (size_t s) ATTRIBUTE_MALLOC;
void *xcalloc (size_t n, size_t s) ATTRIBUTE_MALLOC;
void *xrealloc (void *p, size_t s);
void *x2realloc (void *p, size_t *pn);
void *xmemdup (void const *p, size_t s) ATTRIBUTE_MALLOC;
char *xstrdup (char const *str) ATTRIBUTE_MALLOC;
/* Return 1 if an array of N objects, each of size S, cannot exist due
to size arithmetic overflow. S must be positive and N must be
nonnegative. This is a macro, not an inline function, so that it
works correctly even when SIZE_MAX < N.
By gnulib convention, SIZE_MAX represents overflow in size
calculations, so the conservative dividend to use here is
SIZE_MAX - 1, since SIZE_MAX might represent an overflowed value.
However, malloc (SIZE_MAX) fails on all known hosts where
sizeof (ptrdiff_t) <= sizeof (size_t), so do not bother to test for
exactly-SIZE_MAX allocations on such hosts; this avoids a test and
branch when S is known to be 1. */
# define xalloc_oversized(n, s) \
((size_t) (sizeof (ptrdiff_t) <= sizeof (size_t) ? -1 : -2) / (s) < (n))
/* In the following macros, T must be an elementary or structure/union or
typedef'ed type, or a pointer to such a type. To apply one of the
following macros to a function pointer or array type, you need to typedef
it first and use the typedef name. */
/* Allocate an object of type T dynamically, with error checking. */
/* extern t *XMALLOC (typename t); */
# define XMALLOC(t) ((t *) xmalloc (sizeof (t)))
/* Allocate memory for N elements of type T, with error checking. */
/* extern t *XNMALLOC (size_t n, typename t); */
# define XNMALLOC(n, t) \
((t *) (sizeof (t) == 1 ? xmalloc (n) : xnmalloc (n, sizeof (t))))
/* Allocate an object of type T dynamically, with error checking,
and zero it. */
/* extern t *XZALLOC (typename t); */
# define XZALLOC(t) ((t *) xzalloc (sizeof (t)))
/* Allocate memory for N elements of type T, with error checking,
and zero it. */
/* extern t *XCALLOC (size_t n, typename t); */
# define XCALLOC(n, t) \
((t *) (sizeof (t) == 1 ? xzalloc (n) : xcalloc (n, sizeof (t))))
/*
* Gawk uses this file only to keep dfa.c happy.
* We're therefore safe in manually defining HAVE_INLINE to
* make the !@#$%^&*() thing just work.
*/
#ifdef GAWK
#define HAVE_INLINE 1 /* so there. nyah, nyah, nyah. */
#endif
# if HAVE_INLINE
# define static_inline static inline
# else
void *xnmalloc (size_t n, size_t s) ATTRIBUTE_MALLOC;
void *xnrealloc (void *p, size_t n, size_t s);
void *x2nrealloc (void *p, size_t *pn, size_t s);
char *xcharalloc (size_t n) ATTRIBUTE_MALLOC;
# endif
/* Allocate an array of N objects, each with S bytes of memory,
dynamically, with error checking. S must be nonzero. */
void *
xnmalloc (size_t n, size_t s)
{
if (xalloc_oversized (n, s))
xalloc_die ();
return xmalloc (n * s);
}
/* Allocate an array of N objects, each with S bytes of memory,
dynamically, with error checking. S must be nonzero.
Clear the contents afterwards. */
void *
xcalloc(size_t nmemb, size_t size)
{
void *p = xmalloc (nmemb * size);
memset(p, '\0', nmemb * size);
return p;
}
/* Reallocate a pointer to a new size, with error checking. */
void *
xrealloc(void *p, size_t size)
{
void *new_p = realloc(p, size);
if (new_p == 0)
xalloc_die ();
return new_p;
}
/* xalloc_die --- fatal error message when malloc fails, needed by dfa.c */
void
xalloc_die (void)
{
r_fatal("xalloc: malloc failed: %s"), strerror(errno);
}
/* Clone an object P of size S, with error checking. There's no need
for xnmemdup (P, N, S), since xmemdup (P, N * S) works without any
need for an arithmetic overflow check. */
void *
xmemdup (void const *p, size_t s)
{
return memcpy (xmalloc (s), p, s);
}
/* Change the size of an allocated block of memory P to an array of N
objects each of S bytes, with error checking. S must be nonzero. */
void *
xnrealloc (void *p, size_t n, size_t s)
{
if (xalloc_oversized (n, s))
xalloc_die ();
return xrealloc (p, n * s);
}
/* If P is null, allocate a block of at least *PN such objects;
otherwise, reallocate P so that it contains more than *PN objects
each of S bytes. *PN must be nonzero unless P is null, and S must
be nonzero. Set *PN to the new number of objects, and return the
pointer to the new block. *PN is never set to zero, and the
returned pointer is never null.
Repeated reallocations are guaranteed to make progress, either by
allocating an initial block with a nonzero size, or by allocating a
larger block.
In the following implementation, nonzero sizes are increased by a
factor of approximately 1.5 so that repeated reallocations have
O(N) overall cost rather than O(N**2) cost, but the
specification for this function does not guarantee that rate.
Here is an example of use:
int *p = NULL;
size_t used = 0;
size_t allocated = 0;
void
append_int (int value)
{
if (used == allocated)
p = x2nrealloc (p, &allocated, sizeof *p);
p[used++] = value;
}
This causes x2nrealloc to allocate a block of some nonzero size the
first time it is called.
To have finer-grained control over the initial size, set *PN to a
nonzero value before calling this function with P == NULL. For
example:
int *p = NULL;
size_t used = 0;
size_t allocated = 0;
size_t allocated1 = 1000;
void
append_int (int value)
{
if (used == allocated)
{
p = x2nrealloc (p, &allocated1, sizeof *p);
allocated = allocated1;
}
p[used++] = value;
}
*/
void *
x2nrealloc (void *p, size_t *pn, size_t s)
{
size_t n = *pn;
if (! p)
{
if (! n)
{
/* The approximate size to use for initial small allocation
requests, when the invoking code specifies an old size of
zero. 64 bytes is the largest "small" request for the
GNU C library malloc. */
enum { DEFAULT_MXFAST = 64 };
n = DEFAULT_MXFAST / s;
n += !n;
}
}
else
{
/* Set N = ceil (1.5 * N) so that progress is made if N == 1.
Check for overflow, so that N * S stays in size_t range.
The check is slightly conservative, but an exact check isn't
worth the trouble. */
if ((size_t) -1 / 3 * 2 / s <= n)
xalloc_die ();
n += (n + 1) / 2;
}
*pn = n;
return xrealloc (p, n * s);
}
/* Return a pointer to a new buffer of N bytes. This is like xmalloc,
except it returns char *. */
char *
xcharalloc (size_t n)
{
return XNMALLOC (n, char);
}
/* Allocate S bytes of zeroed memory dynamically, with error checking.
There's no need for xnzalloc (N, S), since it would be equivalent
to xcalloc (N, S). */
void *
xzalloc (size_t s)
{
return memset (xmalloc (s), 0, s);
}
# endif
# ifdef __cplusplus
}
/* C++ does not allow conversions from void * to other pointer types
without a cast. Use templates to work around the problem when
possible. */
template <typename T> inline T *
xrealloc (T *p, size_t s)
{
return (T *) xrealloc ((void *) p, s);
}
template <typename T> inline T *
xnrealloc (T *p, size_t n, size_t s)
{
return (T *) xnrealloc ((void *) p, n, s);
}
template <typename T> inline T *
x2realloc (T *p, size_t *pn)
{
return (T *) x2realloc ((void *) p, pn);
}
template <typename T> inline T *
x2nrealloc (T *p, size_t *pn, size_t s)
{
return (T *) x2nrealloc ((void *) p, pn, s);
}
template <typename T> inline T *
xmemdup (T const *p, size_t s)
{
return (T *) xmemdup ((void const *) p, s);
}
#endif /* !XALLOC_H_ */
(([^ \?.]*\?pos=ad |([^ \?.]*\?pos=(jj|va) )[地]\?pos=dev ){0,2})
(([^ \?.]*\?pos=ad |([^ \?.]*\?pos=(jj|va) )地\?pos=dev ){0,2})
很?pos=ad 宽广?pos=va , 更?pos=ad 是?pos=vc1