http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/6da3961b/ext/kenlm/jam-files/engine/regexp.c ---------------------------------------------------------------------- diff --git a/ext/kenlm b/ext/kenlm new file mode 160000 index 0000000..56fdb5c --- /dev/null +++ b/ext/kenlm @@ -0,0 +1 @@ +Subproject commit 56fdb5c44fca34d5a2e07d96139c28fb163983c5 diff --git a/ext/kenlm/jam-files/engine/regexp.c b/ext/kenlm/jam-files/engine/regexp.c deleted file mode 100644 index c64201b..0000000 --- a/ext/kenlm/jam-files/engine/regexp.c +++ /dev/null @@ -1,1329 +0,0 @@ -/* - * regcomp and regexec -- regsub and regerror are elsewhere - * - * Copyright (c) 1986 by University of Toronto. - * Written by Henry Spencer. Not derived from licensed software. - * - * Permission is granted to anyone to use this software for any - * purpose on any computer system, and to redistribute it freely, - * subject to the following restrictions: - * - * 1. The author is not responsible for the consequences of use of - * this software, no matter how awful, even if they arise - * from defects in it. - * - * 2. The origin of this software must not be misrepresented, either - * by explicit claim or by omission. - * - * 3. Altered versions must be plainly marked as such, and must not - * be misrepresented as being the original software. - *** THIS IS AN ALTERED VERSION. It was altered by John Gilmore, - *** hoptoad!gnu, on 27 Dec 1986, to add \n as an alternative to | - *** to assist in implementing egrep. - *** THIS IS AN ALTERED VERSION. It was altered by John Gilmore, - *** hoptoad!gnu, on 27 Dec 1986, to add \< and \> for word-matching - *** as in BSD grep and ex. - *** THIS IS AN ALTERED VERSION. It was altered by John Gilmore, - *** hoptoad!gnu, on 28 Dec 1986, to optimize characters quoted with \. - *** THIS IS AN ALTERED VERSION. It was altered by James A. Woods, - *** ames!jaw, on 19 June 1987, to quash a regcomp() redundancy. - *** THIS IS AN ALTERED VERSION. It was altered by Christopher Seiwald - *** [email protected], on 28 August 1993, for use in jam. Regmagic.h - *** was moved into regexp.h, and the include of regexp.h now uses "'s - *** to avoid conflicting with the system regexp.h. Const, bless its - *** soul, was removed so it can compile everywhere. The declaration - *** of strchr() was in conflict on AIX, so it was removed (as it is - *** happily defined in string.h). - *** THIS IS AN ALTERED VERSION. It was altered by Christopher Seiwald - *** [email protected], on 20 January 2000, to use function prototypes. - * - * Beware that some of this code is subtly aware of the way operator precedence - * is structured in regular expressions. Serious changes in regular-expression - * syntax might require a total rethink. - */ - - -#include "jam.h" -#include "regexp.h" - -#include <stdio.h> -#include <ctype.h> -#ifndef ultrix -# include <stdlib.h> -#endif -#include <string.h> - - -/* - * The "internal use only" fields in regexp.h are present to pass info from - * compile to execute that permits the execute phase to run lots faster on - * simple cases. They are: - : - * regstart char that must begin a match; '\0' if none obvious. - * reganch is the match anchored (at beginning-of-line only)? - * regmust string (pointer into program) that match must include, or NULL. - * regmlen length of regmust string. - * - * Regstart and reganch permit very fast decisions on suitable starting points - * for a match, cutting down the work a lot. Regmust permits fast rejection of - * lines that cannot possibly match. The regmust tests are costly enough that - * regcomp() supplies a regmust only if the r.e. contains something potentially - * expensive (at present, the only such thing detected is * or + at the start of - * the r.e., which can involve a lot of backup). Regmlen is supplied because the - * test in regexec() needs it and regcomp() is computing it anyway. - */ - -/* - * Structure for regexp "program". This is essentially a linear encoding of a - * nondeterministic finite-state machine (aka syntax charts or "railroad normal - * form" in parsing technology). Each node is an opcode plus a "next" pointer, - * possibly plus an operand. "Next" pointers of all nodes except BRANCH - * implement concatenation; a "next" pointer with a BRANCH on both ends of it is - * connecting two alternatives. [Here we have one of the subtle syntax - * dependencies: an individual BRANCH, as opposed to a collection of them, is - * never concatenated with anything because of operator precedence.] The operand - * of some types of node is a literal string; for others, it is a node leading - * into a sub-FSM. In particular, the operand of a BRANCH node is the first node - * of the branch. [NB this is *not* a tree structure: the tail of the branch - * connects to the thing following the set of BRANCHes.] The opcodes are: - */ - -/* definition number opnd? meaning */ -#define END 0 /* no End of program. */ -#define BOL 1 /* no Match "" at beginning of line. */ -#define EOL 2 /* no Match "" at end of line. */ -#define ANY 3 /* no Match any one character. */ -#define ANYOF 4 /* str Match any character in this string. */ -#define ANYBUT 5 /* str Match any character not in this string. */ -#define BRANCH 6 /* node Match this alternative, or the next... */ -#define BACK 7 /* no Match "", "next" ptr points backward. */ -#define EXACTLY 8 /* str Match this string. */ -#define NOTHING 9 /* no Match empty string. */ -#define STAR 10 /* node Match this (simple) thing 0 or more times. */ -#define PLUS 11 /* node Match this (simple) thing 1 or more times. */ -#define WORDA 12 /* no Match "" at wordchar, where prev is nonword */ -#define WORDZ 13 /* no Match "" at nonwordchar, where prev is word */ -#define OPEN 20 /* no Mark this point in input as start of #n. */ - /* OPEN+1 is number 1, etc. */ -#define CLOSE 30 /* no Analogous to OPEN. */ - - -/* - * Opcode notes: - * - * BRANCH The set of branches constituting a single choice are hooked - * together with their "next" pointers, since precedence prevents - * anything being concatenated to any individual branch. The - * "next" pointer of the last BRANCH in a choice points to the - * thing following the whole choice. This is also where the - * final "next" pointer of each individual branch points; each - * branch starts with the operand node of a BRANCH node. - * - * BACK Normal "next" pointers all implicitly point forward; BACK - * exists to make loop structures possible. - * - * STAR,PLUS '?', and complex '*' and '+', are implemented as circular - * BRANCH structures using BACK. Simple cases (one character - * per match) are implemented with STAR and PLUS for speed - * and to minimize recursive plunges. - * - * OPEN,CLOSE ...are numbered at compile time. - */ - -/* - * A node is one char of opcode followed by two chars of "next" pointer. - * "Next" pointers are stored as two 8-bit pieces, high order first. The - * value is a positive offset from the opcode of the node containing it. - * An operand, if any, simply follows the node. (Note that much of the - * code generation knows about this implicit relationship.) - * - * Using two bytes for the "next" pointer is vast overkill for most things, - * but allows patterns to get big without disasters. - */ -#define OP(p) (*(p)) -#define NEXT(p) (((*((p)+1)&0377)<<8) + (*((p)+2)&0377)) -#define OPERAND(p) ((p) + 3) - -/* - * See regmagic.h for one further detail of program structure. - */ - - -/* - * Utility definitions. - */ -#ifndef CHARBITS -#define UCHARAT(p) ((int)*(const unsigned char *)(p)) -#else -#define UCHARAT(p) ((int)*(p)&CHARBITS) -#endif - -#define FAIL(m) { regerror(m); return(NULL); } -#define ISMULT(c) ((c) == '*' || (c) == '+' || (c) == '?') - -/* - * Flags to be passed up and down. - */ -#define HASWIDTH 01 /* Known never to match null string. */ -#define SIMPLE 02 /* Simple enough to be STAR/PLUS operand. */ -#define SPSTART 04 /* Starts with * or +. */ -#define WORST 0 /* Worst case. */ - -/* - * Global work variables for regcomp(). - */ -static char *regparse; /* Input-scan pointer. */ -static int regnpar; /* () count. */ -static char regdummy; -static char *regcode; /* Code-emit pointer; ®dummy = don't. */ -static long regsize; /* Code size. */ - -/* - * Forward declarations for regcomp()'s friends. - */ -#ifndef STATIC -#define STATIC static -#endif -STATIC char *reg( int paren, int *flagp ); -STATIC char *regbranch( int *flagp ); -STATIC char *regpiece( int *flagp ); -STATIC char *regatom( int *flagp ); -STATIC char *regnode( int op ); -STATIC char *regnext( register char *p ); -STATIC void regc( int b ); -STATIC void reginsert( char op, char *opnd ); -STATIC void regtail( char *p, char *val ); -STATIC void regoptail( char *p, char *val ); -#ifdef STRCSPN -STATIC int strcspn(); -#endif - -/* - - regcomp - compile a regular expression into internal code - * - * We can't allocate space until we know how big the compiled form will be, - * but we can't compile it (and thus know how big it is) until we've got a - * place to put the code. So we cheat: we compile it twice, once with code - * generation turned off and size counting turned on, and once "for real". - * This also means that we don't allocate space until we are sure that the - * thing really will compile successfully, and we never have to move the - * code and thus invalidate pointers into it. (Note that it has to be in - * one piece because free() must be able to free it all.) - * - * Beware that the optimization-preparation code in here knows about some - * of the structure of the compiled regexp. - */ -regexp * -regcomp( const char *exp ) -{ - register regexp *r; - register char *scan; - register char *longest; - register unsigned len; - int flags; - - if (exp == NULL) - FAIL("NULL argument"); - - /* First pass: determine size, legality. */ -#ifdef notdef - if (exp[0] == '.' && exp[1] == '*') exp += 2; /* aid grep */ -#endif - regparse = (char *)exp; - regnpar = 1; - regsize = 0L; - regcode = ®dummy; - regc(MAGIC); - if (reg(0, &flags) == NULL) - return(NULL); - - /* Small enough for pointer-storage convention? */ - if (regsize >= 32767L) /* Probably could be 65535L. */ - FAIL("regexp too big"); - - /* Allocate space. */ - r = (regexp *)BJAM_MALLOC(sizeof(regexp) + (unsigned)regsize); - if (r == NULL) - FAIL("out of space"); - - /* Second pass: emit code. */ - regparse = (char *)exp; - regnpar = 1; - regcode = r->program; - regc(MAGIC); - if (reg(0, &flags) == NULL) - return(NULL); - - /* Dig out information for optimizations. */ - r->regstart = '\0'; /* Worst-case defaults. */ - r->reganch = 0; - r->regmust = NULL; - r->regmlen = 0; - scan = r->program+1; /* First BRANCH. */ - if (OP(regnext(scan)) == END) { /* Only one top-level choice. */ - scan = OPERAND(scan); - - /* Starting-point info. */ - if (OP(scan) == EXACTLY) - r->regstart = *OPERAND(scan); - else if (OP(scan) == BOL) - r->reganch++; - - /* - * If there's something expensive in the r.e., find the - * longest literal string that must appear and make it the - * regmust. Resolve ties in favor of later strings, since - * the regstart check works with the beginning of the r.e. - * and avoiding duplication strengthens checking. Not a - * strong reason, but sufficient in the absence of others. - */ - if (flags&SPSTART) { - longest = NULL; - len = 0; - for (; scan != NULL; scan = regnext(scan)) - if (OP(scan) == EXACTLY && strlen(OPERAND(scan)) >= len) { - longest = OPERAND(scan); - len = strlen(OPERAND(scan)); - } - r->regmust = longest; - r->regmlen = len; - } - } - - return(r); -} - -/* - - reg - regular expression, i.e. main body or parenthesized thing - * - * Caller must absorb opening parenthesis. - * - * Combining parenthesis handling with the base level of regular expression - * is a trifle forced, but the need to tie the tails of the branches to what - * follows makes it hard to avoid. - */ -static char * -reg( - int paren, /* Parenthesized? */ - int *flagp ) -{ - register char *ret; - register char *br; - register char *ender; - register int parno = 0; - int flags; - - *flagp = HASWIDTH; /* Tentatively. */ - - /* Make an OPEN node, if parenthesized. */ - if (paren) { - if (regnpar >= NSUBEXP) - FAIL("too many ()"); - parno = regnpar; - regnpar++; - ret = regnode(OPEN+parno); - } else - ret = NULL; - - /* Pick up the branches, linking them together. */ - br = regbranch(&flags); - if (br == NULL) - return(NULL); - if (ret != NULL) - regtail(ret, br); /* OPEN -> first. */ - else - ret = br; - if (!(flags&HASWIDTH)) - *flagp &= ~HASWIDTH; - *flagp |= flags&SPSTART; - while (*regparse == '|' || *regparse == '\n') { - regparse++; - br = regbranch(&flags); - if (br == NULL) - return(NULL); - regtail(ret, br); /* BRANCH -> BRANCH. */ - if (!(flags&HASWIDTH)) - *flagp &= ~HASWIDTH; - *flagp |= flags&SPSTART; - } - - /* Make a closing node, and hook it on the end. */ - ender = regnode((paren) ? CLOSE+parno : END); - regtail(ret, ender); - - /* Hook the tails of the branches to the closing node. */ - for (br = ret; br != NULL; br = regnext(br)) - regoptail(br, ender); - - /* Check for proper termination. */ - if (paren && *regparse++ != ')') { - FAIL("unmatched ()"); - } else if (!paren && *regparse != '\0') { - if (*regparse == ')') { - FAIL("unmatched ()"); - } else - FAIL("junk on end"); /* "Can't happen". */ - /* NOTREACHED */ - } - - return(ret); -} - -/* - - regbranch - one alternative of an | operator - * - * Implements the concatenation operator. - */ -static char * -regbranch( int *flagp ) -{ - register char *ret; - register char *chain; - register char *latest; - int flags; - - *flagp = WORST; /* Tentatively. */ - - ret = regnode(BRANCH); - chain = NULL; - while (*regparse != '\0' && *regparse != ')' && - *regparse != '\n' && *regparse != '|') { - latest = regpiece(&flags); - if (latest == NULL) - return(NULL); - *flagp |= flags&HASWIDTH; - if (chain == NULL) /* First piece. */ - *flagp |= flags&SPSTART; - else - regtail(chain, latest); - chain = latest; - } - if (chain == NULL) /* Loop ran zero times. */ - (void) regnode(NOTHING); - - return(ret); -} - -/* - - regpiece - something followed by possible [*+?] - * - * Note that the branching code sequences used for ? and the general cases - * of * and + are somewhat optimized: they use the same NOTHING node as - * both the endmarker for their branch list and the body of the last branch. - * It might seem that this node could be dispensed with entirely, but the - * endmarker role is not redundant. - */ -static char * -regpiece( int *flagp ) -{ - register char *ret; - register char op; - register char *next; - int flags; - - ret = regatom(&flags); - if (ret == NULL) - return(NULL); - - op = *regparse; - if (!ISMULT(op)) { - *flagp = flags; - return(ret); - } - - if (!(flags&HASWIDTH) && op != '?') - FAIL("*+ operand could be empty"); - *flagp = (op != '+') ? (WORST|SPSTART) : (WORST|HASWIDTH); - - if (op == '*' && (flags&SIMPLE)) - reginsert(STAR, ret); - else if (op == '*') { - /* Emit x* as (x&|), where & means "self". */ - reginsert(BRANCH, ret); /* Either x */ - regoptail(ret, regnode(BACK)); /* and loop */ - regoptail(ret, ret); /* back */ - regtail(ret, regnode(BRANCH)); /* or */ - regtail(ret, regnode(NOTHING)); /* null. */ - } else if (op == '+' && (flags&SIMPLE)) - reginsert(PLUS, ret); - else if (op == '+') { - /* Emit x+ as x(&|), where & means "self". */ - next = regnode(BRANCH); /* Either */ - regtail(ret, next); - regtail(regnode(BACK), ret); /* loop back */ - regtail(next, regnode(BRANCH)); /* or */ - regtail(ret, regnode(NOTHING)); /* null. */ - } else if (op == '?') { - /* Emit x? as (x|) */ - reginsert(BRANCH, ret); /* Either x */ - regtail(ret, regnode(BRANCH)); /* or */ - next = regnode(NOTHING); /* null. */ - regtail(ret, next); - regoptail(ret, next); - } - regparse++; - if (ISMULT(*regparse)) - FAIL("nested *?+"); - - return(ret); -} - -/* - - regatom - the lowest level - * - * Optimization: gobbles an entire sequence of ordinary characters so that - * it can turn them into a single node, which is smaller to store and - * faster to run. Backslashed characters are exceptions, each becoming a - * separate node; the code is simpler that way and it's not worth fixing. - */ -static char * -regatom( int *flagp ) -{ - register char *ret; - int flags; - - *flagp = WORST; /* Tentatively. */ - - switch (*regparse++) { - /* FIXME: these chars only have meaning at beg/end of pat? */ - case '^': - ret = regnode(BOL); - break; - case '$': - ret = regnode(EOL); - break; - case '.': - ret = regnode(ANY); - *flagp |= HASWIDTH|SIMPLE; - break; - case '[': { - register int classr; - register int classend; - - if (*regparse == '^') { /* Complement of range. */ - ret = regnode(ANYBUT); - regparse++; - } else - ret = regnode(ANYOF); - if (*regparse == ']' || *regparse == '-') - regc(*regparse++); - while (*regparse != '\0' && *regparse != ']') { - if (*regparse == '-') { - regparse++; - if (*regparse == ']' || *regparse == '\0') - regc('-'); - else { - classr = UCHARAT(regparse-2)+1; - classend = UCHARAT(regparse); - if (classr > classend+1) - FAIL("invalid [] range"); - for (; classr <= classend; classr++) - regc(classr); - regparse++; - } - } else - regc(*regparse++); - } - regc('\0'); - if (*regparse != ']') - FAIL("unmatched []"); - regparse++; - *flagp |= HASWIDTH|SIMPLE; - } - break; - case '(': - ret = reg(1, &flags); - if (ret == NULL) - return(NULL); - *flagp |= flags&(HASWIDTH|SPSTART); - break; - case '\0': - case '|': - case '\n': - case ')': - FAIL("internal urp"); /* Supposed to be caught earlier. */ - break; - case '?': - case '+': - case '*': - FAIL("?+* follows nothing"); - break; - case '\\': - switch (*regparse++) { - case '\0': - FAIL("trailing \\"); - break; - case '<': - ret = regnode(WORDA); - break; - case '>': - ret = regnode(WORDZ); - break; - /* FIXME: Someday handle \1, \2, ... */ - default: - /* Handle general quoted chars in exact-match routine */ - goto de_fault; - } - break; - de_fault: - default: - /* - * Encode a string of characters to be matched exactly. - * - * This is a bit tricky due to quoted chars and due to - * '*', '+', and '?' taking the SINGLE char previous - * as their operand. - * - * On entry, the char at regparse[-1] is going to go - * into the string, no matter what it is. (It could be - * following a \ if we are entered from the '\' case.) - * - * Basic idea is to pick up a good char in ch and - * examine the next char. If it's *+? then we twiddle. - * If it's \ then we frozzle. If it's other magic char - * we push ch and terminate the string. If none of the - * above, we push ch on the string and go around again. - * - * regprev is used to remember where "the current char" - * starts in the string, if due to a *+? we need to back - * up and put the current char in a separate, 1-char, string. - * When regprev is NULL, ch is the only char in the - * string; this is used in *+? handling, and in setting - * flags |= SIMPLE at the end. - */ - { - char *regprev; - register char ch; - - regparse--; /* Look at cur char */ - ret = regnode(EXACTLY); - for ( regprev = 0 ; ; ) { - ch = *regparse++; /* Get current char */ - switch (*regparse) { /* look at next one */ - - default: - regc(ch); /* Add cur to string */ - break; - - case '.': case '[': case '(': - case ')': case '|': case '\n': - case '$': case '^': - case '\0': - /* FIXME, $ and ^ should not always be magic */ - magic: - regc(ch); /* dump cur char */ - goto done; /* and we are done */ - - case '?': case '+': case '*': - if (!regprev) /* If just ch in str, */ - goto magic; /* use it */ - /* End mult-char string one early */ - regparse = regprev; /* Back up parse */ - goto done; - - case '\\': - regc(ch); /* Cur char OK */ - switch (regparse[1]){ /* Look after \ */ - case '\0': - case '<': - case '>': - /* FIXME: Someday handle \1, \2, ... */ - goto done; /* Not quoted */ - default: - /* Backup point is \, scan * point is after it. */ - regprev = regparse; - regparse++; - continue; /* NOT break; */ - } - } - regprev = regparse; /* Set backup point */ - } - done: - regc('\0'); - *flagp |= HASWIDTH; - if (!regprev) /* One char? */ - *flagp |= SIMPLE; - } - break; - } - - return(ret); -} - -/* - - regnode - emit a node - */ -static char * /* Location. */ -regnode( int op ) -{ - register char *ret; - register char *ptr; - - ret = regcode; - if (ret == ®dummy) { - regsize += 3; - return(ret); - } - - ptr = ret; - *ptr++ = op; - *ptr++ = '\0'; /* Null "next" pointer. */ - *ptr++ = '\0'; - regcode = ptr; - - return(ret); -} - -/* - - regc - emit (if appropriate) a byte of code - */ -static void -regc( int b ) -{ - if (regcode != ®dummy) - *regcode++ = b; - else - regsize++; -} - -/* - - reginsert - insert an operator in front of already-emitted operand - * - * Means relocating the operand. - */ -static void -reginsert( - char op, - char *opnd ) -{ - register char *src; - register char *dst; - register char *place; - - if (regcode == ®dummy) { - regsize += 3; - return; - } - - src = regcode; - regcode += 3; - dst = regcode; - while (src > opnd) - *--dst = *--src; - - place = opnd; /* Op node, where operand used to be. */ - *place++ = op; - *place++ = '\0'; - *place++ = '\0'; -} - -/* - - regtail - set the next-pointer at the end of a node chain - */ -static void -regtail( - char *p, - char *val ) -{ - register char *scan; - register char *temp; - register int offset; - - if (p == ®dummy) - return; - - /* Find last node. */ - scan = p; - for (;;) { - temp = regnext(scan); - if (temp == NULL) - break; - scan = temp; - } - - if (OP(scan) == BACK) - offset = scan - val; - else - offset = val - scan; - *(scan+1) = (offset>>8)&0377; - *(scan+2) = offset&0377; -} - -/* - - regoptail - regtail on operand of first argument; nop if operandless - */ - -static void -regoptail( - char *p, - char *val ) -{ - /* "Operandless" and "op != BRANCH" are synonymous in practice. */ - if (p == NULL || p == ®dummy || OP(p) != BRANCH) - return; - regtail(OPERAND(p), val); -} - -/* - * regexec and friends - */ - -/* - * Global work variables for regexec(). - */ -static const char *reginput; /* String-input pointer. */ -static const char *regbol; /* Beginning of input, for ^ check. */ -static const char **regstartp; /* Pointer to startp array. */ -static const char **regendp; /* Ditto for endp. */ - -/* - * Forwards. - */ -STATIC int regtry( regexp *prog, const char *string ); -STATIC int regmatch( char *prog ); -STATIC int regrepeat( char *p ); - -#ifdef DEBUG -int regnarrate = 0; -void regdump(); -STATIC char *regprop(); -#endif - -/* - - regexec - match a regexp against a string - */ -int -regexec( - register regexp *prog, - register const char *string ) -{ - register char *s; - - /* Be paranoid... */ - if (prog == NULL || string == NULL) { - regerror("NULL parameter"); - return(0); - } - - /* Check validity of program. */ - if (UCHARAT(prog->program) != MAGIC) { - regerror("corrupted program"); - return(0); - } - - /* If there is a "must appear" string, look for it. */ - if ( prog->regmust != NULL ) - { - s = (char *)string; - while ( ( s = strchr( s, prog->regmust[ 0 ] ) ) != NULL ) - { - if ( !strncmp( s, prog->regmust, prog->regmlen ) ) - break; /* Found it. */ - ++s; - } - if ( s == NULL ) /* Not present. */ - return 0; - } - - /* Mark beginning of line for ^ . */ - regbol = (char *)string; - - /* Simplest case: anchored match need be tried only once. */ - if ( prog->reganch ) - return regtry( prog, string ); - - /* Messy cases: unanchored match. */ - s = (char *)string; - if (prog->regstart != '\0') - /* We know what char it must start with. */ - while ((s = strchr(s, prog->regstart)) != NULL) { - if (regtry(prog, s)) - return(1); - s++; - } - else - /* We do not -- general case. */ - do { - if ( regtry( prog, s ) ) - return( 1 ); - } while ( *s++ != '\0' ); - - /* Failure. */ - return 0; -} - - -/* - * regtry() - try match at specific point. - */ - -static int /* 0 failure, 1 success */ -regtry( - regexp *prog, - const char *string ) -{ - register int i; - register const char * * sp; - register const char * * ep; - - reginput = string; - regstartp = prog->startp; - regendp = prog->endp; - - sp = prog->startp; - ep = prog->endp; - for ( i = NSUBEXP; i > 0; --i ) - { - *sp++ = NULL; - *ep++ = NULL; - } - if ( regmatch( prog->program + 1 ) ) - { - prog->startp[ 0 ] = string; - prog->endp[ 0 ] = reginput; - return 1; - } - else - return 0; -} - - -/* - * regmatch() - main matching routine. - * - * Conceptually the strategy is simple: check to see whether the current node - * matches, call self recursively to see whether the rest matches, and then act - * accordingly. In practice we make some effort to avoid recursion, in - * particular by going through "ordinary" nodes (that do not need to know - * whether the rest of the match failed) by a loop instead of by recursion. - */ - -static int /* 0 failure, 1 success */ -regmatch( char * prog ) -{ - char * scan; /* Current node. */ - char * next; /* Next node. */ - - scan = prog; -#ifdef DEBUG - if (scan != NULL && regnarrate) - fprintf(stderr, "%s(\n", regprop(scan)); -#endif - while (scan != NULL) { -#ifdef DEBUG - if (regnarrate) - fprintf(stderr, "%s...\n", regprop(scan)); -#endif - next = regnext(scan); - - switch (OP(scan)) { - case BOL: - if (reginput != regbol) - return(0); - break; - case EOL: - if (*reginput != '\0') - return(0); - break; - case WORDA: - /* Must be looking at a letter, digit, or _ */ - if ((!isalnum(*reginput)) && *reginput != '_') - return(0); - /* Prev must be BOL or nonword */ - if (reginput > regbol && - (isalnum(reginput[-1]) || reginput[-1] == '_')) - return(0); - break; - case WORDZ: - /* Must be looking at non letter, digit, or _ */ - if (isalnum(*reginput) || *reginput == '_') - return(0); - /* We don't care what the previous char was */ - break; - case ANY: - if (*reginput == '\0') - return(0); - reginput++; - break; - case EXACTLY: { - register int len; - register char *opnd; - - opnd = OPERAND(scan); - /* Inline the first character, for speed. */ - if (*opnd != *reginput) - return(0); - len = strlen(opnd); - if (len > 1 && strncmp(opnd, reginput, len) != 0) - return(0); - reginput += len; - } - break; - case ANYOF: - if (*reginput == '\0' || strchr(OPERAND(scan), *reginput) == NULL) - return(0); - reginput++; - break; - case ANYBUT: - if (*reginput == '\0' || strchr(OPERAND(scan), *reginput) != NULL) - return(0); - reginput++; - break; - case NOTHING: - break; - case BACK: - break; - case OPEN+1: - case OPEN+2: - case OPEN+3: - case OPEN+4: - case OPEN+5: - case OPEN+6: - case OPEN+7: - case OPEN+8: - case OPEN+9: { - register int no; - register const char *save; - - no = OP(scan) - OPEN; - save = reginput; - - if (regmatch(next)) { - /* - * Don't set startp if some later - * invocation of the same parentheses - * already has. - */ - if (regstartp[no] == NULL) - regstartp[no] = save; - return(1); - } else - return(0); - } - break; - case CLOSE+1: - case CLOSE+2: - case CLOSE+3: - case CLOSE+4: - case CLOSE+5: - case CLOSE+6: - case CLOSE+7: - case CLOSE+8: - case CLOSE+9: { - register int no; - register const char *save; - - no = OP(scan) - CLOSE; - save = reginput; - - if (regmatch(next)) { - /* - * Don't set endp if some later - * invocation of the same parentheses - * already has. - */ - if (regendp[no] == NULL) - regendp[no] = save; - return(1); - } else - return(0); - } - break; - case BRANCH: { - register const char *save; - - if (OP(next) != BRANCH) /* No choice. */ - next = OPERAND(scan); /* Avoid recursion. */ - else { - do { - save = reginput; - if (regmatch(OPERAND(scan))) - return(1); - reginput = save; - scan = regnext(scan); - } while (scan != NULL && OP(scan) == BRANCH); - return(0); - /* NOTREACHED */ - } - } - break; - case STAR: - case PLUS: { - register char nextch; - register int no; - register const char *save; - register int min; - - /* - * Lookahead to avoid useless match attempts - * when we know what character comes next. - */ - nextch = '\0'; - if (OP(next) == EXACTLY) - nextch = *OPERAND(next); - min = (OP(scan) == STAR) ? 0 : 1; - save = reginput; - no = regrepeat(OPERAND(scan)); - while (no >= min) { - /* If it could work, try it. */ - if (nextch == '\0' || *reginput == nextch) - if (regmatch(next)) - return(1); - /* Couldn't or didn't -- back up. */ - no--; - reginput = save + no; - } - return(0); - } - break; - case END: - return(1); /* Success! */ - break; - default: - regerror("memory corruption"); - return(0); - break; - } - - scan = next; - } - - /* - * We get here only if there's trouble -- normally "case END" is - * the terminating point. - */ - regerror("corrupted pointers"); - return(0); -} - -/* - - regrepeat - repeatedly match something simple, report how many - */ -static int -regrepeat( char *p ) -{ - register int count = 0; - register const char *scan; - register char *opnd; - - scan = reginput; - opnd = OPERAND(p); - switch (OP(p)) { - case ANY: - count = strlen(scan); - scan += count; - break; - case EXACTLY: - while (*opnd == *scan) { - count++; - scan++; - } - break; - case ANYOF: - while (*scan != '\0' && strchr(opnd, *scan) != NULL) { - count++; - scan++; - } - break; - case ANYBUT: - while (*scan != '\0' && strchr(opnd, *scan) == NULL) { - count++; - scan++; - } - break; - default: /* Oh dear. Called inappropriately. */ - regerror("internal foulup"); - count = 0; /* Best compromise. */ - break; - } - reginput = scan; - - return(count); -} - -/* - - regnext - dig the "next" pointer out of a node - */ -static char * -regnext( register char *p ) -{ - register int offset; - - if (p == ®dummy) - return(NULL); - - offset = NEXT(p); - if (offset == 0) - return(NULL); - - if (OP(p) == BACK) - return(p-offset); - else - return(p+offset); -} - -#ifdef DEBUG - -STATIC char *regprop(); - -/* - - regdump - dump a regexp onto stdout in vaguely comprehensible form - */ -void -regdump( regexp *r ) -{ - register char *s; - register char op = EXACTLY; /* Arbitrary non-END op. */ - register char *next; - - - s = r->program + 1; - while (op != END) { /* While that wasn't END last time... */ - op = OP(s); - printf("%2d%s", s-r->program, regprop(s)); /* Where, what. */ - next = regnext(s); - if (next == NULL) /* Next ptr. */ - printf("(0)"); - else - printf("(%d)", (s-r->program)+(next-s)); - s += 3; - if (op == ANYOF || op == ANYBUT || op == EXACTLY) { - /* Literal string, where present. */ - while (*s != '\0') { - putchar(*s); - s++; - } - s++; - } - putchar('\n'); - } - - /* Header fields of interest. */ - if (r->regstart != '\0') - printf("start `%c' ", r->regstart); - if (r->reganch) - printf("anchored "); - if (r->regmust != NULL) - printf("must have \"%s\"", r->regmust); - printf("\n"); -} - -/* - - regprop - printable representation of opcode - */ -static char * -regprop( char *op ) -{ - register char *p; - static char buf[50]; - - (void) strcpy(buf, ":"); - - switch (OP(op)) { - case BOL: - p = "BOL"; - break; - case EOL: - p = "EOL"; - break; - case ANY: - p = "ANY"; - break; - case ANYOF: - p = "ANYOF"; - break; - case ANYBUT: - p = "ANYBUT"; - break; - case BRANCH: - p = "BRANCH"; - break; - case EXACTLY: - p = "EXACTLY"; - break; - case NOTHING: - p = "NOTHING"; - break; - case BACK: - p = "BACK"; - break; - case END: - p = "END"; - break; - case OPEN+1: - case OPEN+2: - case OPEN+3: - case OPEN+4: - case OPEN+5: - case OPEN+6: - case OPEN+7: - case OPEN+8: - case OPEN+9: - sprintf(buf+strlen(buf), "OPEN%d", OP(op)-OPEN); - p = NULL; - break; - case CLOSE+1: - case CLOSE+2: - case CLOSE+3: - case CLOSE+4: - case CLOSE+5: - case CLOSE+6: - case CLOSE+7: - case CLOSE+8: - case CLOSE+9: - sprintf(buf+strlen(buf), "CLOSE%d", OP(op)-CLOSE); - p = NULL; - break; - case STAR: - p = "STAR"; - break; - case PLUS: - p = "PLUS"; - break; - case WORDA: - p = "WORDA"; - break; - case WORDZ: - p = "WORDZ"; - break; - default: - regerror("corrupted opcode"); - break; - } - if (p != NULL) - (void) strcat(buf, p); - return(buf); -} -#endif - -/* - * The following is provided for those people who do not have strcspn() in - * their C libraries. They should get off their butts and do something - * about it; at least one public-domain implementation of those (highly - * useful) string routines has been published on Usenet. - */ -#ifdef STRCSPN -/* - * strcspn - find length of initial segment of s1 consisting entirely - * of characters not from s2 - */ - -static int -strcspn( - char *s1, - char *s2 ) -{ - register char *scan1; - register char *scan2; - register int count; - - count = 0; - for (scan1 = s1; *scan1 != '\0'; scan1++) { - for (scan2 = s2; *scan2 != '\0';) /* ++ moved down. */ - if (*scan1 == *scan2++) - return(count); - count++; - } - return(count); -} -#endif
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/6da3961b/ext/kenlm/jam-files/engine/regexp.h ---------------------------------------------------------------------- diff --git a/ext/kenlm b/ext/kenlm new file mode 160000 index 0000000..56fdb5c --- /dev/null +++ b/ext/kenlm @@ -0,0 +1 @@ +Subproject commit 56fdb5c44fca34d5a2e07d96139c28fb163983c5 diff --git a/ext/kenlm/jam-files/engine/regexp.h b/ext/kenlm/jam-files/engine/regexp.h deleted file mode 100644 index 6898ccd..0000000 --- a/ext/kenlm/jam-files/engine/regexp.h +++ /dev/null @@ -1,34 +0,0 @@ -/* - * Definitions etc. for regexp(3) routines. - * - * Caveat: this is V8 regexp(3) [actually, a reimplementation thereof], - * not the System V one. - */ -#ifndef REGEXP_DWA20011023_H -#define REGEXP_DWA20011023_H - -#define NSUBEXP 10 -typedef struct regexp { - char const * startp[ NSUBEXP ]; - char const * endp[ NSUBEXP ]; - char regstart; /* Internal use only. */ - char reganch; /* Internal use only. */ - char * regmust; /* Internal use only. */ - int regmlen; /* Internal use only. */ - char program[ 1 ]; /* Unwarranted chumminess with compiler. */ -} regexp; - - -regexp * regcomp( char const * exp ); -int regexec( regexp * prog, char const * string ); -void regerror( char const * s ); - - -/* - * The first byte of the regexp internal "program" is actually this magic - * number; the start node begins in the second byte. - */ -#define MAGIC 0234 - -#endif - http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/6da3961b/ext/kenlm/jam-files/engine/rules.c ---------------------------------------------------------------------- diff --git a/ext/kenlm b/ext/kenlm new file mode 160000 index 0000000..56fdb5c --- /dev/null +++ b/ext/kenlm @@ -0,0 +1 @@ +Subproject commit 56fdb5c44fca34d5a2e07d96139c28fb163983c5 diff --git a/ext/kenlm/jam-files/engine/rules.c b/ext/kenlm/jam-files/engine/rules.c deleted file mode 100644 index 7947c55..0000000 --- a/ext/kenlm/jam-files/engine/rules.c +++ /dev/null @@ -1,740 +0,0 @@ -/* - * Copyright 1993, 1995 Christopher Seiwald. - * - * This file is part of Jam - see jam.c for Copyright information. - */ - -/* This file is ALSO: - * Copyright 2001-2004 David Abrahams. - * Distributed under the Boost Software License, Version 1.0. - * (See accompanying file LICENSE_1_0.txt or http://www.boost.org/LICENSE_1_0.txt) - */ - -/* - * rules.c - access to RULEs, TARGETs, and ACTIONs - * - * External routines: - * bindrule() - return pointer to RULE, creating it if necessary. - * bindtarget() - return pointer to TARGET, creating it if necessary. - * touch_target() - mark a target to simulate being new. - * targetlist() - turn list of target names into a TARGET chain. - * targetentry() - add a TARGET to a chain of TARGETS. - * actionlist() - append to an ACTION chain. - * addsettings() - add a deferred "set" command to a target. - * pushsettings() - set all target specific variables. - * popsettings() - reset target specific variables to their pre-push values. - * freesettings() - delete a settings list. - * rules_done() - free RULE and TARGET tables. - */ - -#include "jam.h" -#include "rules.h" - -#include "hash.h" -#include "lists.h" -#include "object.h" -#include "parse.h" -#include "pathsys.h" -#include "search.h" -#include "variable.h" - - -static void set_rule_actions( RULE *, rule_actions * ); -static void set_rule_body ( RULE *, FUNCTION * ); - -static struct hash * targethash = 0; - - -/* - * get_target_includes() - lazy creates a target's internal includes node - * - * The newly created node is not entered into the hash table as there should - * never be a need to bind them directly from a target names. If you want to - * access an internal includes node by name, first access the actual target and - * then read the internal includes node from there. - */ - -static TARGET * get_target_includes( TARGET * const t ) -{ - if ( !t->includes ) - { - TARGET * const i = (TARGET *)BJAM_MALLOC( sizeof( *t ) ); - memset( (char *)i, '\0', sizeof( *i ) ); - i->name = object_copy( t->name ); - i->boundname = object_copy( i->name ); - i->flags |= T_FLAG_NOTFILE | T_FLAG_INTERNAL; - i->original_target = t; - t->includes = i; - } - return t->includes; -} - - -/* - * target_include() - adds a target to the given targe's 'included' list - * target_include_many() - adds targets to the given target's 'included' list - * - * Included targets are modeled as dependencies of the including target's - * internal include node. - */ - -void target_include( TARGET * const including, TARGET * const included ) -{ - TARGET * const internal = get_target_includes( including ); - internal->depends = targetentry( internal->depends, included ); -} - -void target_include_many( TARGET * const including, LIST * const included_names - ) -{ - TARGET * const internal = get_target_includes( including ); - internal->depends = targetlist( internal->depends, included_names ); -} - - -/* - * enter_rule() - return pointer to RULE, creating it if necessary in - * target_module. - */ - -static RULE * enter_rule( OBJECT * rulename, module_t * target_module ) -{ - int found; - RULE * const r = (RULE *)hash_insert( demand_rules( target_module ), - rulename, &found ); - if ( !found ) - { - r->name = object_copy( rulename ); - r->procedure = 0; - r->module = 0; - r->actions = 0; - r->exported = 0; - r->module = target_module; - } - return r; -} - - -/* - * define_rule() - return pointer to RULE, creating it if necessary in - * target_module. Prepare it to accept a body or action originating in - * src_module. - */ - -static RULE * define_rule( module_t * src_module, OBJECT * rulename, - module_t * target_module ) -{ - RULE * const r = enter_rule( rulename, target_module ); - if ( r->module != src_module ) - { - /* If the rule was imported from elsewhere, clear it now. */ - set_rule_body( r, 0 ); - set_rule_actions( r, 0 ); - /* r will be executed in the source module. */ - r->module = src_module; - } - return r; -} - - -void rule_free( RULE * r ) -{ - object_free( r->name ); - r->name = 0; - if ( r->procedure ) - function_free( r->procedure ); - r->procedure = 0; - if ( r->actions ) - actions_free( r->actions ); - r->actions = 0; -} - - -/* - * bindtarget() - return pointer to TARGET, creating it if necessary. - */ - -TARGET * bindtarget( OBJECT * const target_name ) -{ - int found; - TARGET * t; - - if ( !targethash ) - targethash = hashinit( sizeof( TARGET ), "targets" ); - - t = (TARGET *)hash_insert( targethash, target_name, &found ); - if ( !found ) - { - memset( (char *)t, '\0', sizeof( *t ) ); - t->name = object_copy( target_name ); - t->boundname = object_copy( t->name ); /* default for T_FLAG_NOTFILE */ - } - - return t; -} - - -static void bind_explicitly_located_target( void * xtarget, void * data ) -{ - TARGET * t = (TARGET *)xtarget; - if ( !( t->flags & T_FLAG_NOTFILE ) ) - { - /* Check if there is a setting for LOCATE. */ - SETTINGS * s = t->settings; - for ( ; s ; s = s->next ) - { - if ( object_equal( s->symbol, constant_LOCATE ) && ! list_empty( s->value ) ) - { - set_explicit_binding( t->name, list_front( s->value ) ); - break; - } - } - } -} - - -void bind_explicitly_located_targets() -{ - if ( targethash ) - hashenumerate( targethash, bind_explicitly_located_target, (void *)0 ); -} - - -/* - * touch_target() - mark a target to simulate being new. - */ - -void touch_target( OBJECT * const t ) -{ - bindtarget( t )->flags |= T_FLAG_TOUCHED; -} - - -/* - * target_scc() - returns the root of a strongly connected component that this - * target is a part of. - */ - -TARGET * target_scc( TARGET * t ) -{ - TARGET * result = t; - while ( result->scc_root ) - result = result->scc_root; - while ( t->scc_root ) - { - TARGET * const tmp = t->scc_root; - t->scc_root = result; - t = tmp; - } - return result; -} - - -/* - * targetlist() - turn list of target names into a TARGET chain. - * - * Inputs: - * chain existing TARGETS to append to - * targets list of target names - */ - -TARGETS * targetlist( TARGETS * chain, LIST * target_names ) -{ - LISTITER iter = list_begin( target_names ); - LISTITER const end = list_end( target_names ); - for ( ; iter != end; iter = list_next( iter ) ) - chain = targetentry( chain, bindtarget( list_item( iter ) ) ); - return chain; -} - - -/* - * targetentry() - add a TARGET to a chain of TARGETS. - * - * Inputs: - * chain existing TARGETS to append to - * target new target to append - */ - -TARGETS * targetentry( TARGETS * chain, TARGET * target ) -{ - TARGETS * const c = (TARGETS *)BJAM_MALLOC( sizeof( TARGETS ) ); - c->target = target; - - if ( !chain ) chain = c; - else chain->tail->next = c; - chain->tail = c; - c->next = 0; - - return chain; -} - - -/* - * targetchain() - append two TARGET chains. - * - * Inputs: - * chain existing TARGETS to append to - * target new target to append - */ - -TARGETS * targetchain( TARGETS * chain, TARGETS * targets ) -{ - if ( !targets ) return chain; - if ( !chain ) return targets; - - chain->tail->next = targets; - chain->tail = targets->tail; - return chain; -} - -/* - * action_free - decrement the ACTIONs refrence count and (maybe) free it. - */ - -void action_free( ACTION * action ) -{ - if ( --action->refs == 0 ) - { - freetargets( action->targets ); - freetargets( action->sources ); - BJAM_FREE( action ); - } -} - - -/* - * actionlist() - append to an ACTION chain. - */ - -ACTIONS * actionlist( ACTIONS * chain, ACTION * action ) -{ - ACTIONS * const actions = (ACTIONS *)BJAM_MALLOC( sizeof( ACTIONS ) ); - actions->action = action; - ++action->refs; - if ( !chain ) chain = actions; - else chain->tail->next = actions; - chain->tail = actions; - actions->next = 0; - return chain; -} - -static SETTINGS * settings_freelist; - - -/* - * addsettings() - add a deferred "set" command to a target. - * - * Adds a variable setting (varname=list) onto a chain of settings for a - * particular target. 'flag' controls the relationship between new and old - * values in the same way as in var_set() function (see variable.c). Returns the - * head of the settings chain. - */ - -SETTINGS * addsettings( SETTINGS * head, int flag, OBJECT * symbol, - LIST * value ) -{ - SETTINGS * v; - - /* Look for previous settings. */ - for ( v = head; v; v = v->next ) - if ( object_equal( v->symbol, symbol ) ) - break; - - /* If not previously set, alloc a new. */ - /* If appending, do so. */ - /* Else free old and set new. */ - if ( !v ) - { - v = settings_freelist; - if ( v ) - settings_freelist = v->next; - else - v = (SETTINGS *)BJAM_MALLOC( sizeof( *v ) ); - - v->symbol = object_copy( symbol ); - v->value = value; - v->next = head; - head = v; - } - else if ( flag == VAR_APPEND ) - { - v->value = list_append( v->value, value ); - } - else if ( flag != VAR_DEFAULT ) - { - list_free( v->value ); - v->value = value; - } - else - list_free( value ); - - /* Return (new) head of list. */ - return head; -} - - -/* - * pushsettings() - set all target specific variables. - */ - -void pushsettings( struct module_t * module, SETTINGS * v ) -{ - for ( ; v; v = v->next ) - v->value = var_swap( module, v->symbol, v->value ); -} - - -/* - * popsettings() - reset target specific variables to their pre-push values. - */ - -void popsettings( struct module_t * module, SETTINGS * v ) -{ - pushsettings( module, v ); /* just swap again */ -} - - -/* - * copysettings() - duplicate a settings list, returning the new copy. - */ - -SETTINGS * copysettings( SETTINGS * head ) -{ - SETTINGS * copy = 0; - SETTINGS * v; - for ( v = head; v; v = v->next ) - copy = addsettings( copy, VAR_SET, v->symbol, list_copy( v->value ) ); - return copy; -} - - -/* - * freetargets() - delete a targets list. - */ - -void freetargets( TARGETS * chain ) -{ - while ( chain ) - { - TARGETS * const n = chain->next; - BJAM_FREE( chain ); - chain = n; - } -} - - -/* - * freeactions() - delete an action list. - */ - -void freeactions( ACTIONS * chain ) -{ - while ( chain ) - { - ACTIONS * const n = chain->next; - action_free( chain->action ); - BJAM_FREE( chain ); - chain = n; - } -} - - -/* - * freesettings() - delete a settings list. - */ - -void freesettings( SETTINGS * v ) -{ - while ( v ) - { - SETTINGS * const n = v->next; - object_free( v->symbol ); - list_free( v->value ); - v->next = settings_freelist; - settings_freelist = v; - v = n; - } -} - - -static void freetarget( void * xt, void * data ) -{ - TARGET * const t = (TARGET *)xt; - if ( t->name ) object_free ( t->name ); - if ( t->boundname ) object_free ( t->boundname ); - if ( t->settings ) freesettings( t->settings ); - if ( t->depends ) freetargets ( t->depends ); - if ( t->dependants ) freetargets ( t->dependants ); - if ( t->parents ) freetargets ( t->parents ); - if ( t->actions ) freeactions ( t->actions ); - if ( t->includes ) - { - freetarget( t->includes, (void *)0 ); - BJAM_FREE( t->includes ); - } -} - - -/* - * rules_done() - free RULE and TARGET tables. - */ - -void rules_done() -{ - if ( targethash ) - { - hashenumerate( targethash, freetarget, 0 ); - hashdone( targethash ); - } - while ( settings_freelist ) - { - SETTINGS * const n = settings_freelist->next; - BJAM_FREE( settings_freelist ); - settings_freelist = n; - } -} - - -/* - * actions_refer() - add a new reference to the given actions. - */ - -void actions_refer( rule_actions * a ) -{ - ++a->reference_count; -} - - -/* - * actions_free() - release a reference to given actions. - */ - -void actions_free( rule_actions * a ) -{ - if ( --a->reference_count <= 0 ) - { - function_free( a->command ); - list_free( a->bindlist ); - BJAM_FREE( a ); - } -} - - -/* - * set_rule_body() - set the argument list and procedure of the given rule. - */ - -static void set_rule_body( RULE * rule, FUNCTION * procedure ) -{ - if ( procedure ) - function_refer( procedure ); - if ( rule->procedure ) - function_free( rule->procedure ); - rule->procedure = procedure; -} - - -/* - * global_name() - given a rule, return the name for a corresponding rule in the - * global module. - */ - -static OBJECT * global_rule_name( RULE * r ) -{ - if ( r->module == root_module() ) - return object_copy( r->name ); - - { - char name[ 4096 ] = ""; - if ( r->module->name ) - { - strncat( name, object_str( r->module->name ), sizeof( name ) - 1 ); - strncat( name, ".", sizeof( name ) - 1 ); - } - strncat( name, object_str( r->name ), sizeof( name ) - 1 ); - return object_new( name ); - } -} - - -/* - * global_rule() - given a rule, produce a corresponding entry in the global - * module. - */ - -static RULE * global_rule( RULE * r ) -{ - if ( r->module == root_module() ) - return r; - - { - OBJECT * const name = global_rule_name( r ); - RULE * const result = define_rule( r->module, name, root_module() ); - object_free( name ); - return result; - } -} - - -/* - * new_rule_body() - make a new rule named rulename in the given module, with - * the given argument list and procedure. If exported is true, the rule is - * exported to the global module as modulename.rulename. - */ - -RULE * new_rule_body( module_t * m, OBJECT * rulename, FUNCTION * procedure, - int exported ) -{ - RULE * const local = define_rule( m, rulename, m ); - local->exported = exported; - set_rule_body( local, procedure ); - - /* Mark the procedure with the global rule name, regardless of whether the - * rule is exported. That gives us something reasonably identifiable that we - * can use, e.g. in profiling output. Only do this once, since this could be - * called multiple times with the same procedure. - */ - if ( !function_rulename( procedure ) ) - function_set_rulename( procedure, global_rule_name( local ) ); - - return local; -} - - -static void set_rule_actions( RULE * rule, rule_actions * actions ) -{ - if ( actions ) - actions_refer( actions ); - if ( rule->actions ) - actions_free( rule->actions ); - rule->actions = actions; -} - - -static rule_actions * actions_new( FUNCTION * command, LIST * bindlist, - int flags ) -{ - rule_actions * const result = (rule_actions *)BJAM_MALLOC( sizeof( - rule_actions ) ); - function_refer( command ); - result->command = command; - result->bindlist = bindlist; - result->flags = flags; - result->reference_count = 0; - return result; -} - - -RULE * new_rule_actions( module_t * m, OBJECT * rulename, FUNCTION * command, - LIST * bindlist, int flags ) -{ - RULE * const local = define_rule( m, rulename, m ); - RULE * const global = global_rule( local ); - set_rule_actions( local, actions_new( command, bindlist, flags ) ); - set_rule_actions( global, local->actions ); - return local; -} - - -/* - * Looks for a rule in the specified module, and returns it, if found. First - * checks if the rule is present in the module's rule table. Second, if the - * rule's name is in the form name1.name2 and name1 is in the list of imported - * modules, look in module 'name1' for rule 'name2'. - */ - -RULE * lookup_rule( OBJECT * rulename, module_t * m, int local_only ) -{ - RULE * r; - RULE * result = 0; - module_t * original_module = m; - - if ( m->class_module ) - m = m->class_module; - - if ( m->rules && ( r = (RULE *)hash_find( m->rules, rulename ) ) ) - result = r; - else if ( !local_only && m->imported_modules ) - { - /* Try splitting the name into module and rule. */ - char * p = strchr( object_str( rulename ), '.' ) ; - if ( p ) - { - /* Now, r->name keeps the module name, and p + 1 keeps the rule - * name. - */ - OBJECT * rule_part = object_new( p + 1 ); - OBJECT * module_part; - { - string buf[ 1 ]; - string_new( buf ); - string_append_range( buf, object_str( rulename ), p ); - module_part = object_new( buf->value ); - string_free( buf ); - } - if ( hash_find( m->imported_modules, module_part ) ) - result = lookup_rule( rule_part, bindmodule( module_part ), 1 ); - object_free( module_part ); - object_free( rule_part ); - } - } - - if ( result ) - { - if ( local_only && !result->exported ) - result = 0; - else if ( original_module != m ) - { - /* Lookup started in class module. We have found a rule in class - * module, which is marked for execution in that module, or in some - * instance. Mark it for execution in the instance where we started - * the lookup. - */ - int const execute_in_class = result->module == m; - int const execute_in_some_instance = - result->module->class_module == m; - if ( execute_in_class || execute_in_some_instance ) - result->module = original_module; - } - } - - return result; -} - - -RULE * bindrule( OBJECT * rulename, module_t * m ) -{ - RULE * result = lookup_rule( rulename, m, 0 ); - if ( !result ) - result = lookup_rule( rulename, root_module(), 0 ); - /* We have only one caller, 'evaluate_rule', which will complain about - * calling an undefined rule. We could issue the error here, but we do not - * have the necessary information, such as frame. - */ - if ( !result ) - result = enter_rule( rulename, m ); - return result; -} - - -RULE * import_rule( RULE * source, module_t * m, OBJECT * name ) -{ - RULE * const dest = define_rule( source->module, name, m ); - set_rule_body( dest, source->procedure ); - set_rule_actions( dest, source->actions ); - return dest; -} - - -void rule_localize( RULE * rule, module_t * m ) -{ - rule->module = m; - if ( rule->procedure ) - { - FUNCTION * procedure = function_unbind_variables( rule->procedure ); - function_refer( procedure ); - function_free( rule->procedure ); - rule->procedure = procedure; - } -} http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/6da3961b/ext/kenlm/jam-files/engine/rules.h ---------------------------------------------------------------------- diff --git a/ext/kenlm b/ext/kenlm new file mode 160000 index 0000000..56fdb5c --- /dev/null +++ b/ext/kenlm @@ -0,0 +1 @@ +Subproject commit 56fdb5c44fca34d5a2e07d96139c28fb163983c5 diff --git a/ext/kenlm/jam-files/engine/rules.h b/ext/kenlm/jam-files/engine/rules.h deleted file mode 100644 index fe2792f..0000000 --- a/ext/kenlm/jam-files/engine/rules.h +++ /dev/null @@ -1,270 +0,0 @@ -/* - * Copyright 1993, 1995 Christopher Seiwald. - * - * This file is part of Jam - see jam.c for Copyright information. - */ - -/* This file is ALSO: - * Copyright 2001-2004 David Abrahams. - * Distributed under the Boost Software License, Version 1.0. - * (See accompanying file LICENSE_1_0.txt or http://www.boost.org/LICENSE_1_0.txt) - */ - -/* - * rules.h - targets, rules, and related information - * - * This file describes the structures holding the targets, rules, and related - * information accumulated by interpreting the statements of the jam files. - * - * The following are defined: - * - * RULE - a generic jam rule, the product of RULE and ACTIONS. - * ACTIONS - a chain of ACTIONs. - * ACTION - a RULE instance with targets and sources. - * SETTINGS - variables to set when executing a TARGET's ACTIONS. - * TARGETS - a chain of TARGETs. - * TARGET - an entity (e.g. a file) that can be built. - */ - -#ifndef RULES_DWA_20011020_H -#define RULES_DWA_20011020_H - -#include "function.h" -#include "modules.h" -#include "timestamp.h" - - -typedef struct _rule RULE; -typedef struct _target TARGET; -typedef struct _targets TARGETS; -typedef struct _action ACTION; -typedef struct _actions ACTIONS; -typedef struct _settings SETTINGS ; - -/* RULE - a generic jam rule, the product of RULE and ACTIONS. */ - -/* Build actions corresponding to a rule. */ -struct rule_actions -{ - int reference_count; - FUNCTION * command; /* command string from ACTIONS */ - LIST * bindlist; - int flags; /* modifiers on ACTIONS */ - -#define RULE_NEWSRCS 0x01 /* $(>) is updated sources only */ -#define RULE_TOGETHER 0x02 /* combine actions on single target */ -#define RULE_IGNORE 0x04 /* ignore return status of executes */ -#define RULE_QUIETLY 0x08 /* do not mention it unless verbose */ -#define RULE_PIECEMEAL 0x10 /* split exec so each $(>) is small */ -#define RULE_EXISTING 0x20 /* $(>) is pre-exisitng sources only */ -}; - -typedef struct rule_actions rule_actions; -typedef struct argument_list argument_list; - -struct _rule -{ - OBJECT * name; - FUNCTION * procedure; - rule_actions * actions; /* build actions, or NULL for no actions */ - module_t * module; /* module in which this rule is executed */ - int exported; /* nonzero if this rule is supposed to appear in - * the global module and be automatically - * imported into other modules - */ -}; - -/* ACTIONS - a chain of ACTIONs. */ -struct _actions -{ - ACTIONS * next; - ACTIONS * tail; /* valid only for head */ - ACTION * action; -}; - -/* ACTION - a RULE instance with targets and sources. */ -struct _action -{ - RULE * rule; - TARGETS * targets; - TARGETS * sources; /* aka $(>) */ - char running; /* has been started */ -#define A_INIT 0 -#define A_RUNNING_NOEXEC 1 -#define A_RUNNING 2 - char status; /* see TARGET status */ - int refs; -}; - -/* SETTINGS - variables to set when executing a TARGET's ACTIONS. */ -struct _settings -{ - SETTINGS * next; - OBJECT * symbol; /* symbol name for var_set() */ - LIST * value; /* symbol value for var_set() */ -}; - -/* TARGETS - a chain of TARGETs. */ -struct _targets -{ - TARGETS * next; - TARGETS * tail; /* valid only for head */ - TARGET * target; -}; - -/* TARGET - an entity (e.g. a file) that can be built. */ -struct _target -{ - OBJECT * name; - OBJECT * boundname; /* if search() relocates target */ - ACTIONS * actions; /* rules to execute, if any */ - SETTINGS * settings; /* variables to define */ - - short flags; /* status info */ - -#define T_FLAG_TEMP 0x0001 /* TEMPORARY applied */ -#define T_FLAG_NOCARE 0x0002 /* NOCARE applied */ -#define T_FLAG_NOTFILE 0x0004 /* NOTFILE applied */ -#define T_FLAG_TOUCHED 0x0008 /* ALWAYS applied or -t target */ -#define T_FLAG_LEAVES 0x0010 /* LEAVES applied */ -#define T_FLAG_NOUPDATE 0x0020 /* NOUPDATE applied */ -#define T_FLAG_VISITED 0x0040 /* CWM: Used in debugging */ - -/* This flag has been added to support a new built-in rule named "RMBAD". It is - * used to force removal of outdated targets whose dependencies fail to build. - */ -#define T_FLAG_RMOLD 0x0080 /* RMBAD applied */ - -/* This flag was added to support a new built-in rule named "FAIL_EXPECTED" used - * to indicate that the result of running a given action should be inverted, - * i.e. ok <=> fail. Useful for launching certain test runs from a Jamfile. - */ -#define T_FLAG_FAIL_EXPECTED 0x0100 /* FAIL_EXPECTED applied */ - -#define T_FLAG_INTERNAL 0x0200 /* internal INCLUDES node */ - -/* Indicates that the target must be a file. Prevents matching non-files, like - * directories, when a target is searched. - */ -#define T_FLAG_ISFILE 0x0400 - -#define T_FLAG_PRECIOUS 0x0800 - - char binding; /* how target relates to a real file or - * folder - */ - -#define T_BIND_UNBOUND 0 /* a disembodied name */ -#define T_BIND_MISSING 1 /* could not find real file */ -#define T_BIND_PARENTS 2 /* using parent's timestamp */ -#define T_BIND_EXISTS 3 /* real file, timestamp valid */ - - TARGETS * depends; /* dependencies */ - TARGETS * dependants; /* the inverse of dependencies */ - TARGETS * rebuilds; /* targets that should be force-rebuilt - * whenever this one is - */ - TARGET * includes; /* internal includes node */ - TARGET * original_target; /* original_target->includes = this */ - char rescanned; - - timestamp time; /* update time */ - timestamp leaf; /* update time of leaf sources */ - - char fate; /* make0()'s diagnosis */ - -#define T_FATE_INIT 0 /* nothing done to target */ -#define T_FATE_MAKING 1 /* make0(target) on stack */ - -#define T_FATE_STABLE 2 /* target did not need updating */ -#define T_FATE_NEWER 3 /* target newer than parent */ - -#define T_FATE_SPOIL 4 /* >= SPOIL rebuilds parents */ -#define T_FATE_ISTMP 4 /* unneeded temp target oddly present */ - -#define T_FATE_BUILD 5 /* >= BUILD rebuilds target */ -#define T_FATE_TOUCHED 5 /* manually touched with -t */ -#define T_FATE_REBUILD 6 -#define T_FATE_MISSING 7 /* is missing, needs updating */ -#define T_FATE_NEEDTMP 8 /* missing temp that must be rebuild */ -#define T_FATE_OUTDATED 9 /* is out of date, needs updating */ -#define T_FATE_UPDATE 10 /* deps updated, needs updating */ - -#define T_FATE_BROKEN 11 /* >= BROKEN ruins parents */ -#define T_FATE_CANTFIND 11 /* no rules to make missing target */ -#define T_FATE_CANTMAKE 12 /* can not find dependencies */ - - char progress; /* tracks make1() progress */ - -#define T_MAKE_INIT 0 /* make1(target) not yet called */ -#define T_MAKE_ONSTACK 1 /* make1(target) on stack */ -#define T_MAKE_ACTIVE 2 /* make1(target) in make1b() */ -#define T_MAKE_RUNNING 3 /* make1(target) running commands */ -#define T_MAKE_DONE 4 /* make1(target) done */ -#define T_MAKE_NOEXEC_DONE 5 /* make1(target) done with -n in effect */ - -#ifdef OPT_SEMAPHORE - #define T_MAKE_SEMAPHORE 5 /* Special target type for semaphores */ -#endif - -#ifdef OPT_SEMAPHORE - TARGET * semaphore; /* used in serialization */ -#endif - - char status; /* exec_cmd() result */ - - int asynccnt; /* child deps outstanding */ - TARGETS * parents; /* used by make1() for completion */ - TARGET * scc_root; /* used by make to resolve cyclic includes - */ - TARGET * rescanning; /* used by make0 to mark visited targets - * when rescanning - */ - int depth; /* The depth of the target in the make0 - * stack. - */ - char * cmds; /* type-punned command list */ - - char const * failed; -}; - - -/* Action related functions. */ -void action_free ( ACTION * ); -ACTIONS * actionlist ( ACTIONS *, ACTION * ); -void freeactions ( ACTIONS * ); -SETTINGS * addsettings ( SETTINGS *, int flag, OBJECT * symbol, LIST * value ); -void pushsettings ( module_t *, SETTINGS * ); -void popsettings ( module_t *, SETTINGS * ); -SETTINGS * copysettings ( SETTINGS * ); -void freesettings ( SETTINGS * ); -void actions_refer( rule_actions * ); -void actions_free ( rule_actions * ); - -/* Rule related functions. */ -RULE * bindrule ( OBJECT * rulename, module_t * ); -RULE * import_rule ( RULE * source, module_t *, OBJECT * name ); -void rule_localize ( RULE * rule, module_t * module ); -RULE * new_rule_body ( module_t *, OBJECT * rulename, FUNCTION * func, int exprt ); -RULE * new_rule_actions( module_t *, OBJECT * rulename, FUNCTION * command, LIST * bindlist, int flags ); -void rule_free ( RULE * ); - -/* Target related functions. */ -void bind_explicitly_located_targets(); -TARGET * bindtarget ( OBJECT * const ); -void freetargets ( TARGETS * ); -TARGETS * targetchain ( TARGETS *, TARGETS * ); -TARGETS * targetentry ( TARGETS *, TARGET * ); -void target_include ( TARGET * const including, - TARGET * const included ); -void target_include_many ( TARGET * const including, - LIST * const included_names ); -TARGETS * targetlist ( TARGETS *, LIST * target_names ); -void touch_target ( OBJECT * const ); -void clear_includes ( TARGET * ); -TARGET * target_scc ( TARGET * ); - -/* Final module cleanup. */ -void rules_done(); - -#endif http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/6da3961b/ext/kenlm/jam-files/engine/scan.c ---------------------------------------------------------------------- diff --git a/ext/kenlm b/ext/kenlm new file mode 160000 index 0000000..56fdb5c --- /dev/null +++ b/ext/kenlm @@ -0,0 +1 @@ +Subproject commit 56fdb5c44fca34d5a2e07d96139c28fb163983c5 diff --git a/ext/kenlm/jam-files/engine/scan.c b/ext/kenlm/jam-files/engine/scan.c deleted file mode 100644 index d92fdca..0000000 --- a/ext/kenlm/jam-files/engine/scan.c +++ /dev/null @@ -1,404 +0,0 @@ -/* - * Copyright 1993-2002 Christopher Seiwald and Perforce Software, Inc. - * - * This file is part of Jam - see jam.c for Copyright information. - */ - -/* - * scan.c - the jam yacc scanner - * - */ - -#include "jam.h" -#include "scan.h" - -#include "constants.h" -#include "jambase.h" -#include "jamgram.h" - - -struct keyword -{ - char * word; - int type; -} keywords[] = -{ -#include "jamgramtab.h" - { 0, 0 } -}; - -typedef struct include include; -struct include -{ - include * next; /* next serial include file */ - char * string; /* pointer into current line */ - char * * strings; /* for yyfparse() -- text to parse */ - FILE * file; /* for yyfparse() -- file being read */ - OBJECT * fname; /* for yyfparse() -- file name */ - int line; /* line counter for error messages */ - char buf[ 512 ]; /* for yyfparse() -- line buffer */ -}; - -static include * incp = 0; /* current file; head of chain */ - -static int scanmode = SCAN_NORMAL; -static int anyerrors = 0; - - -static char * symdump( YYSTYPE * ); - -#define BIGGEST_TOKEN 10240 /* no single token can be larger */ - - -/* - * Set parser mode: normal, string, or keyword. - */ - -void yymode( int n ) -{ - scanmode = n; -} - - -void yyerror( char const * s ) -{ - /* We use yylval instead of incp to access the error location information as - * the incp pointer will already be reset to 0 in case the error occurred at - * EOF. - * - * The two may differ only if ran into an unexpected EOF or we get an error - * while reading a lexical token spanning multiple lines, e.g. a multi-line - * string literal or action body, in which case yylval location information - * will hold the information about where the token started while incp will - * hold the information about where reading it broke. - */ - printf( "%s:%d: %s at %s\n", object_str( yylval.file ), yylval.line, s, - symdump( &yylval ) ); - ++anyerrors; -} - - -int yyanyerrors() -{ - return anyerrors != 0; -} - - -void yyfparse( OBJECT * s ) -{ - include * i = (include *)BJAM_MALLOC( sizeof( *i ) ); - - /* Push this onto the incp chain. */ - i->string = ""; - i->strings = 0; - i->file = 0; - i->fname = object_copy( s ); - i->line = 0; - i->next = incp; - incp = i; - - /* If the filename is "+", it means use the internal jambase. */ - if ( !strcmp( object_str( s ), "+" ) ) - i->strings = jambase; -} - - -/* - * yyline() - read new line and return first character. - * - * Fabricates a continuous stream of characters across include files, returning - * EOF at the bitter end. - */ - -int yyline() -{ - include * const i = incp; - - if ( !incp ) - return EOF; - - /* Once we start reading from the input stream, we reset the include - * insertion point so that the next include file becomes the head of the - * list. - */ - - /* If there is more data in this line, return it. */ - if ( *i->string ) - return *i->string++; - - /* If we are reading from an internal string list, go to the next string. */ - if ( i->strings ) - { - if ( *i->strings ) - { - ++i->line; - i->string = *(i->strings++); - return *i->string++; - } - } - else - { - /* If necessary, open the file. */ - if ( !i->file ) - { - FILE * f = stdin; - if ( strcmp( object_str( i->fname ), "-" ) && !( f = fopen( object_str( i->fname ), "r" ) ) ) - perror( object_str( i->fname ) ); - i->file = f; - } - - /* If there is another line in this file, start it. */ - if ( i->file && fgets( i->buf, sizeof( i->buf ), i->file ) ) - { - ++i->line; - i->string = i->buf; - return *i->string++; - } - } - - /* This include is done. Free it up and return EOF so yyparse() returns to - * parse_file(). - */ - - incp = i->next; - - /* Close file, free name. */ - if ( i->file && ( i->file != stdin ) ) - fclose( i->file ); - object_free( i->fname ); - BJAM_FREE( (char *)i ); - - return EOF; -} - - -/* - * yylex() - set yylval to current token; return its type. - * - * Macros to move things along: - * - * yychar() - return and advance character; invalid after EOF. - * yyprev() - back up one character; invalid before yychar(). - * - * yychar() returns a continuous stream of characters, until it hits the EOF of - * the current include file. - */ - -#define yychar() ( *incp->string ? *incp->string++ : yyline() ) -#define yyprev() ( incp->string-- ) - -int yylex() -{ - int c; - char buf[ BIGGEST_TOKEN ]; - char * b = buf; - - if ( !incp ) - goto eof; - - /* Get first character (whitespace or of token). */ - c = yychar(); - - if ( scanmode == SCAN_STRING ) - { - /* If scanning for a string (action's {}'s), look for the closing brace. - * We handle matching braces, if they match. - */ - - int nest = 1; - - while ( ( c != EOF ) && ( b < buf + sizeof( buf ) ) ) - { - if ( c == '{' ) - ++nest; - - if ( ( c == '}' ) && !--nest ) - break; - - *b++ = c; - - c = yychar(); - - /* Turn trailing "\r\n" sequences into plain "\n" for Cygwin. */ - if ( ( c == '\n' ) && ( b[ -1 ] == '\r' ) ) - --b; - } - - /* We ate the ending brace -- regurgitate it. */ - if ( c != EOF ) - yyprev(); - - /* Check for obvious errors. */ - if ( b == buf + sizeof( buf ) ) - { - yyerror( "action block too big" ); - goto eof; - } - - if ( nest ) - { - yyerror( "unmatched {} in action block" ); - goto eof; - } - - *b = 0; - yylval.type = STRING; - yylval.string = object_new( buf ); - yylval.file = incp->fname; - yylval.line = incp->line; - } - else - { - char * b = buf; - struct keyword * k; - int inquote = 0; - int notkeyword; - - /* Eat white space. */ - for ( ; ; ) - { - /* Skip past white space. */ - while ( ( c != EOF ) && isspace( c ) ) - c = yychar(); - - /* Not a comment? */ - if ( c != '#' ) - break; - - /* Swallow up comment line. */ - while ( ( ( c = yychar() ) != EOF ) && ( c != '\n' ) ) ; - } - - /* c now points to the first character of a token. */ - if ( c == EOF ) - goto eof; - - yylval.file = incp->fname; - yylval.line = incp->line; - - /* While scanning the word, disqualify it for (expensive) keyword lookup - * when we can: $anything, "anything", \anything - */ - notkeyword = c == '$'; - - /* Look for white space to delimit word. "'s get stripped but preserve - * white space. \ protects next character. - */ - while - ( - ( c != EOF ) && - ( b < buf + sizeof( buf ) ) && - ( inquote || !isspace( c ) ) - ) - { - if ( c == '"' ) - { - /* begin or end " */ - inquote = !inquote; - notkeyword = 1; - } - else if ( c != '\\' ) - { - /* normal char */ - *b++ = c; - } - else if ( ( c = yychar() ) != EOF ) - { - /* \c */ - if (c == 'n') - c = '\n'; - else if (c == 'r') - c = '\r'; - else if (c == 't') - c = '\t'; - *b++ = c; - notkeyword = 1; - } - else - { - /* \EOF */ - break; - } - - c = yychar(); - } - - /* Check obvious errors. */ - if ( b == buf + sizeof( buf ) ) - { - yyerror( "string too big" ); - goto eof; - } - - if ( inquote ) - { - yyerror( "unmatched \" in string" ); - goto eof; - } - - /* We looked ahead a character - back up. */ - if ( c != EOF ) - yyprev(); - - /* Scan token table. Do not scan if it is obviously not a keyword or if - * it is an alphabetic when were looking for punctuation. - */ - - *b = 0; - yylval.type = ARG; - - if ( !notkeyword && !( isalpha( *buf ) && ( scanmode == SCAN_PUNCT ) ) ) - for ( k = keywords; k->word; ++k ) - if ( ( *buf == *k->word ) && !strcmp( k->word, buf ) ) - { - yylval.type = k->type; - yylval.keyword = k->word; /* used by symdump */ - break; - } - - if ( yylval.type == ARG ) - yylval.string = object_new( buf ); - } - - if ( DEBUG_SCAN ) - printf( "scan %s\n", symdump( &yylval ) ); - - return yylval.type; - -eof: - /* We do not reset yylval.file & yylval.line here so unexpected EOF error - * messages would include correct error location information. - */ - yylval.type = EOF; - return yylval.type; -} - - -static char * symdump( YYSTYPE * s ) -{ - static char buf[ BIGGEST_TOKEN + 20 ]; - switch ( s->type ) - { - case EOF : sprintf( buf, "EOF" ); break; - case 0 : sprintf( buf, "unknown symbol %s", object_str( s->string ) ); break; - case ARG : sprintf( buf, "argument %s" , object_str( s->string ) ); break; - case STRING: sprintf( buf, "string \"%s\"" , object_str( s->string ) ); break; - default : sprintf( buf, "keyword %s" , s->keyword ); break; - } - return buf; -} - - -/* - * Get information about the current file and line, for those epsilon - * transitions that produce a parse. - */ - -void yyinput_last_read_token( OBJECT * * name, int * line ) -{ - /* TODO: Consider whether and when we might want to report where the last - * read token ended, e.g. EOF errors inside string literals. - */ - *name = yylval.file; - *line = yylval.line; -} http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/6da3961b/ext/kenlm/jam-files/engine/scan.h ---------------------------------------------------------------------- diff --git a/ext/kenlm b/ext/kenlm new file mode 160000 index 0000000..56fdb5c --- /dev/null +++ b/ext/kenlm @@ -0,0 +1 @@ +Subproject commit 56fdb5c44fca34d5a2e07d96139c28fb163983c5 diff --git a/ext/kenlm/jam-files/engine/scan.h b/ext/kenlm/jam-files/engine/scan.h deleted file mode 100644 index 745477f..0000000 --- a/ext/kenlm/jam-files/engine/scan.h +++ /dev/null @@ -1,61 +0,0 @@ -/* - * Copyright 1993, 1995 Christopher Seiwald. - * - * This file is part of Jam - see jam.c for Copyright information. - */ - -/* - * scan.h - the jam yacc scanner - * - * External functions: - * yyerror( char *s ) - print a parsing error message. - * yyfparse( char *s ) - scan include file s. - * yylex() - parse the next token, returning its type. - * yymode() - adjust lexicon of scanner. - * yyparse() - declaration for yacc parser. - * yyanyerrors() - indicate if any parsing errors occured. - * - * The yymode() function is for the parser to adjust the lexicon of the scanner. - * Aside from normal keyword scanning, there is a mode to handle action strings - * (look only for the closing }) and a mode to ignore most keywords when looking - * for a punctuation keyword. This allows non-punctuation keywords to be used in - * lists without quoting. - */ - -#include "lists.h" -#include "object.h" -#include "parse.h" - - -/* - * YYSTYPE - value of a lexical token - */ - -#define YYSTYPE YYSYMBOL - -typedef struct _YYSTYPE -{ - int type; - OBJECT * string; - PARSE * parse; - LIST * list; - int number; - OBJECT * file; - int line; - char const * keyword; -} YYSTYPE; - -extern YYSTYPE yylval; - -void yymode( int n ); -void yyerror( char const * s ); -int yyanyerrors(); -void yyfparse( OBJECT * s ); -int yyline(); -int yylex(); -int yyparse(); -void yyinput_last_read_token( OBJECT * * name, int * line ); - -#define SCAN_NORMAL 0 /* normal parsing */ -#define SCAN_STRING 1 /* look only for matching } */ -#define SCAN_PUNCT 2 /* only punctuation keywords */
