http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/6da3961b/ext/kenlm/jam-files/engine/search.c ---------------------------------------------------------------------- diff --git a/ext/kenlm b/ext/kenlm new file mode 160000 index 0000000..56fdb5c --- /dev/null +++ b/ext/kenlm @@ -0,0 +1 @@ +Subproject commit 56fdb5c44fca34d5a2e07d96139c28fb163983c5 diff --git a/ext/kenlm/jam-files/engine/search.c b/ext/kenlm/jam-files/engine/search.c deleted file mode 100644 index b2beada..0000000 --- a/ext/kenlm/jam-files/engine/search.c +++ /dev/null @@ -1,274 +0,0 @@ -/* - * Copyright 1993-2002 Christopher Seiwald and Perforce Software, Inc. - * - * This file is part of Jam - see jam.c for Copyright information. - */ - -/* This file is ALSO: - * Copyright 2001-2004 David Abrahams. - * Distributed under the Boost Software License, Version 1.0. - * (See accompanying file LICENSE_1_0.txt or copy at - * http://www.boost.org/LICENSE_1_0.txt) - */ - -#include "jam.h" -#include "search.h" - -#include "compile.h" -#include "filesys.h" -#include "hash.h" -#include "lists.h" -#include "object.h" -#include "pathsys.h" -#include "strings.h" -#include "timestamp.h" -#include "variable.h" - -#include <string.h> - - -typedef struct _binding -{ - OBJECT * binding; - OBJECT * target; -} BINDING; - -static struct hash * explicit_bindings = 0; - - -void call_bind_rule( OBJECT * target_, OBJECT * boundname_ ) -{ - LIST * const bind_rule = var_get( root_module(), constant_BINDRULE ); - if ( !list_empty( bind_rule ) ) - { - OBJECT * target = object_copy( target_ ); - OBJECT * boundname = object_copy( boundname_ ); - if ( boundname && target ) - { - /* Prepare the argument list. */ - FRAME frame[ 1 ]; - frame_init( frame ); - - /* First argument is the target name. */ - lol_add( frame->args, list_new( target ) ); - - lol_add( frame->args, list_new( boundname ) ); - if ( lol_get( frame->args, 1 ) ) - { - OBJECT * rulename = list_front( bind_rule ); - list_free( evaluate_rule( bindrule( rulename, root_module() ), rulename, frame ) ); - } - - /* Clean up */ - frame_free( frame ); - } - else - { - if ( boundname ) - object_free( boundname ); - if ( target ) - object_free( target ); - } - } -} - -/* Records the binding of a target with an explicit LOCATE. */ -void set_explicit_binding( OBJECT * target, OBJECT * locate ) -{ - OBJECT * boundname; - OBJECT * key; - PATHNAME f[ 1 ]; - string buf[ 1 ]; - int found; - BINDING * ba; - - if ( !explicit_bindings ) - explicit_bindings = hashinit( sizeof( BINDING ), "explicitly specified " - "locations" ); - - string_new( buf ); - - /* Parse the filename. */ - path_parse( object_str( target ), f ); - - /* Ignore the grist. */ - f->f_grist.ptr = 0; - f->f_grist.len = 0; - - /* Root the target path at the given location. */ - f->f_root.ptr = object_str( locate ); - f->f_root.len = strlen( object_str( locate ) ); - - path_build( f, buf ); - boundname = object_new( buf->value ); - if ( DEBUG_SEARCH ) - printf( "explicit locate %s: %s\n", object_str( target ), buf->value ); - string_free( buf ); - key = path_as_key( boundname ); - object_free( boundname ); - - ba = (BINDING *)hash_insert( explicit_bindings, key, &found ); - if ( !found ) - { - ba->binding = key; - ba->target = target; - } - else - object_free( key ); -} - -/* - * search.c - find a target along $(SEARCH) or $(LOCATE). - * - * First, check if LOCATE is set. If so, use it to determine the location of - * target and return, regardless of whether anything exists at that location. - * - * Second, examine all directories in SEARCH. If the file exists there or there - * is another target with the same name already placed at this location via the - * LOCATE setting, stop and return the location. In case of a previous target, - * return its name via the 'another_target' argument. - * - * This behaviour allows handling dependencies on generated files. - * - * If caller does not expect that the target is generated, 0 can be passed as - * 'another_target'. - */ - -OBJECT * search( OBJECT * target, timestamp * const time, - OBJECT * * another_target, int const file ) -{ - PATHNAME f[ 1 ]; - LIST * varlist; - string buf[ 1 ]; - int found = 0; - OBJECT * boundname = 0; - - if ( another_target ) - *another_target = 0; - - if ( !explicit_bindings ) - explicit_bindings = hashinit( sizeof( BINDING ), "explicitly specified " - "locations" ); - - string_new( buf ); - - /* Parse the filename. */ - path_parse( object_str( target ), f ); - - f->f_grist.ptr = 0; - f->f_grist.len = 0; - - varlist = var_get( root_module(), constant_LOCATE ); - if ( !list_empty( varlist ) ) - { - OBJECT * key; - f->f_root.ptr = object_str( list_front( varlist ) ); - f->f_root.len = strlen( object_str( list_front( varlist ) ) ); - - path_build( f, buf ); - - if ( DEBUG_SEARCH ) - printf( "locate %s: %s\n", object_str( target ), buf->value ); - - key = object_new( buf->value ); - timestamp_from_path( time, key ); - object_free( key ); - found = 1; - } - else if ( varlist = var_get( root_module(), constant_SEARCH ), - !list_empty( varlist ) ) - { - LISTITER iter = list_begin( varlist ); - LISTITER const end = list_end( varlist ); - for ( ; iter != end; iter = list_next( iter ) ) - { - BINDING * ba; - file_info_t * ff; - OBJECT * key; - OBJECT * test_path; - - f->f_root.ptr = object_str( list_item( iter ) ); - f->f_root.len = strlen( object_str( list_item( iter ) ) ); - - string_truncate( buf, 0 ); - path_build( f, buf ); - - if ( DEBUG_SEARCH ) - printf( "search %s: %s\n", object_str( target ), buf->value ); - - test_path = object_new( buf->value ); - key = path_as_key( test_path ); - object_free( test_path ); - ff = file_query( key ); - timestamp_from_path( time, key ); - - if ( ( ba = (BINDING *)hash_find( explicit_bindings, key ) ) ) - { - if ( DEBUG_SEARCH ) - printf(" search %s: found explicitly located target %s\n", - object_str( target ), object_str( ba->target ) ); - if ( another_target ) - *another_target = ba->target; - found = 1; - object_free( key ); - break; - } - else if ( ff ) - { - if ( !file || ff->is_file ) - { - found = 1; - object_free( key ); - break; - } - } - object_free( key ); - } - } - - if ( !found ) - { - /* Look for the obvious. */ - /* This is a questionable move. Should we look in the obvious place if - * SEARCH is set? - */ - OBJECT * key; - - f->f_root.ptr = 0; - f->f_root.len = 0; - - string_truncate( buf, 0 ); - path_build( f, buf ); - - if ( DEBUG_SEARCH ) - printf( "search %s: %s\n", object_str( target ), buf->value ); - - key = object_new( buf->value ); - timestamp_from_path( time, key ); - object_free( key ); - } - - boundname = object_new( buf->value ); - string_free( buf ); - - /* Prepare a call to BINDRULE if the variable is set. */ - call_bind_rule( target, boundname ); - - return boundname; -} - - -static void free_binding( void * xbinding, void * data ) -{ - object_free( ( (BINDING *)xbinding )->binding ); -} - - -void search_done( void ) -{ - if ( explicit_bindings ) - { - hashenumerate( explicit_bindings, free_binding, 0 ); - hashdone( explicit_bindings ); - } -}
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/6da3961b/ext/kenlm/jam-files/engine/search.h ---------------------------------------------------------------------- diff --git a/ext/kenlm b/ext/kenlm new file mode 160000 index 0000000..56fdb5c --- /dev/null +++ b/ext/kenlm @@ -0,0 +1 @@ +Subproject commit 56fdb5c44fca34d5a2e07d96139c28fb163983c5 diff --git a/ext/kenlm/jam-files/engine/search.h b/ext/kenlm/jam-files/engine/search.h deleted file mode 100644 index 7e74f79..0000000 --- a/ext/kenlm/jam-files/engine/search.h +++ /dev/null @@ -1,22 +0,0 @@ -/* - * Copyright 1993, 1995 Christopher Seiwald. - * - * This file is part of Jam - see jam.c for Copyright information. - */ - -/* - * search.h - find a target along $(SEARCH) or $(LOCATE) - */ - -#ifndef SEARCH_SW20111118_H -#define SEARCH_SW20111118_H - -#include "object.h" -#include "timestamp.h" - -void set_explicit_binding( OBJECT * target, OBJECT * locate ); -OBJECT * search( OBJECT * target, timestamp * const time, - OBJECT * * another_target, int const file ); -void search_done( void ); - -#endif http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/6da3961b/ext/kenlm/jam-files/engine/strings.c ---------------------------------------------------------------------- diff --git a/ext/kenlm b/ext/kenlm new file mode 160000 index 0000000..56fdb5c --- /dev/null +++ b/ext/kenlm @@ -0,0 +1 @@ +Subproject commit 56fdb5c44fca34d5a2e07d96139c28fb163983c5 diff --git a/ext/kenlm/jam-files/engine/strings.c b/ext/kenlm/jam-files/engine/strings.c deleted file mode 100644 index 3d3e19b..0000000 --- a/ext/kenlm/jam-files/engine/strings.c +++ /dev/null @@ -1,223 +0,0 @@ -/* Copyright David Abrahams 2004. Distributed under the Boost */ -/* Software License, Version 1.0. (See accompanying */ -/* file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) */ - -#include "jam.h" -#include "strings.h" - -#include <assert.h> -#include <stdlib.h> -#include <string.h> - - -#ifndef NDEBUG -# define JAM_STRING_MAGIC ((char)0xcf) -# define JAM_STRING_MAGIC_SIZE 4 -static void assert_invariants( string * self ) -{ - int i; - - if ( self->value == 0 ) - { - assert( self->size == 0 ); - assert( self->capacity == 0 ); - assert( self->opt[ 0 ] == 0 ); - return; - } - - assert( self->size < self->capacity ); - assert( ( self->capacity <= sizeof( self->opt ) ) == ( self->value == self->opt ) ); - assert( self->value[ self->size ] == 0 ); - /* String objects modified manually after construction to contain embedded - * '\0' characters are considered structurally valid. - */ - assert( strlen( self->value ) <= self->size ); - - for ( i = 0; i < 4; ++i ) - { - assert( self->magic[ i ] == JAM_STRING_MAGIC ); - assert( self->value[ self->capacity + i ] == JAM_STRING_MAGIC ); - } -} -#else -# define JAM_STRING_MAGIC_SIZE 0 -# define assert_invariants(x) do {} while (0) -#endif - - -void string_new( string * s ) -{ - s->value = s->opt; - s->size = 0; - s->capacity = sizeof( s->opt ); - s->opt[ 0 ] = 0; -#ifndef NDEBUG - memset( s->magic, JAM_STRING_MAGIC, sizeof( s->magic ) ); -#endif - assert_invariants( s ); -} - - -void string_free( string * s ) -{ - assert_invariants( s ); - if ( s->value != s->opt ) - BJAM_FREE( s->value ); - string_new( s ); -} - - -static void string_reserve_internal( string * self, size_t capacity ) -{ - if ( self->value == self->opt ) - { - self->value = (char *)BJAM_MALLOC_ATOMIC( capacity + - JAM_STRING_MAGIC_SIZE ); - self->value[ 0 ] = 0; - strncat( self->value, self->opt, sizeof(self->opt) ); - assert( strlen( self->value ) <= self->capacity && "Regression test" ); - } - else - { - self->value = (char *)BJAM_REALLOC( self->value, capacity + - JAM_STRING_MAGIC_SIZE ); - } -#ifndef NDEBUG - memcpy( self->value + capacity, self->magic, JAM_STRING_MAGIC_SIZE ); -#endif - self->capacity = capacity; -} - - -void string_reserve( string * self, size_t capacity ) -{ - assert_invariants( self ); - if ( capacity <= self->capacity ) - return; - string_reserve_internal( self, capacity ); - assert_invariants( self ); -} - - -static void extend_full( string * self, char const * start, char const * finish ) -{ - size_t new_size = self->capacity + ( finish - start ); - size_t new_capacity = self->capacity; - size_t old_size = self->capacity; - while ( new_capacity < new_size + 1) - new_capacity <<= 1; - string_reserve_internal( self, new_capacity ); - memcpy( self->value + old_size, start, new_size - old_size ); - self->value[ new_size ] = 0; - self->size = new_size; -} - -static void maybe_reserve( string * self, size_t new_size ) -{ - size_t capacity = self->capacity; - if ( capacity <= new_size ) - { - size_t new_capacity = capacity; - while ( new_capacity <= new_size ) - new_capacity <<= 1; - string_reserve_internal( self, new_capacity ); - } -} - - -void string_append( string * self, char const * rhs ) -{ - size_t rhs_size = strlen( rhs ); - size_t new_size = self->size + rhs_size; - assert_invariants( self ); - - maybe_reserve( self, new_size ); - - memcpy( self->value + self->size, rhs, rhs_size + 1 ); - self->size = new_size; - - assert_invariants( self ); -} - - -void string_append_range( string * self, char const * start, char const * finish ) -{ - size_t rhs_size = finish - start; - size_t new_size = self->size + rhs_size; - assert_invariants( self ); - - maybe_reserve( self, new_size ); - - memcpy( self->value + self->size, start, rhs_size ); - self->size = new_size; - self->value[ new_size ] = 0; - - assert_invariants( self ); -} - - -void string_copy( string * s, char const * rhs ) -{ - string_new( s ); - string_append( s, rhs ); -} - -void string_truncate( string * self, size_t n ) -{ - assert_invariants( self ); - assert( n <= self->capacity ); - self->value[ self->size = n ] = 0; - assert_invariants( self ); -} - - -void string_pop_back( string * self ) -{ - string_truncate( self, self->size - 1 ); -} - - -void string_push_back( string * self, char x ) -{ - string_append_range( self, &x, &x + 1 ); -} - - -char string_back( string * self ) -{ - assert_invariants( self ); - return self->value[ self->size - 1 ]; -} - - -#ifndef NDEBUG -void string_unit_test() -{ - { - string s[ 1 ]; - int i; - int const limit = sizeof( s->opt ) * 2 + 2; - string_new( s ); - assert( s->value == s->opt ); - for ( i = 0; i < limit; ++i ) - { - string_push_back( s, (char)( i + 1 ) ); - assert( s->size == i + 1 ); - } - assert( s->size == limit ); - assert( s->value != s->opt ); - for ( i = 0; i < limit; ++i ) - assert( s->value[ i ] == (char)( i + 1 ) ); - string_free( s ); - } - - { - char * const original = " \n\t\v Foo \r\n\v \tBar\n\n\r\r\t\n\v\t \t"; - string copy[ 1 ]; - string_copy( copy, original ); - assert( !strcmp( copy->value, original ) ); - assert( copy->size == strlen( original ) ); - string_free( copy ); - } -} -#endif http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/6da3961b/ext/kenlm/jam-files/engine/strings.h ---------------------------------------------------------------------- diff --git a/ext/kenlm b/ext/kenlm new file mode 160000 index 0000000..56fdb5c --- /dev/null +++ b/ext/kenlm @@ -0,0 +1 @@ +Subproject commit 56fdb5c44fca34d5a2e07d96139c28fb163983c5 diff --git a/ext/kenlm/jam-files/engine/strings.h b/ext/kenlm/jam-files/engine/strings.h deleted file mode 100644 index 749f287..0000000 --- a/ext/kenlm/jam-files/engine/strings.h +++ /dev/null @@ -1,36 +0,0 @@ -/* - * Copyright 2004. David Abrahams - * Distributed under the Boost Software License, Version 1.0. - * (See accompanying file LICENSE_1_0.txt or copy at - * http://www.boost.org/LICENSE_1_0.txt) - */ - -#ifndef STRINGS_DWA20011024_H -#define STRINGS_DWA20011024_H - -#include <stddef.h> - -typedef struct string -{ - char * value; - unsigned long size; - unsigned long capacity; - char opt[ 32 ]; -#ifndef NDEBUG - char magic[ 4 ]; -#endif -} string; - -void string_new( string * ); -void string_copy( string *, char const * ); -void string_free( string * ); -void string_append( string *, char const * ); -void string_append_range( string *, char const *, char const * ); -void string_push_back( string * s, char x ); -void string_reserve( string *, size_t ); -void string_truncate( string *, size_t ); -void string_pop_back( string * ); -char string_back( string * ); -void string_unit_test(); - -#endif http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/6da3961b/ext/kenlm/jam-files/engine/subst.c ---------------------------------------------------------------------- diff --git a/ext/kenlm b/ext/kenlm new file mode 160000 index 0000000..56fdb5c --- /dev/null +++ b/ext/kenlm @@ -0,0 +1 @@ +Subproject commit 56fdb5c44fca34d5a2e07d96139c28fb163983c5 diff --git a/ext/kenlm/jam-files/engine/subst.c b/ext/kenlm/jam-files/engine/subst.c deleted file mode 100644 index a5fcee0..0000000 --- a/ext/kenlm/jam-files/engine/subst.c +++ /dev/null @@ -1,116 +0,0 @@ -#include "jam.h" -#include "subst.h" - -#include "builtins.h" -#include "frames.h" -#include "hash.h" -#include "lists.h" - -#include <stddef.h> - - -typedef struct regex_entry -{ - OBJECT * pattern; - regexp * regex; -} regex_entry; - -static struct hash * regex_hash; - - -regexp * regex_compile( OBJECT * pattern ) -{ - int found; - regex_entry * e ; - - if ( !regex_hash ) - regex_hash = hashinit( sizeof( regex_entry ), "regex" ); - - e = (regex_entry *)hash_insert( regex_hash, pattern, &found ); - if ( !found ) - { - e->pattern = object_copy( pattern ); - e->regex = regcomp( (char *)pattern ); - } - - return e->regex; -} - - -LIST * builtin_subst( FRAME * frame, int flags ) -{ - LIST * result = L0; - LIST * const arg1 = lol_get( frame->args, 0 ); - LISTITER iter = list_begin( arg1 ); - LISTITER const end = list_end( arg1 ); - - if ( iter != end && list_next( iter ) != end && list_next( list_next( iter ) - ) != end ) - { - char const * const source = object_str( list_item( iter ) ); - OBJECT * const pattern = list_item( list_next( iter ) ); - regexp * const repat = regex_compile( pattern ); - - if ( regexec( repat, (char *)source) ) - { - LISTITER subst = list_next( iter ); - - while ( ( subst = list_next( subst ) ) != end ) - { -#define BUFLEN 4096 - char buf[ BUFLEN + 1 ]; - char const * in = object_str( list_item( subst ) ); - char * out = buf; - - for ( ; *in && out < buf + BUFLEN; ++in ) - { - if ( *in == '\\' || *in == '$' ) - { - ++in; - if ( *in == 0 ) - break; - if ( *in >= '0' && *in <= '9' ) - { - unsigned int const n = *in - '0'; - size_t const srclen = repat->endp[ n ] - - repat->startp[ n ]; - size_t const remaining = buf + BUFLEN - out; - size_t const len = srclen < remaining - ? srclen - : remaining; - memcpy( out, repat->startp[ n ], len ); - out += len; - continue; - } - /* fall through and copy the next character */ - } - *out++ = *in; - } - *out = 0; - - result = list_push_back( result, object_new( buf ) ); -#undef BUFLEN - } - } - } - - return result; -} - - -static void free_regex( void * xregex, void * data ) -{ - regex_entry * const regex = (regex_entry *)xregex; - object_free( regex->pattern ); - BJAM_FREE( regex->regex ); -} - - -void regex_done() -{ - if ( regex_hash ) - { - hashenumerate( regex_hash, free_regex, (void *)0 ); - hashdone( regex_hash ); - } -} http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/6da3961b/ext/kenlm/jam-files/engine/subst.h ---------------------------------------------------------------------- diff --git a/ext/kenlm b/ext/kenlm new file mode 160000 index 0000000..56fdb5c --- /dev/null +++ b/ext/kenlm @@ -0,0 +1 @@ +Subproject commit 56fdb5c44fca34d5a2e07d96139c28fb163983c5 diff --git a/ext/kenlm/jam-files/engine/subst.h b/ext/kenlm/jam-files/engine/subst.h deleted file mode 100644 index 7dc09a6..0000000 --- a/ext/kenlm/jam-files/engine/subst.h +++ /dev/null @@ -1,14 +0,0 @@ -/* Copyright 2001-2004 David Abrahams. - * Distributed under the Boost Software License, Version 1.0. - * (See accompanying file LICENSE_1_0.txt or http://www.boost.org/LICENSE_1_0.txt) - */ - -#ifndef SUBST_JG20120722_H -#define SUBST_JG20120722_H - -#include "object.h" -#include "regexp.h" - -regexp * regex_compile( OBJECT * pattern ); - -#endif http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/6da3961b/ext/kenlm/jam-files/engine/timestamp.c ---------------------------------------------------------------------- diff --git a/ext/kenlm b/ext/kenlm new file mode 160000 index 0000000..56fdb5c --- /dev/null +++ b/ext/kenlm @@ -0,0 +1 @@ +Subproject commit 56fdb5c44fca34d5a2e07d96139c28fb163983c5 diff --git a/ext/kenlm/jam-files/engine/timestamp.c b/ext/kenlm/jam-files/engine/timestamp.c deleted file mode 100644 index 0d01698..0000000 --- a/ext/kenlm/jam-files/engine/timestamp.c +++ /dev/null @@ -1,262 +0,0 @@ -/* - * Copyright 1993-2002 Christopher Seiwald and Perforce Software, Inc. - * - * This file is part of Jam - see jam.c for Copyright information. - */ - -/* This file is ALSO: - * Copyright 2001-2004 David Abrahams. - * Distributed under the Boost Software License, Version 1.0. - * (See accompanying file LICENSE_1_0.txt or - * http://www.boost.org/LICENSE_1_0.txt) - */ - -/* - * timestamp.c - get the timestamp of a file or archive member - * - * External routines: - * timestamp_from_path() - return timestamp for a path, if present - * timestamp_done() - free timestamp tables - * - * Internal routines: - * time_enter() - internal worker callback for scanning archives & - * directories - * free_timestamps() - worker function for freeing timestamp table contents - */ - -#include "jam.h" -#include "timestamp.h" - -#include "filesys.h" -#include "hash.h" -#include "object.h" -#include "pathsys.h" -#include "strings.h" - - -/* - * BINDING - all known files - */ - -typedef struct _binding -{ - OBJECT * name; - short flags; - -#define BIND_SCANNED 0x01 /* if directory or arch, has been scanned */ - - short progress; - -#define BIND_INIT 0 /* never seen */ -#define BIND_NOENTRY 1 /* timestamp requested but file never found */ -#define BIND_SPOTTED 2 /* file found but not timed yet */ -#define BIND_MISSING 3 /* file found but can not get timestamp */ -#define BIND_FOUND 4 /* file found and time stamped */ - - /* update time - cleared if the there is nothing to bind */ - timestamp time; -} BINDING; - -static struct hash * bindhash = 0; - -static void time_enter( void *, OBJECT *, int const found, - timestamp const * const ); - -static char * time_progress[] = -{ - "INIT", - "NOENTRY", - "SPOTTED", - "MISSING", - "FOUND" -}; - - -#ifdef OS_NT -/* - * timestamp_from_filetime() - Windows FILETIME --> timestamp conversion - * - * Lifted shamelessly from the CPython implementation. - */ - -void timestamp_from_filetime( timestamp * const t, FILETIME const * const ft ) -{ - /* Seconds between 1.1.1601 and 1.1.1970 */ - static __int64 const secs_between_epochs = 11644473600; - - /* We can not simply cast and dereference a FILETIME, since it might not be - * aligned properly. __int64 type variables are expected to be aligned to an - * 8 byte boundary while FILETIME structures may be aligned to any 4 byte - * boundary. Using an incorrectly aligned __int64 variable may cause a - * performance penalty on some platforms or even exceptions on others - * (documented on MSDN). - */ - __int64 in; - memcpy( &in, ft, sizeof( in ) ); - - /* FILETIME resolution: 100ns. */ - timestamp_init( t, (time_t)( ( in / 10000000 ) - secs_between_epochs ), - (int)( in % 10000000 ) * 100 ); -} -#endif /* OS_NT */ - - -void timestamp_clear( timestamp * const time ) -{ - time->secs = time->nsecs = 0; -} - - -int timestamp_cmp( timestamp const * const lhs, timestamp const * const rhs ) -{ - return lhs->secs == rhs->secs - ? lhs->nsecs - rhs->nsecs - : lhs->secs - rhs->secs; -} - - -void timestamp_copy( timestamp * const target, timestamp const * const source ) -{ - target->secs = source->secs; - target->nsecs = source->nsecs; -} - - -void timestamp_current( timestamp * const t ) -{ -#ifdef OS_NT - /* GetSystemTimeAsFileTime()'s resolution seems to be about 15 ms on Windows - * XP and under a millisecond on Windows 7. - */ - FILETIME ft; - GetSystemTimeAsFileTime( &ft ); - timestamp_from_filetime( t, &ft ); -#else /* OS_NT */ - timestamp_init( t, time( 0 ), 0 ); -#endif /* OS_NT */ -} - - -int timestamp_empty( timestamp const * const time ) -{ - return !time->secs && !time->nsecs; -} - - -/* - * timestamp_from_path() - return timestamp for a path, if present - */ - -void timestamp_from_path( timestamp * const time, OBJECT * const path ) -{ - PROFILE_ENTER( timestamp ); - - PATHNAME f1; - PATHNAME f2; - int found; - BINDING * b; - string buf[ 1 ]; - - - if ( file_time( path, time ) < 0 ) - timestamp_clear( time ); - - PROFILE_EXIT( timestamp ); -} - - -void timestamp_init( timestamp * const time, time_t const secs, int const nsecs - ) -{ - time->secs = secs; - time->nsecs = nsecs; -} - - -void timestamp_max( timestamp * const max, timestamp const * const lhs, - timestamp const * const rhs ) -{ - if ( timestamp_cmp( lhs, rhs ) > 0 ) - timestamp_copy( max, lhs ); - else - timestamp_copy( max, rhs ); -} - - -static char const * timestamp_formatstr( timestamp const * const time, - char const * const format ) -{ - static char result1[ 500 ]; - static char result2[ 500 ]; - strftime( result1, sizeof( result1 ) / sizeof( *result1 ), format, gmtime( - &time->secs ) ); - sprintf( result2, result1, time->nsecs ); - return result2; -} - - -char const * timestamp_str( timestamp const * const time ) -{ - return timestamp_formatstr( time, "%Y-%m-%d %H:%M:%S.%%09d +0000" ); -} - - -char const * timestamp_timestr( timestamp const * const time ) -{ - return timestamp_formatstr( time, "%H:%M:%S.%%09d" ); -} - - -/* - * time_enter() - internal worker callback for scanning archives & directories - */ - -static void time_enter( void * closure, OBJECT * target, int const found, - timestamp const * const time ) -{ - int item_found; - BINDING * b; - struct hash * const bindhash = (struct hash *)closure; - - target = path_as_key( target ); - - b = (BINDING *)hash_insert( bindhash, target, &item_found ); - if ( !item_found ) - { - b->name = object_copy( target ); - b->flags = 0; - } - - timestamp_copy( &b->time, time ); - b->progress = found ? BIND_FOUND : BIND_SPOTTED; - - if ( DEBUG_BINDSCAN ) - printf( "time ( %s ) : %s\n", object_str( target ), time_progress[ - b->progress ] ); - - object_free( target ); -} - - -/* - * free_timestamps() - worker function for freeing timestamp table contents - */ - -static void free_timestamps( void * xbinding, void * data ) -{ - object_free( ( (BINDING *)xbinding )->name ); -} - - -/* - * timestamp_done() - free timestamp tables - */ - -void timestamp_done() -{ - if ( bindhash ) - { - hashenumerate( bindhash, free_timestamps, 0 ); - hashdone( bindhash ); - } -} http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/6da3961b/ext/kenlm/jam-files/engine/timestamp.h ---------------------------------------------------------------------- diff --git a/ext/kenlm b/ext/kenlm new file mode 160000 index 0000000..56fdb5c --- /dev/null +++ b/ext/kenlm @@ -0,0 +1 @@ +Subproject commit 56fdb5c44fca34d5a2e07d96139c28fb163983c5 diff --git a/ext/kenlm/jam-files/engine/timestamp.h b/ext/kenlm/jam-files/engine/timestamp.h deleted file mode 100644 index aaf1310..0000000 --- a/ext/kenlm/jam-files/engine/timestamp.h +++ /dev/null @@ -1,47 +0,0 @@ -/* - * Copyright 1993, 1995 Christopher Seiwald. - * - * This file is part of Jam - see jam.c for Copyright information. - */ - -/* - * timestamp.h - get the timestamp of a file or archive member - */ - -#ifndef TIMESTAMP_H_SW_2011_11_18 -#define TIMESTAMP_H_SW_2011_11_18 - -#include "object.h" - -#ifdef OS_NT -# define WIN32_LEAN_AND_MEAN -# include <windows.h> -#endif - -#include <time.h> - -typedef struct timestamp -{ - time_t secs; - int nsecs; -} timestamp; - -void timestamp_clear( timestamp * const ); -int timestamp_cmp( timestamp const * const lhs, timestamp const * const rhs ); -void timestamp_copy( timestamp * const target, timestamp const * const source ); -void timestamp_current( timestamp * const ); -int timestamp_empty( timestamp const * const ); -void timestamp_from_path( timestamp * const, OBJECT * const path ); -void timestamp_init( timestamp * const, time_t const secs, int const nsecs ); -void timestamp_max( timestamp * const max, timestamp const * const lhs, - timestamp const * const rhs ); -char const * timestamp_str( timestamp const * const ); -char const * timestamp_timestr( timestamp const * const ); - -#ifdef OS_NT -void timestamp_from_filetime( timestamp * const, FILETIME const * const ); -#endif - -void timestamp_done(); - -#endif http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/6da3961b/ext/kenlm/jam-files/engine/variable.c ---------------------------------------------------------------------- diff --git a/ext/kenlm b/ext/kenlm new file mode 160000 index 0000000..56fdb5c --- /dev/null +++ b/ext/kenlm @@ -0,0 +1 @@ +Subproject commit 56fdb5c44fca34d5a2e07d96139c28fb163983c5 diff --git a/ext/kenlm/jam-files/engine/variable.c b/ext/kenlm/jam-files/engine/variable.c deleted file mode 100644 index 2c292fb..0000000 --- a/ext/kenlm/jam-files/engine/variable.c +++ /dev/null @@ -1,345 +0,0 @@ -/* - * Copyright 1993, 2000 Christopher Seiwald. - * - * This file is part of Jam - see jam.c for Copyright information. - */ - -/* This file is ALSO: - * Copyright 2001-2004 David Abrahams. - * Copyright 2005 Reece H. Dunn. - * Copyright 2005 Rene Rivera. - * Distributed under the Boost Software License, Version 1.0. - * (See accompanying file LICENSE_1_0.txt or copy at - * http://www.boost.org/LICENSE_1_0.txt) - */ - -/* - * variable.c - handle Jam multi-element variables. - * - * External routines: - * - * var_defines() - load a bunch of variable=value settings - * var_get() - get value of a user defined symbol - * var_set() - set a variable in jam's user defined symbol table. - * var_swap() - swap a variable's value with the given one - * var_done() - free variable tables - * - * Internal routines: - * - * var_enter() - make new var symbol table entry, returning var ptr - * var_dump() - dump a variable to stdout - */ - -#include "jam.h" -#include "variable.h" - -#include "filesys.h" -#include "hash.h" -#include "modules.h" -#include "parse.h" -#include "pathsys.h" -#include "strings.h" - -#include <stdio.h> -#include <stdlib.h> - - -/* - * VARIABLE - a user defined multi-value variable - */ - -typedef struct _variable VARIABLE ; - -struct _variable -{ - OBJECT * symbol; - LIST * value; -}; - -static LIST * * var_enter( struct module_t *, OBJECT * symbol ); -static void var_dump( OBJECT * symbol, LIST * value, char * what ); - - -/* - * var_defines() - load a bunch of variable=value settings - * - * If preprocess is false, take the value verbatim. - * - * Otherwise, if the variable value is enclosed in quotes, strip the quotes. - * Otherwise, if variable name ends in PATH, split value at :'s. - * Otherwise, split the value at blanks. - */ - -void var_defines( struct module_t * module, char * const * e, int preprocess ) -{ - string buf[ 1 ]; - - string_new( buf ); - - for ( ; *e; ++e ) - { - char * val; - - if ( ( val = strchr( *e, '=' ) ) -#if defined( OS_MAC ) - /* On the mac (MPW), the var=val is actually var\0val */ - /* Think different. */ - || ( val = *e + strlen( *e ) ) -#endif - ) - { - LIST * l = L0; - size_t const len = strlen( val + 1 ); - int const quoted = ( val[ 1 ] == '"' ) && ( val[ len ] == '"' ) && - ( len > 1 ); - - if ( quoted && preprocess ) - { - string_append_range( buf, val + 2, val + len ); - l = list_push_back( l, object_new( buf->value ) ); - string_truncate( buf, 0 ); - } - else - { - char * p; - char * pp; - char split = -#if defined( OPT_NO_EXTERNAL_VARIABLE_SPLIT ) - '\0' -#elif defined( OS_MAC ) - ',' -#else - ' ' -#endif - ; - - /* Split *PATH at :'s, not spaces. */ - if ( val - 4 >= *e ) - { - if ( !strncmp( val - 4, "PATH", 4 ) || - !strncmp( val - 4, "Path", 4 ) || - !strncmp( val - 4, "path", 4 ) ) - split = SPLITPATH; - } - - /* Do the split. */ - for - ( - pp = val + 1; - preprocess && ( ( p = strchr( pp, split ) ) != 0 ); - pp = p + 1 - ) - { - string_append_range( buf, pp, p ); - l = list_push_back( l, object_new( buf->value ) ); - string_truncate( buf, 0 ); - } - - l = list_push_back( l, object_new( pp ) ); - } - - /* Get name. */ - string_append_range( buf, *e, val ); - { - OBJECT * const varname = object_new( buf->value ); - var_set( module, varname, l, VAR_SET ); - object_free( varname ); - } - string_truncate( buf, 0 ); - } - } - string_free( buf ); -} - - -/* Last returned variable value saved so we may clear it in var_done(). */ -static LIST * saved_var = L0; - - -/* - * var_get() - get value of a user defined symbol - * - * Returns NULL if symbol unset. - */ - -LIST * var_get( struct module_t * module, OBJECT * symbol ) -{ - LIST * result = L0; -#ifdef OPT_AT_FILES - /* Some "fixed" variables... */ - if ( object_equal( symbol, constant_TMPDIR ) ) - { - list_free( saved_var ); - result = saved_var = list_new( object_new( path_tmpdir()->value ) ); - } - else if ( object_equal( symbol, constant_TMPNAME ) ) - { - list_free( saved_var ); - result = saved_var = list_new( path_tmpnam() ); - } - else if ( object_equal( symbol, constant_TMPFILE ) ) - { - list_free( saved_var ); - result = saved_var = list_new( path_tmpfile() ); - } - else if ( object_equal( symbol, constant_STDOUT ) ) - { - list_free( saved_var ); - result = saved_var = list_new( object_copy( constant_STDOUT ) ); - } - else if ( object_equal( symbol, constant_STDERR ) ) - { - list_free( saved_var ); - result = saved_var = list_new( object_copy( constant_STDERR ) ); - } - else -#endif - { - VARIABLE * v; - int n; - - if ( ( n = module_get_fixed_var( module, symbol ) ) != -1 ) - { - if ( DEBUG_VARGET ) - var_dump( symbol, module->fixed_variables[ n ], "get" ); - result = module->fixed_variables[ n ]; - } - else if ( module->variables && ( v = (VARIABLE *)hash_find( - module->variables, symbol ) ) ) - { - if ( DEBUG_VARGET ) - var_dump( v->symbol, v->value, "get" ); - result = v->value; - } - } - return result; -} - - -LIST * var_get_and_clear_raw( module_t * module, OBJECT * symbol ) -{ - LIST * result = L0; - VARIABLE * v; - - if ( module->variables && ( v = (VARIABLE *)hash_find( module->variables, - symbol ) ) ) - { - result = v->value; - v->value = L0; - } - - return result; -} - - -/* - * var_set() - set a variable in Jam's user defined symbol table - * - * 'flag' controls the relationship between new and old values of the variable: - * SET replaces the old with the new; APPEND appends the new to the old; DEFAULT - * only uses the new if the variable was previously unset. - * - * Copies symbol. Takes ownership of value. - */ - -void var_set( struct module_t * module, OBJECT * symbol, LIST * value, int flag - ) -{ - LIST * * v = var_enter( module, symbol ); - - if ( DEBUG_VARSET ) - var_dump( symbol, value, "set" ); - - switch ( flag ) - { - case VAR_SET: /* Replace value */ - list_free( *v ); - *v = value; - break; - - case VAR_APPEND: /* Append value */ - *v = list_append( *v, value ); - break; - - case VAR_DEFAULT: /* Set only if unset */ - if ( list_empty( *v ) ) - *v = value; - else - list_free( value ); - break; - } -} - - -/* - * var_swap() - swap a variable's value with the given one - */ - -LIST * var_swap( struct module_t * module, OBJECT * symbol, LIST * value ) -{ - LIST * * v = var_enter( module, symbol ); - LIST * oldvalue = *v; - if ( DEBUG_VARSET ) - var_dump( symbol, value, "set" ); - *v = value; - return oldvalue; -} - - -/* - * var_enter() - make new var symbol table entry, returning var ptr - */ - -static LIST * * var_enter( struct module_t * module, OBJECT * symbol ) -{ - int found; - VARIABLE * v; - int n; - - if ( ( n = module_get_fixed_var( module, symbol ) ) != -1 ) - return &module->fixed_variables[ n ]; - - if ( !module->variables ) - module->variables = hashinit( sizeof( VARIABLE ), "variables" ); - - v = (VARIABLE *)hash_insert( module->variables, symbol, &found ); - if ( !found ) - { - v->symbol = object_copy( symbol ); - v->value = L0; - } - - return &v->value; -} - - -/* - * var_dump() - dump a variable to stdout - */ - -static void var_dump( OBJECT * symbol, LIST * value, char * what ) -{ - printf( "%s %s = ", what, object_str( symbol ) ); - list_print( value ); - printf( "\n" ); -} - - -/* - * var_done() - free variable tables - */ - -static void delete_var_( void * xvar, void * data ) -{ - VARIABLE * const v = (VARIABLE *)xvar; - object_free( v->symbol ); - list_free( v->value ); -} - -void var_done( struct module_t * module ) -{ - list_free( saved_var ); - saved_var = L0; - hashenumerate( module->variables, delete_var_, 0 ); - hash_free( module->variables ); -} http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/6da3961b/ext/kenlm/jam-files/engine/variable.h ---------------------------------------------------------------------- diff --git a/ext/kenlm b/ext/kenlm new file mode 160000 index 0000000..56fdb5c --- /dev/null +++ b/ext/kenlm @@ -0,0 +1 @@ +Subproject commit 56fdb5c44fca34d5a2e07d96139c28fb163983c5 diff --git a/ext/kenlm/jam-files/engine/variable.h b/ext/kenlm/jam-files/engine/variable.h deleted file mode 100644 index ddb452b..0000000 --- a/ext/kenlm/jam-files/engine/variable.h +++ /dev/null @@ -1,34 +0,0 @@ -/* - * Copyright 1993, 2000 Christopher Seiwald. - * - * This file is part of Jam - see jam.c for Copyright information. - */ - -/* - * variable.h - handle jam multi-element variables - */ - -#ifndef VARIABLE_SW20111119_H -#define VARIABLE_SW20111119_H - -#include "lists.h" -#include "object.h" - - -struct module_t; - -void var_defines( struct module_t *, char * const * e, int preprocess ); -LIST * var_get( struct module_t *, OBJECT * symbol ); -void var_set( struct module_t *, OBJECT * symbol, LIST * value, int flag ); -LIST * var_swap( struct module_t *, OBJECT * symbol, LIST * value ); -void var_done( struct module_t * ); - -/* - * Defines for var_set(). - */ - -#define VAR_SET 0 /* override previous value */ -#define VAR_APPEND 1 /* append to previous value */ -#define VAR_DEFAULT 2 /* set only if no previous value */ - -#endif http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/6da3961b/ext/kenlm/jam-files/engine/w32_getreg.c ---------------------------------------------------------------------- diff --git a/ext/kenlm b/ext/kenlm new file mode 160000 index 0000000..56fdb5c --- /dev/null +++ b/ext/kenlm @@ -0,0 +1 @@ +Subproject commit 56fdb5c44fca34d5a2e07d96139c28fb163983c5 diff --git a/ext/kenlm/jam-files/engine/w32_getreg.c b/ext/kenlm/jam-files/engine/w32_getreg.c deleted file mode 100644 index dd2d0fc..0000000 --- a/ext/kenlm/jam-files/engine/w32_getreg.c +++ /dev/null @@ -1,201 +0,0 @@ -/* -Copyright Paul Lin 2003. Copyright 2006 Bojan Resnik. -Distributed under the Boost Software License, Version 1.0. (See accompanying -file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) -*/ - -# include "jam.h" - -# if defined( OS_NT ) || defined( OS_CYGWIN ) - -# include "lists.h" -# include "object.h" -# include "parse.h" -# include "frames.h" -# include "strings.h" - -# define WIN32_LEAN_AND_MEAN -# include <windows.h> - -# define MAX_REGISTRY_DATA_LENGTH 4096 -# define MAX_REGISTRY_KEYNAME_LENGTH 256 -# define MAX_REGISTRY_VALUENAME_LENGTH 16384 - -typedef struct -{ - LPCSTR name; - HKEY value; -} KeyMap; - -static const KeyMap dlRootKeys[] = { - { "HKLM", HKEY_LOCAL_MACHINE }, - { "HKCU", HKEY_CURRENT_USER }, - { "HKCR", HKEY_CLASSES_ROOT }, - { "HKEY_LOCAL_MACHINE", HKEY_LOCAL_MACHINE }, - { "HKEY_CURRENT_USER", HKEY_CURRENT_USER }, - { "HKEY_CLASSES_ROOT", HKEY_CLASSES_ROOT }, - { 0, 0 } -}; - -static HKEY get_key(char const** path) -{ - const KeyMap *p; - - for (p = dlRootKeys; p->name; ++p) - { - int n = strlen(p->name); - if (!strncmp(*path,p->name,n)) - { - if ((*path)[n] == '\\' || (*path)[n] == 0) - { - *path += n + 1; - break; - } - } - } - - return p->value; -} - -LIST * builtin_system_registry( FRAME * frame, int flags ) -{ - char const* path = object_str( list_front( lol_get(frame->args, 0) ) ); - LIST* result = L0; - HKEY key = get_key(&path); - - if ( - key != 0 - && ERROR_SUCCESS == RegOpenKeyEx(key, path, 0, KEY_QUERY_VALUE, &key) - ) - { - DWORD type; - BYTE data[MAX_REGISTRY_DATA_LENGTH]; - DWORD len = sizeof(data); - LIST * const field = lol_get(frame->args, 1); - - if ( ERROR_SUCCESS == - RegQueryValueEx(key, field ? object_str( list_front( field ) ) : 0, 0, &type, data, &len) ) - { - switch (type) - { - - case REG_EXPAND_SZ: - { - long len; - string expanded[1]; - string_new(expanded); - - while ( - (len = ExpandEnvironmentStrings( - (LPCSTR)data, expanded->value, expanded->capacity)) - > expanded->capacity - ) - string_reserve(expanded, len); - - expanded->size = len - 1; - - result = list_push_back( result, object_new(expanded->value) ); - string_free( expanded ); - } - break; - - case REG_MULTI_SZ: - { - char* s; - - for (s = (char*)data; *s; s += strlen(s) + 1) - result = list_push_back( result, object_new(s) ); - - } - break; - - case REG_DWORD: - { - char buf[100]; - sprintf( buf, "%u", *(PDWORD)data ); - result = list_push_back( result, object_new(buf) ); - } - break; - - case REG_SZ: - result = list_push_back( result, object_new( (const char *)data ) ); - break; - } - } - RegCloseKey(key); - } - return result; -} - -static LIST* get_subkey_names(HKEY key, char const* path) -{ - LIST* result = 0; - - if ( ERROR_SUCCESS == - RegOpenKeyEx(key, path, 0, KEY_ENUMERATE_SUB_KEYS, &key) - ) - { - char name[MAX_REGISTRY_KEYNAME_LENGTH]; - DWORD name_size = sizeof(name); - DWORD index; - FILETIME last_write_time; - - for ( index = 0; - ERROR_SUCCESS == RegEnumKeyEx( - key, index, name, &name_size, 0, 0, 0, &last_write_time); - ++index, - name_size = sizeof(name) - ) - { - name[name_size] = 0; - result = list_append(result, list_new(object_new(name))); - } - - RegCloseKey(key); - } - - return result; -} - -static LIST* get_value_names(HKEY key, char const* path) -{ - LIST* result = 0; - - if ( ERROR_SUCCESS == RegOpenKeyEx(key, path, 0, KEY_QUERY_VALUE, &key) ) - { - char name[MAX_REGISTRY_VALUENAME_LENGTH]; - DWORD name_size = sizeof(name); - DWORD index; - - for ( index = 0; - ERROR_SUCCESS == RegEnumValue( - key, index, name, &name_size, 0, 0, 0, 0); - ++index, - name_size = sizeof(name) - ) - { - name[name_size] = 0; - result = list_append(result, list_new(object_new(name))); - } - - RegCloseKey(key); - } - - return result; -} - -LIST * builtin_system_registry_names( FRAME * frame, int flags ) -{ - char const* path = object_str( list_front( lol_get(frame->args, 0) ) ); - char const* result_type = object_str( list_front( lol_get(frame->args, 1) ) ); - - HKEY key = get_key(&path); - - if ( !strcmp(result_type, "subkeys") ) - return get_subkey_names(key, path); - if ( !strcmp(result_type, "values") ) - return get_value_names(key, path); - return 0; -} - -# endif http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/6da3961b/ext/kenlm/jam-files/engine/yyacc.c ---------------------------------------------------------------------- diff --git a/ext/kenlm b/ext/kenlm new file mode 160000 index 0000000..56fdb5c --- /dev/null +++ b/ext/kenlm @@ -0,0 +1 @@ +Subproject commit 56fdb5c44fca34d5a2e07d96139c28fb163983c5 diff --git a/ext/kenlm/jam-files/engine/yyacc.c b/ext/kenlm/jam-files/engine/yyacc.c deleted file mode 100644 index b5efc96..0000000 --- a/ext/kenlm/jam-files/engine/yyacc.c +++ /dev/null @@ -1,268 +0,0 @@ -/* Copyright 2002 Rene Rivera. -** Distributed under the Boost Software License, Version 1.0. -** (See accompanying file LICENSE_1_0.txt or http://www.boost.org/LICENSE_1_0.txt) -*/ - -#include <stdio.h> -#include <string.h> -#include <ctype.h> -#include <stdlib.h> - -/* -# yyacc - yacc wrapper -# -# Allows tokens to be written as `literal` and then automatically -# substituted with #defined tokens. -# -# Usage: -# yyacc file.y filetab.h file.yy -# -# inputs: -# file.yy yacc grammar with ` literals -# -# outputs: -# file.y yacc grammar -# filetab.h array of string <-> token mappings -# -# 3-13-93 -# Documented and p moved in sed command (for some reason, -# s/x/y/p doesn't work). -# 10-12-93 -# Take basename as second argument. -# 12-31-96 -# reversed order of args to be compatible with GenFile rule -# 11-20-2002 -# Reimplemented as a C program for portability. (Rene Rivera) -*/ - -void print_usage(); -char * copy_string(char * s, int l); -char * tokenize_string(char * s); -int cmp_literal(const void * a, const void * b); - -typedef struct -{ - char * string; - char * token; -} literal; - -int main(int argc, char ** argv) -{ - int result = 0; - if (argc != 4) - { - print_usage(); - result = 1; - } - else - { - FILE * token_output_f = 0; - FILE * grammar_output_f = 0; - FILE * grammar_source_f = 0; - - grammar_source_f = fopen(argv[3],"r"); - if (grammar_source_f == 0) { result = 1; } - if (result == 0) - { - literal literals[1024]; - int t = 0; - char l[2048]; - while (1) - { - if (fgets(l,2048,grammar_source_f) != 0) - { - char * c = l; - while (1) - { - char * c1 = strchr(c,'`'); - if (c1 != 0) - { - char * c2 = strchr(c1+1,'`'); - if (c2 != 0) - { - literals[t].string = copy_string(c1+1,c2-c1-1); - literals[t].token = tokenize_string(literals[t].string); - t += 1; - c = c2+1; - } - else - break; - } - else - break; - } - } - else - { - break; - } - } - literals[t].string = 0; - literals[t].token = 0; - qsort(literals,t,sizeof(literal),cmp_literal); - { - int p = 1; - int i = 1; - while (literals[i].string != 0) - { - if (strcmp(literals[p-1].string,literals[i].string) != 0) - { - literals[p] = literals[i]; - p += 1; - } - i += 1; - } - literals[p].string = 0; - literals[p].token = 0; - t = p; - } - token_output_f = fopen(argv[2],"w"); - if (token_output_f != 0) - { - int i = 0; - while (literals[i].string != 0) - { - fprintf(token_output_f," { \"%s\", %s },\n",literals[i].string,literals[i].token); - i += 1; - } - fclose(token_output_f); - } - else - result = 1; - if (result == 0) - { - grammar_output_f = fopen(argv[1],"w"); - if (grammar_output_f != 0) - { - int i = 0; - while (literals[i].string != 0) - { - fprintf(grammar_output_f,"%%token %s\n",literals[i].token); - i += 1; - } - rewind(grammar_source_f); - while (1) - { - if (fgets(l,2048,grammar_source_f) != 0) - { - char * c = l; - while (1) - { - char * c1 = strchr(c,'`'); - if (c1 != 0) - { - char * c2 = strchr(c1+1,'`'); - if (c2 != 0) - { - literal key; - literal * replacement = 0; - key.string = copy_string(c1+1,c2-c1-1); - key.token = 0; - replacement = (literal*)bsearch( - &key,literals,t,sizeof(literal),cmp_literal); - *c1 = 0; - fprintf(grammar_output_f,"%s%s",c,replacement->token); - c = c2+1; - } - else - { - fprintf(grammar_output_f,"%s",c); - break; - } - } - else - { - fprintf(grammar_output_f,"%s",c); - break; - } - } - } - else - { - break; - } - } - fclose(grammar_output_f); - } - else - result = 1; - } - } - if (result != 0) - { - perror("yyacc"); - } - } - return result; -} - -static char * usage[] = { - "yyacc <grammar output.y> <token table output.h> <grammar source.yy>", - 0 }; - -void print_usage() -{ - char ** u; - for (u = usage; *u != 0; ++u) - { - fputs(*u,stderr); putc('\n',stderr); - } -} - -char * copy_string(char * s, int l) -{ - char * result = (char*)malloc(l+1); - strncpy(result,s,l); - result[l] = 0; - return result; -} - -char * tokenize_string(char * s) -{ - char * result; - char * literal = s; - int l; - int c; - - if (strcmp(s,":") == 0) literal = "_colon"; - else if (strcmp(s,"!") == 0) literal = "_bang"; - else if (strcmp(s,"!=") == 0) literal = "_bang_equals"; - else if (strcmp(s,"&&") == 0) literal = "_amperamper"; - else if (strcmp(s,"&") == 0) literal = "_amper"; - else if (strcmp(s,"+") == 0) literal = "_plus"; - else if (strcmp(s,"+=") == 0) literal = "_plus_equals"; - else if (strcmp(s,"||") == 0) literal = "_barbar"; - else if (strcmp(s,"|") == 0) literal = "_bar"; - else if (strcmp(s,";") == 0) literal = "_semic"; - else if (strcmp(s,"-") == 0) literal = "_minus"; - else if (strcmp(s,"<") == 0) literal = "_langle"; - else if (strcmp(s,"<=") == 0) literal = "_langle_equals"; - else if (strcmp(s,">") == 0) literal = "_rangle"; - else if (strcmp(s,">=") == 0) literal = "_rangle_equals"; - else if (strcmp(s,".") == 0) literal = "_period"; - else if (strcmp(s,"?") == 0) literal = "_question"; - else if (strcmp(s,"?=") == 0) literal = "_question_equals"; - else if (strcmp(s,"=") == 0) literal = "_equals"; - else if (strcmp(s,",") == 0) literal = "_comma"; - else if (strcmp(s,"[") == 0) literal = "_lbracket"; - else if (strcmp(s,"]") == 0) literal = "_rbracket"; - else if (strcmp(s,"{") == 0) literal = "_lbrace"; - else if (strcmp(s,"}") == 0) literal = "_rbrace"; - else if (strcmp(s,"(") == 0) literal = "_lparen"; - else if (strcmp(s,")") == 0) literal = "_rparen"; - l = strlen(literal)+2; - result = (char*)malloc(l+1); - for (c = 0; literal[c] != 0; ++c) - { - result[c] = toupper(literal[c]); - } - result[l-2] = '_'; - result[l-1] = 't'; - result[l] = 0; - return result; -} - -int cmp_literal(const void * a, const void * b) -{ - return strcmp(((const literal *)a)->string,((const literal *)b)->string); -} http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/6da3961b/ext/kenlm/jam-files/fail/Jamroot ---------------------------------------------------------------------- diff --git a/ext/kenlm b/ext/kenlm new file mode 160000 index 0000000..56fdb5c --- /dev/null +++ b/ext/kenlm @@ -0,0 +1 @@ +Subproject commit 56fdb5c44fca34d5a2e07d96139c28fb163983c5 diff --git a/ext/kenlm/jam-files/fail/Jamroot b/ext/kenlm/jam-files/fail/Jamroot deleted file mode 100644 index c3584d8..0000000 --- a/ext/kenlm/jam-files/fail/Jamroot +++ /dev/null @@ -1,4 +0,0 @@ -actions fail { - false -} -make fail : : fail ; http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/6da3961b/ext/kenlm/jam-files/sanity.jam ---------------------------------------------------------------------- diff --git a/ext/kenlm b/ext/kenlm new file mode 160000 index 0000000..56fdb5c --- /dev/null +++ b/ext/kenlm @@ -0,0 +1 @@ +Subproject commit 56fdb5c44fca34d5a2e07d96139c28fb163983c5 diff --git a/ext/kenlm/jam-files/sanity.jam b/ext/kenlm/jam-files/sanity.jam deleted file mode 100644 index 1851ece..0000000 --- a/ext/kenlm/jam-files/sanity.jam +++ /dev/null @@ -1,344 +0,0 @@ -import modules ; -import option ; -import os ; -import path ; -import project ; -import build-system ; -import version ; - -#Shell with trailing line removed http://lists.boost.org/boost-build/2007/08/17051.php -rule trim-nl ( str extras * ) { -return [ MATCH "([^ -]*)" : $(str) ] $(extras) ; -} -rule _shell ( cmd : extras * ) { - return [ trim-nl [ SHELL $(cmd) : $(extras) ] ] ; -} - -rule shell_or_fail ( cmd ) { - local ret = [ SHELL $(cmd) : exit-status ] ; - if $(ret[2]) != 0 { - exit $(cmd) failed : 1 ; - } -} - -rule shell_or_die ( cmd ) { - local ret = [ SHELL $(cmd) : exit-status ] ; - if $(ret[2]) != 0 { - exit $(cmd) failed : 1 ; - } - return [ trim-nl $(ret[1]) ] ; -} - -cxxflags = [ os.environ "CXXFLAGS" ] ; -cflags = [ os.environ "CFLAGS" ] ; -ldflags = [ os.environ "LDFLAGS" ] ; - -#Run g++ with empty main and these arguments to see if it passes. -rule test_flags ( flags * : main ? ) { - flags = $(cxxflags) $(ldflags) $(flags) ; - if ! $(main) { - main = "int main() {}" ; - } - local cmd = "bash -c \"g++ "$(flags:J=" ")" -x c++ - <<<'$(main)' -o $(TOP)/dummy >/dev/null 2>/dev/null && rm $(TOP)/dummy 2>/dev/null\"" ; - local ret = [ SHELL $(cmd) : exit-status ] ; - if --debug-configuration in [ modules.peek : ARGV ] { - echo $(cmd) ; - echo $(ret) ; - } - if $(ret[2]) = 0 { - return true ; - } else { - return ; - } -} - -rule test_header ( name ) { - return [ test_flags "-include $(name)" ] ; -} - -requirements = ; - -FORCE-STATIC = [ option.get "static" : : "yes" ] ; -if $(FORCE-STATIC) { - requirements += <link>static <runtime-link>static ; -} - -rule test_library ( name ) { - if $(FORCE-STATIC) { - return [ test_flags "-Wl,-Bstatic -l$(name) -Wl,-Bdynamic" ] ; - } else { - return [ test_flags "-l$(name)" ] ; - } -} - -{ - local cleaning = [ option.get "clean" : : yes ] ; - cleaning ?= [ option.get "clean-all" : no : yes ] ; - if "clean" in [ modules.peek : ARGV ] { - cleaning = yes ; - } - constant CLEANING : $(cleaning) ; -} - -shared-command-line = ; -local argv = [ modules.peek : ARGV ] ; -while $(argv) { - if $(argv[1]) = "link=shared" { - shared-command-line = <link>shared ; - } - argv = $(argv[2-]) ; -} - -#Determine if a library can be compiled statically. -rule auto-shared ( name : additional * ) { - - additional ?= "" ; - if $(shared-command-line) = "<link>shared" { - return "<link>shared" ; - } else { - if [ test_flags $(additional)" -Wl,-Bstatic -l"$(name)" -Wl,-Bdynamic" ] { - return ; - } else { - if $(FORCE-STATIC) { - echo "Could not statically link against lib $(name). Your build will probably fail." ; - return ; - } else { - return "<link>shared" ; - } - } - } -} - -# MacPorts' default location is /opt/local -- use this if no path is given. -with-macports = [ option.get "with-macports" : : "/opt/local" ] ; -if $(with-macports) { - using darwin ; - ECHO "Using --with-macports=$(with-macports), implying use of darwin GCC" ; - - L-boost-search = -L$(with-macports)/lib ; - boost-search = <search>$(with-macports)/lib ; - I-boost-include = -I$(with-macports)/include ; - boost-include = <include>$(with-macports)/include ; - requirements += $(boost-include) ; -} else { - with-boost = [ option.get "with-boost" ] ; - with-boost ?= [ os.environ "BOOST_ROOT" ] ; - if $(with-boost) { - L-boost-search = -L$(with-boost)/lib" "-L$(with-boost)/lib64 ; - boost-search = <search>$(with-boost)/lib <search>$(with-boost)/lib64 ; - I-boost-include = -I$(with-boost)/include ; - boost-include = <include>$(with-boost)/include ; - requirements += $(boost-include) ; - } else { - L-boost-search = "" ; - boost-search = ; - I-boost-include = "" ; - boost-include = ; - } -} - -#Convenience rule for boost libraries. Defines library boost_$(name). -rule boost-lib ( name macro : deps * ) { - lib boost_$(name)_static : $(deps) : $(boost-search) <name>boost_$(name)$(boost-lib-version) <link>static ; - lib boost_$(name)_shared : $(deps) : $(boost-search) <name>boost_$(name)$(boost-lib-version) <link>shared : : <define>BOOST_$(macro) ; - - alias boost_$(name)_default : $(deps) : <link>static:<source>boost_$(name)_static <link>shared:<source>boost_$(name)_shared ; - - alias boost_$(name)_static_works : $(deps) : [ check-target-builds empty_test_shared "Shared Boost" : <source>boost_$(name)_default : <source>boost_$(name)_static ] ; - alias boost_$(name) : $(deps) : [ check-target-builds empty_test_static "Static Boost" : <source>boost_$(name)_static_works : <source>boost_$(name)_shared ] ; -} - -#Argument is e.g. 103600 -rule boost ( min-version ) { - local cmd = "bash -c \"g++ "$(I-boost-include)" -dM -x c++ -E /dev/null -include boost/version.hpp 2>/dev/null |grep '#define BOOST_'\"" ; - local boost-shell = [ SHELL "$(cmd)" : exit-status ] ; - if $(boost-shell[2]) != 0 && $(CLEANING) = no { - echo Failed to run "$(cmd)" ; - exit Boost does not seem to be installed or g++ is confused. : 1 ; - } - constant BOOST-VERSION : [ MATCH "#define BOOST_VERSION ([0-9]*)" : $(boost-shell[1]) ] ; - if $(BOOST-VERSION) < $(min-version) && $(CLEANING) = no { - exit You have Boost $(BOOST-VERSION). This package requires Boost at least $(min-version) (and preferably newer). : 1 ; - } - # If matching version tags exist, use them. - boost-lib-version = [ MATCH "#define BOOST_LIB_VERSION \"([^\"]*)\"" : $(boost-shell[1]) ] ; - if [ test_flags $(L-boost-search)" -lboost_program_options-"$(boost-lib-version) ] { - boost-lib-version = "-"$(boost-lib-version) ; - } else { - boost-lib-version = "" ; - } - - #Crazy amount of testing to make sure that BOOST_TEST_DYN_LINK is defined properly. - lib boost_unit_test_framework_static_test : : $(boost-search) <name>boost_unit_test_framework$(boost-lib-version) <link>static ; - obj empty_test_static.o : jam-files/empty_test_main.cc boost_unit_test_framework_static_test : $(boost-include) ; - exe empty_test_static : empty_test_static.o boost_unit_test_framework_static_test ; - - lib boost_unit_test_framework_shared_test : : $(boost-search) <name>boost_unit_test_framework$(boost-lib-version) <link>shared : : <define>BOOST_TEST_DYN_LINK ; - obj empty_test_shared.o : jam-files/empty_test_main.cc boost_unit_test_framework_shared_test : $(boost-include) ; - exe empty_test_shared : empty_test_shared.o boost_unit_test_framework_shared_test ; - - explicit empty_test_static.o empty_test_static empty_test_shared.o empty_test_shared ; - - - #See tools/build/v2/contrib/boost.jam in a boost distribution for a table of macros to define. - boost-lib system SYSTEM_DYN_LINK ; - boost-lib thread THREAD_DYN_DLL : boost_system ; - boost-lib program_options PROGRAM_OPTIONS_DYN_LINK ; - boost-lib iostreams IOSTREAMS_DYN_LINK ; - boost-lib filesystem FILE_SYSTEM_DYN_LINK ; - boost-lib unit_test_framework TEST_DYN_LINK ; -# if $(BOOST-VERSION) >= 104800 { -# boost-lib chrono CHRONO_DYN_LINK ; -# boost-lib timer TIMER_DYN_LINK : boost_chrono ; -# } -} - -#Link normally to a library, but sometimes static isn't installed so fall back to dynamic. -rule external-lib ( name : search-path * : deps * ) { - lib $(name) : : [ auto-shared $(name) : "-L"$(search-path) ] <search>$(search-path) <use>$(deps) ; -} - -#Write the current command line to previous.sh. This does not do shell escaping. -{ - local build-log = $(TOP)/previous.sh ; - if ! [ path.exists $(build-log) ] { - SHELL "touch \"$(build-log)\" && chmod +x \"$(build-log)\"" ; - } - local script = [ modules.peek : ARGV ] ; - if $(script[1]) = "./jam-files/bjam" { - #The ./bjam shell script calls ./jam-files/bjam so that appears in argv but - #we want ./bjam to appear so the environment variables are set correctly. - script = "./bjam "$(script[2-]:J=" ") ; - } else { - script = $(script:J=" ") ; - } - script = "#!/bin/sh\n$(script)\n" ; - local ignored = @($(build-log):E=$(script)) ; -} - -#Boost jam's static clang for Linux is buggy. -requirements += <cxxflags>$(cxxflags) <cflags>$(cflags) <linkflags>$(ldflags) <os>LINUX,<toolset>clang:<link>shared ; - -if ! [ option.get "without-libsegfault" : : "yes" ] && ! $(FORCE-STATIC) { - #libSegFault prints a stack trace on segfault. Link against it if available. - if [ test_flags "-lSegFault" ] { - external-lib SegFault ; - requirements += <library>SegFault ; - } -} - -if [ option.get "git" : : "yes" ] { - local revision = [ _shell "git rev-parse --verify HEAD |head -c 7" ] ; - constant GITTAG : "/"$(revision) ; -} else { - constant GITTAG : "" ; -} - -local prefix = [ option.get "prefix" ] ; -if $(prefix) { - prefix = [ path.root $(prefix) [ path.pwd ] ] ; - prefix = $(prefix)$(GITTAG) ; -} else { - prefix = $(TOP)$(GITTAG) ; -} - -path-constant PREFIX : $(prefix) ; - -path-constant BINDIR : [ option.get "bindir" : $(PREFIX)/bin ] ; -path-constant LIBDIR : [ option.get "libdir" : $(PREFIX)/lib ] ; -rule install-bin-libs ( deps * ) { - install prefix-bin : $(deps) : <location>$(BINDIR) <install-dependencies>on <install-type>EXE <link>shared:<dll-path>$(LIBDIR) ; - install prefix-lib : $(deps) : <location>$(LIBDIR) <install-dependencies>on <install-type>LIB <link>shared:<dll-path>$(LIBDIR) ; -} -rule install-headers ( name : list * : source-root ? ) { - local includedir = [ option.get "includedir" : $(prefix)/include ] ; - source-root ?= "." ; - install $(name) : $(list) : <location>$(includedir) <install-source-root>$(source-root) ; -} - -rule build-projects ( projects * ) { - for local p in $(projects) { - build-project $(p) ; - } -} - -#Only one post build hook is allowed. Allow multiple. -post-hooks = ; -rule post-build ( ok ? ) { - for local r in $(post-hooks) { - $(r) $(ok) ; - } -} -IMPORT $(__name__) : post-build : : $(__name__).post-build ; -build-system.set-post-build-hook $(__name__).post-build ; -rule add-post-hook ( names * ) { - post-hooks += $(names) ; -} - -rule failure-message ( ok ? ) { - if $(ok) != "ok" { - local args = [ modules.peek : ARGV ] ; - local args = $(args:J=" ") ; - if --debug-configuration in [ modules.peek : ARGV ] { - echo "The build failed with command line: " ; - echo " $(args)" ; - echo "If you need support, attach the full output to your e-mail." ; - } else { - echo "The build failed. If you need support, run:" ; - echo " $(args) --debug-configuration -d2 |gzip >build.log.gz" ; - echo "then attach build.log.gz to your e-mail." ; - } - echo "ERROR" ; - } else { - echo "SUCCESS" ; - } -} -add-post-hook failure-message ; - -import feature : feature ; -feature options-to-write : : free ; -import toolset : flags ; -flags write-options OPTIONS-TO-WRITE <options-to-write> ; -actions write-options { - echo "$(OPTIONS-TO-WRITE)" > $(<) ; -} - -#Compare contents of file with current. If they're different, write to the -#file. This file can then be used with <dependency>$(file) to force -#recompilation. -rule update-if-changed ( file current ) { - if ( ! [ path.exists $(file) ] ) || ( [ _shell "cat $(file)" ] != $(current) ) { - make $(file) : : $(__name__).write-options : <options-to-write>$(current) ; - always $(file) ; - } -} - -if [ option.get "sanity-test" : : "yes" ] { - local current_version = [ modules.peek : JAM_VERSION ] ; - if ( $(current_version[0]) < 2000 && [ version.check-jam-version 3 1 16 ] ) || [ version.check-jam-version 2011 0 0 ] { - EXIT "Sane" : 0 ; - } else { - EXIT "Bad" : 1 ; - } -} - -#Hack to act like alias in the sense that no lib is built, but only build cpp files once. -import type ; -rule fakelib ( name : deps * : requirements * : default-build * : usage-requirements * ) { - local c-files = ; - local real-deps = ; - for local c in $(deps) { - if [ type.type $(c) ] = CPP { - c-files += $(c) ; - } else { - real-deps += $(c) ; - } - } - for local c in $(c-files) { - obj $(c:B).o : $(c) $(real-deps) : $(requirements) : $(default-build) : $(usage_requirements) ; - } - alias $(name) : $(c-files:B).o $(real-deps) : $(requirements) : $(default-build) : $(usage-requirements) ; -} - -use-project /top : . ; http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/6da3961b/ext/kenlm/lm/CMakeLists.txt ---------------------------------------------------------------------- diff --git a/ext/kenlm b/ext/kenlm new file mode 160000 index 0000000..56fdb5c --- /dev/null +++ b/ext/kenlm @@ -0,0 +1 @@ +Subproject commit 56fdb5c44fca34d5a2e07d96139c28fb163983c5 diff --git a/ext/kenlm/lm/CMakeLists.txt b/ext/kenlm/lm/CMakeLists.txt deleted file mode 100644 index e3ef06f..0000000 --- a/ext/kenlm/lm/CMakeLists.txt +++ /dev/null @@ -1,90 +0,0 @@ -cmake_minimum_required(VERSION 2.8.8) -# -# The KenLM cmake files make use of add_library(... OBJECTS ...) -# -# This syntax allows grouping of source files when compiling -# (effectively creating "fake" libraries based on source subdirs). -# -# This syntax was only added in cmake version 2.8.8 -# -# see http://www.cmake.org/Wiki/CMake/Tutorials/Object_Library - - -# This CMake file was created by Lane Schwartz <[email protected]> - - -set(KENLM_MAX_ORDER 6 CACHE STRING "Maximum supported ngram order") - -add_definitions(-DKENLM_MAX_ORDER=${KENLM_MAX_ORDER}) - - -# Explicitly list the source files for this subdirectory -# -# If you add any source files to this subdirectory -# that should be included in the kenlm library, -# (this excludes any unit test files) -# you should add them to the following list: -set(KENLM_SOURCE - bhiksha.cc - binary_format.cc - config.cc - lm_exception.cc - model.cc - quantize.cc - read_arpa.cc - search_hashed.cc - search_trie.cc - sizes.cc - trie.cc - trie_sort.cc - value_build.cc - virtual_interface.cc - vocab.cc -) - - -# Group these objects together for later use. -# -# Given add_library(foo OBJECT ${my_foo_sources}), -# refer to these objects as $<TARGET_OBJECTS:foo> -# -add_library(kenlm OBJECT ${KENLM_SOURCE}) - -# This directory has children that need to be processed -add_subdirectory(builder) -add_subdirectory(common) -add_subdirectory(filter) - - - -# Explicitly list the executable files to be compiled -set(EXE_LIST - query - fragment - build_binary -) - -AddExes(EXES ${EXE_LIST} - DEPENDS $<TARGET_OBJECTS:kenlm> $<TARGET_OBJECTS:kenlm_util> - LIBRARIES ${Boost_LIBRARIES} pthread) - -# Conditionally build the interpolation code -if(BUILD_INTERPOLATE) - add_subdirectory(interpolate) -endif() - -if(BUILD_TESTING) - - set(KENLM_BOOST_TESTS_LIST left_test partial_test) - AddTests(TESTS ${KENLM_BOOST_TESTS_LIST} - DEPENDS $<TARGET_OBJECTS:kenlm> $<TARGET_OBJECTS:kenlm_util> - LIBRARIES ${Boost_LIBRARIES} pthread - TEST_ARGS ${CMAKE_CURRENT_SOURCE_DIR}/test.arpa) - - # model_test requires an extra command line parameter - KenLMAddTest(TEST model_test - DEPENDS $<TARGET_OBJECTS:kenlm> $<TARGET_OBJECTS:kenlm_util> - LIBRARIES ${Boost_LIBRARIES} pthread - TEST_ARGS ${CMAKE_CURRENT_SOURCE_DIR}/test.arpa - ${CMAKE_CURRENT_SOURCE_DIR}/test_nounk.arpa) -endif() http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/6da3961b/ext/kenlm/lm/Jamfile ---------------------------------------------------------------------- diff --git a/ext/kenlm b/ext/kenlm new file mode 160000 index 0000000..56fdb5c --- /dev/null +++ b/ext/kenlm @@ -0,0 +1 @@ +Subproject commit 56fdb5c44fca34d5a2e07d96139c28fb163983c5 diff --git a/ext/kenlm/lm/Jamfile b/ext/kenlm/lm/Jamfile deleted file mode 100644 index a479e2d..0000000 --- a/ext/kenlm/lm/Jamfile +++ /dev/null @@ -1,40 +0,0 @@ -# If you need higher order, change this option -# Having this limit means that State can be -# (KENLM_MAX_ORDER - 1) * sizeof(float) bytes instead of -# sizeof(float*) + (KENLM_MAX_ORDER - 1) * sizeof(float) + malloc overhead -max-order = [ option.get "max-kenlm-order" : 6 : 6 ] ; -if ( $(max-order) != 6 ) { - echo "Setting KenLM maximum n-gram order to $(max-order)" ; -} -max-order = <define>KENLM_MAX_ORDER=$(max-order) ; - -path-constant ORDER-LOG : bin/order.log ; -update-if-changed $(ORDER-LOG) $(max-order) ; - -max-order += <dependency>$(ORDER-LOG) ; - -wrappers = ; -local with-nplm = [ option.get "with-nplm" ] ; -if $(with-nplm) { - lib nplm : : <search>$(with-nplm)/src ; - obj nplm.o : wrappers/nplm.cc : <include>.. <include>$(with-nplm)/src <cxxflags>-fopenmp <include>$(with-nplm)/3rdparty/eigen <define>NPLM_DOUBLE_PRECISION=0 ; - alias nplm-all : nplm.o nplm ..//boost_thread : : : <cxxflags>-fopenmp <linkflags>-fopenmp <define>WITH_NPLM <library>..//boost_thread ; - wrappers += nplm-all ; -} - -fakelib kenlm : $(wrappers) [ glob *.cc : *main.cc *test.cc ] ../util//kenutil : <include>.. $(max-order) : : <include>.. $(max-order) ; - -import testing ; - -run left_test.cc kenlm /top//boost_unit_test_framework : : test.arpa ; -run model_test.cc kenlm /top//boost_unit_test_framework : : test.arpa test_nounk.arpa ; -run partial_test.cc kenlm /top//boost_unit_test_framework : : test.arpa ; - -exes = ; -for local p in [ glob *_main.cc ] { - local name = [ MATCH "(.*)\_main.cc" : $(p) ] ; - exe $(name) : $(p) kenlm ; - exes += $(name) ; -} - -alias programs : $(exes) filter//filter builder//dump_counts : <threading>multi:<source>builder//lmplz ; http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/6da3961b/ext/kenlm/lm/bhiksha.cc ---------------------------------------------------------------------- diff --git a/ext/kenlm b/ext/kenlm new file mode 160000 index 0000000..56fdb5c --- /dev/null +++ b/ext/kenlm @@ -0,0 +1 @@ +Subproject commit 56fdb5c44fca34d5a2e07d96139c28fb163983c5 diff --git a/ext/kenlm/lm/bhiksha.cc b/ext/kenlm/lm/bhiksha.cc deleted file mode 100644 index 4262b61..0000000 --- a/ext/kenlm/lm/bhiksha.cc +++ /dev/null @@ -1,94 +0,0 @@ -#include "lm/bhiksha.hh" - -#include "lm/binary_format.hh" -#include "lm/config.hh" -#include "util/file.hh" -#include "util/exception.hh" - -#include <limits> - -namespace lm { -namespace ngram { -namespace trie { - -DontBhiksha::DontBhiksha(const void * /*base*/, uint64_t /*max_offset*/, uint64_t max_next, const Config &/*config*/) : - next_(util::BitsMask::ByMax(max_next)) {} - -const uint8_t kArrayBhikshaVersion = 0; - -// TODO: put this in binary file header instead when I change the binary file format again. -void ArrayBhiksha::UpdateConfigFromBinary(const BinaryFormat &file, uint64_t offset, Config &config) { - uint8_t buffer[2]; - file.ReadForConfig(buffer, 2, offset); - uint8_t version = buffer[0]; - uint8_t configured_bits = buffer[1]; - if (version != kArrayBhikshaVersion) UTIL_THROW(FormatLoadException, "This file has sorted array compression version " << (unsigned) version << " but the code expects version " << (unsigned)kArrayBhikshaVersion); - config.pointer_bhiksha_bits = configured_bits; -} - -namespace { - -// Find argmin_{chopped \in [0, RequiredBits(max_next)]} ChoppedDelta(max_offset) -uint8_t ChopBits(uint64_t max_offset, uint64_t max_next, const Config &config) { - uint8_t required = util::RequiredBits(max_next); - uint8_t best_chop = 0; - int64_t lowest_change = std::numeric_limits<int64_t>::max(); - // There are probably faster ways but I don't care because this is only done once per order at construction time. - for (uint8_t chop = 0; chop <= std::min(required, config.pointer_bhiksha_bits); ++chop) { - int64_t change = (max_next >> (required - chop)) * 64 /* table cost in bits */ - - max_offset * static_cast<int64_t>(chop); /* savings in bits*/ - if (change < lowest_change) { - lowest_change = change; - best_chop = chop; - } - } - return best_chop; -} - -std::size_t ArrayCount(uint64_t max_offset, uint64_t max_next, const Config &config) { - uint8_t required = util::RequiredBits(max_next); - uint8_t chopping = ChopBits(max_offset, max_next, config); - return (max_next >> (required - chopping)) + 1 /* we store 0 too */; -} -} // namespace - -uint64_t ArrayBhiksha::Size(uint64_t max_offset, uint64_t max_next, const Config &config) { - return sizeof(uint64_t) * (1 /* header */ + ArrayCount(max_offset, max_next, config)) + 7 /* 8-byte alignment */; -} - -uint8_t ArrayBhiksha::InlineBits(uint64_t max_offset, uint64_t max_next, const Config &config) { - return util::RequiredBits(max_next) - ChopBits(max_offset, max_next, config); -} - -namespace { - -void *AlignTo8(void *from) { - uint8_t *val = reinterpret_cast<uint8_t*>(from); - std::size_t remainder = reinterpret_cast<std::size_t>(val) & 7; - if (!remainder) return val; - return val + 8 - remainder; -} - -} // namespace - -ArrayBhiksha::ArrayBhiksha(void *base, uint64_t max_offset, uint64_t max_next, const Config &config) - : next_inline_(util::BitsMask::ByBits(InlineBits(max_offset, max_next, config))), - offset_begin_(reinterpret_cast<const uint64_t*>(AlignTo8(base)) + 1 /* 8-byte header */), - offset_end_(offset_begin_ + ArrayCount(max_offset, max_next, config)), - write_to_(reinterpret_cast<uint64_t*>(AlignTo8(base)) + 1 /* 8-byte header */ + 1 /* first entry is 0 */), - original_base_(base) {} - -void ArrayBhiksha::FinishedLoading(const Config &config) { - // *offset_begin_ = 0 but without a const_cast. - *(write_to_ - (write_to_ - offset_begin_)) = 0; - - if (write_to_ != offset_end_) UTIL_THROW(util::Exception, "Did not get all the array entries that were expected."); - - uint8_t *head_write = reinterpret_cast<uint8_t*>(original_base_); - *(head_write++) = kArrayBhikshaVersion; - *(head_write++) = config.pointer_bhiksha_bits; -} - -} // namespace trie -} // namespace ngram -} // namespace lm http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/6da3961b/ext/kenlm/lm/bhiksha.hh ---------------------------------------------------------------------- diff --git a/ext/kenlm b/ext/kenlm new file mode 160000 index 0000000..56fdb5c --- /dev/null +++ b/ext/kenlm @@ -0,0 +1 @@ +Subproject commit 56fdb5c44fca34d5a2e07d96139c28fb163983c5 diff --git a/ext/kenlm/lm/bhiksha.hh b/ext/kenlm/lm/bhiksha.hh deleted file mode 100644 index 36438f1..0000000 --- a/ext/kenlm/lm/bhiksha.hh +++ /dev/null @@ -1,122 +0,0 @@ -/* Simple implementation of - * @inproceedings{bhikshacompression, - * author={Bhiksha Raj and Ed Whittaker}, - * year={2003}, - * title={Lossless Compression of Language Model Structure and Word Identifiers}, - * booktitle={Proceedings of IEEE International Conference on Acoustics, Speech and Signal Processing}, - * pages={388--391}, - * } - * - * Currently only used for next pointers. - */ - -#ifndef LM_BHIKSHA_H -#define LM_BHIKSHA_H - -#include "lm/model_type.hh" -#include "lm/trie.hh" -#include "util/bit_packing.hh" -#include "util/sorted_uniform.hh" - -#include <algorithm> -#include <stdint.h> -#include <cassert> - -namespace lm { -namespace ngram { -struct Config; -class BinaryFormat; - -namespace trie { - -class DontBhiksha { - public: - static const ModelType kModelTypeAdd = static_cast<ModelType>(0); - - static void UpdateConfigFromBinary(const BinaryFormat &, uint64_t, Config &/*config*/) {} - - static uint64_t Size(uint64_t /*max_offset*/, uint64_t /*max_next*/, const Config &/*config*/) { return 0; } - - static uint8_t InlineBits(uint64_t /*max_offset*/, uint64_t max_next, const Config &/*config*/) { - return util::RequiredBits(max_next); - } - - DontBhiksha(const void *base, uint64_t max_offset, uint64_t max_next, const Config &config); - - void ReadNext(const void *base, uint64_t bit_offset, uint64_t /*index*/, uint8_t total_bits, NodeRange &out) const { - out.begin = util::ReadInt57(base, bit_offset, next_.bits, next_.mask); - out.end = util::ReadInt57(base, bit_offset + total_bits, next_.bits, next_.mask); - //assert(out.end >= out.begin); - } - - void WriteNext(void *base, uint64_t bit_offset, uint64_t /*index*/, uint64_t value) { - util::WriteInt57(base, bit_offset, next_.bits, value); - } - - void FinishedLoading(const Config &/*config*/) {} - - uint8_t InlineBits() const { return next_.bits; } - - private: - util::BitsMask next_; -}; - -class ArrayBhiksha { - public: - static const ModelType kModelTypeAdd = kArrayAdd; - - static void UpdateConfigFromBinary(const BinaryFormat &file, uint64_t offset, Config &config); - - static uint64_t Size(uint64_t max_offset, uint64_t max_next, const Config &config); - - static uint8_t InlineBits(uint64_t max_offset, uint64_t max_next, const Config &config); - - ArrayBhiksha(void *base, uint64_t max_offset, uint64_t max_value, const Config &config); - - void ReadNext(const void *base, uint64_t bit_offset, uint64_t index, uint8_t total_bits, NodeRange &out) const { - // Some assertions are commented out because they are expensive. - // assert(*offset_begin_ == 0); - // std::upper_bound returns the first element that is greater. Want the - // last element that is <= to the index. - const uint64_t *begin_it = std::upper_bound(offset_begin_, offset_end_, index) - 1; - // Since *offset_begin_ == 0, the position should be in range. - // assert(begin_it >= offset_begin_); - const uint64_t *end_it; - for (end_it = begin_it + 1; (end_it < offset_end_) && (*end_it <= index + 1); ++end_it) {} - // assert(end_it == std::upper_bound(offset_begin_, offset_end_, index + 1)); - --end_it; - // assert(end_it >= begin_it); - out.begin = ((begin_it - offset_begin_) << next_inline_.bits) | - util::ReadInt57(base, bit_offset, next_inline_.bits, next_inline_.mask); - out.end = ((end_it - offset_begin_) << next_inline_.bits) | - util::ReadInt57(base, bit_offset + total_bits, next_inline_.bits, next_inline_.mask); - // If this fails, consider rebuilding your model using KenLM after 1e333d786b748555e8f368d2bbba29a016c98052 - assert(out.end >= out.begin); - } - - void WriteNext(void *base, uint64_t bit_offset, uint64_t index, uint64_t value) { - uint64_t encode = value >> next_inline_.bits; - for (; write_to_ <= offset_begin_ + encode; ++write_to_) *write_to_ = index; - util::WriteInt57(base, bit_offset, next_inline_.bits, value & next_inline_.mask); - } - - void FinishedLoading(const Config &config); - - uint8_t InlineBits() const { return next_inline_.bits; } - - private: - const util::BitsMask next_inline_; - - const uint64_t *const offset_begin_; - const uint64_t *const offset_end_; - - uint64_t *write_to_; - - void *original_base_; -}; - -} // namespace trie -} // namespace ngram -} // namespace lm - -#endif // LM_BHIKSHA_H
