Hi all, Attached patches fix #1576. The first patch should be applied first, then you should build CHICKEN via a bootstrap CHICKEN. Then, install it and apply the second patch and rebuild again.
Aside from the bootstrapping dance, it is a relatively straightforward change: Keywords are now kept in a different hash table than regular symbols, which allows us to remove the NUL prefix and avoid name clashes. It also changes keywords so they no longer have a plist. Strictly speaking this *should* not be necessary, because you can just look up the symbol in the keyword table (and if you find it, it was a keyword), but the reason I decided to do it this way is that this is a O(1) operation while the hash table lookup is O(n) where n is the length of the bucket's chain, and even more importantly, the sanity checks in the GC which check if the symbol is persistable would break by doing the lookup, because the symbol table can contain forwarded pointers, which complicates things a lot. A few minor remaining questions: - What should the (initial) size of the keyword table be? - Should we use "symbols" in CHICKEN_initialize to determine the keyword table size as well? Cheers, Peter
From ddfe2e7112b1ff9f239d420b54c17a617567f3fe Mon Sep 17 00:00:00 2001 From: Peter Bex <[email protected]> Date: Sat, 6 Apr 2019 17:03:15 +0200 Subject: [PATCH 1/2] Change representation of keywords - Keywords are no longer encoded with a leading NUL byte. This allows us to have proper read/write invariance of symbols and keywords and ensures we don't return #t for keyword? on symbols starting with \0. - Keywords are now kept in symbol table completely separate from the one used for actual symbols. - The plist of a keyword is now #f instead of '(). This is the one thing we can use to differentiate a keyword without attempting to look it up in the keyword table (which won't work while GC'ing). - In order to be able to decide in which table to intern a symbol, when encoding literals, keywords and symbols are prefixed with a special byte: A \1-prefixed literal is decoded as a regular symbol, a \2-prefixed literal is read as a keyword. - When persisting or unpersisting a symbol, loop through *all* the symbol tables when trying to locate the symbol. Another small change is in how keywords are converted to nodes by the compiler; originally they would (accidentally) be represented as ##core#variable nodes. The intention was to auto-quote them in canonicalize-expression, but due to how the cond was placed this result would be thrown away and would be converted into ##core#variable instead. This is the first step towards fixing #1578. For bootstrapping reasons, this current implementation still accepts NUL-prefixed symbols as keywords. There is backwards compatible support code which detects such symbols and interns them into (or looks them up in) the keyword table instead. --- NEWS | 4 ++ c-backend.scm | 15 ++++-- chicken.h | 19 +++---- core.scm | 16 +++--- expand.scm | 8 +-- library.scm | 16 +++--- runtime.c | 162 +++++++++++++++++++++++++++++++++++++++++++++++++--------- 7 files changed, 182 insertions(+), 58 deletions(-) diff --git a/NEWS b/NEWS index 825acbfb..c401e86a 100644 --- a/NEWS +++ b/NEWS @@ -5,6 +5,10 @@ than #!key, #!rest or #!optional is now preserved (#1572). - When using (set-file-position!) on a port, its EOF status will now be reset. + - Keywords are now interned in a separate keyword table, not in the + standard symbol table. This brings full read-write invariance + for symbols (they can now also start with NUL bytes). Keywords + no longer have plists. Fixes #1576. 5.0.1 diff --git a/c-backend.scm b/c-backend.scm index 4ad307d0..037eab3e 100644 --- a/c-backend.scm +++ b/c-backend.scm @@ -42,6 +42,7 @@ chicken.foreign chicken.format chicken.internal + chicken.keyword chicken.platform chicken.sort chicken.string @@ -739,11 +740,14 @@ ((char? lit) (gen #t to "=C_make_character(" (char->integer lit) ");") ) ((symbol? lit) ; handled slightly specially (see C_h_intern_in) - (let* ([str (##sys#slot lit 1)] - [cstr (c-ify-string str)] - [len (##sys#size str)] ) + (let* ((str (##sys#slot lit 1)) + (cstr (c-ify-string str)) + (len (##sys#size str)) + (intern (if (keyword? lit) + "C_h_intern_kw" + "C_h_intern"))) (gen #t to "=") - (gen "C_h_intern(&" to #\, len ", C_text(" cstr "));"))) + (gen intern "(&" to #\, len ", C_text(" cstr "));"))) ((null? lit) (gen #t to "=C_SCHEME_END_OF_LIST;") ) ((and (not (##sys#immediate? lit)) ; nop @@ -1483,8 +1487,9 @@ return((C_header_bits(lit) >> 24) & 0xff); ((symbol? lit) (let ((str (##sys#slot lit 1))) (string-append - "\x01" + "\x01" (encode-size (string-length str)) + (if (keyword? lit) "\x02" "\x01") str) ) ) ((##sys#immediate? lit) (bomb "invalid literal - cannot encode" lit)) diff --git a/chicken.h b/chicken.h index 7a2f3a14..0243e728 100644 --- a/chicken.h +++ b/chicken.h @@ -583,7 +583,7 @@ void *alloca (); #define C_BAD_MINIMUM_ARGUMENT_COUNT_ERROR 2 #define C_BAD_ARGUMENT_TYPE_ERROR 3 #define C_UNBOUND_VARIABLE_ERROR 4 -/* Unused: 5 */ +#define C_BAD_ARGUMENT_TYPE_SYMBOL_IS_KEYWORD_ERROR 5 #define C_OUT_OF_MEMORY_ERROR 6 #define C_DIVISION_BY_ZERO_ERROR 7 #define C_OUT_OF_RANGE_ERROR 8 @@ -1760,8 +1760,10 @@ C_fctexport C_word C_fcall C_string_aligned8(C_word **ptr, int len, C_char *str) C_fctexport C_word C_fcall C_string2(C_word **ptr, C_char *str) C_regparm; C_fctexport C_word C_fcall C_string2_safe(C_word **ptr, int max, C_char *str) C_regparm; C_fctexport C_word C_fcall C_intern(C_word **ptr, int len, C_char *str) C_regparm; +C_fctexport C_word C_fcall C_intern_kw(C_word **ptr, int len, C_char *str) C_regparm; C_fctexport C_word C_fcall C_intern_in(C_word **ptr, int len, C_char *str, C_SYMBOL_TABLE *stable) C_regparm; C_fctexport C_word C_fcall C_h_intern(C_word *slot, int len, C_char *str) C_regparm; +C_fctexport C_word C_fcall C_h_intern_kw(C_word *slot, int len, C_char *str) C_regparm; C_fctexport C_word C_fcall C_h_intern_in(C_word *slot, int len, C_char *str, C_SYMBOL_TABLE *stable) C_regparm; C_fctexport C_word C_fcall C_intern2(C_word **ptr, C_char *str) C_regparm; C_fctexport C_word C_fcall C_intern3(C_word **ptr, C_char *str, C_word value) C_regparm; @@ -1829,6 +1831,7 @@ C_fctexport void C_delete_symbol_table(C_SYMBOL_TABLE *st) C_regparm; C_fctexport void C_set_symbol_table(C_SYMBOL_TABLE *st) C_regparm; C_fctexport C_SYMBOL_TABLE *C_find_symbol_table(char *name) C_regparm; C_fctexport C_word C_find_symbol(C_word str, C_SYMBOL_TABLE *stable) C_regparm; +C_fctexport C_word C_find_keyword(C_word str, C_SYMBOL_TABLE *stable) C_regparm; C_fctexport C_word C_fcall C_lookup_symbol(C_word sym) C_regparm; C_fctexport void C_do_register_finalizer(C_word x, C_word proc); C_fctexport int C_do_unregister_finalizer(C_word x); @@ -1868,6 +1871,7 @@ C_fctexport C_cpsproc(C_gc) C_noret; C_fctexport C_cpsproc(C_open_file_port) C_noret; C_fctexport C_cpsproc(C_allocate_vector) C_noret; C_fctexport C_cpsproc(C_string_to_symbol) C_noret; +C_fctexport C_cpsproc(C_string_to_keyword) C_noret; C_fctexport C_cpsproc(C_build_symbol) C_noret; C_fctexport C_cpsproc(C_number_to_string) C_noret; C_fctexport C_cpsproc(C_fixnum_to_string) C_noret; @@ -2154,11 +2158,7 @@ inline static C_word C_u_i_namespaced_symbolp(C_word x) inline static C_word C_u_i_keywordp(C_word x) { - /* TODO: This representation is rather bogus */ - C_word n = C_symbol_name(x); - return C_mk_bool(C_symbol_value(x) == x && - C_header_size(n) > 0 && - ((C_byte *)C_data_pointer(n))[0] == '\0'); + return C_mk_bool(C_symbol_plist(x) == C_SCHEME_FALSE); } inline static C_word C_flonum(C_word **ptr, double n) @@ -2620,9 +2620,10 @@ inline static C_word C_i_symbolp(C_word x) inline static int C_persistable_symbol(C_word x) { - /* Symbol is bound (and not a keyword), or has a non-empty plist */ - return ((C_truep(C_boundp(x)) && !C_truep(C_u_i_keywordp(x))) || - C_symbol_plist(x) != C_SCHEME_END_OF_LIST); + /* Symbol is bound, or has a non-empty plist (but is not a keyword) */ + return ((C_truep(C_boundp(x)) || + C_symbol_plist(x) != C_SCHEME_END_OF_LIST) && + !C_truep(C_u_i_keywordp(x))); } inline static C_word C_i_pairp(C_word x) diff --git a/core.scm b/core.scm index 06a4cf7f..eabba538 100644 --- a/core.scm +++ b/core.scm @@ -522,7 +522,8 @@ (else (find-id id (cdr se))))) (define (lookup id) - (cond ((find-id id (##sys#current-environment))) + (cond ((keyword? id) id) + ((find-id id (##sys#current-environment))) ((##sys#get id '##core#macro-alias) symbol? => values) (else id))) @@ -560,6 +561,11 @@ x) ) (define (resolve-variable x0 e dest ldest h) + + (when (memq x0 unlikely-variables) + (warning + (sprintf "reference to variable `~s' possibly unintended" x0) )) + (let ((x (lookup x0))) (d `(RESOLVE-VARIABLE: ,x0 ,x ,(map (lambda (x) (car x)) (##sys#current-environment)))) (cond ((not (symbol? x)) x0) ; syntax? @@ -614,12 +620,8 @@ (print "\n;; END OF FILE"))))) ) ) (define (walk x e dest ldest h outer-ln tl?) - (cond ((symbol? x) - (cond ((keyword? x) `(quote ,x)) - ((memq x unlikely-variables) - (warning - (sprintf "reference to variable `~s' possibly unintended" x) ))) - (resolve-variable x e dest ldest h)) + (cond ((keyword? x) `(quote ,x)) + ((symbol? x) (resolve-variable x e dest ldest h)) ((not (pair? x)) (if (constant? x) `(quote ,x) diff --git a/expand.scm b/expand.scm index ec302d48..4986da48 100644 --- a/expand.scm +++ b/expand.scm @@ -111,6 +111,7 @@ (let ((seen '())) (let walk ((x exp)) (cond ((assq x seen) => cdr) + ((keyword? x) x) ((symbol? x) (let ((x2 (getp x '##core#macro-alias) ) ) (cond ((getp x '##core#real-name)) @@ -836,7 +837,7 @@ (cons (rename (car sym)) (rename (cdr sym)))) ((vector? sym) (list->vector (rename (vector->list sym)))) - ((not (symbol? sym)) sym) + ((or (not (symbol? sym)) (keyword? sym)) sym) ((assq sym renv) => (lambda (a) (dd `(RENAME/RENV: ,sym --> ,(cdr a))) @@ -859,7 +860,8 @@ (do ((i 0 (fx+ i 1)) (f #t (compare (vector-ref s1 i) (vector-ref s2 i)))) ((or (fx>= i len) (not f)) f)))))) - ((and (symbol? s1) (symbol? s2)) + ((and (symbol? s1) (not (keyword? s1)) + (symbol? s2) (not (keyword? s2))) (let ((ss1 (or (getp s1 '##core#macro-alias) (lookup2 1 s1 dse) s1) ) @@ -897,7 +899,7 @@ (cons (mirror-rename (car sym)) (mirror-rename (cdr sym)))) ((vector? sym) (list->vector (mirror-rename (vector->list sym)))) - ((not (symbol? sym)) sym) + ((or (not (symbol? sym)) (keyword? sym)) sym) (else ; Code stolen from strip-syntax (let ((renamed (lookup sym se) ) ) (cond ((assq-reverse sym renv) => diff --git a/library.scm b/library.scm index cba0f723..af4221c4 100644 --- a/library.scm +++ b/library.scm @@ -2666,6 +2666,7 @@ EOF (define ##sys#snafu '##sys#fnord) (define ##sys#intern-symbol (##core#primitive "C_string_to_symbol")) +(define ##sys#intern-keyword (##core#primitive "C_string_to_keyword")) (define (##sys#interned-symbol? x) (##core#inline "C_lookup_symbol" x)) (define (##sys#string->symbol str) @@ -2673,10 +2674,7 @@ EOF (##sys#intern-symbol str) ) (define (##sys#symbol->string s) - (let ((str (##sys#slot s 1))) - (if (##core#inline "C_u_i_keywordp" s) ; Keywords encoded as \000foo - (##sys#substring str 1 (string-length str)) - str))) + (##sys#slot s 1)) (set! scheme#symbol->string (lambda (s) @@ -2738,7 +2736,7 @@ EOF (let ([string string] ) (lambda (s) (##sys#check-string s 'string->keyword) - (##sys#intern-symbol (##sys#string-append (string (integer->char 0)) s)) ) ) ) + (##sys#intern-keyword s) ) ) ) (define keyword->string (let ([keyword? keyword?]) @@ -3709,8 +3707,7 @@ EOF (case-sensitive case-sensitive) (parentheses-synonyms parentheses-synonyms) (symbol-escape symbol-escape) - (current-read-table ##sys#current-read-table) - (kwprefix (string (integer->char 0)))) + (current-read-table ##sys#current-read-table)) (lambda (port infohandler) (let ((csp (case-sensitive)) (ksp (keyword-style)) @@ -4119,8 +4116,7 @@ EOF (##sys#intern-symbol tok) ) (define (build-keyword tok) - (##sys#intern-symbol - (##sys#string-append kwprefix tok))) + (##sys#intern-keyword tok)) ;; now have the state to make a decision. (set! reserved-characters @@ -5381,7 +5377,7 @@ EOF (if fn (list fn) '())))) ((3) (apply ##sys#signal-hook #:type-error loc "bad argument type" args)) ((4) (apply ##sys#signal-hook #:runtime-error loc "unbound variable" args)) - ;; ((5) ...unused...) + ((5) (apply ##sys#signal-hook #:type-error loc "symbol is a keyword, which has no plist" args)) ((6) (apply ##sys#signal-hook #:limit-error loc "out of memory" args)) ((7) (apply ##sys#signal-hook #:arithmetic-error loc "division by zero" args)) ((8) (apply ##sys#signal-hook #:bounds-error loc "out of range" args)) diff --git a/runtime.c b/runtime.c index 75cc8d41..2931f1ec 100644 --- a/runtime.c +++ b/runtime.c @@ -155,6 +155,7 @@ static C_TLS int timezone; #endif #define DEFAULT_SYMBOL_TABLE_SIZE 2999 +#define DEFAULT_KEYWORD_TABLE_SIZE 999 #define DEFAULT_HEAP_SIZE DEFAULT_STACK_SIZE #define MINIMAL_HEAP_SIZE DEFAULT_STACK_SIZE #define DEFAULT_SCRATCH_SPACE_SIZE 256 @@ -400,7 +401,8 @@ static C_TLS C_char *save_string; static C_TLS C_SYMBOL_TABLE *symbol_table, - *symbol_table_list; + *symbol_table_list, + *keyword_table; static C_TLS C_word **collectibles, **collectibles_top, @@ -703,6 +705,12 @@ int CHICKEN_initialize(int heap, int stack, int symbols, void *toplevel) if(symbol_table == NULL) return 0; + /* TODO: Should we use "symbols" here too? */ + keyword_table = C_new_symbol_table("kw", DEFAULT_KEYWORD_TABLE_SIZE); + + if(keyword_table == NULL) + return 0; + page_size = 0; stack_size = stack ? stack : DEFAULT_STACK_SIZE; C_set_or_change_heap_size(heap ? heap : DEFAULT_HEAP_SIZE, 0); @@ -877,7 +885,7 @@ static C_PTABLE_ENTRY *create_initial_ptable() { /* IMPORTANT: hardcoded table size - this must match the number of C_pte calls + 1 (NULL terminator)! */ - C_PTABLE_ENTRY *pt = (C_PTABLE_ENTRY *)C_malloc(sizeof(C_PTABLE_ENTRY) * 62); + C_PTABLE_ENTRY *pt = (C_PTABLE_ENTRY *)C_malloc(sizeof(C_PTABLE_ENTRY) * 63); int i = 0; if(pt == NULL) @@ -912,6 +920,7 @@ static C_PTABLE_ENTRY *create_initial_ptable() C_pte(C_number_to_string); C_pte(C_make_symbol); C_pte(C_string_to_symbol); + C_pte(C_string_to_keyword); C_pte(C_apply); C_pte(C_call_cc); C_pte(C_values); @@ -1116,6 +1125,22 @@ void initialize_symbol_table(void) } +C_regparm C_word C_find_keyword(C_word str, C_SYMBOL_TABLE *kwtable) +{ + C_char *sptr = C_c_string(str); + int len = C_header_size(str); + int key; + C_word s; + + if(kwtable == NULL) kwtable = keyword_table; + + key = hash_string(len, sptr, kwtable->size, kwtable->rand, 0); + + if(C_truep(s = lookup(key, len, sptr, kwtable))) return s; + else return C_SCHEME_FALSE; +} + + void C_ccall sigsegv_trampoline(C_word c, C_word *av) { barf(C_MEMORY_VIOLATION_ERROR, NULL); @@ -1349,7 +1374,6 @@ void CHICKEN_parse_command_line(int argc, char *argv[], C_word *heap, C_word *st " -:sSIZE set nursery (stack) size\n" " -:tSIZE set symbol-table size\n" " -:fSIZE set maximal number of pending finalizers\n" - " -:w enable garbage collection of unused symbols\n" " -:x deliver uncaught exceptions of other threads to primordial one\n" " -:b enter REPL on error\n" " -:B sound bell on major GC\n" @@ -1666,6 +1690,11 @@ void barf(int code, char *loc, ...) c = 1; break; + case C_BAD_ARGUMENT_TYPE_SYMBOL_IS_KEYWORD_ERROR: + msg = C_text("symbol is a keyword, which has no plist"); + c = 1; + break; + case C_OUT_OF_MEMORY_ERROR: msg = C_text("not enough memory"); c = 0; @@ -2262,16 +2291,41 @@ void C_unregister_lf(void *handle) C_regparm C_word C_fcall C_intern(C_word **ptr, int len, C_char *str) { - return C_intern_in(ptr, len, str, symbol_table); + if (*str == '\0') { /* OBSOLETE: Backwards compatibility */ + return C_intern_kw(ptr, len-1, str+1); + } else { + return C_intern_in(ptr, len, str, symbol_table); + } } C_regparm C_word C_fcall C_h_intern(C_word *slot, int len, C_char *str) { - return C_h_intern_in(slot, len, str, symbol_table); + if (*str == '\0') { /* OBSOLETE: Backwards compatibility */ + return C_h_intern_kw(slot, len-1, str+1); + } else { + return C_h_intern_in(slot, len, str, symbol_table); + } } +C_regparm C_word C_fcall C_intern_kw(C_word **ptr, int len, C_char *str) +{ + C_word kw = C_intern_in(ptr, len, str, keyword_table); + C_set_block_item(kw, 0, kw); /* Keywords evaluate to themselves */ + C_set_block_item(kw, 2, C_SCHEME_FALSE); /* Keywords have no plists */ + return kw; +} + + +C_regparm C_word C_fcall C_h_intern_kw(C_word *slot, int len, C_char *str) +{ + C_word kw = C_h_intern_in(slot, len, str, keyword_table); + C_set_block_item(kw, 0, kw); /* Keywords evaluate to themselves */ + C_set_block_item(kw, 2, C_SCHEME_FALSE); /* Keywords have no plists */ + return kw; +} + C_regparm C_word C_fcall C_intern_in(C_word **ptr, int len, C_char *str, C_SYMBOL_TABLE *stable) { int key; @@ -2391,15 +2445,19 @@ C_regparm C_word C_fcall lookup(C_word key, int len, C_char *str, C_SYMBOL_TABLE C_regparm C_word C_fcall C_i_persist_symbol(C_word sym) { C_word bucket; + C_SYMBOL_TABLE *stp; C_i_check_symbol(sym); - bucket = lookup_bucket(sym, NULL); - if (C_truep(bucket)) { /* It could be an uninterned symbol(?) */ - /* Change weak to strong ref to ensure long-term survival */ - C_block_header(bucket) = C_block_header(bucket) & ~C_SPECIALBLOCK_BIT; - /* Ensure survival on next minor GC */ - if (C_in_stackp(sym)) C_mutate_slot(&C_block_item(bucket, 0), sym); + for(stp = symbol_table_list; stp != NULL; stp = stp->next) { + bucket = lookup_bucket(sym, stp); + + if (C_truep(bucket)) { + /* Change weak to strong ref to ensure long-term survival */ + C_block_header(bucket) = C_block_header(bucket) & ~C_SPECIALBLOCK_BIT; + /* Ensure survival on next minor GC */ + if (C_in_stackp(sym)) C_mutate_slot(&C_block_item(bucket, 0), sym); + } } return C_SCHEME_UNDEFINED; } @@ -2411,6 +2469,7 @@ C_regparm C_word C_fcall C_i_persist_symbol(C_word sym) C_regparm C_word C_fcall C_i_unpersist_symbol(C_word sym) { C_word bucket; + C_SYMBOL_TABLE *stp; C_i_check_symbol(sym); @@ -2419,11 +2478,14 @@ C_regparm C_word C_fcall C_i_unpersist_symbol(C_word sym) return C_SCHEME_FALSE; } - bucket = lookup_bucket(sym, NULL); - if (C_truep(bucket)) { /* It could be an uninterned symbol(?) */ - /* Turn it into a weak ref */ - C_block_header(bucket) = C_block_header(bucket) | C_SPECIALBLOCK_BIT; - return C_SCHEME_TRUE; + for(stp = symbol_table_list; stp != NULL; stp = stp->next) { + bucket = lookup_bucket(sym, NULL); + + if (C_truep(bucket)) { + /* Turn it into a weak ref */ + C_block_header(bucket) = C_block_header(bucket) | C_SPECIALBLOCK_BIT; + return C_SCHEME_TRUE; + } } return C_SCHEME_FALSE; } @@ -2477,20 +2539,19 @@ double compute_symbol_table_load(double *avg_bucket_len, int *total_n) C_word add_symbol(C_word **ptr, C_word key, C_word string, C_SYMBOL_TABLE *stable) { C_word bucket, sym, b2, *p; - int keyw = C_header_size(string) > 0 && *((char *)C_data_pointer(string)) == 0; p = *ptr; sym = (C_word)p; p += C_SIZEOF_SYMBOL; C_block_header_init(sym, C_SYMBOL_TYPE | (C_SIZEOF_SYMBOL - 1)); - C_set_block_item(sym, 0, keyw ? sym : C_SCHEME_UNBOUND); /* keyword? */ + C_set_block_item(sym, 0, C_SCHEME_UNBOUND); C_set_block_item(sym, 1, string); C_set_block_item(sym, 2, C_SCHEME_END_OF_LIST); *ptr = p; b2 = stable->table[ key ]; /* previous bucket */ /* Create new weak or strong bucket depending on persistability */ - if (C_persistable_symbol(sym) || C_truep(C_permanentp(string))) { + if (C_truep(C_permanentp(string))) { bucket = C_a_pair(ptr, sym, b2); } else { bucket = C_a_weak_pair(ptr, sym, b2); @@ -9947,11 +10008,51 @@ void C_ccall C_string_to_symbol(C_word c, C_word *av) len = C_header_size(string); name = (C_char *)C_data_pointer(string); - key = hash_string(len, name, symbol_table->size, symbol_table->rand, 0); - if(!C_truep(s = lookup(key, len, name, symbol_table))) - s = add_symbol(&a, key, string, symbol_table); + if (*name == '\0' && len > 1) { /* OBSOLETE: Backwards compatibility */ + key = hash_string(len-1, name+1, keyword_table->size, keyword_table->rand, 0); + if(!C_truep(s = lookup(key, len-1, name+1, keyword_table))) { + C_word *a2 = C_alloc(C_bytestowords(len-1)+1); + C_word string2 = C_string(&a2, len-1, name+1); + s = add_symbol(&a, key, string, keyword_table); + C_set_block_item(s, 0, s); /* Keywords evaluate to themselves */ + C_set_block_item(s, 2, C_SCHEME_FALSE); /* Keywords have no plists */ + } + } else { + key = hash_string(len, name, symbol_table->size, symbol_table->rand, 0); + if(!C_truep(s = lookup(key, len, name, symbol_table))) + s = add_symbol(&a, key, string, symbol_table); + } + + C_kontinue(k, s); +} + +void C_ccall C_string_to_keyword(C_word c, C_word *av) +{ + C_word + /* closure = av[ 0 ] */ + k = av[ 1 ], + string; + int len, key; + C_word s, *a = C_alloc(C_SIZEOF_SYMBOL + C_SIZEOF_PAIR); + C_char *name; + + if(c != 3) C_bad_argc(c, 3); + string = av[ 2 ]; + + if(C_immediatep(string) || C_header_bits(string) != C_STRING_TYPE) + barf(C_BAD_ARGUMENT_TYPE_ERROR, "string->keyword", string); + + len = C_header_size(string); + name = (C_char *)C_data_pointer(string); + key = hash_string(len, name, keyword_table->size, keyword_table->rand, 0); + + if(!C_truep(s = lookup(key, len, name, keyword_table))) { + s = add_symbol(&a, key, string, keyword_table); + C_set_block_item(s, 0, s); /* Keywords evaluate to themselves */ + C_set_block_item(s, 2, C_SCHEME_FALSE); /* Keywords have no plists */ + } C_kontinue(k, s); } @@ -11920,7 +12021,14 @@ static C_regparm C_word C_fcall decode_literal2(C_word **ptr, C_char **str, if(dest == NULL) panic(C_text("invalid literal symbol destination")); - val = C_h_intern(dest, size, *str); + if (**str == '\1') { + val = C_h_intern(dest, size, ++*str); + } else if (**str == '\2') { + val = C_h_intern_kw(dest, size, ++*str); + } else { + /* Backwards compatibility */ + val = C_h_intern(dest, size, *str); + } *str += size; break; @@ -12125,7 +12233,10 @@ error: C_regparm C_word C_fcall C_i_getprop(C_word sym, C_word prop, C_word def) { - C_word pl = C_block_item(sym, 2); + C_word pl = C_symbol_plist(sym); + + if (pl == C_SCHEME_FALSE) + barf(C_BAD_ARGUMENT_TYPE_SYMBOL_IS_KEYWORD_ERROR, "get", sym); while(pl != C_SCHEME_END_OF_LIST) { if(C_block_item(pl, 0) == prop) @@ -12142,6 +12253,9 @@ C_putprop(C_word **ptr, C_word sym, C_word prop, C_word val) { C_word pl = C_symbol_plist(sym); + if (pl == C_SCHEME_FALSE) + barf(C_BAD_ARGUMENT_TYPE_SYMBOL_IS_KEYWORD_ERROR, "put", sym); + /* Newly added plist? Ensure the symbol stays! */ if (pl == C_SCHEME_END_OF_LIST) C_i_persist_symbol(sym); -- 2.11.0
From e4cef09f38577efc245ce080fde3d622215835cc Mon Sep 17 00:00:00 2001 From: Peter Bex <[email protected]> Date: Sat, 6 Apr 2019 18:50:09 +0200 Subject: [PATCH 2/2] Drop backwards compat support for keywords-as-NUL-prefixed-symbols Bump binary compatibility version to 11. After this change, #1576 has been fully fixed. --- NEWS | 4 ++++ defaults.make | 2 +- runtime.c | 33 +++++++-------------------------- tests/library-tests.scm | 5 +++-- 4 files changed, 15 insertions(+), 29 deletions(-) diff --git a/NEWS b/NEWS index c401e86a..781b1453 100644 --- a/NEWS +++ b/NEWS @@ -10,6 +10,10 @@ for symbols (they can now also start with NUL bytes). Keywords no longer have plists. Fixes #1576. +- Runtime system + - Increased the "binary compatibility version" to 11. + + 5.0.1 - Type system diff --git a/defaults.make b/defaults.make index 4da4d633..9ea214c1 100644 --- a/defaults.make +++ b/defaults.make @@ -27,7 +27,7 @@ # basic parameters -BINARYVERSION = 10 +BINARYVERSION = 11 STACKDIRECTION ?= 1 CROSS_CHICKEN ?= 0 diff --git a/runtime.c b/runtime.c index 2931f1ec..75dec603 100644 --- a/runtime.c +++ b/runtime.c @@ -2291,21 +2291,13 @@ void C_unregister_lf(void *handle) C_regparm C_word C_fcall C_intern(C_word **ptr, int len, C_char *str) { - if (*str == '\0') { /* OBSOLETE: Backwards compatibility */ - return C_intern_kw(ptr, len-1, str+1); - } else { - return C_intern_in(ptr, len, str, symbol_table); - } + return C_intern_in(ptr, len, str, symbol_table); } C_regparm C_word C_fcall C_h_intern(C_word *slot, int len, C_char *str) { - if (*str == '\0') { /* OBSOLETE: Backwards compatibility */ - return C_h_intern_kw(slot, len-1, str+1); - } else { - return C_h_intern_in(slot, len, str, symbol_table); - } + return C_h_intern_in(slot, len, str, symbol_table); } @@ -10009,20 +10001,9 @@ void C_ccall C_string_to_symbol(C_word c, C_word *av) len = C_header_size(string); name = (C_char *)C_data_pointer(string); - if (*name == '\0' && len > 1) { /* OBSOLETE: Backwards compatibility */ - key = hash_string(len-1, name+1, keyword_table->size, keyword_table->rand, 0); - if(!C_truep(s = lookup(key, len-1, name+1, keyword_table))) { - C_word *a2 = C_alloc(C_bytestowords(len-1)+1); - C_word string2 = C_string(&a2, len-1, name+1); - s = add_symbol(&a, key, string, keyword_table); - C_set_block_item(s, 0, s); /* Keywords evaluate to themselves */ - C_set_block_item(s, 2, C_SCHEME_FALSE); /* Keywords have no plists */ - } - } else { - key = hash_string(len, name, symbol_table->size, symbol_table->rand, 0); - if(!C_truep(s = lookup(key, len, name, symbol_table))) - s = add_symbol(&a, key, string, symbol_table); - } + key = hash_string(len, name, symbol_table->size, symbol_table->rand, 0); + if(!C_truep(s = lookup(key, len, name, symbol_table))) + s = add_symbol(&a, key, string, symbol_table); C_kontinue(k, s); } @@ -12026,8 +12007,8 @@ static C_regparm C_word C_fcall decode_literal2(C_word **ptr, C_char **str, } else if (**str == '\2') { val = C_h_intern_kw(dest, size, ++*str); } else { - /* Backwards compatibility */ - val = C_h_intern(dest, size, *str); + C_snprintf(buffer, sizeof(buffer), C_text("Unknown symbol subtype: %d"), (int)**str); + panic(buffer); } *str += size; break; diff --git a/tests/library-tests.scm b/tests/library-tests.scm index f7a1d3ff..1e19a632 100644 --- a/tests/library-tests.scm +++ b/tests/library-tests.scm @@ -328,8 +328,9 @@ (assert (eq? '##foo#bar '|##foo#bar|)) (assert (string=? "|\\x0a|" (with-output-to-string (lambda () (write '|\n|))))) -;; NOT YET, keywords are still prefixed with \000: -; (assert (string=? "|\000foo|" (with-output-to-string (lambda () (write '|\000foo|))))) +;; #1576 +(assert (string=? "|\\x00foo|" (with-output-to-string (lambda () (write '|\000foo|))))) +(assert (not (keyword? '|\000foo|))) (assert (string=? "|###foo#bar|" (with-output-to-string (lambda () (write '|###foo#bar|))))) ;;; Paren synonyms -- 2.11.0
signature.asc
Description: PGP signature
_______________________________________________ Chicken-hackers mailing list [email protected] https://lists.nongnu.org/mailman/listinfo/chicken-hackers
