Author: coke Date: Tue Nov 15 21:20:11 2005 New Revision: 10014 Modified: trunk/languages/tcl/lib/commands/array.pir trunk/languages/tcl/lib/commands/proc.pir trunk/languages/tcl/lib/commands/puts.pir trunk/languages/tcl/lib/commands/string.pir trunk/languages/tcl/lib/commands/switch.pir trunk/languages/tcl/lib/parser.pir Log: Squash a few bugs related to the recent unicode work: strings are getting converted to utf16 that shouldn't: their utf16-ness borked various compilers (PIR and PGE) that were expecting plain ASCII.
Further, we were outputing code as utf16: added a last ditch "convert to utf8" to [puts] that forces all our output to be as our test suite intended. This doesn't fix PGE globbing: near as I can tell, I'm doing the right thing for glob'ing, but we're still not matching. Failing 8 tests, down from 70+. Modified: trunk/languages/tcl/lib/commands/array.pir ============================================================================== --- trunk/languages/tcl/lib/commands/array.pir (original) +++ trunk/languages/tcl/lib/commands/array.pir Tue Nov 15 21:20:11 2005 @@ -194,7 +194,6 @@ odd_args: argc = argv if argc > 1 goto bad_args - .local string match_str # ?pattern? defaults to matching everything. match_str = "*" @@ -215,20 +214,21 @@ no_args: globber = find_global "PGE", "glob" .local pmc rule - (rule, $P0, $P1) = globber(match_str) + match_str = escape match_str # escape unicode + rule = globber(match_str) iter = new .Iterator, the_array iter = .ITERATE_FROM_START retval = new .String - .local int count count = 0 push_loop: unless iter goto push_end str = shift iter + str = escape str # escape unicode # check for match $P2 = rule(str) Modified: trunk/languages/tcl/lib/commands/proc.pir ============================================================================== --- trunk/languages/tcl/lib/commands/proc.pir (original) +++ trunk/languages/tcl/lib/commands/proc.pir Tue Nov 15 21:20:11 2005 @@ -39,8 +39,8 @@ got_args: # Save the parsed body. .local pmc parsed_body $S0 = body_p - ($I0,$P0) = compiler(0,$S0) - $P0 = pir_compiler($I0,$P0) + ($I0,$S1) = compiler(0,$S0) + $P0 = pir_compiler($I0,$S1) parsed_body = $P0[0] # XXX these need to go away - for now, we'll just escape @@ -212,6 +212,11 @@ END_PIR .local pmc pir_compiler pir_compiler = compreg "PIR" + + # (see note on trans_charset in lib/parser.pir) XXX + $I0 = find_charset 'ascii' + proc_body = trans_charset $I0 + $P0 = pir_compiler(proc_body) # XXX because of the current implementation of the PIR compiler, we must save a reference Modified: trunk/languages/tcl/lib/commands/puts.pir ============================================================================== --- trunk/languages/tcl/lib/commands/puts.pir (original) +++ trunk/languages/tcl/lib/commands/puts.pir Tue Nov 15 21:20:11 2005 @@ -11,6 +11,9 @@ if argc == 0 goto error if argc > 3 goto error + .local int utf8 + utf8 = find_encoding 'utf8' + if argc == 1 goto one_arg if argc == 2 goto two_arg @@ -23,6 +26,7 @@ three_arg: $P1 = channels[$S2] if_null $P1, bad_channel $S3 = argv[2] + $S3 = trans_encoding $S3, utf8 print $P1, $S3 goto done @@ -33,6 +37,7 @@ two_arg: if $S2 != "-nonewline" goto two_arg_channel two_arg_nonewline: $S3 = argv[1] + $S3 = trans_encoding $S3, utf8 print $S3 goto done @@ -42,6 +47,7 @@ two_arg_channel: channels = find_global "_Tcl", "channels" $P1 = channels[$S2] if_null $P1, bad_channel + $S3 = trans_encoding $S3, utf8 print $P1, $S3 print $P1, "\n" goto done @@ -49,6 +55,7 @@ two_arg_channel: one_arg: $P0 = argv[0] $S1 = $P0 + $S1 = trans_encoding $S1, utf8 print $S1 print "\n" goto done Modified: trunk/languages/tcl/lib/commands/string.pir ============================================================================== --- trunk/languages/tcl/lib/commands/string.pir (original) +++ trunk/languages/tcl/lib/commands/string.pir Tue Nov 15 21:20:11 2005 @@ -179,19 +179,19 @@ match_next: pattern = argv[0] the_string = argv[1] unless nocase goto match_continue - pattern = downcase pattern - $I0 = find_encoding 'utf8' - trans_encoding pattern, $I0 + pattern = downcase pattern + the_string = downcase the_string match_continue: + pattern = escape pattern # escape unicode for PGE globs. + the_string = escape the_string + .local pmc globber globber = find_global "PGE", "glob" - .local pmc rule, pir, exp - (rule, pir, exp) = globber(pattern) - - .local pmc match + .local pmc rule, match + rule = globber(pattern) match = rule(the_string) .return match.__get_bool() Modified: trunk/languages/tcl/lib/commands/switch.pir ============================================================================== --- trunk/languages/tcl/lib/commands/switch.pir (original) +++ trunk/languages/tcl/lib/commands/switch.pir Tue Nov 15 21:20:11 2005 @@ -101,8 +101,8 @@ body_match: .local pmc compiler,pir_compiler compiler = find_global "_Tcl", "compile" pir_compiler = find_global "_Tcl", "pir_compiler" - ($I0,$P0) = compiler(0,code) - $P1 = pir_compiler($I0,$P0) + ($I0,$S0) = compiler(0,code) + $P1 = pir_compiler($I0,$S0) .return $P1() bad_args: Modified: trunk/languages/tcl/lib/parser.pir ============================================================================== --- trunk/languages/tcl/lib/parser.pir (original) +++ trunk/languages/tcl/lib/parser.pir Tue Nov 15 21:20:11 2005 @@ -28,7 +28,7 @@ Return register_num is the register numb .sub compile .param int register_num .param string tcl_code - + .local int len len = length tcl_code @@ -90,7 +90,7 @@ done_comment: push commands, command goto next_command - + done: .return "compile_dispatch"(commands,register_num) .end @@ -167,7 +167,7 @@ set_args: compile_it: .local pmc pir_compiler pir_compiler = compreg "PIR" - + .return pir_compiler(pir_code) .end @@ -739,6 +739,15 @@ set_args: pir_code = sprintf "$P%i = new .%s\n$P%i=%s%s%s\n", printf_args + # PIR's compiler can't deal with the utf16 code is generated as a result + # of the string manipulation that brings us to this point. So, we need + # to downcast it to ASCII. Which should be lossless, given the code that + # we're generating. It should be possible to move this trans_charset + # to where the upcasting is occuring instead of doing it once here. (XXX) + + $I0 = find_charset 'ascii' + pir_code = trans_charset $I0 + .return(register_num,pir_code) can_compile:
