Author: coke
Date: Tue Nov 15 21:20:11 2005
New Revision: 10014

Modified:
   trunk/languages/tcl/lib/commands/array.pir
   trunk/languages/tcl/lib/commands/proc.pir
   trunk/languages/tcl/lib/commands/puts.pir
   trunk/languages/tcl/lib/commands/string.pir
   trunk/languages/tcl/lib/commands/switch.pir
   trunk/languages/tcl/lib/parser.pir
Log:
Squash a few bugs related to the recent unicode work: strings are getting
converted to utf16 that shouldn't: their utf16-ness borked various compilers
(PIR and PGE) that were expecting plain ASCII.

Further, we were outputing code as utf16: added a last ditch "convert to
utf8" to [puts] that forces all our output to be as our test suite intended.

This doesn't fix PGE globbing: near as I can tell, I'm doing the right
thing for glob'ing, but we're still not matching. Failing 8 tests,
down from 70+.



Modified: trunk/languages/tcl/lib/commands/array.pir
==============================================================================
--- trunk/languages/tcl/lib/commands/array.pir  (original)
+++ trunk/languages/tcl/lib/commands/array.pir  Tue Nov 15 21:20:11 2005
@@ -194,7 +194,6 @@ odd_args:
   argc = argv
   if argc > 1 goto bad_args
 
-
   .local string match_str
   # ?pattern? defaults to matching everything.
   match_str = "*"
@@ -215,20 +214,21 @@ no_args:
 
   globber = find_global "PGE", "glob"
   .local pmc rule
-  (rule, $P0, $P1) = globber(match_str)
+  match_str = escape match_str # escape unicode
+  rule = globber(match_str)
 
   iter = new .Iterator, the_array
   iter = .ITERATE_FROM_START
 
   retval = new .String
 
-
   .local int count
   count = 0
 
 push_loop:
   unless iter goto push_end
   str = shift iter
+  str = escape str # escape unicode
 
   # check for match
   $P2 = rule(str)

Modified: trunk/languages/tcl/lib/commands/proc.pir
==============================================================================
--- trunk/languages/tcl/lib/commands/proc.pir   (original)
+++ trunk/languages/tcl/lib/commands/proc.pir   Tue Nov 15 21:20:11 2005
@@ -39,8 +39,8 @@ got_args:
   # Save the parsed body.
   .local pmc parsed_body
   $S0 = body_p
-  ($I0,$P0) = compiler(0,$S0)
-  $P0 = pir_compiler($I0,$P0)
+  ($I0,$S1) = compiler(0,$S0)
+  $P0 = pir_compiler($I0,$S1)
   parsed_body = $P0[0]
 
   # XXX these need to go away - for now, we'll just escape
@@ -212,6 +212,11 @@ END_PIR
 
   .local pmc pir_compiler
   pir_compiler = compreg "PIR"
+
+  # (see note on trans_charset in lib/parser.pir) XXX
+  $I0 = find_charset 'ascii'
+  proc_body = trans_charset $I0
+
   $P0 = pir_compiler(proc_body)
 
   # XXX because of the current implementation of the PIR compiler, we must 
save a reference

Modified: trunk/languages/tcl/lib/commands/puts.pir
==============================================================================
--- trunk/languages/tcl/lib/commands/puts.pir   (original)
+++ trunk/languages/tcl/lib/commands/puts.pir   Tue Nov 15 21:20:11 2005
@@ -11,6 +11,9 @@
   if argc == 0 goto error
   if argc > 3 goto error
 
+  .local int utf8
+  utf8 = find_encoding 'utf8'
+
   if argc == 1 goto one_arg
   if argc == 2 goto two_arg
 
@@ -23,6 +26,7 @@ three_arg: 
   $P1 = channels[$S2]
   if_null $P1, bad_channel
   $S3 = argv[2]
+  $S3 = trans_encoding $S3, utf8
   print $P1, $S3
   goto done
 
@@ -33,6 +37,7 @@ two_arg:
   if $S2 != "-nonewline" goto two_arg_channel
 two_arg_nonewline:
   $S3 = argv[1] 
+  $S3 = trans_encoding $S3, utf8
   print $S3
   goto done
 
@@ -42,6 +47,7 @@ two_arg_channel:  
   channels = find_global "_Tcl", "channels"
   $P1 = channels[$S2]
   if_null $P1, bad_channel
+  $S3 = trans_encoding $S3, utf8
   print $P1, $S3
   print $P1, "\n"
   goto done
@@ -49,6 +55,7 @@ two_arg_channel:  
 one_arg:
   $P0 = argv[0]
   $S1 = $P0
+  $S1 = trans_encoding $S1, utf8
   print $S1
   print "\n"
   goto done  

Modified: trunk/languages/tcl/lib/commands/string.pir
==============================================================================
--- trunk/languages/tcl/lib/commands/string.pir (original)
+++ trunk/languages/tcl/lib/commands/string.pir Tue Nov 15 21:20:11 2005
@@ -179,19 +179,19 @@ match_next:
   pattern = argv[0]
   the_string = argv[1]
   unless nocase goto match_continue
-  pattern    = downcase pattern
-  $I0 = find_encoding 'utf8'
-  trans_encoding pattern, $I0
+  pattern = downcase pattern
+
   the_string = downcase the_string
 
 match_continue:
+  pattern    = escape pattern # escape unicode for PGE globs.
+  the_string = escape the_string
+
   .local pmc globber
   globber = find_global "PGE", "glob"
 
-  .local pmc rule, pir, exp
-  (rule, pir, exp) = globber(pattern)
-
-  .local pmc match
+  .local pmc rule, match 
+  rule = globber(pattern)
   match = rule(the_string)
 
   .return match.__get_bool()

Modified: trunk/languages/tcl/lib/commands/switch.pir
==============================================================================
--- trunk/languages/tcl/lib/commands/switch.pir (original)
+++ trunk/languages/tcl/lib/commands/switch.pir Tue Nov 15 21:20:11 2005
@@ -101,8 +101,8 @@ body_match:
   .local pmc compiler,pir_compiler
   compiler = find_global "_Tcl", "compile"
   pir_compiler = find_global "_Tcl", "pir_compiler"
-  ($I0,$P0) = compiler(0,code)
-  $P1 = pir_compiler($I0,$P0)
+  ($I0,$S0) = compiler(0,code)
+  $P1 = pir_compiler($I0,$S0)
   .return $P1()
 
 bad_args:

Modified: trunk/languages/tcl/lib/parser.pir
==============================================================================
--- trunk/languages/tcl/lib/parser.pir  (original)
+++ trunk/languages/tcl/lib/parser.pir  Tue Nov 15 21:20:11 2005
@@ -28,7 +28,7 @@ Return register_num is the register numb
 .sub compile
   .param int register_num
   .param string tcl_code
-  
+
   .local int len
   len = length tcl_code
  
@@ -90,7 +90,7 @@ done_comment:
   
   push commands, command
   goto next_command
- 
+
 done:
   .return "compile_dispatch"(commands,register_num)
 .end
@@ -167,7 +167,7 @@ set_args:
 compile_it:
   .local pmc pir_compiler
   pir_compiler = compreg "PIR"
-  
+ 
   .return pir_compiler(pir_code)
 .end
 
@@ -739,6 +739,15 @@ set_args:
  
   pir_code = sprintf "$P%i = new .%s\n$P%i=%s%s%s\n", printf_args
 
+  # PIR's compiler can't deal with the utf16 code is generated as a result
+  # of the string manipulation that brings us to this point. So, we need
+  # to downcast it to ASCII. Which should be lossless, given the code that
+  # we're generating. It should be possible to move this trans_charset
+  # to where the upcasting is occuring instead of doing it once here. (XXX)
+
+  $I0 = find_charset 'ascii'
+  pir_code = trans_charset $I0
+
   .return(register_num,pir_code)
 
 can_compile:

Reply via email to