Author: olga
Date: Fri Oct 31 12:15:07 2008
New Revision: 709537

URL: http://svn.apache.org/viewvc?rev=709537&view=rev
Log:
PIG-497: UTF8 handling in BinStorage

Modified:
    hadoop/pig/branches/types/CHANGES.txt
    hadoop/pig/branches/types/src/org/apache/pig/data/DataReaderWriter.java
    hadoop/pig/branches/types/test/org/apache/pig/test/TestEvalPipeline.java
    hadoop/pig/branches/types/test/org/apache/pig/test/Util.java

Modified: hadoop/pig/branches/types/CHANGES.txt
URL: 
http://svn.apache.org/viewvc/hadoop/pig/branches/types/CHANGES.txt?rev=709537&r1=709536&r2=709537&view=diff
==============================================================================
--- hadoop/pig/branches/types/CHANGES.txt (original)
+++ hadoop/pig/branches/types/CHANGES.txt Fri Oct 31 12:15:07 2008
@@ -303,3 +303,6 @@
     PIG-507: permission error not reported (pradeepk via olgan)
 
     PIG-508: problem with double joins (pradeepk via olgan)
+
+    PIG-497: problems with UTF8 handling in BinStorage (pradeepk via olgan)
+

Modified: 
hadoop/pig/branches/types/src/org/apache/pig/data/DataReaderWriter.java
URL: 
http://svn.apache.org/viewvc/hadoop/pig/branches/types/src/org/apache/pig/data/DataReaderWriter.java?rev=709537&r1=709536&r2=709537&view=diff
==============================================================================
--- hadoop/pig/branches/types/src/org/apache/pig/data/DataReaderWriter.java 
(original)
+++ hadoop/pig/branches/types/src/org/apache/pig/data/DataReaderWriter.java Fri 
Oct 31 12:15:07 2008
@@ -105,12 +105,8 @@
                 return new DataByteArray(ba);
                                      }
 
-            case DataType.CHARARRAY: {
-                int size = in.readInt();
-                byte[] ba = new byte[size];
-                in.readFully(ba);
-                return new String(ba);
-                                     }
+            case DataType.CHARARRAY:
+                return in.readUTF();
 
             case DataType.NULL:
                 return null;
@@ -194,9 +190,7 @@
 
             case DataType.CHARARRAY: {
                 out.writeByte(DataType.CHARARRAY);
-                String s = (String)val;
-                out.writeInt(s.length());
-                out.writeBytes(s);
+                out.writeUTF((String)val);
                 break;
                                      }
 

Modified: 
hadoop/pig/branches/types/test/org/apache/pig/test/TestEvalPipeline.java
URL: 
http://svn.apache.org/viewvc/hadoop/pig/branches/types/test/org/apache/pig/test/TestEvalPipeline.java?rev=709537&r1=709536&r2=709537&view=diff
==============================================================================
--- hadoop/pig/branches/types/test/org/apache/pig/test/TestEvalPipeline.java 
(original)
+++ hadoop/pig/branches/types/test/org/apache/pig/test/TestEvalPipeline.java 
Fri Oct 31 12:15:07 2008
@@ -693,4 +693,18 @@
             assertEquals(output.second, t.get(2));
         }
     }
+    
+    @Test
+    public void testUtf8Dump() throws IOException, ExecException {
+        
+        // Create input file with unicode data
+        File input = Util.createInputFile("tmp", "", 
+                new String[] {"wendyξ"});
+        pigServer.registerQuery("a = load 'file:" + 
Util.encodeEscape(input.toString()) + "' using PigStorage() " +
+        "as (name:chararray);");
+        Iterator<Tuple> it = pigServer.openIterator("a");
+        Tuple t = it.next();
+        assertEquals("wendyξ", t.get(0));
+        
+    }
 }

Modified: hadoop/pig/branches/types/test/org/apache/pig/test/Util.java
URL: 
http://svn.apache.org/viewvc/hadoop/pig/branches/types/test/org/apache/pig/test/Util.java?rev=709537&r1=709536&r2=709537&view=diff
==============================================================================
--- hadoop/pig/branches/types/test/org/apache/pig/test/Util.java (original)
+++ hadoop/pig/branches/types/test/org/apache/pig/test/Util.java Fri Oct 31 
12:15:07 2008
@@ -18,6 +18,8 @@
 package org.apache.pig.test;
 
 import java.io.File;
+import java.io.FileOutputStream;
+import java.io.OutputStreamWriter;
 import java.io.IOException;
 import java.io.PrintWriter;
 import java.util.HashMap;
@@ -166,7 +168,7 @@
        throws IOException {
                File f = File.createTempFile(tmpFilenamePrefix, 
tmpFilenameSuffix);
         f.deleteOnExit();
-               PrintWriter pw = new PrintWriter(f);
+               PrintWriter pw = new PrintWriter(new OutputStreamWriter(new 
FileOutputStream(f), "UTF-8"));
                for (int i=0; i<inputData.length; i++){
                        pw.println(inputData[i]);
                }


Reply via email to