[GitHub] [orc] bilbingham edited a comment on pull request #649: [ORC-756] Check null readerschema prior to setting readerSchemaIsAcid && readerColumnOffset

GitBox Thu, 04 Mar 2021 08:13:34 -0800


bilbingham edited a comment on pull request #649:
URL: https://github.com/apache/orc/pull/649#issuecomment-790733843



   To reproduce 
   The Attached file was created with the hivev2 streaming api into the 
following table (5000k random values) 
   CREATE TABLE acidorc ( i int, j int, k int)
   STORED AS ORC
   [acid.orc.zip](https://github.com/apache/orc/files/6085038/acid.orc.zip)
   
   
   tblproperties( "transactional"="true", "orc.compress"="SNAPPY", 
"orc.bloom.filter.columns"="i,j,k");
   
   ```
   package orc.apache.orc.test;
   import com.google.gson.stream.JsonWriter;
   import org.apache.hadoop.fs.FileSystem;
   import org.apache.hadoop.fs.Path;
   import org.apache.hadoop.io.NullWritable;
   import org.apache.hadoop.mapred.JobConf;
   import org.apache.hadoop.mapreduce.RecordReader;
   import org.apache.hadoop.mapreduce.TaskAttemptContext;
   import org.apache.hadoop.mapreduce.TaskAttemptID;
   import org.apache.hadoop.mapreduce.TaskType;
   import org.apache.hadoop.mapreduce.lib.input.FileSplit;
   import org.apache.hadoop.mapreduce.task.TaskAttemptContextImpl;
   import org.apache.hive.streaming.StreamingException;
   import org.apache.orc.OrcConf;
   import org.apache.orc.mapred.OrcStruct;
   import org.apache.orc.mapreduce.OrcInputFormat;
   
   import java.io.*;
   
   public class Break {
       public static void main(String[] args) throws StreamingException, 
IOException, InterruptedException {
   //        CREATE TABLE acidorc ( i int, j int, k int)
   //        STORED AS ORC
   //        tblproperties( "transactional"="true", "orc.compress"="SNAPPY", 
"orc.bloom.filter.columns"="i,j,k");
   
           Path workDir = new Path(System.getProperty("test.tmp.dir", "target" 
+ File.separator + "test" + File.separator + "tmp"));
           JobConf conf = new JobConf();
           FileSystem fs;
   //        String typeStr = "struct<i:int,j:int,k:int>";
   //        OrcConf.MAPRED_OUTPUT_SCHEMA.setString(conf, typeStr);
   //        conf.set("mapreduce.output.fileoutputformat.outputdir", 
"workDir.toStworkDir.toString()ring()");
   //        conf.setInt(OrcConf.ROW_INDEX_STRIDE.getAttribute(), 1000);
   //        conf.setBoolean(OrcOutputFormat.SKIP_TEMP_DIRECTORY, true);
           TaskAttemptID id = new TaskAttemptID("jt", 0, TaskType.MAP, 0, 1);
           TaskAttemptContext attemptContext = new TaskAttemptContextImpl(conf, 
id);
   //        OutputFormat<NullWritable, OrcStruct> outputFormat =
   //                new OrcOutputFormat<OrcStruct>();
   //        RecordWriter<NullWritable, OrcStruct> writer = 
outputFormat.getRecordWriter(attemptContext);
   //
   //        // write 4000 rows with the integer and the binary string
   //        TypeDescription type = TypeDescription.fromString(typeStr);
   //        OrcStruct row = (OrcStruct) OrcStruct.createValue(type);
   //        NullWritable nada = NullWritable.get();
   //        for(int r=0; r < 3000; ++r) {
   //            row.setFieldValue(0, new IntWritable(r));
   //            row.setFieldValue(1, new IntWritable(r * 2));
   //            row.setFieldValue(2, new IntWritable(r * 3));
   //            writer.write(nada, row);
   //        }
   //        writer.close(attemptContext);
           conf.set(OrcConf.INCLUDE_COLUMNS.getAttribute(), "5");
           FileSplit split = new FileSplit(new Path("/tmp", "acid.orc"),
                   0, 1000000, new String[0]);
           RecordReader<NullWritable, OrcStruct> reader =
                   new OrcInputFormat<OrcStruct>().createRecordReader(split,
                           attemptContext);
           // the sarg should cause it to skip over the rows except 1000 to 2000
           int count = 0;
           while (reader.nextKeyValue() && count < 5) {
               count++;
               OutputStream outputStream = new ByteArrayOutputStream();
               JsonWriter jw = new JsonWriter(new 
OutputStreamWriter(outputStream, "UTF-8"));
               OrcStruct row = (OrcStruct)reader.getCurrentValue();
               jw.beginObject();
               for (int i = 0; i < row.getNumFields(); i++) {
                   jw.name(row.getSchema().getFieldNames().get(i));
                   jw.value(String.valueOf(row.getFieldValue(i)));
               }
               jw.endObject();
               jw.close();
               String x = outputStream.toString();
               System.out.println(outputStream.toString());
           }
       }
   }
   
   ```


----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
[email protected]

[GitHub] [orc] bilbingham edited a comment on pull request #649: [ORC-756] Check null readerschema prior to setting readerSchemaIsAcid && readerColumnOffset

Reply via email to