Repository: tajo Updated Branches: refs/heads/master f868c0e23 -> f87f66729
TAJO-1743: Improve calculation of intermediate table statistics. Closes #678 Project: http://git-wip-us.apache.org/repos/asf/tajo/repo Commit: http://git-wip-us.apache.org/repos/asf/tajo/commit/f87f6672 Tree: http://git-wip-us.apache.org/repos/asf/tajo/tree/f87f6672 Diff: http://git-wip-us.apache.org/repos/asf/tajo/diff/f87f6672 Branch: refs/heads/master Commit: f87f6672991cabfcb80332d0a0fb9869751ea665 Parents: f868c0e Author: Jinho Kim <[email protected]> Authored: Mon Aug 10 14:28:57 2015 +0900 Committer: Jinho Kim <[email protected]> Committed: Mon Aug 10 14:28:57 2015 +0900 ---------------------------------------------------------------------- CHANGES | 2 ++ .../apache/tajo/storage/StorageConstants.java | 3 +++ .../java/org/apache/tajo/util/StringUtils.java | 22 ++++++++++++++++++++ .../engine/planner/PhysicalPlannerImpl.java | 5 ++++- .../physical/RangeShuffleFileWriteExec.java | 19 +++++++++++------ .../tajo/engine/query/TestCreateTable.java | 2 +- .../org/apache/tajo/plan/util/PlannerUtil.java | 13 +++++++++++- .../java/org/apache/tajo/storage/RawFile.java | 9 +++++++- .../java/org/apache/tajo/storage/RowFile.java | 9 +++++++- .../apache/tajo/storage/avro/AvroAppender.java | 6 +----- .../tajo/storage/parquet/ParquetAppender.java | 5 ----- .../storage/rawfile/DirectRawFileWriter.java | 18 ++++++++++------ .../org/apache/tajo/storage/rcfile/RCFile.java | 14 ------------- .../sequencefile/SequenceFileAppender.java | 22 -------------------- 14 files changed, 86 insertions(+), 63 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/tajo/blob/f87f6672/CHANGES ---------------------------------------------------------------------- diff --git a/CHANGES b/CHANGES index b478ffb..2b873b4 100644 --- a/CHANGES +++ b/CHANGES @@ -33,6 +33,8 @@ Release 0.11.0 - unreleased IMPROVEMENT + TAJO-1743: Improve calculation of intermediate table statistics. (jinho) + TAJO-1699: Tajo Java Client version 2. (hyunsik) TAJO-1721: Separate routine for CREATE TABLE from DDLExecutor. (hyunsik) http://git-wip-us.apache.org/repos/asf/tajo/blob/f87f6672/tajo-common/src/main/java/org/apache/tajo/storage/StorageConstants.java ---------------------------------------------------------------------- diff --git a/tajo-common/src/main/java/org/apache/tajo/storage/StorageConstants.java b/tajo-common/src/main/java/org/apache/tajo/storage/StorageConstants.java index 6df6228..75af292 100644 --- a/tajo-common/src/main/java/org/apache/tajo/storage/StorageConstants.java +++ b/tajo-common/src/main/java/org/apache/tajo/storage/StorageConstants.java @@ -96,6 +96,9 @@ public class StorageConstants { public static final String AVRO_SCHEMA_LITERAL = "avro.schema.literal"; public static final String AVRO_SCHEMA_URL = "avro.schema.url"; + // Internal storage properties ------------------------------------------------- + public static final String SHUFFLE_TYPE = "shuffle.type"; + static { PARQUET_DEFAULT_BLOCK_SIZE = Integer.toString(DEFAULT_BLOCK_SIZE); PARQUET_DEFAULT_PAGE_SIZE = Integer.toString(DEFAULT_PAGE_SIZE); http://git-wip-us.apache.org/repos/asf/tajo/blob/f87f6672/tajo-common/src/main/java/org/apache/tajo/util/StringUtils.java ---------------------------------------------------------------------- diff --git a/tajo-common/src/main/java/org/apache/tajo/util/StringUtils.java b/tajo-common/src/main/java/org/apache/tajo/util/StringUtils.java index 018c62a..61dc855 100644 --- a/tajo-common/src/main/java/org/apache/tajo/util/StringUtils.java +++ b/tajo-common/src/main/java/org/apache/tajo/util/StringUtils.java @@ -444,4 +444,26 @@ public class StringUtils { return sb.toString(); } + + /** + * <p>Checks if a String is empty ("") or null.</p> + * + * <pre> + * StringUtils.isEmpty(null) = true + * StringUtils.isEmpty("") = true + * StringUtils.isEmpty(" ") = false + * StringUtils.isEmpty("bob") = false + * StringUtils.isEmpty(" bob ") = false + * </pre> + * + * <p>NOTE: This method changed in Lang version 2.0. + * It no longer trims the String. + * That functionality is available in isBlank().</p> + * + * @param str the String to check, may be null + * @return <code>true</code> if the String is empty or null + */ + public static boolean isEmpty(String str) { + return str == null || str.length() == 0; + } } http://git-wip-us.apache.org/repos/asf/tajo/blob/f87f6672/tajo-core/src/main/java/org/apache/tajo/engine/planner/PhysicalPlannerImpl.java ---------------------------------------------------------------------- diff --git a/tajo-core/src/main/java/org/apache/tajo/engine/planner/PhysicalPlannerImpl.java b/tajo-core/src/main/java/org/apache/tajo/engine/planner/PhysicalPlannerImpl.java index ef3d039..7b1b1d7 100644 --- a/tajo-core/src/main/java/org/apache/tajo/engine/planner/PhysicalPlannerImpl.java +++ b/tajo-core/src/main/java/org/apache/tajo/engine/planner/PhysicalPlannerImpl.java @@ -770,6 +770,9 @@ public class PhysicalPlannerImpl implements PhysicalPlanner { */ public PhysicalExec createShuffleFileWritePlan(TaskAttemptContext ctx, ShuffleFileWriteNode plan, PhysicalExec subOp) throws IOException { + plan.getOptions().set(StorageConstants.SHUFFLE_TYPE, + PlannerUtil.getShuffleType(ctx.getDataChannel().getShuffleType())); + switch (plan.getShuffleType()) { case HASH_SHUFFLE: case SCATTERED_HASH_SHUFFLE: @@ -788,7 +791,7 @@ public class PhysicalPlannerImpl implements PhysicalPlanner { specs[i] = new SortSpec(columns[i]); } } - return new RangeShuffleFileWriteExec(ctx, subOp, plan.getInSchema(), plan.getInSchema(), sortSpecs); + return new RangeShuffleFileWriteExec(ctx, plan, subOp, sortSpecs); case NONE_SHUFFLE: // if there is no given NULL CHAR property in the table property and the query is neither CTAS or INSERT, http://git-wip-us.apache.org/repos/asf/tajo/blob/f87f6672/tajo-core/src/main/java/org/apache/tajo/engine/planner/physical/RangeShuffleFileWriteExec.java ---------------------------------------------------------------------- diff --git a/tajo-core/src/main/java/org/apache/tajo/engine/planner/physical/RangeShuffleFileWriteExec.java b/tajo-core/src/main/java/org/apache/tajo/engine/planner/physical/RangeShuffleFileWriteExec.java index ac3d1b2..4d01b00 100644 --- a/tajo-core/src/main/java/org/apache/tajo/engine/planner/physical/RangeShuffleFileWriteExec.java +++ b/tajo-core/src/main/java/org/apache/tajo/engine/planner/physical/RangeShuffleFileWriteExec.java @@ -30,6 +30,7 @@ import org.apache.tajo.catalog.SortSpec; import org.apache.tajo.catalog.TableMeta; import org.apache.tajo.conf.TajoConf; import org.apache.tajo.engine.planner.KeyProjector; +import org.apache.tajo.plan.logical.ShuffleFileWriteNode; import org.apache.tajo.plan.util.PlannerUtil; import org.apache.tajo.storage.*; import org.apache.tajo.storage.index.bst.BSTIndex; @@ -56,14 +57,19 @@ public class RangeShuffleFileWriteExec extends UnaryPhysicalExec { private KeyProjector keyProjector; public RangeShuffleFileWriteExec(final TaskAttemptContext context, - final PhysicalExec child, final Schema inSchema, final Schema outSchema, - final SortSpec[] sortSpecs) throws IOException { - super(context, inSchema, outSchema, child); + final ShuffleFileWriteNode plan, + final PhysicalExec child, final SortSpec[] sortSpecs) throws IOException { + super(context, plan.getInSchema(), plan.getInSchema(), child); this.sortSpecs = sortSpecs; + + if (plan.hasOptions()) { + this.meta = CatalogUtil.newTableMeta(plan.getStorageType(), plan.getOptions()); + } else { + this.meta = CatalogUtil.newTableMeta(plan.getStorageType()); + } } public void init() throws IOException { - super.init(); keySchema = PlannerUtil.sortSpecsToSchema(sortSpecs); keyProjector = new KeyProjector(inSchema, keySchema.toArray()); @@ -72,8 +78,7 @@ public class RangeShuffleFileWriteExec extends UnaryPhysicalExec { this.comp = new BaseTupleComparator(keySchema, sortSpecs); Path storeTablePath = new Path(context.getWorkDir(), "output"); LOG.info("Output data directory: " + storeTablePath); - this.meta = CatalogUtil.newTableMeta(context.getDataChannel() != null ? - context.getDataChannel().getStoreType() : "RAW"); + FileSystem fs = new RawLocalFileSystem(); fs.mkdirs(storeTablePath); this.appender = (FileAppender) ((FileTablespace) TablespaceManager.getDefault()) @@ -84,6 +89,8 @@ public class RangeShuffleFileWriteExec extends UnaryPhysicalExec { BSTIndex.TWO_LEVEL_INDEX, keySchema, comp); this.indexWriter.setLoadNum(100); this.indexWriter.open(); + + super.init(); } @Override http://git-wip-us.apache.org/repos/asf/tajo/blob/f87f6672/tajo-core/src/test/java/org/apache/tajo/engine/query/TestCreateTable.java ---------------------------------------------------------------------- diff --git a/tajo-core/src/test/java/org/apache/tajo/engine/query/TestCreateTable.java b/tajo-core/src/test/java/org/apache/tajo/engine/query/TestCreateTable.java index 08aabf4..f34cbce 100644 --- a/tajo-core/src/test/java/org/apache/tajo/engine/query/TestCreateTable.java +++ b/tajo-core/src/test/java/org/apache/tajo/engine/query/TestCreateTable.java @@ -448,7 +448,7 @@ public class TestCreateTable extends QueryTestCaseBase { res = executeString( "INSERT INTO LOCATION '/testCreateExternalTable1FromOnlyPath' SELECT * FROM default.lineitem"); res = executeString( - "CREATE EXTERNAL TABLE table1 (col1 INTEGER) USING CSV LOCATION '/testCreateExternalTable1FromOnlyPath';"); + "CREATE EXTERNAL TABLE table1 (col1 INTEGER) USING TEXT LOCATION '/testCreateExternalTable1FromOnlyPath';"); } catch (Throwable t) { if (res != null) { res.close(); http://git-wip-us.apache.org/repos/asf/tajo/blob/f87f6672/tajo-plan/src/main/java/org/apache/tajo/plan/util/PlannerUtil.java ---------------------------------------------------------------------- diff --git a/tajo-plan/src/main/java/org/apache/tajo/plan/util/PlannerUtil.java b/tajo-plan/src/main/java/org/apache/tajo/plan/util/PlannerUtil.java index c4a4367..445dc8a 100644 --- a/tajo-plan/src/main/java/org/apache/tajo/plan/util/PlannerUtil.java +++ b/tajo-plan/src/main/java/org/apache/tajo/plan/util/PlannerUtil.java @@ -29,14 +29,15 @@ import org.apache.tajo.exception.TajoException; import org.apache.tajo.exception.TajoInternalError; import org.apache.tajo.plan.InvalidQueryException; import org.apache.tajo.plan.LogicalPlan; -import org.apache.tajo.plan.PlanningException; import org.apache.tajo.plan.Target; import org.apache.tajo.plan.expr.*; import org.apache.tajo.plan.logical.*; +import org.apache.tajo.plan.serder.PlanProto.ShuffleType; import org.apache.tajo.plan.visitor.BasicLogicalPlanVisitor; import org.apache.tajo.plan.visitor.ExplainLogicalPlanVisitor; import org.apache.tajo.plan.visitor.SimpleAlgebraVisitor; import org.apache.tajo.util.KeyValueSet; +import org.apache.tajo.util.StringUtils; import org.apache.tajo.util.TUtil; import java.io.IOException; @@ -864,6 +865,16 @@ public class PlannerUtil { return explains.toString(); } + public static String getShuffleType(ShuffleType shuffleType) { + if (shuffleType == null) return ShuffleType.NONE_SHUFFLE.toString(); + return shuffleType.toString(); + } + + public static ShuffleType getShuffleType(String shuffleType) { + if (StringUtils.isEmpty(shuffleType)) return ShuffleType.NONE_SHUFFLE; + return ShuffleType.valueOf(shuffleType); + } + public static boolean isFileStorageType(String storageType) { if (storageType.equalsIgnoreCase("hbase")) { return false; http://git-wip-us.apache.org/repos/asf/tajo/blob/f87f6672/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/RawFile.java ---------------------------------------------------------------------- diff --git a/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/RawFile.java b/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/RawFile.java index e3594f9..20c5d6d 100644 --- a/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/RawFile.java +++ b/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/RawFile.java @@ -35,6 +35,8 @@ import org.apache.tajo.datum.NullDatum; import org.apache.tajo.datum.ProtobufDatumFactory; import org.apache.tajo.exception.UnsupportedException; import org.apache.tajo.plan.expr.EvalNode; +import org.apache.tajo.plan.serder.PlanProto.ShuffleType; +import org.apache.tajo.plan.util.PlannerUtil; import org.apache.tajo.storage.fragment.Fragment; import org.apache.tajo.unit.StorageUnit; import org.apache.tajo.util.BitArray; @@ -472,6 +474,7 @@ public class RawFile { private int headerSize = 0; private static final int RECORD_SIZE = 4; private long pos; + private ShuffleType shuffleType; private TableStatistics stats; @@ -511,6 +514,9 @@ public class RawFile { if (enabledStats) { this.stats = new TableStatistics(this.schema); + this.shuffleType = PlannerUtil.getShuffleType( + meta.getOption(StorageConstants.SHUFFLE_TYPE, + PlannerUtil.getShuffleType(ShuffleType.NONE_SHUFFLE))); } super.init(); @@ -640,7 +646,8 @@ public class RawFile { // reset the null flags nullFlags.clear(); for (int i = 0; i < schema.size(); i++) { - if (enabledStats) { + if (shuffleType == ShuffleType.RANGE_SHUFFLE) { + // it is to calculate min/max values, and it is only used for the intermediate file. stats.analyzeField(i, t); } http://git-wip-us.apache.org/repos/asf/tajo/blob/f87f6672/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/RowFile.java ---------------------------------------------------------------------- diff --git a/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/RowFile.java b/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/RowFile.java index ef597ea..dbe438c 100644 --- a/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/RowFile.java +++ b/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/RowFile.java @@ -36,6 +36,8 @@ import org.apache.tajo.datum.Datum; import org.apache.tajo.datum.DatumFactory; import org.apache.tajo.exception.UnsupportedException; import org.apache.tajo.plan.expr.EvalNode; +import org.apache.tajo.plan.serder.PlanProto.ShuffleType; +import org.apache.tajo.plan.util.PlannerUtil; import org.apache.tajo.storage.exception.AlreadyExistsStorageException; import org.apache.tajo.storage.fragment.Fragment; import org.apache.tajo.util.BitArray; @@ -322,6 +324,7 @@ public class RowFile { private BitArray nullFlags; // statistics private TableStatistics stats; + private ShuffleType shuffleType; public RowFileAppender(Configuration conf, final TaskAttemptId taskAttemptId, final Schema schema, final TableMeta meta, final Path workDir) @@ -364,6 +367,9 @@ public class RowFile { if (enabledStats) { this.stats = new TableStatistics(this.schema); + this.shuffleType = PlannerUtil.getShuffleType( + meta.getOption(StorageConstants.SHUFFLE_TYPE, + PlannerUtil.getShuffleType(ShuffleType.NONE_SHUFFLE))); } } @@ -383,7 +389,8 @@ public class RowFile { nullFlags.clear(); for (int i = 0; i < schema.size(); i++) { - if (enabledStats) { + if (shuffleType == ShuffleType.RANGE_SHUFFLE) { + // it is to calculate min/max values, and it is only used for the intermediate file. stats.analyzeField(i, t); } http://git-wip-us.apache.org/repos/asf/tajo/blob/f87f6672/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/avro/AvroAppender.java ---------------------------------------------------------------------- diff --git a/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/avro/AvroAppender.java b/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/avro/AvroAppender.java index 2782955..0c67320 100644 --- a/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/avro/AvroAppender.java +++ b/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/avro/AvroAppender.java @@ -30,7 +30,6 @@ import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.IOUtils; import org.apache.tajo.TaskAttemptId; -import org.apache.tajo.catalog.Column; import org.apache.tajo.catalog.TableMeta; import org.apache.tajo.catalog.statistics.TableStats; import org.apache.tajo.storage.FileAppender; @@ -138,10 +137,7 @@ public class AvroAppender extends FileAppender { public void addTuple(Tuple tuple) throws IOException { GenericRecord record = new GenericData.Record(avroSchema); for (int i = 0; i < schema.size(); ++i) { - Column column = schema.getColumn(i); - if (enabledStats) { - stats.analyzeField(i, tuple); - } + Object value; Schema.Field avroField = avroFields.get(i); Schema.Type avroType = avroField.schema().getType(); http://git-wip-us.apache.org/repos/asf/tajo/blob/f87f6672/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/parquet/ParquetAppender.java ---------------------------------------------------------------------- diff --git a/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/parquet/ParquetAppender.java b/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/parquet/ParquetAppender.java index 4a8b256..6cb99d1 100644 --- a/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/parquet/ParquetAppender.java +++ b/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/parquet/ParquetAppender.java @@ -106,11 +106,6 @@ public class ParquetAppender extends FileAppender { */ @Override public void addTuple(Tuple tuple) throws IOException { - if (enabledStats) { - for (int i = 0; i < schema.size(); ++i) { - stats.analyzeField(i, tuple); - } - } writer.write(tuple); if (enabledStats) { stats.incrementRow(); http://git-wip-us.apache.org/repos/asf/tajo/blob/f87f6672/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/rawfile/DirectRawFileWriter.java ---------------------------------------------------------------------- diff --git a/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/rawfile/DirectRawFileWriter.java b/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/rawfile/DirectRawFileWriter.java index bb81d6e..912649f 100644 --- a/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/rawfile/DirectRawFileWriter.java +++ b/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/rawfile/DirectRawFileWriter.java @@ -31,16 +31,17 @@ import org.apache.tajo.catalog.Schema; import org.apache.tajo.catalog.TableMeta; import org.apache.tajo.catalog.statistics.TableStats; import org.apache.tajo.common.TajoDataTypes; -import org.apache.tajo.storage.FileAppender; -import org.apache.tajo.storage.RowStoreUtil; -import org.apache.tajo.storage.TableStatistics; -import org.apache.tajo.storage.Tuple; +import org.apache.tajo.plan.serder.PlanProto.ShuffleType; +import org.apache.tajo.plan.util.PlannerUtil; +import org.apache.tajo.storage.*; import org.apache.tajo.tuple.BaseTupleBuilder; import org.apache.tajo.tuple.offheap.OffHeapRowBlock; import org.apache.tajo.tuple.offheap.UnSafeTuple; import org.apache.tajo.unit.StorageUnit; -import java.io.*; +import java.io.File; +import java.io.IOException; +import java.io.RandomAccessFile; import java.nio.ByteBuffer; import java.nio.channels.FileChannel; @@ -56,6 +57,7 @@ public class DirectRawFileWriter extends FileAppender { private long pos; private TableStatistics stats; + private ShuffleType shuffleType; private BaseTupleBuilder builder; @@ -96,6 +98,9 @@ public class DirectRawFileWriter extends FileAppender { if (enabledStats) { this.stats = new TableStatistics(this.schema); + this.shuffleType = PlannerUtil.getShuffleType( + meta.getOption(StorageConstants.SHUFFLE_TYPE, + PlannerUtil.getShuffleType(ShuffleType.NONE_SHUFFLE))); } builder = new BaseTupleBuilder(schema); @@ -149,7 +154,8 @@ public class DirectRawFileWriter extends FileAppender { @Override public void addTuple(Tuple t) throws IOException { - if (enabledStats) { + if (shuffleType == ShuffleType.RANGE_SHUFFLE) { + // it is to calculate min/max values, and it is only used for the intermediate file. for (int i = 0; i < schema.size(); i++) { stats.analyzeField(i, t); } http://git-wip-us.apache.org/repos/asf/tajo/blob/f87f6672/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/rcfile/RCFile.java ---------------------------------------------------------------------- diff --git a/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/rcfile/RCFile.java b/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/rcfile/RCFile.java index 99c727c..4cd51e6 100644 --- a/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/rcfile/RCFile.java +++ b/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/rcfile/RCFile.java @@ -32,7 +32,6 @@ import org.apache.hadoop.util.ReflectionUtils; import org.apache.tajo.TaskAttemptId; import org.apache.tajo.catalog.Schema; import org.apache.tajo.catalog.TableMeta; -import org.apache.tajo.catalog.proto.CatalogProtos; import org.apache.tajo.catalog.statistics.TableStats; import org.apache.tajo.conf.TajoConf; import org.apache.tajo.datum.Datum; @@ -731,15 +730,6 @@ public class RCFile { throw new FileNotFoundException(path.toString()); } - //determine the intermediate file type - String store = conf.get(TajoConf.ConfVars.SHUFFLE_FILE_FORMAT.varname, - TajoConf.ConfVars.SHUFFLE_FILE_FORMAT.defaultVal); - if (enabledStats && CatalogProtos.StoreType.RCFILE == CatalogProtos.StoreType.valueOf(store.toUpperCase())) { - isShuffle = true; - } else { - isShuffle = false; - } - if (this.meta.containsOption(StorageConstants.COMPRESSION_CODEC)) { String codecClassname = this.meta.getOption(StorageConstants.COMPRESSION_CODEC); try { @@ -900,10 +890,6 @@ public class RCFile { for (int i = 0; i < size; i++) { int length = columnBuffers[i].append(tuple, i); columnBufferSize += length; - if (isShuffle) { - // it is to calculate min/max values, and it is only used for the intermediate file. - stats.analyzeField(i, tuple); - } } if (size < columnNumber) { http://git-wip-us.apache.org/repos/asf/tajo/blob/f87f6672/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/sequencefile/SequenceFileAppender.java ---------------------------------------------------------------------- diff --git a/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/sequencefile/SequenceFileAppender.java b/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/sequencefile/SequenceFileAppender.java index 9b09d78..ad622fe 100644 --- a/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/sequencefile/SequenceFileAppender.java +++ b/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/sequencefile/SequenceFileAppender.java @@ -32,9 +32,7 @@ import org.apache.hadoop.io.compress.CompressionCodecFactory; import org.apache.tajo.TaskAttemptId; import org.apache.tajo.catalog.Schema; import org.apache.tajo.catalog.TableMeta; -import org.apache.tajo.catalog.proto.CatalogProtos; import org.apache.tajo.catalog.statistics.TableStats; -import org.apache.tajo.conf.TajoConf; import org.apache.tajo.datum.NullDatum; import org.apache.tajo.datum.ProtobufDatum; import org.apache.tajo.storage.*; @@ -86,15 +84,6 @@ public class SequenceFileAppender extends FileAppender { this.fs = path.getFileSystem(conf); - //determine the intermediate file type - String store = conf.get(TajoConf.ConfVars.SHUFFLE_FILE_FORMAT.varname, - TajoConf.ConfVars.SHUFFLE_FILE_FORMAT.defaultVal); - if (enabledStats && CatalogProtos.StoreType.SEQUENCEFILE == CatalogProtos.StoreType.valueOf(store.toUpperCase())) { - isShuffle = true; - } else { - isShuffle = false; - } - this.delimiter = StringEscapeUtils.unescapeJava(this.meta.getOption(StorageConstants.SEQUENCEFILE_DELIMITER, StorageConstants.DEFAULT_FIELD_DELIMITER)).charAt(0); this.columnNum = schema.size(); @@ -194,11 +183,6 @@ public class SequenceFileAppender extends FileAppender { } serde.serialize(j, tuple, os, nullChars); - - if (isShuffle) { - // it is to calculate min/max values, and it is only used for the intermediate file. - stats.analyzeField(j, tuple); - } } lasti = i + 1; nullByte = 0; @@ -216,12 +200,6 @@ public class SequenceFileAppender extends FileAppender { if (columnNum -1 > i) { os.write((byte) delimiter); } - - if (isShuffle) { - // it is to calculate min/max values, and it is only used for the intermediate file. - stats.analyzeField(i, tuple); - } - } writer.append(EMPTY_KEY, new Text(os.toByteArray())); }
