TAJO-1485: Datum 'Char' returned only 1byte. Signed-off-by: Jihoon Son <[email protected]>
Project: http://git-wip-us.apache.org/repos/asf/tajo/repo Commit: http://git-wip-us.apache.org/repos/asf/tajo/commit/d3ca4bc1 Tree: http://git-wip-us.apache.org/repos/asf/tajo/tree/d3ca4bc1 Diff: http://git-wip-us.apache.org/repos/asf/tajo/diff/d3ca4bc1 Branch: refs/heads/index_support Commit: d3ca4bc1e59539896ddcb3b1075433a79f713114 Parents: 31c4630 Author: DaeMyung Kang <[email protected]> Authored: Thu May 14 16:39:13 2015 +0900 Committer: Jihoon Son <[email protected]> Committed: Thu May 14 16:39:13 2015 +0900 ---------------------------------------------------------------------- CHANGES | 3 ++ .../org/apache/tajo/storage/RowStoreUtil.java | 24 ++++++++-- .../ValueTooLongForTypeCharactersException.java | 27 +++++++++++ .../tajo/engine/query/TestInsertQuery.java | 45 +++++++++++++++++- .../apache/tajo/engine/util/TestTupleUtil.java | 18 ++++++++ .../queries/TestInsertQuery/test1_ddl.sql | 1 + .../TestInsertQuery/test1_nolength_ddl.sql | 1 + .../testInsertIntoSelectWithFixedSizeChar.sql | 4 ++ ...tIntoSelectWithFixedSizeCharWithNoLength.sql | 2 + .../org/apache/tajo/plan/LogicalPlanner.java | 4 ++ .../stream/TextFieldSerializerDeserializer.java | 8 +++- .../storage/BinarySerializerDeserializer.java | 10 ++++ .../org/apache/tajo/storage/RowStoreUtil.java | 20 ++++++-- .../storage/TextSerializerDeserializer.java | 10 ++-- .../tajo/storage/parquet/TajoWriteSupport.java | 7 +++ .../text/TextFieldSerializerDeserializer.java | 8 +++- .../org/apache/tajo/storage/TestStorages.java | 48 +++++++++++++++++++- 17 files changed, 225 insertions(+), 15 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/tajo/blob/d3ca4bc1/CHANGES ---------------------------------------------------------------------- diff --git a/CHANGES b/CHANGES index c9b2522..da16ffe 100644 --- a/CHANGES +++ b/CHANGES @@ -123,6 +123,9 @@ Release 0.11.0 - unreleased BUG FIXES + TAJO-1485: Datum 'Char' returned only 1byte. + (Contributed by DaeMyung Kang, Committed by jihoon) + TAJO-1586: TajoMaster HA startup failure on Yarn. (jaehwa) TAJO-1598: TableMeta should change equals mechanism. http://git-wip-us.apache.org/repos/asf/tajo/blob/d3ca4bc1/tajo-client/src/main/java/org/apache/tajo/storage/RowStoreUtil.java ---------------------------------------------------------------------- diff --git a/tajo-client/src/main/java/org/apache/tajo/storage/RowStoreUtil.java b/tajo-client/src/main/java/org/apache/tajo/storage/RowStoreUtil.java index 6e16095..95dd60e 100644 --- a/tajo-client/src/main/java/org/apache/tajo/storage/RowStoreUtil.java +++ b/tajo-client/src/main/java/org/apache/tajo/storage/RowStoreUtil.java @@ -25,6 +25,7 @@ import org.apache.tajo.datum.DatumFactory; import org.apache.tajo.datum.IntervalDatum; import org.apache.tajo.exception.UnknownDataTypeException; import org.apache.tajo.exception.UnsupportedException; +import org.apache.tajo.exception.ValueTooLongForTypeCharactersException; import org.apache.tajo.util.BitArray; import java.nio.ByteBuffer; @@ -100,8 +101,9 @@ public class RowStoreUtil { break; case CHAR: - byte c = bb.get(); - tuple.put(i, DatumFactory.createChar(c)); + byte [] _str = new byte[type.getLength()]; + bb.get(_str); + tuple.put(i, DatumFactory.createChar(_str)); break; case INT2: @@ -197,7 +199,17 @@ public class RowStoreUtil { case NULL_TYPE: nullFlags.set(i); break; case BOOLEAN: bb.put(tuple.get(i).asByte()); break; case BIT: bb.put(tuple.get(i).asByte()); break; - case CHAR: bb.put(tuple.get(i).asByte()); break; + case CHAR: + int charSize = col.getDataType().getLength(); + byte [] _char = new byte[charSize]; + byte [] src = tuple.get(i).asByteArray(); + if (charSize < src.length) { + throw new ValueTooLongForTypeCharactersException(charSize); + } + + System.arraycopy(src, 0, _char, 0, src.length); + bb.put(_char); + break; case INT2: bb.putShort(tuple.get(i).asInt2()); break; case INT4: bb.putInt(tuple.get(i).asInt4()); break; case INT8: bb.putLong(tuple.get(i).asInt8()); break; @@ -259,7 +271,11 @@ public class RowStoreUtil { switch (col.getDataType().getType()) { case BOOLEAN: case BIT: - case CHAR: size += 1; break; + size += 1; + break; + case CHAR: + size += col.getDataType().getLength(); + break; case INT2: size += 2; break; case DATE: case INT4: http://git-wip-us.apache.org/repos/asf/tajo/blob/d3ca4bc1/tajo-common/src/main/java/org/apache/tajo/exception/ValueTooLongForTypeCharactersException.java ---------------------------------------------------------------------- diff --git a/tajo-common/src/main/java/org/apache/tajo/exception/ValueTooLongForTypeCharactersException.java b/tajo-common/src/main/java/org/apache/tajo/exception/ValueTooLongForTypeCharactersException.java new file mode 100644 index 0000000..262b714 --- /dev/null +++ b/tajo-common/src/main/java/org/apache/tajo/exception/ValueTooLongForTypeCharactersException.java @@ -0,0 +1,27 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.tajo.exception; + +public class ValueTooLongForTypeCharactersException extends RuntimeException { + private static final long serialVersionUID = -7689027447969916150L; + + public ValueTooLongForTypeCharactersException(int size) { + super("value too long for type character(" + size + ")"); + } +} http://git-wip-us.apache.org/repos/asf/tajo/blob/d3ca4bc1/tajo-core/src/test/java/org/apache/tajo/engine/query/TestInsertQuery.java ---------------------------------------------------------------------- diff --git a/tajo-core/src/test/java/org/apache/tajo/engine/query/TestInsertQuery.java b/tajo-core/src/test/java/org/apache/tajo/engine/query/TestInsertQuery.java index b3e3402..b4334f6 100644 --- a/tajo-core/src/test/java/org/apache/tajo/engine/query/TestInsertQuery.java +++ b/tajo-core/src/test/java/org/apache/tajo/engine/query/TestInsertQuery.java @@ -39,7 +39,6 @@ import java.sql.ResultSet; import java.util.List; import static org.junit.Assert.*; -import static org.junit.Assert.assertEquals; @Category(IntegrationTest.class) public class TestInsertQuery extends QueryTestCaseBase { @@ -836,4 +835,48 @@ public class TestInsertQuery extends QueryTestCaseBase { executeString("drop table nation_diff purge;"); } } + + @Test + public final void testFixedCharSelectWithNoLength() throws Exception { + ResultSet res = executeFile("test1_nolength_ddl.sql"); + res.close(); + + CatalogService catalog = testingCluster.getMaster().getCatalog(); + assertTrue(catalog.existsTable(getCurrentDatabase(), "test1")); + + res = executeFile("testInsertIntoSelectWithFixedSizeCharWithNoLength.sql"); + res.close(); + + //remove \0 + String resultDatas = getTableFileContents("test1").replaceAll("\0",""); + String expected = "a\n"; + + assertNotNull(resultDatas); + assertEquals(expected.length(), resultDatas.length()); + assertEquals(expected, resultDatas); + executeString("DROP TABLE test1 PURGE"); + } + + @Test + public final void testFixedCharSelect() throws Exception { + ResultSet res = executeFile("test1_ddl.sql"); + res.close(); + + CatalogService catalog = testingCluster.getMaster().getCatalog(); + assertTrue(catalog.existsTable(getCurrentDatabase(), "test1")); + + res = executeFile("testInsertIntoSelectWithFixedSizeChar.sql"); + res.close(); + + //remove \0 + String resultDatas = getTableFileContents("test1").replaceAll("\0",""); + String expected = "a\n" + + "abc\n" + + "abcde\n"; + + assertNotNull(resultDatas); + assertEquals(expected.length(), resultDatas.length()); + assertEquals(expected, resultDatas); + executeString("DROP TABLE test1 PURGE"); + } } http://git-wip-us.apache.org/repos/asf/tajo/blob/d3ca4bc1/tajo-core/src/test/java/org/apache/tajo/engine/util/TestTupleUtil.java ---------------------------------------------------------------------- diff --git a/tajo-core/src/test/java/org/apache/tajo/engine/util/TestTupleUtil.java b/tajo-core/src/test/java/org/apache/tajo/engine/util/TestTupleUtil.java index b8114e0..c1c07b8 100644 --- a/tajo-core/src/test/java/org/apache/tajo/engine/util/TestTupleUtil.java +++ b/tajo-core/src/test/java/org/apache/tajo/engine/util/TestTupleUtil.java @@ -37,6 +37,24 @@ import static org.junit.Assert.*; public class TestTupleUtil { @Test + public final void testFixedSizeChar() { + Schema schema = new Schema(); + schema.addColumn("col1", Type.CHAR, 5); + + Tuple tuple = new VTuple(1); + tuple.put(new Datum[] { + DatumFactory.createChar("abc\0\0") + }); + + RowStoreEncoder encoder = RowStoreUtil.createEncoder(schema); + RowStoreDecoder decoder = RowStoreUtil.createDecoder(schema); + byte [] bytes = encoder.toBytes(tuple); + Tuple tuple2 = decoder.toTuple(bytes); + + assertEquals(tuple, tuple2); + } + + @Test public final void testToBytesAndToTuple() { Schema schema = new Schema(); schema.addColumn("col1", Type.BOOLEAN); http://git-wip-us.apache.org/repos/asf/tajo/blob/d3ca4bc1/tajo-core/src/test/resources/queries/TestInsertQuery/test1_ddl.sql ---------------------------------------------------------------------- diff --git a/tajo-core/src/test/resources/queries/TestInsertQuery/test1_ddl.sql b/tajo-core/src/test/resources/queries/TestInsertQuery/test1_ddl.sql new file mode 100644 index 0000000..c02b080 --- /dev/null +++ b/tajo-core/src/test/resources/queries/TestInsertQuery/test1_ddl.sql @@ -0,0 +1 @@ +create table test1 (col1 char(5)); \ No newline at end of file http://git-wip-us.apache.org/repos/asf/tajo/blob/d3ca4bc1/tajo-core/src/test/resources/queries/TestInsertQuery/test1_nolength_ddl.sql ---------------------------------------------------------------------- diff --git a/tajo-core/src/test/resources/queries/TestInsertQuery/test1_nolength_ddl.sql b/tajo-core/src/test/resources/queries/TestInsertQuery/test1_nolength_ddl.sql new file mode 100644 index 0000000..cbe3654 --- /dev/null +++ b/tajo-core/src/test/resources/queries/TestInsertQuery/test1_nolength_ddl.sql @@ -0,0 +1 @@ +create table test1 (col1 char); \ No newline at end of file http://git-wip-us.apache.org/repos/asf/tajo/blob/d3ca4bc1/tajo-core/src/test/resources/queries/TestInsertQuery/testInsertIntoSelectWithFixedSizeChar.sql ---------------------------------------------------------------------- diff --git a/tajo-core/src/test/resources/queries/TestInsertQuery/testInsertIntoSelectWithFixedSizeChar.sql b/tajo-core/src/test/resources/queries/TestInsertQuery/testInsertIntoSelectWithFixedSizeChar.sql new file mode 100644 index 0000000..f7ec11c --- /dev/null +++ b/tajo-core/src/test/resources/queries/TestInsertQuery/testInsertIntoSelectWithFixedSizeChar.sql @@ -0,0 +1,4 @@ +insert into test1 select 'a'; +insert into test1 select 'abc'; +insert into test1 select 'abcde'; +select * from test1; \ No newline at end of file http://git-wip-us.apache.org/repos/asf/tajo/blob/d3ca4bc1/tajo-core/src/test/resources/queries/TestInsertQuery/testInsertIntoSelectWithFixedSizeCharWithNoLength.sql ---------------------------------------------------------------------- diff --git a/tajo-core/src/test/resources/queries/TestInsertQuery/testInsertIntoSelectWithFixedSizeCharWithNoLength.sql b/tajo-core/src/test/resources/queries/TestInsertQuery/testInsertIntoSelectWithFixedSizeCharWithNoLength.sql new file mode 100644 index 0000000..02a1d6c --- /dev/null +++ b/tajo-core/src/test/resources/queries/TestInsertQuery/testInsertIntoSelectWithFixedSizeCharWithNoLength.sql @@ -0,0 +1,2 @@ +insert into test1 select 'a'; +select * from test1; \ No newline at end of file http://git-wip-us.apache.org/repos/asf/tajo/blob/d3ca4bc1/tajo-plan/src/main/java/org/apache/tajo/plan/LogicalPlanner.java ---------------------------------------------------------------------- diff --git a/tajo-plan/src/main/java/org/apache/tajo/plan/LogicalPlanner.java b/tajo-plan/src/main/java/org/apache/tajo/plan/LogicalPlanner.java index ce1e4ec..cec0760 100644 --- a/tajo-plan/src/main/java/org/apache/tajo/plan/LogicalPlanner.java +++ b/tajo-plan/src/main/java/org/apache/tajo/plan/LogicalPlanner.java @@ -1913,6 +1913,10 @@ public class LogicalPlanner extends BaseAlgebraVisitor<LogicalPlanner.PlanContex if (dataType.hasLengthOrPrecision()) { builder.setLength(dataType.getLengthOrPrecision()); + } else { + if (type == TajoDataTypes.Type.CHAR) { + builder.setLength(1); + } } TypeDesc typeDesc; http://git-wip-us.apache.org/repos/asf/tajo/blob/d3ca4bc1/tajo-plan/src/main/java/org/apache/tajo/plan/function/stream/TextFieldSerializerDeserializer.java ---------------------------------------------------------------------- diff --git a/tajo-plan/src/main/java/org/apache/tajo/plan/function/stream/TextFieldSerializerDeserializer.java b/tajo-plan/src/main/java/org/apache/tajo/plan/function/stream/TextFieldSerializerDeserializer.java index b6d5020..879373b 100644 --- a/tajo-plan/src/main/java/org/apache/tajo/plan/function/stream/TextFieldSerializerDeserializer.java +++ b/tajo-plan/src/main/java/org/apache/tajo/plan/function/stream/TextFieldSerializerDeserializer.java @@ -29,6 +29,7 @@ import org.apache.tajo.common.TajoDataTypes; import org.apache.tajo.datum.*; import org.apache.tajo.datum.protobuf.ProtobufJsonFormat; import org.apache.tajo.exception.UnsupportedException; +import org.apache.tajo.exception.ValueTooLongForTypeCharactersException; import org.apache.tajo.storage.StorageConstants; import org.apache.tajo.util.NumberUtil; @@ -84,7 +85,12 @@ public class TextFieldSerializerDeserializer implements FieldSerializerDeseriali length = trueBytes.length; break; case CHAR: - byte[] pad = new byte[dataType.getLength() - datum.size()]; + int size = dataType.getLength() - datum.size(); + if (size < 0){ + throw new ValueTooLongForTypeCharactersException(dataType.getLength()); + } + + byte[] pad = new byte[size]; bytes = datum.asTextBytes(); out.write(bytes); out.write(pad); http://git-wip-us.apache.org/repos/asf/tajo/blob/d3ca4bc1/tajo-storage/tajo-storage-common/src/main/java/org/apache/tajo/storage/BinarySerializerDeserializer.java ---------------------------------------------------------------------- diff --git a/tajo-storage/tajo-storage-common/src/main/java/org/apache/tajo/storage/BinarySerializerDeserializer.java b/tajo-storage/tajo-storage-common/src/main/java/org/apache/tajo/storage/BinarySerializerDeserializer.java index 00112e7..a3b8da8 100644 --- a/tajo-storage/tajo-storage-common/src/main/java/org/apache/tajo/storage/BinarySerializerDeserializer.java +++ b/tajo-storage/tajo-storage-common/src/main/java/org/apache/tajo/storage/BinarySerializerDeserializer.java @@ -22,6 +22,7 @@ import com.google.common.base.Preconditions; import com.google.protobuf.Message; import org.apache.tajo.catalog.Column; import org.apache.tajo.datum.*; +import org.apache.tajo.exception.ValueTooLongForTypeCharactersException; import org.apache.tajo.util.Bytes; import java.io.IOException; @@ -44,9 +45,18 @@ public class BinarySerializerDeserializer implements SerializerDeserializer { switch (col.getDataType().getType()) { case BOOLEAN: case BIT: + bytes = datum.asByteArray(); + length = bytes.length; + out.write(bytes, 0, length); + break; + case CHAR: bytes = datum.asByteArray(); length = bytes.length; + if (length > col.getDataType().getLength()) { + throw new ValueTooLongForTypeCharactersException(col.getDataType().getLength()); + } + out.write(bytes, 0, length); break; case INT2: http://git-wip-us.apache.org/repos/asf/tajo/blob/d3ca4bc1/tajo-storage/tajo-storage-common/src/main/java/org/apache/tajo/storage/RowStoreUtil.java ---------------------------------------------------------------------- diff --git a/tajo-storage/tajo-storage-common/src/main/java/org/apache/tajo/storage/RowStoreUtil.java b/tajo-storage/tajo-storage-common/src/main/java/org/apache/tajo/storage/RowStoreUtil.java index 9d69423..256bc78 100644 --- a/tajo-storage/tajo-storage-common/src/main/java/org/apache/tajo/storage/RowStoreUtil.java +++ b/tajo-storage/tajo-storage-common/src/main/java/org/apache/tajo/storage/RowStoreUtil.java @@ -26,6 +26,7 @@ import org.apache.tajo.datum.IntervalDatum; import org.apache.tajo.datum.ProtobufDatum; import org.apache.tajo.exception.UnknownDataTypeException; import org.apache.tajo.exception.UnsupportedException; +import org.apache.tajo.exception.ValueTooLongForTypeCharactersException; import org.apache.tajo.tuple.offheap.RowWriter; import org.apache.tajo.util.BitArray; @@ -99,8 +100,9 @@ public class RowStoreUtil { break; case CHAR: - byte c = bb.get(); - tuple.put(i, DatumFactory.createChar(c)); + byte [] _str = new byte[type.getLength()]; + bb.get(_str); + tuple.put(i, DatumFactory.createChar(_str)); break; case INT2: @@ -204,7 +206,15 @@ public class RowStoreUtil { bb.put(tuple.get(i).asByte()); break; case CHAR: - bb.put(tuple.get(i).asByte()); + int charSize = col.getDataType().getLength(); + byte [] _char = new byte[charSize]; + byte [] src = tuple.get(i).asByteArray(); + if (charSize < src.length) { + throw new ValueTooLongForTypeCharactersException(charSize); + } + + System.arraycopy(src, 0, _char, 0, src.length); + bb.put(_char); break; case INT2: bb.putShort(tuple.get(i).asInt2()); @@ -281,9 +291,11 @@ public class RowStoreUtil { switch (col.getDataType().getType()) { case BOOLEAN: case BIT: - case CHAR: size += 1; break; + case CHAR: + size += col.getDataType().getLength(); + break; case INT2: size += 2; break; http://git-wip-us.apache.org/repos/asf/tajo/blob/d3ca4bc1/tajo-storage/tajo-storage-common/src/main/java/org/apache/tajo/storage/TextSerializerDeserializer.java ---------------------------------------------------------------------- diff --git a/tajo-storage/tajo-storage-common/src/main/java/org/apache/tajo/storage/TextSerializerDeserializer.java b/tajo-storage/tajo-storage-common/src/main/java/org/apache/tajo/storage/TextSerializerDeserializer.java index ab8816b..954b62d 100644 --- a/tajo-storage/tajo-storage-common/src/main/java/org/apache/tajo/storage/TextSerializerDeserializer.java +++ b/tajo-storage/tajo-storage-common/src/main/java/org/apache/tajo/storage/TextSerializerDeserializer.java @@ -20,12 +20,11 @@ package org.apache.tajo.storage; import com.google.protobuf.Message; import org.apache.commons.codec.binary.Base64; -import org.apache.tajo.TajoConstants; import org.apache.tajo.catalog.Column; import org.apache.tajo.common.TajoDataTypes; -import org.apache.tajo.conf.TajoConf; import org.apache.tajo.datum.*; import org.apache.tajo.datum.protobuf.ProtobufJsonFormat; +import org.apache.tajo.exception.ValueTooLongForTypeCharactersException; import org.apache.tajo.util.Bytes; import org.apache.tajo.util.NumberUtil; @@ -66,7 +65,12 @@ public class TextSerializerDeserializer implements SerializerDeserializer { length = trueBytes.length; break; case CHAR: - byte[] pad = new byte[dataType.getLength() - datum.size()]; + int size = dataType.getLength() - datum.size(); + if (size < 0){ + throw new ValueTooLongForTypeCharactersException(dataType.getLength()); + } + + byte[] pad = new byte[size]; bytes = datum.asTextBytes(); out.write(bytes); out.write(pad); http://git-wip-us.apache.org/repos/asf/tajo/blob/d3ca4bc1/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/parquet/TajoWriteSupport.java ---------------------------------------------------------------------- diff --git a/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/parquet/TajoWriteSupport.java b/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/parquet/TajoWriteSupport.java index e05aeaf..dd951e1 100644 --- a/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/parquet/TajoWriteSupport.java +++ b/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/parquet/TajoWriteSupport.java @@ -23,6 +23,7 @@ import org.apache.tajo.catalog.Column; import org.apache.tajo.catalog.Schema; import org.apache.tajo.common.TajoDataTypes; import org.apache.tajo.datum.Datum; +import org.apache.tajo.exception.ValueTooLongForTypeCharactersException; import org.apache.tajo.storage.Tuple; import parquet.hadoop.api.WriteSupport; import parquet.io.api.Binary; @@ -132,6 +133,12 @@ public class TajoWriteSupport extends WriteSupport<Tuple> { recordConsumer.addDouble(datum.asFloat8()); break; case CHAR: + if (datum.size() > column.getDataType().getLength()) { + throw new ValueTooLongForTypeCharactersException(column.getDataType().getLength()); + } + + recordConsumer.addBinary(Binary.fromByteArray(datum.asTextBytes())); + break; case TEXT: recordConsumer.addBinary(Binary.fromByteArray(datum.asTextBytes())); break; http://git-wip-us.apache.org/repos/asf/tajo/blob/d3ca4bc1/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/text/TextFieldSerializerDeserializer.java ---------------------------------------------------------------------- diff --git a/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/text/TextFieldSerializerDeserializer.java b/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/text/TextFieldSerializerDeserializer.java index e637c7f..d2eee9f 100644 --- a/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/text/TextFieldSerializerDeserializer.java +++ b/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/text/TextFieldSerializerDeserializer.java @@ -28,6 +28,7 @@ import org.apache.tajo.catalog.TableMeta; import org.apache.tajo.common.TajoDataTypes; import org.apache.tajo.datum.*; import org.apache.tajo.datum.protobuf.ProtobufJsonFormat; +import org.apache.tajo.exception.ValueTooLongForTypeCharactersException; import org.apache.tajo.storage.FieldSerializerDeserializer; import org.apache.tajo.storage.StorageConstants; import org.apache.tajo.util.Bytes; @@ -86,7 +87,12 @@ public class TextFieldSerializerDeserializer implements FieldSerializerDeseriali length = trueBytes.length; break; case CHAR: - byte[] pad = new byte[dataType.getLength() - datum.size()]; + int size = dataType.getLength() - datum.size(); + if (size < 0){ + throw new ValueTooLongForTypeCharactersException(dataType.getLength()); + } + + byte[] pad = new byte[size]; bytes = datum.asTextBytes(); out.write(bytes); out.write(pad); http://git-wip-us.apache.org/repos/asf/tajo/blob/d3ca4bc1/tajo-storage/tajo-storage-hdfs/src/test/java/org/apache/tajo/storage/TestStorages.java ---------------------------------------------------------------------- diff --git a/tajo-storage/tajo-storage-hdfs/src/test/java/org/apache/tajo/storage/TestStorages.java b/tajo-storage/tajo-storage-hdfs/src/test/java/org/apache/tajo/storage/TestStorages.java index 949e2e8..6a0080c 100644 --- a/tajo-storage/tajo-storage-hdfs/src/test/java/org/apache/tajo/storage/TestStorages.java +++ b/tajo-storage/tajo-storage-hdfs/src/test/java/org/apache/tajo/storage/TestStorages.java @@ -30,7 +30,6 @@ import org.apache.tajo.TajoIdProtos; import org.apache.tajo.catalog.CatalogUtil; import org.apache.tajo.catalog.Schema; import org.apache.tajo.catalog.TableMeta; -import org.apache.tajo.catalog.proto.CatalogProtos.StoreType; import org.apache.tajo.catalog.statistics.TableStats; import org.apache.tajo.common.TajoDataTypes.Type; import org.apache.tajo.conf.TajoConf; @@ -38,6 +37,7 @@ import org.apache.tajo.datum.Datum; import org.apache.tajo.datum.DatumFactory; import org.apache.tajo.datum.NullDatum; import org.apache.tajo.datum.ProtobufDatumFactory; +import org.apache.tajo.exception.ValueTooLongForTypeCharactersException; import org.apache.tajo.storage.fragment.FileFragment; import org.apache.tajo.storage.rcfile.RCFile; import org.apache.tajo.storage.sequencefile.SequenceFileScanner; @@ -1023,4 +1023,50 @@ public class TestStorages { assertEquals(NullDatum.get(), tuple.get(4)); } } + + @Test + public final void testInsertFixedCharTypeWithOverSize() throws Exception { + if (storeType.equalsIgnoreCase("CSV") == false && + storeType.equalsIgnoreCase("SEQUENCEFILE") == false && + storeType.equalsIgnoreCase("RCFILE") == false && + storeType.equalsIgnoreCase("PARQUET") == false) { + return; + } + + Schema dataSchema = new Schema(); + dataSchema.addColumn("col1", Type.CHAR); + + KeyValueSet options = new KeyValueSet(); + TableMeta meta = CatalogUtil.newTableMeta(storeType, options); + meta.setOptions(CatalogUtil.newPhysicalProperties(storeType)); + + Path tablePath = new Path(testDir, "test_storetype_oversize.data"); + FileStorageManager sm = (FileStorageManager) StorageManager.getFileStorageManager(conf); + Appender appender = sm.getAppender(meta, dataSchema, tablePath); + appender.init(); + + Tuple expect = new VTuple(dataSchema.size()); + expect.put(new Datum[]{ + DatumFactory.createChar("1"), + }); + + appender.addTuple(expect); + appender.flush(); + + Tuple expect2 = new VTuple(dataSchema.size()); + expect2.put(new Datum[]{ + DatumFactory.createChar("12"), + }); + + boolean ok = false; + try { + appender.addTuple(expect2); + appender.flush(); + appender.close(); + } catch (ValueTooLongForTypeCharactersException e) { + ok = true; + } + + assertTrue(ok); + } }
