TAJO-682: RangePartitionAlgorithm should be improved to handle empty texts. (Alvin Henrick via hyunsik)
Project: http://git-wip-us.apache.org/repos/asf/tajo/repo Commit: http://git-wip-us.apache.org/repos/asf/tajo/commit/9c33b329 Tree: http://git-wip-us.apache.org/repos/asf/tajo/tree/9c33b329 Diff: http://git-wip-us.apache.org/repos/asf/tajo/diff/9c33b329 Branch: refs/heads/branch-0.8.0 Commit: 9c33b3294bbe7483d38aa2acd4397bc16616157f Parents: 801e5c9 Author: Hyunsik Choi <[email protected]> Authored: Tue Mar 25 12:19:34 2014 +0900 Committer: Hyunsik Choi <[email protected]> Committed: Tue Mar 25 12:21:09 2014 +0900 ---------------------------------------------------------------------- CHANGES.txt | 3 + .../engine/planner/RangePartitionAlgorithm.java | 7 +- .../engine/planner/UniformRangePartition.java | 5 +- .../planner/TestUniformRangePartition.java | 74 ++++++++++++++++++++ 4 files changed, 85 insertions(+), 4 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/tajo/blob/9c33b329/CHANGES.txt ---------------------------------------------------------------------- diff --git a/CHANGES.txt b/CHANGES.txt index 50719e4..e2795d2 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -826,6 +826,9 @@ Release 0.2.0 - released BUG FIXES + TAJO-682: RangePartitionAlgorithm should be improved to handle empty + texts. (hyunsik) + TAJO-511: Sometimes, a query progress becomes higher than 100%. (jihoon) TAJO-281: 'mvn package -Pdist' generates duplicate Tajo jar files. http://git-wip-us.apache.org/repos/asf/tajo/blob/9c33b329/tajo-core/tajo-core-backend/src/main/java/org/apache/tajo/engine/planner/RangePartitionAlgorithm.java ---------------------------------------------------------------------- diff --git a/tajo-core/tajo-core-backend/src/main/java/org/apache/tajo/engine/planner/RangePartitionAlgorithm.java b/tajo-core/tajo-core-backend/src/main/java/org/apache/tajo/engine/planner/RangePartitionAlgorithm.java index 5bff857..68438bd 100644 --- a/tajo-core/tajo-core-backend/src/main/java/org/apache/tajo/engine/planner/RangePartitionAlgorithm.java +++ b/tajo-core/tajo-core-backend/src/main/java/org/apache/tajo/engine/planner/RangePartitionAlgorithm.java @@ -22,6 +22,7 @@ import org.apache.tajo.catalog.Column; import org.apache.tajo.catalog.SortSpec; import org.apache.tajo.common.TajoDataTypes.DataType; import org.apache.tajo.datum.Datum; +import org.apache.tajo.datum.NullDatum; import org.apache.tajo.storage.Tuple; import org.apache.tajo.storage.TupleRange; @@ -113,10 +114,12 @@ public abstract class RangePartitionAlgorithm { } break; case TEXT: + final char textStart = start instanceof NullDatum ? '0' : start.asChars().charAt(0); + final char textEnd = end instanceof NullDatum ? '0' : end.asChars().charAt(0); if (isAscending) { - columnCard = new BigDecimal(end.asChars().charAt(0) - start.asChars().charAt(0)); + columnCard = new BigDecimal(textEnd - textStart); } else { - columnCard = new BigDecimal(start.asChars().charAt(0) - end.asChars().charAt(0)); + columnCard = new BigDecimal(textStart - textEnd); } break; case DATE: http://git-wip-us.apache.org/repos/asf/tajo/blob/9c33b329/tajo-core/tajo-core-backend/src/main/java/org/apache/tajo/engine/planner/UniformRangePartition.java ---------------------------------------------------------------------- diff --git a/tajo-core/tajo-core-backend/src/main/java/org/apache/tajo/engine/planner/UniformRangePartition.java b/tajo-core/tajo-core-backend/src/main/java/org/apache/tajo/engine/planner/UniformRangePartition.java index 948b19e..a7df3e8 100644 --- a/tajo-core/tajo-core-backend/src/main/java/org/apache/tajo/engine/planner/UniformRangePartition.java +++ b/tajo-core/tajo-core-backend/src/main/java/org/apache/tajo/engine/planner/UniformRangePartition.java @@ -24,6 +24,7 @@ import org.apache.tajo.catalog.Column; import org.apache.tajo.catalog.SortSpec; import org.apache.tajo.datum.Datum; import org.apache.tajo.datum.DatumFactory; +import org.apache.tajo.datum.NullDatum; import org.apache.tajo.engine.exception.RangeOverflowException; import org.apache.tajo.storage.Tuple; import org.apache.tajo.storage.TupleRange; @@ -194,7 +195,7 @@ public class UniformRangePartition extends RangePartitionAlgorithm { } case TEXT: { if (sortSpecs[colId].isAscending()) { - candidate = inc.add(new BigDecimal((int)(last.asChars().charAt(0)))); + candidate = inc.add(new BigDecimal((int)(last instanceof NullDatum ? '0' : last.asChars().charAt(0)))); return new BigDecimal(range.getEnd().get(colId).asChars().charAt(0)).compareTo(candidate) < 0; } else { candidate = new BigDecimal((int)(last.asChars().charAt(0))).subtract(inc); @@ -381,7 +382,7 @@ public class UniformRangePartition extends RangePartitionAlgorithm { + incs[i].longValue())) + "")); } else { end.put(i, DatumFactory.createText( - ((char) (last.get(i).asChars().charAt(0) + incs[i].longValue())) + "")); + ((char) ((last.get(i) instanceof NullDatum ? '0': last.get(i).asChars().charAt(0)) + incs[i].longValue())) + "")); } break; case DATE: http://git-wip-us.apache.org/repos/asf/tajo/blob/9c33b329/tajo-core/tajo-core-backend/src/test/java/org/apache/tajo/engine/planner/TestUniformRangePartition.java ---------------------------------------------------------------------- diff --git a/tajo-core/tajo-core-backend/src/test/java/org/apache/tajo/engine/planner/TestUniformRangePartition.java b/tajo-core/tajo-core-backend/src/test/java/org/apache/tajo/engine/planner/TestUniformRangePartition.java index 3d5cdf2..dc5108a 100644 --- a/tajo-core/tajo-core-backend/src/test/java/org/apache/tajo/engine/planner/TestUniformRangePartition.java +++ b/tajo-core/tajo-core-backend/src/test/java/org/apache/tajo/engine/planner/TestUniformRangePartition.java @@ -304,4 +304,78 @@ public class TestUniformRangePartition { assertEquals(expected, ranges[0]); } + + @Test + public void testPartitionForOnePartNumWithOneOfTheValueNull() { + Schema schema = new Schema() + .addColumn("l_returnflag", Type.TEXT) + .addColumn("l_linestatus", Type.TEXT); + + SortSpec [] sortSpecs = PlannerUtil.schemaToSortSpecs(schema); + + Tuple s = new VTuple(2); + s.put(0, DatumFactory.createNullDatum()); + s.put(1, DatumFactory.createText("F")); + Tuple e = new VTuple(2); + e.put(0, DatumFactory.createText("R")); + e.put(1, DatumFactory.createNullDatum()); + TupleRange expected = new TupleRange(sortSpecs, s, e); + RangePartitionAlgorithm partitioner = + new UniformRangePartition(expected, sortSpecs, true); + TupleRange [] ranges = partitioner.partition(1); + + assertEquals(expected, ranges[0]); + } + + @Test + public void testPartitionForOnePartNumWithBothValueNull() { + Schema schema = new Schema() + .addColumn("l_returnflag", Type.TEXT) + .addColumn("l_linestatus", Type.TEXT); + + SortSpec [] sortSpecs = PlannerUtil.schemaToSortSpecs(schema); + + Tuple s = new VTuple(2); + s.put(0, DatumFactory.createNullDatum()); + s.put(1, DatumFactory.createNullDatum()); + Tuple e = new VTuple(2); + e.put(0, DatumFactory.createNullDatum()); + e.put(1, DatumFactory.createNullDatum()); + TupleRange expected = new TupleRange(sortSpecs, s, e); + RangePartitionAlgorithm partitioner = + new UniformRangePartition(expected, sortSpecs, true); + TupleRange [] ranges = partitioner.partition(1); + + assertEquals(expected, ranges[0]); + } + + @Test + public void testPartitionWithNull() { + Schema schema = new Schema(); + schema.addColumn("l_returnflag", Type.TEXT); + schema.addColumn("l_linestatus", Type.TEXT); + + SortSpec [] sortSpecs = PlannerUtil.schemaToSortSpecs(schema); + + Tuple s = new VTuple(2); + s.put(0, DatumFactory.createNullDatum()); + s.put(1, DatumFactory.createText("F")); + Tuple e = new VTuple(2); + e.put(0, DatumFactory.createNullDatum()); + e.put(1, DatumFactory.createText("O")); + TupleRange expected = new TupleRange(sortSpecs, s, e); + RangePartitionAlgorithm partitioner + = new UniformRangePartition(expected, sortSpecs, true); + TupleRange [] ranges = partitioner.partition(10); + + + TupleRange prev = null; + for (TupleRange r : ranges) { + if (prev == null) { + prev = r; + } else { + assertTrue(prev.compareTo(r) > 0); + } + } + } }
