Github user jihoonson commented on a diff in the pull request:
https://github.com/apache/tajo/pull/91#discussion_r15740995
--- Diff:
tajo-core/src/main/java/org/apache/tajo/engine/planner/UniformRangePartition.java
---
@@ -94,25 +100,81 @@ public UniformRangePartition(TupleRange range,
SortSpec [] sortSpecs) {
}
List<TupleRange> ranges = Lists.newArrayList();
- BigDecimal term = reverseCardsForDigit[0].divide(
- new BigDecimal(partNum), RoundingMode.CEILING);
- BigDecimal reminder = reverseCardsForDigit[0];
- Tuple last = range.getStart();
- while(reminder.compareTo(new BigDecimal(0)) > 0) {
+
+ BigDecimal x = new BigDecimal(reverseCardsForDigit[0]);
+
+ BigInteger term = x.divide(BigDecimal.valueOf(partNum),
RoundingMode.CEILING).toBigInteger();
+ BigInteger reminder = reverseCardsForDigit[0];
+ Tuple last = mergedRange.getStart();
+ TupleRange tupleRange;
+ while(reminder.compareTo(BigInteger.ZERO) > 0) {
if (reminder.compareTo(term) <= 0) { // final one is inclusive
- ranges.add(new TupleRange(sortSpecs, last, range.getEnd()));
+ tupleRange = new TupleRange(sortSpecs, last, mergedRange.getEnd());
} else {
- Tuple next = increment(last, term.longValue(), variableId);
- ranges.add(new TupleRange(sortSpecs, last, next));
+ Tuple next = increment(last, term, variableId);
+ tupleRange = new TupleRange(sortSpecs, last, next);
}
+
+ ranges.add(tupleRange);
last = ranges.get(ranges.size() - 1).getEnd();
reminder = reminder.subtract(term);
}
+ for (TupleRange r : ranges) {
+ denormalize(sortSpecs, r);
+ }
+
return ranges.toArray(new TupleRange[ranges.size()]);
}
/**
+ * It normalizes the start and end keys to have the same length bytes if
they are texts or bytes.
+ *
+ * @param sortSpecs The sort specs
+ * @param range Tuple range to be normalize
+ */
+ public static void normalize(final SortSpec [] sortSpecs, TupleRange
range) {
+ // normalize text fields to have same bytes length
+ for (int i = 0; i < sortSpecs.length; i++) {
+ if (sortSpecs[i].getSortKey().getDataType().getType() ==
TajoDataTypes.Type.TEXT) {
+ byte [] startBytes;
+ byte [] endBytes;
+ if (range.getStart().isNull(i)) {
+ startBytes = BigInteger.ZERO.toByteArray();
+ } else {
+ startBytes = range.getStart().getBytes(i);
+ }
+
+ if (range.getEnd().isNull(i)) {
+ endBytes = BigInteger.ZERO.toByteArray();
+ } else {
+ endBytes = range.getEnd().getBytes(i);
+ }
+
+ byte [][] padded = BytesUtils.padBytes(startBytes, endBytes);
+ range.getStart().put(i, DatumFactory.createText(padded[0]));
+ range.getEnd().put(i, DatumFactory.createText(padded[1]));
+ }
+ }
+ }
+
+ /**
+ * Normalized keys have padding values, but it will cause the key
mismatch in pull server.
+ * So, it denormalize the normalized keys again.
+ *
+ * @param sortSpecs The sort specs
+ * @param range Tuple range to be denormalized
+ */
+ public static void denormalize(SortSpec [] sortSpecs, TupleRange range) {
+ for (int i = 0; i < sortSpecs.length; i++) {
+ if (sortSpecs[i].getSortKey().getDataType().getType() ==
TajoDataTypes.Type.TEXT) {
--- End diff --
As commented above, the type seems to be able to be BLOB.
---
If your project is set up for it, you can reply to this email and have your
reply appear on GitHub as well. If your project does not have this feature
enabled and wishes so, or if the feature is enabled but not working, please
contact infrastructure at [email protected] or file a JIRA ticket
with INFRA.
---