Github user xuchuanyin commented on a diff in the pull request:
https://github.com/apache/carbondata/pull/2683#discussion_r216965853
--- Diff:
core/src/main/java/org/apache/carbondata/core/util/DataTypeUtil.java ---
@@ -168,6 +168,65 @@ public static Object
getMeasureObjectBasedOnDataType(ColumnPage measurePage, int
}
}
+ /**
+ * Calculate data percentage in [min, max] scope based on data type
+ * @param data data to calculate the percentage
+ * @param min min value
+ * @param max max value
+ * @param column column schema including data type
+ * @return result
+ */
+ public static double computePercentage(byte[] data, byte[] min, byte[]
max, ColumnSchema column) {
+ if (column.getDataType() == DataTypes.STRING) {
+ // for string, we do not calculate
+ return 0;
+ } else if (DataTypes.isDecimal(column.getDataType())) {
+ BigDecimal minValue = DataTypeUtil.byteToBigDecimal(min);
+ BigDecimal dataValue =
DataTypeUtil.byteToBigDecimal(data).subtract(minValue);
+ BigDecimal factorValue =
DataTypeUtil.byteToBigDecimal(max).subtract(minValue);
+ return dataValue.divide(factorValue).doubleValue();
+ }
+ double dataValue, minValue, factorValue;
+ if (column.getDataType() == DataTypes.SHORT) {
+ minValue = ByteUtil.toShort(min, 0);
+ dataValue = ByteUtil.toShort(data, 0) - minValue;
+ factorValue = ByteUtil.toShort(max, 0) - ByteUtil.toShort(min, 0);
+ } else if (column.getDataType() == DataTypes.INT) {
+ if (column.isSortColumn()) {
+ minValue = ByteUtil.toXorInt(min, 0, min.length);
+ dataValue = ByteUtil.toXorInt(data, 0, data.length) - minValue;
+ factorValue = ByteUtil.toXorInt(max, 0, max.length) -
ByteUtil.toXorInt(min, 0, min.length);
+ } else {
+ minValue = ByteUtil.toLong(min, 0, min.length);
+ dataValue = ByteUtil.toLong(data, 0, data.length) - minValue;
+ factorValue = ByteUtil.toLong(max, 0, max.length) -
ByteUtil.toLong(min, 0, min.length);
+ }
+ } else if (column.getDataType() == DataTypes.LONG) {
+ minValue = ByteUtil.toLong(min, 0, min.length);
+ dataValue = ByteUtil.toLong(data, 0, data.length) - minValue;
+ factorValue = ByteUtil.toLong(max, 0, max.length) -
ByteUtil.toLong(min, 0, min.length);
+ } else if (column.getDataType() == DataTypes.DATE) {
+ minValue = ByteUtil.toInt(min, 0, min.length);
+ dataValue = ByteUtil.toInt(data, 0, data.length) - minValue;
+ factorValue = ByteUtil.toInt(max, 0, max.length) -
ByteUtil.toInt(min, 0, min.length);
+ } else if (column.getDataType() == DataTypes.TIMESTAMP) {
+ minValue = ByteUtil.toLong(min, 0, min.length);
+ dataValue = ByteUtil.toLong(data, 0, data.length) - minValue;
+ factorValue = ByteUtil.toLong(max, 0, max.length) -
ByteUtil.toLong(min, 0, min.length);
+ } else if (column.getDataType() == DataTypes.DOUBLE) {
+ minValue = ByteUtil.toDouble(min, 0, min.length);
+ dataValue = ByteUtil.toDouble(data, 0, data.length) - minValue;
+ factorValue = ByteUtil.toDouble(max, 0, max.length) -
ByteUtil.toDouble(min, 0, min.length);
+ } else {
+ throw new UnsupportedOperationException("data type: " +
column.getDataType());
+ }
+
+ if (factorValue == 0d) {
+ return Double.MIN_VALUE;
--- End diff --
If the value for the column is constant, the 'factorValue' here will be
'0'. And I think the percentage should be '1' instead of 'Double.MIN_VALUE'.
---