[
https://issues.apache.org/jira/browse/PHOENIX-1981?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=14561915#comment-14561915
]
ASF GitHub Bot commented on PHOENIX-1981:
-----------------------------------------
Github user prashantkommireddi commented on a diff in the pull request:
https://github.com/apache/phoenix/pull/85#discussion_r31189047
--- Diff:
phoenix-pig/src/main/java/org/apache/phoenix/pig/util/TypeUtil.java ---
@@ -68,258 +67,250 @@
import com.google.common.collect.ImmutableMap.Builder;
public final class TypeUtil {
-
+
private static final Log LOG = LogFactory.getLog(TypeUtil.class);
- private static final HBaseBinaryConverter binaryConverter = new
HBaseBinaryConverter ();
- private static final ImmutableMap<PDataType,Byte>
phoenixTypeToPigDataType = init();
-
- private TypeUtil(){
- }
-
- /**
- * A map of Phoenix to Pig data types.
- * @return
- */
- private static ImmutableMap<PDataType, Byte> init() {
- final ImmutableMap.Builder<PDataType,Byte> builder = new
Builder<PDataType,Byte> ();
- builder.put(PLong.INSTANCE,DataType.LONG);
- builder.put(PVarbinary.INSTANCE,DataType.BYTEARRAY);
- builder.put(PChar.INSTANCE,DataType.CHARARRAY);
- builder.put(PVarchar.INSTANCE,DataType.CHARARRAY);
- builder.put(PDouble.INSTANCE,DataType.DOUBLE);
- builder.put(PFloat.INSTANCE,DataType.FLOAT);
- builder.put(PInteger.INSTANCE,DataType.INTEGER);
- builder.put(PTinyint.INSTANCE,DataType.INTEGER);
- builder.put(PSmallint.INSTANCE,DataType.INTEGER);
- builder.put(PDecimal.INSTANCE,DataType.BIGDECIMAL);
- builder.put(PTime.INSTANCE,DataType.DATETIME);
- builder.put(PTimestamp.INSTANCE,DataType.DATETIME);
- builder.put(PBoolean.INSTANCE,DataType.BOOLEAN);
- builder.put(PDate.INSTANCE,DataType.DATETIME);
- builder.put(PUnsignedDate.INSTANCE,DataType.DATETIME);
- builder.put(PUnsignedDouble.INSTANCE,DataType.DOUBLE);
- builder.put(PUnsignedFloat.INSTANCE,DataType.FLOAT);
- builder.put(PUnsignedInt.INSTANCE,DataType.INTEGER);
- builder.put(PUnsignedLong.INSTANCE,DataType.LONG);
- builder.put(PUnsignedSmallint.INSTANCE,DataType.INTEGER);
- builder.put(PUnsignedTime.INSTANCE,DataType.DATETIME);
- builder.put(PUnsignedTimestamp.INSTANCE,DataType.DATETIME);
- builder.put(PUnsignedTinyint.INSTANCE,DataType.INTEGER);
+ private static final HBaseBinaryConverter BINARY_CONVERTER = new
HBaseBinaryConverter();
+ private static final ImmutableMap<PDataType, Byte> PHOENIX_TO_PIG_TYPE
= init();
+ private static final TupleFactory TUPLE_FACTORY =
TupleFactory.getInstance();
+
+ private TypeUtil() {}
+
+ /**
+ * A map of Phoenix to Pig data types.
+ *
+ * @return
+ */
+ private static ImmutableMap<PDataType, Byte> init() {
+ final ImmutableMap.Builder<PDataType, Byte> builder = new
Builder<PDataType, Byte>();
+ builder.put(PLong.INSTANCE, DataType.LONG);
+ builder.put(PVarbinary.INSTANCE, DataType.BYTEARRAY);
+ builder.put(PChar.INSTANCE, DataType.CHARARRAY);
+ builder.put(PVarchar.INSTANCE, DataType.CHARARRAY);
+ builder.put(PDouble.INSTANCE, DataType.DOUBLE);
+ builder.put(PFloat.INSTANCE, DataType.FLOAT);
+ builder.put(PInteger.INSTANCE, DataType.INTEGER);
+ builder.put(PTinyint.INSTANCE, DataType.INTEGER);
+ builder.put(PSmallint.INSTANCE, DataType.INTEGER);
+ builder.put(PDecimal.INSTANCE, DataType.BIGDECIMAL);
+ builder.put(PTime.INSTANCE, DataType.DATETIME);
+ builder.put(PTimestamp.INSTANCE, DataType.DATETIME);
+ builder.put(PBoolean.INSTANCE, DataType.BOOLEAN);
+ builder.put(PDate.INSTANCE, DataType.DATETIME);
+ builder.put(PUnsignedDate.INSTANCE, DataType.DATETIME);
+ builder.put(PUnsignedDouble.INSTANCE, DataType.DOUBLE);
+ builder.put(PUnsignedFloat.INSTANCE, DataType.FLOAT);
+ builder.put(PUnsignedInt.INSTANCE, DataType.INTEGER);
+ builder.put(PUnsignedLong.INSTANCE, DataType.LONG);
+ builder.put(PUnsignedSmallint.INSTANCE, DataType.INTEGER);
+ builder.put(PUnsignedTime.INSTANCE, DataType.DATETIME);
+ builder.put(PUnsignedTimestamp.INSTANCE, DataType.DATETIME);
+ builder.put(PUnsignedTinyint.INSTANCE, DataType.INTEGER);
return builder.build();
}
+
+ /**
+ * This method returns the most appropriate PDataType associated with
the incoming Pig type. Note for Pig DataType
+ * DATETIME, returns DATE as inferredSqlType. This is later used to
make a cast to targetPhoenixType accordingly.
+ * See {@link #castPigTypeToPhoenix(Object, byte, PDataType)}
+ *
+ * @param obj
+ * @return PDataType
+ */
+ public static PDataType getType(Object obj, byte type) {
+ if (obj == null) { return null; }
+ PDataType sqlType;
+
+ switch (type) {
+ case DataType.BYTEARRAY:
+ sqlType = PVarbinary.INSTANCE;
+ break;
+ case DataType.CHARARRAY:
+ sqlType = PVarchar.INSTANCE;
+ break;
+ case DataType.DOUBLE:
+ case DataType.BIGDECIMAL:
+ sqlType = PDouble.INSTANCE;
+ break;
+ case DataType.FLOAT:
+ sqlType = PFloat.INSTANCE;
+ break;
+ case DataType.INTEGER:
+ sqlType = PInteger.INSTANCE;
+ break;
+ case DataType.LONG:
+ case DataType.BIGINTEGER:
+ sqlType = PLong.INSTANCE;
+ break;
+ case DataType.BOOLEAN:
+ sqlType = PBoolean.INSTANCE;
+ break;
+ case DataType.DATETIME:
+ sqlType = PDate.INSTANCE;
+ break;
+ case DataType.BYTE:
+ sqlType = PTinyint.INSTANCE;
+ break;
+ default:
+ throw new RuntimeException("Unknown type " +
obj.getClass().getName() + " passed to PhoenixHBaseStorage");
+ }
+
+ return sqlType;
+
+ }
+
/**
- * This method returns the most appropriate PDataType associated with
- * the incoming Pig type. Note for Pig DataType DATETIME, returns DATE
as
- * inferredSqlType.
- *
- * This is later used to make a cast to targetPhoenixType accordingly.
See
- * {@link #castPigTypeToPhoenix(Object, byte, PDataType)}
- *
- * @param obj
- * @return PDataType
- */
- public static PDataType getType(Object obj, byte type) {
- if (obj == null) {
- return null;
- }
- PDataType sqlType;
+ * This method encodes a value with Phoenix data type. It begins with
checking whether an object is BINARY and makes
+ * a call to {@link #castBytes(Object, PDataType)} to convery bytes to
targetPhoenixType
+ *
+ * @param o
+ * @param targetPhoenixType
+ * @return Object
+ */
+ public static Object castPigTypeToPhoenix(Object o, byte objectType,
PDataType targetPhoenixType) {
+ PDataType inferredPType = getType(o, objectType);
- switch (type) {
- case DataType.BYTEARRAY:
- sqlType = PVarbinary.INSTANCE;
- break;
- case DataType.CHARARRAY:
- sqlType = PVarchar.INSTANCE;
- break;
- case DataType.DOUBLE:
- case DataType.BIGDECIMAL:
- sqlType = PDouble.INSTANCE;
- break;
- case DataType.FLOAT:
- sqlType = PFloat.INSTANCE;
- break;
- case DataType.INTEGER:
- sqlType = PInteger.INSTANCE;
- break;
- case DataType.LONG:
- case DataType.BIGINTEGER:
- sqlType = PLong.INSTANCE;
- break;
- case DataType.BOOLEAN:
- sqlType = PBoolean.INSTANCE;
- break;
- case DataType.DATETIME:
- sqlType = PDate.INSTANCE;
- break;
- case DataType.BYTE:
- sqlType = PTinyint.INSTANCE;
- break;
- default:
- throw new RuntimeException("Unknown type " +
obj.getClass().getName()
- + " passed to PhoenixHBaseStorage");
- }
+ if (inferredPType == null) { return null; }
- return sqlType;
+ if (inferredPType == PVarbinary.INSTANCE) {
+ try {
+ o = castBytes(o, targetPhoenixType);
+ if (targetPhoenixType != PVarbinary.INSTANCE &&
targetPhoenixType != PBinary.INSTANCE) {
+ inferredPType = getType(o, DataType.findType(o));
+ }
+ } catch (IOException e) {
+ throw new RuntimeException("Error while casting bytes for
object " + o);
+ }
+ }
+ if (inferredPType == PDate.INSTANCE) {
+ int inferredSqlType = targetPhoenixType.getSqlType();
- }
+ if (inferredSqlType == Types.DATE) { return new
Date(((DateTime)o).getMillis()); }
+ if (inferredSqlType == Types.TIME) { return new
Time(((DateTime)o).getMillis()); }
+ if (inferredSqlType == Types.TIMESTAMP) { return new
Timestamp(((DateTime)o).getMillis()); }
+ }
- /**
- * This method encodes a value with Phoenix data type. It begins
- * with checking whether an object is BINARY and makes a call to
- * {@link #castBytes(Object, PDataType)} to convery bytes to
- * targetPhoenixType
- *
- * @param o
- * @param targetPhoenixType
- * @return Object
- */
- public static Object castPigTypeToPhoenix(Object o, byte objectType,
PDataType targetPhoenixType) {
- PDataType inferredPType = getType(o, objectType);
-
- if(inferredPType == null) {
- return null;
- }
+ if (targetPhoenixType == inferredPType ||
inferredPType.isCoercibleTo(targetPhoenixType)) { return inferredPType
+ .toObject(o, targetPhoenixType); }
- if(inferredPType == PVarbinary.INSTANCE) {
- try {
- o = castBytes(o, targetPhoenixType);
- if(targetPhoenixType != PVarbinary.INSTANCE &&
targetPhoenixType != PBinary.INSTANCE) {
- inferredPType = getType(o,
DataType.findType(o));
- }
- } catch (IOException e) {
- throw new RuntimeException("Error while casting
bytes for object " +o);
- }
- }
- if(inferredPType == PDate.INSTANCE) {
- int inferredSqlType = targetPhoenixType.getSqlType();
+ throw new RuntimeException(o.getClass().getName() + " cannot be
coerced to " + targetPhoenixType.toString());
+ }
- if(inferredSqlType == Types.DATE) {
- return new Date(((DateTime)o).getMillis());
- }
- if(inferredSqlType == Types.TIME) {
- return new Time(((DateTime)o).getMillis());
- }
- if(inferredSqlType == Types.TIMESTAMP) {
- return new Timestamp(((DateTime)o).getMillis());
- }
- }
-
- if (targetPhoenixType == inferredPType ||
inferredPType.isCoercibleTo(targetPhoenixType)) {
- return inferredPType.toObject(o, targetPhoenixType);
- }
-
- throw new RuntimeException(o.getClass().getName()
- + " cannot be coerced to
"+targetPhoenixType.toString());
- }
-
- /**
- * This method converts bytes to the target type required
- * for Phoenix. It uses {@link Utf8StorageConverter} for
- * the conversion.
- *
- * @param o
- * @param targetPhoenixType
- * @return Object
- * @throws IOException
- */
- private static Object castBytes(Object o, PDataType targetPhoenixType)
throws IOException {
+ /**
+ * This method converts bytes to the target type required for Phoenix.
It uses {@link HBaseBinaryConverter} for the
+ * conversion.
+ *
+ * @param o
+ * @param targetPhoenixType
+ * @return Object
+ * @throws IOException
+ */
+ private static Object castBytes(Object o, PDataType targetPhoenixType)
throws IOException {
byte[] bytes = ((DataByteArray)o).get();
if (PDataType.equalsAny(targetPhoenixType, PChar.INSTANCE,
PVarchar.INSTANCE)) {
- return binaryConverter.bytesToCharArray(bytes);
+ return BINARY_CONVERTER.bytesToCharArray(bytes);
} else if (PDataType.equalsAny(targetPhoenixType,
PUnsignedSmallint.INSTANCE, PSmallint.INSTANCE)) {
- return binaryConverter.bytesToInteger(bytes).shortValue();
+ return BINARY_CONVERTER.bytesToInteger(bytes).shortValue();
} else if (PDataType.equalsAny(targetPhoenixType,
PUnsignedTinyint.INSTANCE, PTinyint.INSTANCE)) {
- return binaryConverter.bytesToInteger(bytes).byteValue();
+ return BINARY_CONVERTER.bytesToInteger(bytes).byteValue();
} else if (PDataType.equalsAny(targetPhoenixType,
PUnsignedInt.INSTANCE, PInteger.INSTANCE)) {
- return binaryConverter.bytesToInteger(bytes);
+ return BINARY_CONVERTER.bytesToInteger(bytes);
} else if (targetPhoenixType.equals(PBoolean.INSTANCE)) {
- return binaryConverter.bytesToBoolean(bytes);
+ return BINARY_CONVERTER.bytesToBoolean(bytes);
} else if (PDataType.equalsAny(targetPhoenixType, PFloat.INSTANCE,
PUnsignedFloat.INSTANCE)) {
- return binaryConverter.bytesToFloat(bytes);
+ return BINARY_CONVERTER.bytesToFloat(bytes);
} else if (PDataType.equalsAny(targetPhoenixType,
PDouble.INSTANCE, PUnsignedDouble.INSTANCE)) {
- return binaryConverter.bytesToDouble(bytes);
+ return BINARY_CONVERTER.bytesToDouble(bytes);
} else if (PDataType.equalsAny(targetPhoenixType,
PUnsignedLong.INSTANCE, PLong.INSTANCE)) {
- return binaryConverter.bytesToLong(bytes);
+ return BINARY_CONVERTER.bytesToLong(bytes);
} else if (PDataType.equalsAny(targetPhoenixType,
PVarbinary.INSTANCE, PBinary.INSTANCE)) {
return bytes;
} else {
return o;
- }
+ }
}
-
+
/**
* Transforms the PhoenixRecord to Pig {@link Tuple}.
+ *
* @param record
* @param projectedColumns
* @return
* @throws IOException
*/
- public static Tuple transformToTuple(final PhoenixPigDBWritable
record, final ResourceFieldSchema[] projectedColumns) throws IOException {
-
+ public static Tuple transformToTuple(final PhoenixPigDBWritable
record, final ResourceFieldSchema[] projectedColumns)
+ throws IOException {
+
List<Object> columnValues = record.getValues();
- if(columnValues == null || columnValues.size() == 0 ||
projectedColumns == null || projectedColumns.length != columnValues.size()) {
- return null;
- }
- int columns = columnValues.size();
- Tuple tuple = TupleFactory.getInstance().newTuple(columns);
+ if (columnValues == null || columnValues.size() == 0 ||
projectedColumns == null
+ || projectedColumns.length != columnValues.size()) {
return null; }
+ int numColumns = columnValues.size();
+ Tuple tuple = TUPLE_FACTORY.newTuple(numColumns);
try {
- for(int i = 0 ; i < columns ; i++) {
+ for (int i = 0; i < numColumns; i++) {
final ResourceFieldSchema fieldSchema =
projectedColumns[i];
Object object = columnValues.get(i);
if (object == null) {
tuple.set(i, null);
continue;
}
-
- switch(fieldSchema.getType()) {
- case DataType.BYTEARRAY:
- byte[] bytes =
PDataType.fromTypeId(PBinary.INSTANCE.getSqlType()).toBytes(object);
- tuple.set(i,new
DataByteArray(bytes,0,bytes.length));
- break;
- case DataType.CHARARRAY:
- tuple.set(i,DataType.toString(object));
- break;
- case DataType.DOUBLE:
- tuple.set(i,DataType.toDouble(object));
- break;
- case DataType.FLOAT:
- tuple.set(i,DataType.toFloat(object));
- break;
- case DataType.INTEGER:
- tuple.set(i,DataType.toInteger(object));
- break;
- case DataType.LONG:
- tuple.set(i,DataType.toLong(object));
- break;
- case DataType.BOOLEAN:
- tuple.set(i,DataType.toBoolean(object));
- break;
- case DataType.DATETIME:
- tuple.set(i,DataType.toDateTime(object));
- break;
- default:
- throw new RuntimeException(String.format(" Not
supported [%s] pig type" , fieldSchema));
+
+ switch (fieldSchema.getType()) {
+ case DataType.BYTEARRAY:
+ byte[] bytes =
PDataType.fromTypeId(PBinary.INSTANCE.getSqlType()).toBytes(object);
+ tuple.set(i, new DataByteArray(bytes, 0,
bytes.length));
+ break;
+ case DataType.CHARARRAY:
+ tuple.set(i, DataType.toString(object));
+ break;
+ case DataType.DOUBLE:
+ tuple.set(i, DataType.toDouble(object));
+ break;
+ case DataType.FLOAT:
+ tuple.set(i, DataType.toFloat(object));
+ break;
+ case DataType.INTEGER:
+ tuple.set(i, DataType.toInteger(object));
+ break;
+ case DataType.LONG:
+ tuple.set(i, DataType.toLong(object));
+ break;
+ case DataType.BOOLEAN:
+ tuple.set(i, DataType.toBoolean(object));
+ break;
+ case DataType.DATETIME:
+ tuple.set(i, DataType.toDateTime(object));
+ break;
+ case DataType.BIGDECIMAL:
--- End diff --
Yes.
> PhoenixHBase Load and Store Funcs should handle all Pig data types
> ------------------------------------------------------------------
>
> Key: PHOENIX-1981
> URL: https://issues.apache.org/jira/browse/PHOENIX-1981
> Project: Phoenix
> Issue Type: Improvement
> Reporter: Prashant Kommireddi
> Assignee: Prashant Kommireddi
>
> The load and store func (Pig integration) currently do not handle all Pig
> types. Here is a complete list
> http://pig.apache.org/docs/r0.13.0/basic.html#data-types
> In addition to handling all simple types (BigInteger and BigDecimal are
> missing in the LoadFunc currently), we should also look into handling complex
> Pig types.
--
This message was sent by Atlassian JIRA
(v6.3.4#6332)