[ 
https://issues.apache.org/jira/browse/PHOENIX-1981?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=14560599#comment-14560599
 ] 

ASF GitHub Bot commented on PHOENIX-1981:
-----------------------------------------

Github user elilevine commented on a diff in the pull request:

    https://github.com/apache/phoenix/pull/85#discussion_r31110903
  
    --- Diff: 
phoenix-pig/src/main/java/org/apache/phoenix/pig/util/TypeUtil.java ---
    @@ -68,258 +67,250 @@
     import com.google.common.collect.ImmutableMap.Builder;
     
     public final class TypeUtil {
    -   
    +
         private static final Log LOG = LogFactory.getLog(TypeUtil.class);
    -    private static final HBaseBinaryConverter binaryConverter = new 
HBaseBinaryConverter ();
    -   private static final ImmutableMap<PDataType,Byte> 
phoenixTypeToPigDataType = init();
    -   
    -   private TypeUtil(){
    -   }
    -   
    -   /**
    -    * A map of Phoenix to Pig data types.
    -    * @return
    -    */
    -   private static ImmutableMap<PDataType, Byte> init() {
    -        final ImmutableMap.Builder<PDataType,Byte> builder = new 
Builder<PDataType,Byte> ();
    -        builder.put(PLong.INSTANCE,DataType.LONG);
    -        builder.put(PVarbinary.INSTANCE,DataType.BYTEARRAY);
    -        builder.put(PChar.INSTANCE,DataType.CHARARRAY);
    -        builder.put(PVarchar.INSTANCE,DataType.CHARARRAY);
    -        builder.put(PDouble.INSTANCE,DataType.DOUBLE);
    -        builder.put(PFloat.INSTANCE,DataType.FLOAT);
    -        builder.put(PInteger.INSTANCE,DataType.INTEGER);
    -        builder.put(PTinyint.INSTANCE,DataType.INTEGER);
    -        builder.put(PSmallint.INSTANCE,DataType.INTEGER);
    -        builder.put(PDecimal.INSTANCE,DataType.BIGDECIMAL);
    -        builder.put(PTime.INSTANCE,DataType.DATETIME);
    -        builder.put(PTimestamp.INSTANCE,DataType.DATETIME);
    -        builder.put(PBoolean.INSTANCE,DataType.BOOLEAN);
    -        builder.put(PDate.INSTANCE,DataType.DATETIME);
    -        builder.put(PUnsignedDate.INSTANCE,DataType.DATETIME);
    -        builder.put(PUnsignedDouble.INSTANCE,DataType.DOUBLE);
    -        builder.put(PUnsignedFloat.INSTANCE,DataType.FLOAT);
    -        builder.put(PUnsignedInt.INSTANCE,DataType.INTEGER);
    -        builder.put(PUnsignedLong.INSTANCE,DataType.LONG);
    -        builder.put(PUnsignedSmallint.INSTANCE,DataType.INTEGER);
    -        builder.put(PUnsignedTime.INSTANCE,DataType.DATETIME);
    -        builder.put(PUnsignedTimestamp.INSTANCE,DataType.DATETIME);
    -        builder.put(PUnsignedTinyint.INSTANCE,DataType.INTEGER);
    +    private static final HBaseBinaryConverter BINARY_CONVERTER = new 
HBaseBinaryConverter();
    +    private static final ImmutableMap<PDataType, Byte> PHOENIX_TO_PIG_TYPE 
= init();
    +    private static final TupleFactory TUPLE_FACTORY = 
TupleFactory.getInstance();
    +
    +    private TypeUtil() {}
    +
    +    /**
    +     * A map of Phoenix to Pig data types.
    +     * 
    +     * @return
    +     */
    +    private static ImmutableMap<PDataType, Byte> init() {
    +        final ImmutableMap.Builder<PDataType, Byte> builder = new 
Builder<PDataType, Byte>();
    +        builder.put(PLong.INSTANCE, DataType.LONG);
    +        builder.put(PVarbinary.INSTANCE, DataType.BYTEARRAY);
    +        builder.put(PChar.INSTANCE, DataType.CHARARRAY);
    +        builder.put(PVarchar.INSTANCE, DataType.CHARARRAY);
    +        builder.put(PDouble.INSTANCE, DataType.DOUBLE);
    +        builder.put(PFloat.INSTANCE, DataType.FLOAT);
    +        builder.put(PInteger.INSTANCE, DataType.INTEGER);
    +        builder.put(PTinyint.INSTANCE, DataType.INTEGER);
    +        builder.put(PSmallint.INSTANCE, DataType.INTEGER);
    +        builder.put(PDecimal.INSTANCE, DataType.BIGDECIMAL);
    +        builder.put(PTime.INSTANCE, DataType.DATETIME);
    +        builder.put(PTimestamp.INSTANCE, DataType.DATETIME);
    +        builder.put(PBoolean.INSTANCE, DataType.BOOLEAN);
    +        builder.put(PDate.INSTANCE, DataType.DATETIME);
    +        builder.put(PUnsignedDate.INSTANCE, DataType.DATETIME);
    +        builder.put(PUnsignedDouble.INSTANCE, DataType.DOUBLE);
    +        builder.put(PUnsignedFloat.INSTANCE, DataType.FLOAT);
    +        builder.put(PUnsignedInt.INSTANCE, DataType.INTEGER);
    +        builder.put(PUnsignedLong.INSTANCE, DataType.LONG);
    +        builder.put(PUnsignedSmallint.INSTANCE, DataType.INTEGER);
    +        builder.put(PUnsignedTime.INSTANCE, DataType.DATETIME);
    +        builder.put(PUnsignedTimestamp.INSTANCE, DataType.DATETIME);
    +        builder.put(PUnsignedTinyint.INSTANCE, DataType.INTEGER);
             return builder.build();
         }
    +
    +    /**
    +     * This method returns the most appropriate PDataType associated with 
the incoming Pig type. Note for Pig DataType
    +     * DATETIME, returns DATE as inferredSqlType. This is later used to 
make a cast to targetPhoenixType accordingly.
    +     * See {@link #castPigTypeToPhoenix(Object, byte, PDataType)}
    +     * 
    +     * @param obj
    +     * @return PDataType
    +     */
    +    public static PDataType getType(Object obj, byte type) {
    +        if (obj == null) { return null; }
    +        PDataType sqlType;
    +
    +        switch (type) {
    +        case DataType.BYTEARRAY:
    +            sqlType = PVarbinary.INSTANCE;
    +            break;
    +        case DataType.CHARARRAY:
    +            sqlType = PVarchar.INSTANCE;
    +            break;
    +        case DataType.DOUBLE:
    +        case DataType.BIGDECIMAL:
    +            sqlType = PDouble.INSTANCE;
    +            break;
    +        case DataType.FLOAT:
    +            sqlType = PFloat.INSTANCE;
    +            break;
    +        case DataType.INTEGER:
    +            sqlType = PInteger.INSTANCE;
    +            break;
    +        case DataType.LONG:
    +        case DataType.BIGINTEGER:
    +            sqlType = PLong.INSTANCE;
    +            break;
    +        case DataType.BOOLEAN:
    +            sqlType = PBoolean.INSTANCE;
    +            break;
    +        case DataType.DATETIME:
    +            sqlType = PDate.INSTANCE;
    +            break;
    +        case DataType.BYTE:
    +            sqlType = PTinyint.INSTANCE;
    +            break;
    +        default:
    +            throw new RuntimeException("Unknown type " + 
obj.getClass().getName() + " passed to PhoenixHBaseStorage");
    +        }
    +
    +        return sqlType;
    +
    +    }
    +
         /**
    -    * This method returns the most appropriate PDataType associated with 
    -    * the incoming Pig type. Note for Pig DataType DATETIME, returns DATE 
as 
    -    * inferredSqlType. 
    -    * 
    -    * This is later used to make a cast to targetPhoenixType accordingly. 
See
    -    * {@link #castPigTypeToPhoenix(Object, byte, PDataType)}
    -    * 
    -    * @param obj
    -    * @return PDataType
    -    */
    -   public static PDataType getType(Object obj, byte type) {
    -           if (obj == null) {
    -                   return null;
    -           }
    -           PDataType sqlType;
    +     * This method encodes a value with Phoenix data type. It begins with 
checking whether an object is BINARY and makes
    +     * a call to {@link #castBytes(Object, PDataType)} to convery bytes to 
targetPhoenixType
    +     * 
    +     * @param o
    +     * @param targetPhoenixType
    +     * @return Object
    +     */
    +    public static Object castPigTypeToPhoenix(Object o, byte objectType, 
PDataType targetPhoenixType) {
    +        PDataType inferredPType = getType(o, objectType);
     
    -           switch (type) {
    -           case DataType.BYTEARRAY:
    -                   sqlType = PVarbinary.INSTANCE;
    -                   break;
    -           case DataType.CHARARRAY:
    -                   sqlType = PVarchar.INSTANCE;
    -                   break;
    -           case DataType.DOUBLE:
    -           case DataType.BIGDECIMAL:
    -                   sqlType = PDouble.INSTANCE;
    -                   break;
    -           case DataType.FLOAT:
    -                   sqlType = PFloat.INSTANCE;
    -                   break;
    -           case DataType.INTEGER:
    -                   sqlType = PInteger.INSTANCE;
    -                   break;
    -           case DataType.LONG:
    -           case DataType.BIGINTEGER:
    -                   sqlType = PLong.INSTANCE;
    -                   break;
    -           case DataType.BOOLEAN:
    -                   sqlType = PBoolean.INSTANCE;
    -                   break;
    -           case DataType.DATETIME:
    -                   sqlType = PDate.INSTANCE;
    -                   break;
    -           case DataType.BYTE:
    -                   sqlType = PTinyint.INSTANCE;
    -                   break;
    -           default:
    -                   throw new RuntimeException("Unknown type " + 
obj.getClass().getName()
    -                                   + " passed to PhoenixHBaseStorage");
    -           }
    +        if (inferredPType == null) { return null; }
     
    -           return sqlType;
    +        if (inferredPType == PVarbinary.INSTANCE) {
    +            try {
    +                o = castBytes(o, targetPhoenixType);
    +                if (targetPhoenixType != PVarbinary.INSTANCE && 
targetPhoenixType != PBinary.INSTANCE) {
    +                    inferredPType = getType(o, DataType.findType(o));
    +                }
    +            } catch (IOException e) {
    +                throw new RuntimeException("Error while casting bytes for 
object " + o);
    +            }
    +        }
    +        if (inferredPType == PDate.INSTANCE) {
    +            int inferredSqlType = targetPhoenixType.getSqlType();
     
    -   }
    +            if (inferredSqlType == Types.DATE) { return new 
Date(((DateTime)o).getMillis()); }
    +            if (inferredSqlType == Types.TIME) { return new 
Time(((DateTime)o).getMillis()); }
    +            if (inferredSqlType == Types.TIMESTAMP) { return new 
Timestamp(((DateTime)o).getMillis()); }
    +        }
     
    -   /**
    -    * This method encodes a value with Phoenix data type. It begins
    -    * with checking whether an object is BINARY and makes a call to
    -    * {@link #castBytes(Object, PDataType)} to convery bytes to
    -    * targetPhoenixType
    -    * 
    -    * @param o
    -    * @param targetPhoenixType
    -    * @return Object
    -    */
    -   public static Object castPigTypeToPhoenix(Object o, byte objectType, 
PDataType targetPhoenixType) {
    -           PDataType inferredPType = getType(o, objectType);
    -           
    -           if(inferredPType == null) {
    -                   return null;
    -           }
    +        if (targetPhoenixType == inferredPType || 
inferredPType.isCoercibleTo(targetPhoenixType)) { return inferredPType
    +                .toObject(o, targetPhoenixType); }
     
    -           if(inferredPType == PVarbinary.INSTANCE) {
    -                   try {
    -                           o = castBytes(o, targetPhoenixType);
    -                           if(targetPhoenixType != PVarbinary.INSTANCE && 
targetPhoenixType != PBinary.INSTANCE) {
    -                                   inferredPType = getType(o, 
DataType.findType(o));       
    -                           }
    -                   } catch (IOException e) {
    -                           throw new RuntimeException("Error while casting 
bytes for object " +o);
    -                   }
    -           }
    -           if(inferredPType == PDate.INSTANCE) {
    -                   int inferredSqlType = targetPhoenixType.getSqlType();
    +        throw new RuntimeException(o.getClass().getName() + " cannot be 
coerced to " + targetPhoenixType.toString());
    +    }
     
    -                   if(inferredSqlType == Types.DATE) {
    -                           return new Date(((DateTime)o).getMillis());
    -                   } 
    -                   if(inferredSqlType == Types.TIME) {
    -                           return new Time(((DateTime)o).getMillis());
    -                   }
    -                   if(inferredSqlType == Types.TIMESTAMP) {
    -                           return new Timestamp(((DateTime)o).getMillis());
    -                   }
    -           }
    -           
    -           if (targetPhoenixType == inferredPType || 
inferredPType.isCoercibleTo(targetPhoenixType)) {
    -                   return inferredPType.toObject(o, targetPhoenixType);
    -           }
    -           
    -           throw new RuntimeException(o.getClass().getName()
    -                           + " cannot be coerced to 
"+targetPhoenixType.toString());
    -   }
    -   
    -   /**
    -    * This method converts bytes to the target type required
    -    * for Phoenix. It uses {@link Utf8StorageConverter} for
    -    * the conversion.
    -    * 
    -    * @param o
    -    * @param targetPhoenixType
    -    * @return Object
    -    * @throws IOException
    -    */
    -   private static Object castBytes(Object o, PDataType targetPhoenixType) 
throws IOException {
    +    /**
    +     * This method converts bytes to the target type required for Phoenix. 
It uses {@link HBaseBinaryConverter} for the
    +     * conversion.
    +     * 
    +     * @param o
    +     * @param targetPhoenixType
    +     * @return Object
    +     * @throws IOException
    +     */
    +    private static Object castBytes(Object o, PDataType targetPhoenixType) 
throws IOException {
             byte[] bytes = ((DataByteArray)o).get();
     
             if (PDataType.equalsAny(targetPhoenixType, PChar.INSTANCE, 
PVarchar.INSTANCE)) {
    -            return binaryConverter.bytesToCharArray(bytes);
    +            return BINARY_CONVERTER.bytesToCharArray(bytes);
             } else if (PDataType.equalsAny(targetPhoenixType, 
PUnsignedSmallint.INSTANCE, PSmallint.INSTANCE)) {
    -            return binaryConverter.bytesToInteger(bytes).shortValue();
    +            return BINARY_CONVERTER.bytesToInteger(bytes).shortValue();
             } else if (PDataType.equalsAny(targetPhoenixType, 
PUnsignedTinyint.INSTANCE, PTinyint.INSTANCE)) {
    -            return binaryConverter.bytesToInteger(bytes).byteValue();
    +            return BINARY_CONVERTER.bytesToInteger(bytes).byteValue();
             } else if (PDataType.equalsAny(targetPhoenixType, 
PUnsignedInt.INSTANCE, PInteger.INSTANCE)) {
    -            return binaryConverter.bytesToInteger(bytes);
    +            return BINARY_CONVERTER.bytesToInteger(bytes);
             } else if (targetPhoenixType.equals(PBoolean.INSTANCE)) {
    -            return binaryConverter.bytesToBoolean(bytes);
    +            return BINARY_CONVERTER.bytesToBoolean(bytes);
             } else if (PDataType.equalsAny(targetPhoenixType, PFloat.INSTANCE, 
PUnsignedFloat.INSTANCE)) {
    -            return binaryConverter.bytesToFloat(bytes);
    +            return BINARY_CONVERTER.bytesToFloat(bytes);
             } else if (PDataType.equalsAny(targetPhoenixType, 
PDouble.INSTANCE, PUnsignedDouble.INSTANCE)) {
    -            return binaryConverter.bytesToDouble(bytes);
    +            return BINARY_CONVERTER.bytesToDouble(bytes);
             } else if (PDataType.equalsAny(targetPhoenixType, 
PUnsignedLong.INSTANCE, PLong.INSTANCE)) {
    -            return binaryConverter.bytesToLong(bytes);
    +            return BINARY_CONVERTER.bytesToLong(bytes);
             } else if (PDataType.equalsAny(targetPhoenixType, 
PVarbinary.INSTANCE, PBinary.INSTANCE)) {
                 return bytes;
             } else {
                 return o;
    -        }        
    +        }
         }
    -    
    +
         /**
          * Transforms the PhoenixRecord to Pig {@link Tuple}.
    +     * 
          * @param record
          * @param projectedColumns
          * @return
          * @throws IOException
          */
    -    public static Tuple transformToTuple(final PhoenixPigDBWritable 
record, final ResourceFieldSchema[] projectedColumns) throws IOException {
    -        
    +    public static Tuple transformToTuple(final PhoenixPigDBWritable 
record, final ResourceFieldSchema[] projectedColumns)
    +            throws IOException {
    +
             List<Object> columnValues = record.getValues();
    -        if(columnValues == null || columnValues.size() == 0 || 
projectedColumns == null || projectedColumns.length != columnValues.size()) {
    -            return null;
    -        }
    -        int columns = columnValues.size();
    -        Tuple tuple = TupleFactory.getInstance().newTuple(columns);
    +        if (columnValues == null || columnValues.size() == 0 || 
projectedColumns == null
    +                || projectedColumns.length != columnValues.size()) { 
return null; }
    +        int numColumns = columnValues.size();
    +        Tuple tuple = TUPLE_FACTORY.newTuple(numColumns);
             try {
    -            for(int i = 0 ; i < columns ; i++) {
    +            for (int i = 0; i < numColumns; i++) {
                     final ResourceFieldSchema fieldSchema = 
projectedColumns[i];
                     Object object = columnValues.get(i);
                     if (object == null) {
                         tuple.set(i, null);
                         continue;
                     }
    -                
    -                switch(fieldSchema.getType()) {
    -                    case DataType.BYTEARRAY:
    -                        byte[] bytes = 
PDataType.fromTypeId(PBinary.INSTANCE.getSqlType()).toBytes(object);
    -                        tuple.set(i,new 
DataByteArray(bytes,0,bytes.length));
    -                        break;
    -                    case DataType.CHARARRAY:
    -                        tuple.set(i,DataType.toString(object));
    -                        break;
    -                    case DataType.DOUBLE:
    -                        tuple.set(i,DataType.toDouble(object));
    -                        break;
    -                    case DataType.FLOAT:
    -                        tuple.set(i,DataType.toFloat(object));
    -                        break;
    -                    case DataType.INTEGER:
    -                        tuple.set(i,DataType.toInteger(object));
    -                        break;
    -                    case DataType.LONG:
    -                        tuple.set(i,DataType.toLong(object));
    -                        break;
    -                    case DataType.BOOLEAN:
    -                        tuple.set(i,DataType.toBoolean(object));
    -                        break;
    -                    case DataType.DATETIME:
    -                        tuple.set(i,DataType.toDateTime(object));
    -                        break;
    -                    default:
    -                        throw new RuntimeException(String.format(" Not 
supported [%s] pig type" , fieldSchema));
    +
    +                switch (fieldSchema.getType()) {
    +                case DataType.BYTEARRAY:
    +                    byte[] bytes = 
PDataType.fromTypeId(PBinary.INSTANCE.getSqlType()).toBytes(object);
    +                    tuple.set(i, new DataByteArray(bytes, 0, 
bytes.length));
    +                    break;
    +                case DataType.CHARARRAY:
    +                    tuple.set(i, DataType.toString(object));
    +                    break;
    +                case DataType.DOUBLE:
    +                    tuple.set(i, DataType.toDouble(object));
    +                    break;
    +                case DataType.FLOAT:
    +                    tuple.set(i, DataType.toFloat(object));
    +                    break;
    +                case DataType.INTEGER:
    +                    tuple.set(i, DataType.toInteger(object));
    +                    break;
    +                case DataType.LONG:
    +                    tuple.set(i, DataType.toLong(object));
    +                    break;
    +                case DataType.BOOLEAN:
    +                    tuple.set(i, DataType.toBoolean(object));
    +                    break;
    +                case DataType.DATETIME:
    +                    tuple.set(i, DataType.toDateTime(object));
    +                    break;
    +                case DataType.BIGDECIMAL:
    --- End diff --
    
    Is this the only functional change in this PR? Everything else if 
formatting?


> PhoenixHBase Load and Store Funcs should handle all Pig data types
> ------------------------------------------------------------------
>
>                 Key: PHOENIX-1981
>                 URL: https://issues.apache.org/jira/browse/PHOENIX-1981
>             Project: Phoenix
>          Issue Type: Improvement
>            Reporter: Prashant Kommireddi
>            Assignee: Prashant Kommireddi
>
> The load and store func (Pig integration) currently do not handle all Pig 
> types. Here is a complete list 
> http://pig.apache.org/docs/r0.13.0/basic.html#data-types
> In addition to handling all simple types (BigInteger and BigDecimal are 
> missing in the LoadFunc currently), we should also look into handling complex 
> Pig types.



--
This message was sent by Atlassian JIRA
(v6.3.4#6332)

Reply via email to