Modified: hive/branches/cbo/hcatalog/core/src/test/java/org/apache/hive/hcatalog/cli/TestSemanticAnalysis.java URL: http://svn.apache.org/viewvc/hive/branches/cbo/hcatalog/core/src/test/java/org/apache/hive/hcatalog/cli/TestSemanticAnalysis.java?rev=1619936&r1=1619935&r2=1619936&view=diff ============================================================================== --- hive/branches/cbo/hcatalog/core/src/test/java/org/apache/hive/hcatalog/cli/TestSemanticAnalysis.java (original) +++ hive/branches/cbo/hcatalog/core/src/test/java/org/apache/hive/hcatalog/cli/TestSemanticAnalysis.java Fri Aug 22 21:36:47 2014 @@ -37,6 +37,7 @@ import org.apache.hadoop.hive.metastore. import org.apache.hadoop.hive.metastore.api.Table; import org.apache.hadoop.hive.ql.CommandNeedRetryException; import org.apache.hadoop.hive.ql.Driver; +import org.apache.hadoop.hive.ql.ErrorMsg; import org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat; import org.apache.hadoop.hive.ql.io.RCFileInputFormat; import org.apache.hadoop.hive.ql.io.RCFileOutputFormat; @@ -120,7 +121,7 @@ public class TestSemanticAnalysis extend @Test public void testUsNonExistentDB() throws CommandNeedRetryException { CommandProcessorResponse resp = hcatDriver.run("use no_such_db"); - assertEquals(1, resp.getResponseCode()); + assertEquals(ErrorMsg.DATABASE_NOT_EXISTS.getErrorCode(), resp.getResponseCode()); } @Test
Modified: hive/branches/cbo/hcatalog/streaming/src/java/org/apache/hive/hcatalog/streaming/AbstractRecordWriter.java URL: http://svn.apache.org/viewvc/hive/branches/cbo/hcatalog/streaming/src/java/org/apache/hive/hcatalog/streaming/AbstractRecordWriter.java?rev=1619936&r1=1619935&r2=1619936&view=diff ============================================================================== --- hive/branches/cbo/hcatalog/streaming/src/java/org/apache/hive/hcatalog/streaming/AbstractRecordWriter.java (original) +++ hive/branches/cbo/hcatalog/streaming/src/java/org/apache/hive/hcatalog/streaming/AbstractRecordWriter.java Fri Aug 22 21:36:47 2014 @@ -53,7 +53,7 @@ abstract class AbstractRecordWriter impl private int currentBucketId = 0; private final Path partitionPath; - final AcidOutputFormat<?> outf; + final AcidOutputFormat<?,?> outf; protected AbstractRecordWriter(HiveEndPoint endPoint, HiveConf conf) throws ConnectionError, StreamingException { @@ -70,7 +70,7 @@ abstract class AbstractRecordWriter impl + endPoint); } String outFormatName = this.tbl.getSd().getOutputFormat(); - outf = (AcidOutputFormat<?>) ReflectionUtils.newInstance(Class.forName(outFormatName), conf); + outf = (AcidOutputFormat<?,?>) ReflectionUtils.newInstance(Class.forName(outFormatName), conf); } catch (MetaException e) { throw new ConnectionError(endPoint, e); } catch (NoSuchObjectException e) { Modified: hive/branches/cbo/hcatalog/webhcat/java-client/src/main/java/org/apache/hive/hcatalog/api/HCatAddPartitionDesc.java URL: http://svn.apache.org/viewvc/hive/branches/cbo/hcatalog/webhcat/java-client/src/main/java/org/apache/hive/hcatalog/api/HCatAddPartitionDesc.java?rev=1619936&r1=1619935&r2=1619936&view=diff ============================================================================== --- hive/branches/cbo/hcatalog/webhcat/java-client/src/main/java/org/apache/hive/hcatalog/api/HCatAddPartitionDesc.java (original) +++ hive/branches/cbo/hcatalog/webhcat/java-client/src/main/java/org/apache/hive/hcatalog/api/HCatAddPartitionDesc.java Fri Aug 22 21:36:47 2014 @@ -18,18 +18,8 @@ */ package org.apache.hive.hcatalog.api; -import java.util.ArrayList; -import java.util.List; import java.util.Map; -import org.apache.hadoop.fs.Path; -import org.apache.hadoop.hive.metastore.MetaStoreUtils; -import org.apache.hadoop.hive.metastore.Warehouse; -import org.apache.hadoop.hive.metastore.api.FieldSchema; -import org.apache.hadoop.hive.metastore.api.MetaException; -import org.apache.hadoop.hive.metastore.api.Partition; -import org.apache.hadoop.hive.metastore.api.StorageDescriptor; -import org.apache.hadoop.hive.metastore.api.Table; import org.apache.hive.hcatalog.common.HCatException; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -40,16 +30,33 @@ import org.slf4j.LoggerFactory; public class HCatAddPartitionDesc { private static final Logger LOG = LoggerFactory.getLogger(HCatAddPartitionDesc.class); - private String tableName; - private String dbName; - private String location; - private Map<String, String> partSpec; + private HCatPartition hcatPartition; - private HCatAddPartitionDesc(String dbName, String tbl, String loc, Map<String, String> spec) { + // The following data members are only required to support the deprecated constructor (and builder). + String dbName, tableName, location; + Map<String, String> partitionKeyValues; + + private HCatAddPartitionDesc(HCatPartition hcatPartition) { + this.hcatPartition = hcatPartition; + } + + private HCatAddPartitionDesc(String dbName, String tableName, String location, Map<String, String> partitionKeyValues) { + this.hcatPartition = null; this.dbName = dbName; - this.tableName = tbl; - this.location = loc; - this.partSpec = spec; + this.tableName = tableName; + this.location = location; + this.partitionKeyValues = partitionKeyValues; + } + + HCatPartition getHCatPartition() { + return hcatPartition; + } + + HCatPartition getHCatPartition(HCatTable hcatTable) throws HCatException { + assert hcatPartition == null : "hcatPartition should have been null at this point."; + assert dbName.equalsIgnoreCase(hcatTable.getDbName()) : "DB names don't match."; + assert tableName.equalsIgnoreCase(hcatTable.getTableName()) : "Table names don't match."; + return new HCatPartition(hcatTable, partitionKeyValues, location); } /** @@ -57,18 +64,19 @@ public class HCatAddPartitionDesc { * * @return the location */ + @Deprecated // @deprecated in favour of {@link HCatPartition.#getLocation()}. To be removed in Hive 0.16. public String getLocation() { - return this.location; + return hcatPartition == null? location : hcatPartition.getLocation(); } - /** * Gets the partition spec. * * @return the partition spec */ + @Deprecated // @deprecated in favour of {@link HCatPartition.#getPartitionKeyValMap()}. To be removed in Hive 0.16. public Map<String, String> getPartitionSpec() { - return this.partSpec; + return hcatPartition == null? partitionKeyValues : hcatPartition.getPartitionKeyValMap(); } /** @@ -76,8 +84,9 @@ public class HCatAddPartitionDesc { * * @return the table name */ + @Deprecated // @deprecated in favour of {@link HCatPartition.#getTableName()}. To be removed in Hive 0.16. public String getTableName() { - return this.tableName; + return hcatPartition == null? tableName : hcatPartition.getTableName(); } /** @@ -85,17 +94,14 @@ public class HCatAddPartitionDesc { * * @return the database name */ + @Deprecated // @deprecated in favour of {@link HCatPartition.#getDatabaseName()}. To be removed in Hive 0.16. public String getDatabaseName() { - return this.dbName; + return hcatPartition == null? dbName : hcatPartition.getDatabaseName(); } @Override public String toString() { - return "HCatAddPartitionDesc [" - + (tableName != null ? "tableName=" + tableName + ", " : "tableName=null") - + (dbName != null ? "dbName=" + dbName + ", " : "dbName=null") - + (location != null ? "location=" + location + ", " : "location=null") - + (partSpec != null ? "partSpec=" + partSpec : "partSpec=null") + "]"; + return "HCatAddPartitionDesc [" + hcatPartition + "]"; } /** @@ -108,61 +114,48 @@ public class HCatAddPartitionDesc { * @return the builder * @throws HCatException */ - public static Builder create(String dbName, String tableName, String location, - Map<String, String> partSpec) throws HCatException { + @Deprecated // @deprecated in favour of {@link HCatAddPartitionDesc.#create(HCatPartition)}. To be removed in Hive 0.16. + public static Builder create(String dbName, + String tableName, + String location, + Map<String, String> partSpec + ) throws HCatException { + LOG.error("Unsupported! HCatAddPartitionDesc requires HCatTable to be specified explicitly."); return new Builder(dbName, tableName, location, partSpec); } - Partition toHivePartition(Table hiveTable) throws HCatException { - Partition hivePtn = new Partition(); - hivePtn.setDbName(this.dbName); - hivePtn.setTableName(this.tableName); - - List<String> pvals = new ArrayList<String>(); - for (FieldSchema field : hiveTable.getPartitionKeys()) { - String val = partSpec.get(field.getName()); - if (val == null || val.length() == 0) { - throw new HCatException("create partition: Value for key " - + field.getName() + " is null or empty"); - } - pvals.add(val); - } - - hivePtn.setValues(pvals); - StorageDescriptor sd = new StorageDescriptor(hiveTable.getSd()); - hivePtn.setSd(sd); - hivePtn.setParameters(hiveTable.getParameters()); - if (this.location != null) { - hivePtn.getSd().setLocation(this.location); - } else { - String partName; - try { - partName = Warehouse.makePartName( - hiveTable.getPartitionKeys(), pvals); - LOG.info("Setting partition location to :" + partName); - } catch (MetaException e) { - throw new HCatException("Exception while creating partition name.", e); - } - Path partPath = new Path(hiveTable.getSd().getLocation(), partName); - hivePtn.getSd().setLocation(partPath.toString()); - } - hivePtn.setCreateTime((int) (System.currentTimeMillis() / 1000)); - hivePtn.setLastAccessTimeIsSet(false); - return hivePtn; + /** + * Constructs a Builder instance, using an HCatPartition object. + * @param partition An HCatPartition instance. + * @return A Builder object that can build an appropriate HCatAddPartitionDesc. + * @throws HCatException + */ + public static Builder create(HCatPartition partition) throws HCatException { + return new Builder(partition); } + /** + * Builder class for constructing an HCatAddPartition instance. + */ public static class Builder { - private String tableName; - private String location; - private Map<String, String> values; - private String dbName; + private HCatPartition hcatPartition; + + // The following data members are only required to support the deprecated constructor (and builder). + String dbName, tableName, location; + Map<String, String> partitionSpec; + + private Builder(HCatPartition hcatPartition) { + this.hcatPartition = hcatPartition; + } - private Builder(String dbName, String tableName, String location, Map<String, String> values) { + @Deprecated // To be removed in Hive 0.16. + private Builder(String dbName, String tableName, String location, Map<String, String> partitionSpec) { + this.hcatPartition = null; this.dbName = dbName; this.tableName = tableName; this.location = location; - this.values = values; + this.partitionSpec = partitionSpec; } /** @@ -172,13 +165,9 @@ public class HCatAddPartitionDesc { * @throws HCatException */ public HCatAddPartitionDesc build() throws HCatException { - if (this.dbName == null) { - this.dbName = MetaStoreUtils.DEFAULT_DATABASE_NAME; - } - HCatAddPartitionDesc desc = new HCatAddPartitionDesc( - this.dbName, this.tableName, this.location, - this.values); - return desc; + return hcatPartition == null? + new HCatAddPartitionDesc(dbName, tableName, location, partitionSpec) + : new HCatAddPartitionDesc(hcatPartition); } } Modified: hive/branches/cbo/hcatalog/webhcat/java-client/src/main/java/org/apache/hive/hcatalog/api/HCatClient.java URL: http://svn.apache.org/viewvc/hive/branches/cbo/hcatalog/webhcat/java-client/src/main/java/org/apache/hive/hcatalog/api/HCatClient.java?rev=1619936&r1=1619935&r2=1619936&view=diff ============================================================================== --- hive/branches/cbo/hcatalog/webhcat/java-client/src/main/java/org/apache/hive/hcatalog/api/HCatClient.java (original) +++ hive/branches/cbo/hcatalog/webhcat/java-client/src/main/java/org/apache/hive/hcatalog/api/HCatClient.java Fri Aug 22 21:36:47 2014 @@ -155,6 +155,64 @@ public abstract class HCatClient { throws HCatException; /** + * Updates the Table's whole schema (including column schema, I/O Formats, SerDe definitions, etc.) + * @param dbName The name of the database. + * @param tableName The name of the table. + * @param newTableDefinition The (new) definition of the table. + * @throws HCatException + */ + public abstract void updateTableSchema(String dbName, String tableName, HCatTable newTableDefinition) + throws HCatException; + + /** + * Serializer for HCatTable. + * @param hcatTable The HCatTable to be serialized into string form + * @return String representation of the HCatTable. + * @throws HCatException, on failure to serialize. + */ + public abstract String serializeTable(HCatTable hcatTable) throws HCatException; + + /** + * Deserializer for HCatTable. + * @param hcatTableStringRep The String representation of an HCatTable, presumably retrieved from {@link #serializeTable(HCatTable)} + * @return HCatTable reconstructed from the string + * throws HCatException, on failure to deserialize. + */ + public abstract HCatTable deserializeTable(String hcatTableStringRep) throws HCatException; + + /** + * Serializer for HCatPartition. + * @param hcatPartition The HCatPartition instance to be serialized. + * @return String representation of the HCatPartition. + * @throws HCatException, on failure to serialize. + */ + public abstract String serializePartition(HCatPartition hcatPartition) throws HCatException; + + /** + * Serializer for a list of HCatPartition. + * @param hcatPartitions The HCatPartitions to be serialized. + * @return A list of Strings, each representing an HCatPartition. + * @throws HCatException, on failure to serialize. + */ + public abstract List<String> serializePartitions(List<HCatPartition> hcatPartitions) throws HCatException; + + /** + * Deserializer for an HCatPartition. + * @param hcatPartitionStringRep The String representation of the HCatPartition, presumably retrieved from {@link #serializePartition(HCatPartition)} + * @return HCatPartition instance reconstructed from the string. + * @throws HCatException, on failure to deserialze. + */ + public abstract HCatPartition deserializePartition(String hcatPartitionStringRep) throws HCatException; + + /** + * Deserializer for a list of HCatPartition strings. + * @param hcatPartitionStringReps The list of HCatPartition strings to be deserialized. + * @return A list of HCatPartition instances, each reconstructed from an entry in the string-list. + * @throws HCatException, on failure to deserialize. + */ + public abstract List<HCatPartition> deserializePartitions(List<String> hcatPartitionStringReps) throws HCatException; + + /** * Creates the table like an existing table. * * @param dbName The name of the database. Modified: hive/branches/cbo/hcatalog/webhcat/java-client/src/main/java/org/apache/hive/hcatalog/api/HCatClientHMSImpl.java URL: http://svn.apache.org/viewvc/hive/branches/cbo/hcatalog/webhcat/java-client/src/main/java/org/apache/hive/hcatalog/api/HCatClientHMSImpl.java?rev=1619936&r1=1619935&r2=1619936&view=diff ============================================================================== --- hive/branches/cbo/hcatalog/webhcat/java-client/src/main/java/org/apache/hive/hcatalog/api/HCatClientHMSImpl.java (original) +++ hive/branches/cbo/hcatalog/webhcat/java-client/src/main/java/org/apache/hive/hcatalog/api/HCatClientHMSImpl.java Fri Aug 22 21:36:47 2014 @@ -172,7 +172,7 @@ public class HCatClientHMSImpl extends H public void createTable(HCatCreateTableDesc createTableDesc) throws HCatException { try { - hmsClient.createTable(createTableDesc.toHiveTable(hiveConfig)); + hmsClient.createTable(createTableDesc.getHCatTable().toHiveTable()); } catch (AlreadyExistsException e) { if (!createTableDesc.getIfNotExists()) { throw new HCatException( @@ -220,6 +220,27 @@ public class HCatClientHMSImpl extends H } @Override + public void updateTableSchema(String dbName, String tableName, HCatTable newTableDefinition) throws HCatException { + try { + hmsClient.alter_table(dbName, tableName, newTableDefinition.toHiveTable()); + } + catch (InvalidOperationException e) { + throw new HCatException("InvalidOperationException while updating table schema.", e); + } + catch (MetaException e) { + throw new HCatException("MetaException while updating table schema.", e); + } + catch (NoSuchObjectException e) { + throw new ObjectNotFoundException( + "NoSuchObjectException while updating table schema.", e); + } + catch (TException e) { + throw new ConnectionFailureException( + "TException while updating table schema.", e); + } + } + + @Override public void createTableLike(String dbName, String existingTblName, String newTableName, boolean ifNotExists, boolean isExternal, String location) throws HCatException { @@ -308,10 +329,12 @@ public class HCatClientHMSImpl extends H throws HCatException { List<HCatPartition> hcatPtns = new ArrayList<HCatPartition>(); try { + Table table = hmsClient.getTable(dbName, tblName); + HCatTable hcatTable = new HCatTable(table); List<Partition> hivePtns = hmsClient.listPartitions( checkDB(dbName), tblName, (short) -1); for (Partition ptn : hivePtns) { - hcatPtns.add(new HCatPartition(ptn)); + hcatPtns.add(new HCatPartition(hcatTable, ptn)); } } catch (NoSuchObjectException e) { throw new ObjectNotFoundException( @@ -351,7 +374,8 @@ public class HCatClientHMSImpl extends H Map<String, String> partitionSpec) throws HCatException { HCatPartition partition = null; try { - List<HCatFieldSchema> partitionColumns = getTable(checkDB(dbName), tableName).getPartCols(); + HCatTable hcatTable = getTable(checkDB(dbName), tableName); + List<HCatFieldSchema> partitionColumns = hcatTable.getPartCols(); if (partitionColumns.size() != partitionSpec.size()) { throw new HCatException("Partition-spec doesn't have the right number of partition keys."); } @@ -369,7 +393,7 @@ public class HCatClientHMSImpl extends H Partition hivePartition = hmsClient.getPartition(checkDB(dbName), tableName, ptnValues); if (hivePartition != null) { - partition = new HCatPartition(hivePartition); + partition = new HCatPartition(hcatTable, hivePartition); } } catch (MetaException e) { throw new HCatException( @@ -397,7 +421,17 @@ public class HCatClientHMSImpl extends H + " is not partitioned."); } - hmsClient.add_partition(partInfo.toHivePartition(tbl)); + HCatTable hcatTable = new HCatTable(tbl); + + HCatPartition hcatPartition = partInfo.getHCatPartition(); + + // TODO: Remove in Hive 0.16. + // This is only required to support the deprecated methods in HCatAddPartitionDesc.Builder. + if (hcatPartition == null) { + hcatPartition = partInfo.getHCatPartition(hcatTable); + } + + hmsClient.add_partition(hcatPartition.toHivePartition()); } catch (InvalidObjectException e) { throw new HCatException( "InvalidObjectException while adding partition.", e); @@ -458,10 +492,11 @@ public class HCatClientHMSImpl extends H String tblName, String filter) throws HCatException { List<HCatPartition> hcatPtns = new ArrayList<HCatPartition>(); try { + HCatTable table = getTable(dbName, tblName); List<Partition> hivePtns = hmsClient.listPartitionsByFilter( checkDB(dbName), tblName, filter, (short) -1); for (Partition ptn : hivePtns) { - hcatPtns.add(new HCatPartition(ptn)); + hcatPtns.add(new HCatPartition(table, ptn)); } } catch (MetaException e) { throw new HCatException("MetaException while fetching partitions.", @@ -682,9 +717,18 @@ public class HCatClientHMSImpl extends H try { tbl = hmsClient.getTable(partInfoList.get(0).getDatabaseName(), partInfoList.get(0).getTableName()); + HCatTable hcatTable = new HCatTable(tbl); ArrayList<Partition> ptnList = new ArrayList<Partition>(); for (HCatAddPartitionDesc desc : partInfoList) { - ptnList.add(desc.toHivePartition(tbl)); + HCatPartition hCatPartition = desc.getHCatPartition(); + + // TODO: Remove in Hive 0.16. + // This is required only to support the deprecated HCatAddPartitionDesc.Builder interfaces. + if (hCatPartition == null) { + hCatPartition = desc.getHCatPartition(hcatTable); + } + + ptnList.add(hCatPartition.toHivePartition()); } numPartitions = hmsClient.add_partitions(ptnList); } catch (InvalidObjectException e) { @@ -720,4 +764,65 @@ public class HCatClientHMSImpl extends H "TException while retrieving JMS Topic name.", e); } } + + @Override + public String serializeTable(HCatTable hcatTable) throws HCatException { + return MetadataSerializer.get().serializeTable(hcatTable); + } + + @Override + public HCatTable deserializeTable(String hcatTableStringRep) throws HCatException { + return MetadataSerializer.get().deserializeTable(hcatTableStringRep); + } + + @Override + public String serializePartition(HCatPartition hcatPartition) throws HCatException { + return MetadataSerializer.get().serializePartition(hcatPartition); + } + + @Override + public List<String> serializePartitions(List<HCatPartition> hcatPartitions) throws HCatException { + List<String> partStrings = new ArrayList<String>(hcatPartitions.size()); + MetadataSerializer serializer = MetadataSerializer.get(); + + for (HCatPartition partition : hcatPartitions) { + partStrings.add(serializer.serializePartition(partition)); + } + + return partStrings; + } + + @Override + public HCatPartition deserializePartition(String hcatPartitionStringRep) throws HCatException { + HCatPartition hcatPartition = MetadataSerializer.get().deserializePartition(hcatPartitionStringRep); + hcatPartition.hcatTable(getTable(hcatPartition.getDatabaseName(), hcatPartition.getTableName())); + return hcatPartition; + } + + @Override + public List<HCatPartition> deserializePartitions(List<String> hcatPartitionStringReps) throws HCatException { + List<HCatPartition> partitions = new ArrayList<HCatPartition>(hcatPartitionStringReps.size()); + MetadataSerializer deserializer = MetadataSerializer.get(); + HCatTable table = null; + for (String partString : hcatPartitionStringReps) { + HCatPartition partition; + if (table == null) { + partition = deserializePartition(partString); + table = partition.hcatTable(); + } + else { + partition = deserializer.deserializePartition(partString); + if (partition.getDatabaseName().equals(table.getDbName()) + && partition.getTableName().equals(table.getTableName())) { + partition.hcatTable(table); + } + else { + throw new HCatException("All partitions are not of the same table: " + + table.getDbName() + "." + table.getTableName()); + } + } + partitions.add(partition); + } + return partitions; + } } Modified: hive/branches/cbo/hcatalog/webhcat/java-client/src/main/java/org/apache/hive/hcatalog/api/HCatCreateTableDesc.java URL: http://svn.apache.org/viewvc/hive/branches/cbo/hcatalog/webhcat/java-client/src/main/java/org/apache/hive/hcatalog/api/HCatCreateTableDesc.java?rev=1619936&r1=1619935&r2=1619936&view=diff ============================================================================== --- hive/branches/cbo/hcatalog/webhcat/java-client/src/main/java/org/apache/hive/hcatalog/api/HCatCreateTableDesc.java (original) +++ hive/branches/cbo/hcatalog/webhcat/java-client/src/main/java/org/apache/hive/hcatalog/api/HCatCreateTableDesc.java Fri Aug 22 21:36:47 2014 @@ -19,38 +19,13 @@ package org.apache.hive.hcatalog.api; import java.util.ArrayList; -import java.util.HashMap; import java.util.List; import java.util.Map; -import org.apache.commons.lang.StringUtils; -import org.apache.hadoop.hive.conf.HiveConf; -import org.apache.hadoop.hive.metastore.MetaStoreUtils; -import org.apache.hadoop.hive.metastore.TableType; -import org.apache.hadoop.hive.metastore.api.FieldSchema; import org.apache.hadoop.hive.metastore.api.Order; -import org.apache.hadoop.hive.metastore.api.SerDeInfo; -import org.apache.hadoop.hive.metastore.api.StorageDescriptor; -import org.apache.hadoop.hive.metastore.api.Table; -import org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat; -import org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat; -import org.apache.hadoop.hive.ql.io.orc.OrcInputFormat; -import org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat; -import org.apache.hadoop.hive.ql.io.orc.OrcSerde; -import org.apache.hadoop.hive.ql.io.RCFileInputFormat; -import org.apache.hadoop.hive.ql.io.RCFileOutputFormat; -import org.apache.hadoop.hive.ql.metadata.HiveException; -import org.apache.hadoop.hive.ql.metadata.HiveStorageHandler; -import org.apache.hadoop.hive.ql.metadata.HiveUtils; import org.apache.hadoop.hive.serde.serdeConstants; -import org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe; -import org.apache.hadoop.mapred.SequenceFileInputFormat; -import org.apache.hadoop.mapred.TextInputFormat; import org.apache.hive.hcatalog.common.HCatException; import org.apache.hive.hcatalog.data.schema.HCatFieldSchema; -import org.apache.hive.hcatalog.data.schema.HCatSchemaUtils; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; /** * The Class HCatCreateTableDesc for defining attributes for a new table. @@ -58,31 +33,12 @@ import org.slf4j.LoggerFactory; @SuppressWarnings("deprecation") public class HCatCreateTableDesc { - private static final Logger LOG = LoggerFactory.getLogger(HCatCreateTableDesc.class); - - private String tableName; - private String dbName; - private boolean isExternal; - private String comment; - private String location; - private List<HCatFieldSchema> cols; - private List<HCatFieldSchema> partCols; - private List<String> bucketCols; - private int numBuckets; - private List<Order> sortCols; - private Map<String, String> tblProps; private boolean ifNotExists; - private String fileFormat; - private String inputformat; - private String outputformat; - private String serde; - private String storageHandler; - private Map<String, String> serdeParams; - - private HCatCreateTableDesc(String dbName, String tableName, List<HCatFieldSchema> columns) { - this.dbName = dbName; - this.tableName = tableName; - this.cols = columns; + private HCatTable hcatTable; + + private HCatCreateTableDesc(HCatTable hcatTable, boolean ifNotExists) { + this.hcatTable = hcatTable; + this.ifNotExists = ifNotExists; } /** @@ -93,109 +49,36 @@ public class HCatCreateTableDesc { * @param columns the columns * @return the builder */ + @Deprecated // @deprecated in favour of {@link #create(HCatTable)}. To be removed in Hive 0.16. public static Builder create(String dbName, String tableName, List<HCatFieldSchema> columns) { return new Builder(dbName, tableName, columns); } - Table toHiveTable(HiveConf conf) throws HCatException { + /** + * Getter for HCatCreateTableDesc.Builder instance. + * @param table Spec for HCatTable to be created. + * @param ifNotExists Only create the table if it doesn't already exist. + * @return Builder instance. + */ + public static Builder create(HCatTable table, boolean ifNotExists) { + return new Builder(table, ifNotExists); + } + + /** + * Getter for HCatCreateTableDesc.Builder instance. By default, ifNotExists is false. + * So the attempt to create the table is made even if the table already exists. + * @param table Spec for HCatTable to be created. + * @return Builder instance. + */ + public static Builder create(HCatTable table) { + return new Builder(table, false); + } - /* - * get the same defaults as are set when a Table is created via the Hive Driver. - */ - Table newTable = org.apache.hadoop.hive.ql.metadata.Table.getEmptyTable(dbName, tableName); - newTable.setDbName(dbName); - newTable.setTableName(tableName); - if (tblProps != null) { - for ( Map.Entry<String,String> e : tblProps.entrySet()){ - newTable.getParameters().put(e.getKey(), e.getValue()); - } - } - - if (isExternal) { - newTable.putToParameters("EXTERNAL", "TRUE"); - newTable.setTableType(TableType.EXTERNAL_TABLE.toString()); - } else { - newTable.setTableType(TableType.MANAGED_TABLE.toString()); - } - - // Initialize an sd if one does not exist - if (newTable.getSd() == null) { - newTable.setSd(new StorageDescriptor()); - } - StorageDescriptor sd = newTable.getSd(); - - if (sd.getSerdeInfo() == null){ - sd.setSerdeInfo(new SerDeInfo()); - } - if (location != null) { - sd.setLocation(location); - } - if (this.comment != null) { - newTable.putToParameters("comment", comment); - } - if (!StringUtils.isEmpty(fileFormat)) { - sd.setInputFormat(inputformat); - sd.setOutputFormat(outputformat); - if (serde != null) { - sd.getSerdeInfo().setSerializationLib(serde); - } else { - LOG.info("Using LazySimpleSerDe for table " + tableName); - sd.getSerdeInfo() - .setSerializationLib( - org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe.class - .getName()); - } - } else { - try { - LOG.info("Creating instance of storage handler to get input/output, serder info."); - HiveStorageHandler sh = HiveUtils.getStorageHandler(conf, - storageHandler); - sd.setInputFormat(sh.getInputFormatClass().getName()); - sd.setOutputFormat(sh.getOutputFormatClass().getName()); - sd.getSerdeInfo().setSerializationLib( - sh.getSerDeClass().getName()); - newTable.putToParameters( - org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.META_TABLE_STORAGE, - storageHandler); - } catch (HiveException e) { - throw new HCatException( - "Exception while creating instance of storage handler", - e); - } - } - if(serdeParams != null) { - for(Map.Entry<String, String> param : serdeParams.entrySet()) { - sd.getSerdeInfo().putToParameters(param.getKey(), param.getValue()); - } - } - if (this.partCols != null) { - ArrayList<FieldSchema> hivePtnCols = new ArrayList<FieldSchema>(); - for (HCatFieldSchema fs : this.partCols) { - hivePtnCols.add(HCatSchemaUtils.getFieldSchema(fs)); - } - newTable.setPartitionKeys(hivePtnCols); - } - - if (this.cols != null) { - ArrayList<FieldSchema> hiveTblCols = new ArrayList<FieldSchema>(); - for (HCatFieldSchema fs : this.cols) { - hiveTblCols.add(HCatSchemaUtils.getFieldSchema(fs)); - } - newTable.getSd().setCols(hiveTblCols); - } - - if (this.bucketCols != null) { - newTable.getSd().setBucketCols(bucketCols); - newTable.getSd().setNumBuckets(numBuckets); - } - - if (this.sortCols != null) { - newTable.getSd().setSortCols(sortCols); - } - - newTable.setCreateTime((int) (System.currentTimeMillis() / 1000)); - newTable.setLastAccessTimeIsSet(false); - return newTable; + /** + * Getter for underlying HCatTable instance. + */ + public HCatTable getHCatTable() { + return this.hcatTable; } /** @@ -212,8 +95,9 @@ public class HCatCreateTableDesc { * * @return the table name */ + @Deprecated // @deprecated in favour of {@link HCatTable.#getTableName()}. To be removed in Hive 0.16. public String getTableName() { - return this.tableName; + return this.hcatTable.getTableName(); } /** @@ -221,8 +105,9 @@ public class HCatCreateTableDesc { * * @return the cols */ + @Deprecated // @deprecated in favour of {@link HCatTable.#getCols()}. To be removed in Hive 0.16. public List<HCatFieldSchema> getCols() { - return this.cols; + return this.hcatTable.getCols(); } /** @@ -230,8 +115,9 @@ public class HCatCreateTableDesc { * * @return the partition cols */ + @Deprecated // @deprecated in favour of {@link HCatTable.#getPartCols()}. To be removed in Hive 0.16. public List<HCatFieldSchema> getPartitionCols() { - return this.partCols; + return this.hcatTable.getPartCols(); } /** @@ -239,12 +125,14 @@ public class HCatCreateTableDesc { * * @return the bucket cols */ + @Deprecated // @deprecated in favour of {@link HCatTable.#getBucketCols()}. To be removed in Hive 0.16. public List<String> getBucketCols() { - return this.bucketCols; + return this.hcatTable.getBucketCols(); } + @Deprecated // @deprecated in favour of {@link HCatTable.#getNumBuckets()}. public int getNumBuckets() { - return this.numBuckets; + return this.hcatTable.getNumBuckets(); } /** @@ -252,8 +140,9 @@ public class HCatCreateTableDesc { * * @return the comments */ + @Deprecated // @deprecated in favour of {@link HCatTable.#comment()}. To be removed in Hive 0.16. public String getComments() { - return this.comment; + return this.hcatTable.comment(); } /** @@ -261,8 +150,9 @@ public class HCatCreateTableDesc { * * @return the storage handler */ + @Deprecated // @deprecated in favour of {@link HCatTable.#getStorageHandler()}. To be removed in Hive 0.16. public String getStorageHandler() { - return this.storageHandler; + return this.hcatTable.getStorageHandler(); } /** @@ -270,8 +160,9 @@ public class HCatCreateTableDesc { * * @return the location */ + @Deprecated // @deprecated in favour of {@link HCatTable.#getLocation()}. To be removed in Hive 0.16. public String getLocation() { - return this.location; + return this.hcatTable.getLocation(); } /** @@ -279,8 +170,11 @@ public class HCatCreateTableDesc { * * @return the external */ + @Deprecated // @deprecated in favour of {@link HCatTable.#getTableType()}. To be removed in Hive 0.16. public boolean getExternal() { - return this.isExternal; + + return this.hcatTable.getTabletype() + .equalsIgnoreCase(HCatTable.Type.EXTERNAL_TABLE.toString()); } /** @@ -288,8 +182,9 @@ public class HCatCreateTableDesc { * * @return the sort cols */ + @Deprecated // @deprecated in favour of {@link HCatTable.#getSortCols()}. To be removed in Hive 0.16. public List<Order> getSortCols() { - return this.sortCols; + return this.hcatTable.getSortCols(); } /** @@ -297,8 +192,9 @@ public class HCatCreateTableDesc { * * @return the tbl props */ + @Deprecated // @deprecated in favour of {@link HCatTable.#getTblProps()}. To be removed in Hive 0.16. public Map<String, String> getTblProps() { - return this.tblProps; + return this.hcatTable.getTblProps(); } /** @@ -306,8 +202,9 @@ public class HCatCreateTableDesc { * * @return the file format */ + @Deprecated // @deprecated in favour of {@link HCatTable.#fileFormat()}. To be removed in Hive 0.16. public String getFileFormat() { - return this.fileFormat; + return this.hcatTable.fileFormat(); } /** @@ -315,74 +212,39 @@ public class HCatCreateTableDesc { * * @return the database name */ + @Deprecated // @deprecated in favour of {@link HCatTable.#getDbName()}. To be removed in Hive 0.16. public String getDatabaseName() { - return this.dbName; + return this.hcatTable.getDbName(); } + /** * Gets the SerDe parameters; for example see {@link org.apache.hive.hcatalog.api.HCatCreateTableDesc.Builder#fieldsTerminatedBy(char)} */ + @Deprecated public Map<String, String> getSerdeParams() { - return serdeParams; + return this.hcatTable.getSerdeParams(); } @Override public String toString() { - return "HCatCreateTableDesc [" - + (tableName != null ? "tableName=" + tableName + ", " : "tableName=null") - + (dbName != null ? "dbName=" + dbName + ", " : "dbName=null") - + "isExternal=" - + isExternal - + ", " - + (comment != null ? "comment=" + comment + ", " : "comment=null") - + (location != null ? "location=" + location + ", " : "location=null") - + (cols != null ? "cols=" + cols + ", " : "cols=null") - + (partCols != null ? "partCols=" + partCols + ", " : "partCols=null") - + (bucketCols != null ? "bucketCols=" + bucketCols + ", " : "bucketCols=null") - + "numBuckets=" - + numBuckets - + ", " - + (sortCols != null ? "sortCols=" + sortCols + ", " : "sortCols=null") - + (tblProps != null ? "tblProps=" + tblProps + ", " : "tblProps=null") - + "ifNotExists=" - + ifNotExists - + ", " - + (fileFormat != null ? "fileFormat=" + fileFormat + ", " : "fileFormat=null") - + (inputformat != null ? "inputformat=" + inputformat + ", " - : "inputformat=null") - + (outputformat != null ? "outputformat=" + outputformat + ", " - : "outputformat=null") - + (serde != null ? "serde=" + serde + ", " : "serde=null") - + (storageHandler != null ? "storageHandler=" + storageHandler - : "storageHandler=null") - + ",serdeParams=" + (serdeParams == null ? "null" : serdeParams) - + "]"; + return "HCatCreateTableDesc [ " + hcatTable.toString() + + ", ifNotExists = " + ifNotExists + "]"; } public static class Builder { - private String tableName; - private boolean isExternal; - private List<HCatFieldSchema> cols; - private List<HCatFieldSchema> partCols; - private List<String> bucketCols; - private List<Order> sortCols; - private int numBuckets; - private String comment; - private String fileFormat; - private String location; - private String storageHandler; - private Map<String, String> tblProps; private boolean ifNotExists; - private String dbName; - private Map<String, String> serdeParams; - + private HCatTable hcatTable; + @Deprecated // @deprecated in favour of {@link #Builder(HCatTable, boolean)}. To be removed in Hive 0.16. private Builder(String dbName, String tableName, List<HCatFieldSchema> columns) { - this.dbName = dbName; - this.tableName = tableName; - this.cols = columns; + hcatTable = new HCatTable(dbName, tableName).cols(columns); } + private Builder(HCatTable hcatTable, boolean ifNotExists) { + this.hcatTable = hcatTable; + this.ifNotExists = ifNotExists; + } /** * If not exists. @@ -403,8 +265,9 @@ public class HCatCreateTableDesc { * @param partCols the partition cols * @return the builder */ + @Deprecated // @deprecated in favour of {@link HCatTable.#partCols(List<FieldSchema>)}. To be removed in Hive 0.16. public Builder partCols(List<HCatFieldSchema> partCols) { - this.partCols = partCols; + this.hcatTable.partCols(partCols); return this; } @@ -415,9 +278,10 @@ public class HCatCreateTableDesc { * @param bucketCols the bucket cols * @return the builder */ + @Deprecated // @deprecated in favour of {@link HCatTable.#bucketCols(List<FieldSchema>) and HCatTable.#numBuckets(int)}. + // To be removed in Hive 0.16. public Builder bucketCols(List<String> bucketCols, int buckets) { - this.bucketCols = bucketCols; - this.numBuckets = buckets; + this.hcatTable.bucketCols(bucketCols).numBuckets(buckets); return this; } @@ -427,8 +291,9 @@ public class HCatCreateTableDesc { * @param storageHandler the storage handler * @return the builder */ - public Builder storageHandler(String storageHandler) { - this.storageHandler = storageHandler; + @Deprecated // @deprecated in favour of {@link HCatTable.#storageHandler(String)}. To be removed in Hive 0.16. + public Builder storageHandler(String storageHandler) throws HCatException { + this.hcatTable.storageHandler(storageHandler); return this; } @@ -438,8 +303,9 @@ public class HCatCreateTableDesc { * @param location the location * @return the builder */ + @Deprecated // @deprecated in favour of {@link HCatTable.#location(String)}. To be removed in Hive 0.16. public Builder location(String location) { - this.location = location; + this.hcatTable.location(location); return this; } @@ -449,8 +315,9 @@ public class HCatCreateTableDesc { * @param comment the comment * @return the builder */ + @Deprecated // @deprecated in favour of {@link HCatTable.#comment(String)}. To be removed in Hive 0.16. public Builder comments(String comment) { - this.comment = comment; + this.hcatTable.comment(comment); return this; } @@ -460,8 +327,9 @@ public class HCatCreateTableDesc { * @param isExternal the is external * @return the builder */ + @Deprecated // @deprecated in favour of {@link HCatTable.#tableType(HCatTable.Type)}. To be removed in Hive 0.16. public Builder isTableExternal(boolean isExternal) { - this.isExternal = isExternal; + this.hcatTable.tableType(isExternal? HCatTable.Type.EXTERNAL_TABLE : HCatTable.Type.MANAGED_TABLE); return this; } @@ -471,8 +339,9 @@ public class HCatCreateTableDesc { * @param sortCols the sort cols * @return the builder */ + @Deprecated // @deprecated in favour of {@link HCatTable.#sortCols(ArrayList<Order>)}. To be removed in Hive 0.16. public Builder sortCols(ArrayList<Order> sortCols) { - this.sortCols = sortCols; + this.hcatTable.sortCols(sortCols); return this; } @@ -482,8 +351,10 @@ public class HCatCreateTableDesc { * @param tblProps the tbl props * @return the builder */ + @Deprecated // @deprecated in favour of {@link HCatTable.#sortCols(Map<String, String>)}. + // To be removed in Hive 0.16. public Builder tblProps(Map<String, String> tblProps) { - this.tblProps = tblProps; + this.hcatTable.tblProps(tblProps); return this; } @@ -493,54 +364,60 @@ public class HCatCreateTableDesc { * @param format the format * @return the builder */ + @Deprecated // @deprecated in favour of {@link HCatTable.#fileFormat(String)}. To be removed in Hive 0.16. public Builder fileFormat(String format) { - this.fileFormat = format; + this.hcatTable.fileFormat(format); return this; } /** * See <i>row_format</i> element of CREATE_TABLE DDL for Hive. */ + @Deprecated // @deprecated in favour of {@link HCatTable.#fieldsTerminatedBy()}. To be removed in Hive 0.16. public Builder fieldsTerminatedBy(char delimiter) { return serdeParam(serdeConstants.FIELD_DELIM, Character.toString(delimiter)); } /** * See <i>row_format</i> element of CREATE_TABLE DDL for Hive. */ + @Deprecated // @deprecated in favour of {@link HCatTable.#escapeChar()}. public Builder escapeChar(char escapeChar) { return serdeParam(serdeConstants.ESCAPE_CHAR, Character.toString(escapeChar)); } /** * See <i>row_format</i> element of CREATE_TABLE DDL for Hive. */ + @Deprecated // @deprecated in favour of {@link HCatTable.#collectionItemsTerminatedBy()}. To be removed in Hive 0.16. public Builder collectionItemsTerminatedBy(char delimiter) { return serdeParam(serdeConstants.COLLECTION_DELIM, Character.toString(delimiter)); } /** * See <i>row_format</i> element of CREATE_TABLE DDL for Hive. */ + @Deprecated // @deprecated in favour of {@link HCatTable.#mapKeysTerminatedBy()}. To be removed in Hive 0.16. public Builder mapKeysTerminatedBy(char delimiter) { return serdeParam(serdeConstants.MAPKEY_DELIM, Character.toString(delimiter)); } /** * See <i>row_format</i> element of CREATE_TABLE DDL for Hive. */ + @Deprecated // @deprecated in favour of {@link HCatTable.#linesTerminatedBy()}. To be removed in Hive 0.16. public Builder linesTerminatedBy(char delimiter) { return serdeParam(serdeConstants.LINE_DELIM, Character.toString(delimiter)); } /** * See <i>row_format</i> element of CREATE_TABLE DDL for Hive. */ + @Deprecated // @deprecated in favour of {@link HCatTable.#nullDefinedAs()}. To be removed in Hive 0.16. public Builder nullDefinedAs(char nullChar) { return serdeParam(serdeConstants.SERIALIZATION_NULL_FORMAT, Character.toString(nullChar)); } /** * used for setting arbitrary SerDe parameter */ + @Deprecated // @deprecated in favour of {@link HCatTable.#serdeParam(Map<String, String>)}. + // To be removed in Hive 0.16. public Builder serdeParam(String paramName, String value) { - if(serdeParams == null) { - serdeParams = new HashMap<String, String>(); - } - serdeParams.put(paramName, value); + hcatTable.serdeParam(paramName, value); return this; } /** @@ -550,52 +427,9 @@ public class HCatCreateTableDesc { * @throws HCatException */ public HCatCreateTableDesc build() throws HCatException { - if (this.dbName == null) { - LOG.info("Database name found null. Setting db to :" - + MetaStoreUtils.DEFAULT_DATABASE_NAME); - this.dbName = MetaStoreUtils.DEFAULT_DATABASE_NAME; - } - HCatCreateTableDesc desc = new HCatCreateTableDesc(this.dbName, - this.tableName, this.cols); - desc.ifNotExists = this.ifNotExists; - desc.isExternal = this.isExternal; - desc.comment = this.comment; - desc.partCols = this.partCols; - desc.bucketCols = this.bucketCols; - desc.numBuckets = this.numBuckets; - desc.location = this.location; - desc.tblProps = this.tblProps; - desc.sortCols = this.sortCols; - desc.serde = null; - if (!StringUtils.isEmpty(fileFormat)) { - desc.fileFormat = fileFormat; - if ("SequenceFile".equalsIgnoreCase(fileFormat)) { - desc.inputformat = SequenceFileInputFormat.class.getName(); - desc.outputformat = HiveSequenceFileOutputFormat.class - .getName(); - } else if ("RCFile".equalsIgnoreCase(fileFormat)) { - desc.inputformat = RCFileInputFormat.class.getName(); - desc.outputformat = RCFileOutputFormat.class.getName(); - desc.serde = ColumnarSerDe.class.getName(); - } else if ("orcfile".equalsIgnoreCase(fileFormat)) { - desc.inputformat = OrcInputFormat.class.getName(); - desc.outputformat = OrcOutputFormat.class.getName(); - desc.serde = OrcSerde.class.getName(); - } - desc.storageHandler = StringUtils.EMPTY; - } else if (!StringUtils.isEmpty(storageHandler)) { - desc.storageHandler = storageHandler; - } else { - desc.fileFormat = "TextFile"; - LOG.info("Using text file format for the table."); - desc.inputformat = TextInputFormat.class.getName(); - LOG.info("Table input format:" + desc.inputformat); - desc.outputformat = HiveIgnoreKeyTextOutputFormat.class - .getName(); - LOG.info("Table output format:" + desc.outputformat); - } - desc.serdeParams = this.serdeParams; - return desc; + return new HCatCreateTableDesc(this.hcatTable, this.ifNotExists); } - } -} + + } // class Builder; + +} // class HCatAddPartitionDesc; Modified: hive/branches/cbo/hcatalog/webhcat/java-client/src/main/java/org/apache/hive/hcatalog/api/HCatPartition.java URL: http://svn.apache.org/viewvc/hive/branches/cbo/hcatalog/webhcat/java-client/src/main/java/org/apache/hive/hcatalog/api/HCatPartition.java?rev=1619936&r1=1619935&r2=1619936&view=diff ============================================================================== --- hive/branches/cbo/hcatalog/webhcat/java-client/src/main/java/org/apache/hive/hcatalog/api/HCatPartition.java (original) +++ hive/branches/cbo/hcatalog/webhcat/java-client/src/main/java/org/apache/hive/hcatalog/api/HCatPartition.java Fri Aug 22 21:36:47 2014 @@ -19,43 +19,134 @@ package org.apache.hive.hcatalog.api; import java.util.ArrayList; +import java.util.LinkedHashMap; import java.util.List; import java.util.Map; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hive.metastore.MetaStoreUtils; +import org.apache.hadoop.hive.metastore.Warehouse; import org.apache.hadoop.hive.metastore.api.FieldSchema; +import org.apache.hadoop.hive.metastore.api.MetaException; import org.apache.hadoop.hive.metastore.api.Order; import org.apache.hadoop.hive.metastore.api.Partition; import org.apache.hadoop.hive.metastore.api.StorageDescriptor; import org.apache.hive.hcatalog.common.HCatException; import org.apache.hive.hcatalog.data.schema.HCatFieldSchema; import org.apache.hive.hcatalog.data.schema.HCatSchemaUtils; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; /** * The HCatPartition is a wrapper around org.apache.hadoop.hive.metastore.api.Partition. */ public class HCatPartition { + private static final Logger LOG = LoggerFactory.getLogger(HCatPartition.class); + + private HCatTable hcatTable; private String tableName; - private String dbName; + private String dbName = MetaStoreUtils.DEFAULT_DATABASE_NAME; private List<String> values; - private List<HCatFieldSchema> tableCols; private int createTime; private int lastAccessTime; private StorageDescriptor sd; private Map<String, String> parameters; - HCatPartition(Partition partition) throws HCatException { + // For use from within HCatClient.getPartitions(). + HCatPartition(HCatTable hcatTable, Partition partition) throws HCatException { + this.hcatTable = hcatTable; this.tableName = partition.getTableName(); this.dbName = partition.getDbName(); this.createTime = partition.getCreateTime(); this.lastAccessTime = partition.getLastAccessTime(); this.parameters = partition.getParameters(); this.values = partition.getValues(); + if (hcatTable != null && partition.getValuesSize() != hcatTable.getPartCols().size()) { + throw new HCatException("Mismatched number of partition columns between table:" + hcatTable.getDbName() + "." + hcatTable.getTableName() + + " and partition " + partition.getValues()); + } + this.sd = partition.getSd(); - this.tableCols = new ArrayList<HCatFieldSchema>(); - for (FieldSchema fs : this.sd.getCols()) { - this.tableCols.add(HCatSchemaUtils.getHCatFieldSchema(fs)); + } + + // For constructing HCatPartitions afresh, as an argument to HCatClient.addPartitions(). + public HCatPartition(HCatTable hcatTable, Map<String, String> partitionKeyValues, String location) throws HCatException { + this.hcatTable = hcatTable; + this.tableName = hcatTable.getTableName(); + this.dbName = hcatTable.getDbName(); + this.sd = new StorageDescriptor(hcatTable.getSd()); + this.sd.setLocation(location); + this.createTime = (int)(System.currentTimeMillis()/1000); + this.lastAccessTime = -1; + this.values = new ArrayList<String>(hcatTable.getPartCols().size()); + for (HCatFieldSchema partField : hcatTable.getPartCols()) { + if (!partitionKeyValues.containsKey(partField.getName())) { + throw new HCatException("Missing value for partition-key \'" + partField.getName() + + "\' in table: " + hcatTable.getDbName() + "." + hcatTable.getTableName()); + } + else { + values.add(partitionKeyValues.get(partField.getName())); + } + } + } + + // For replicating an HCatPartition definition. + public HCatPartition(HCatPartition rhs, Map<String, String> partitionKeyValues, String location) throws HCatException { + this.hcatTable = rhs.hcatTable; + this.tableName = rhs.tableName; + this.dbName = rhs.dbName; + this.sd = new StorageDescriptor(rhs.sd); + this.sd.setLocation(location); + + this.createTime = (int) (System.currentTimeMillis() / 1000); + this.lastAccessTime = -1; + this.values = new ArrayList<String>(hcatTable.getPartCols().size()); + for (HCatFieldSchema partField : hcatTable.getPartCols()) { + if (!partitionKeyValues.containsKey(partField.getName())) { + throw new HCatException("Missing value for partition-key \'" + partField.getName() + + "\' in table: " + hcatTable.getDbName() + "." + hcatTable.getTableName()); + } else { + values.add(partitionKeyValues.get(partField.getName())); + } + } + } + + // For use from HCatClient.addPartitions(), to construct from user-input. + Partition toHivePartition() throws HCatException { + Partition hivePtn = new Partition(); + hivePtn.setDbName(dbName); + hivePtn.setTableName(tableName); + hivePtn.setValues(values); + + hivePtn.setParameters(parameters); + if (sd.getLocation() == null) { + LOG.warn("Partition location is not set! Attempting to construct default partition location."); + try { + String partName = Warehouse.makePartName(HCatSchemaUtils.getFieldSchemas(hcatTable.getPartCols()), values); + sd.setLocation(new Path(hcatTable.getSd().getLocation(), partName).toString()); + } + catch(MetaException exception) { + throw new HCatException("Could not construct default partition-path for " + + hcatTable.getDbName() + "." + hcatTable.getTableName() + "[" + values + "]"); + } } + hivePtn.setSd(sd); + + hivePtn.setCreateTime((int) (System.currentTimeMillis() / 1000)); + hivePtn.setLastAccessTimeIsSet(false); + return hivePtn; + } + + public HCatTable hcatTable() { + return hcatTable; + } + + public HCatPartition hcatTable(HCatTable hcatTable) { + this.hcatTable = hcatTable; + this.tableName = hcatTable.getTableName(); + this.dbName = hcatTable.getDbName(); + return this; } /** @@ -81,8 +172,12 @@ public class HCatPartition { * * @return the columns */ - public List<HCatFieldSchema> getColumns() { - return this.tableCols; + public List<HCatFieldSchema> getColumns() throws HCatException { + ArrayList<HCatFieldSchema> columns = new ArrayList<HCatFieldSchema>(sd.getColsSize()); + for (FieldSchema fieldSchema : sd.getCols()) { + columns.add(HCatSchemaUtils.getHCatFieldSchema(fieldSchema)); + } + return columns; } /** @@ -124,6 +219,14 @@ public class HCatPartition { } /** + * Setter for partition directory location. + */ + public HCatPartition location(String location) { + this.sd.setLocation(location); + return this; + } + + /** * Gets the serde. * * @return the serde @@ -132,6 +235,14 @@ public class HCatPartition { return this.sd.getSerdeInfo().getSerializationLib(); } + /** + * Getter for SerDe parameters. + * @return The SerDe parameters. + */ + public Map<String, String> getSerdeParams() { + return this.sd.getSerdeInfo().getParameters(); + } + public Map<String, String> getParameters() { return this.parameters; } @@ -164,6 +275,34 @@ public class HCatPartition { } /** + * Getter for partition-spec map. + */ + public LinkedHashMap<String, String> getPartitionKeyValMap() { + LinkedHashMap<String, String> map = new LinkedHashMap<String, String>(hcatTable.getPartCols().size()); + for (int i=0; i<hcatTable.getPartCols().size(); ++i) { + map.put(hcatTable.getPartCols().get(i).getName(), values.get(i)); + } + return map; + } + + /** + * Setter for partition key-values. + */ + public HCatPartition setPartitionKeyValues(Map<String, String> partitionKeyValues) throws HCatException { + for (HCatFieldSchema partField : hcatTable.getPartCols()) { + if (!partitionKeyValues.containsKey(partField.getName())) { + throw new HCatException("Missing value for partition-key \'" + partField.getName() + + "\' in table: " + hcatTable.getDbName() + "." + hcatTable.getTableName()); + } + else { + values.add(partitionKeyValues.get(partField.getName())); + // Keep partKeyValMap in synch as well. + } + } + return this; + } + + /** * Gets the bucket columns. * * @return the bucket columns @@ -192,13 +331,14 @@ public class HCatPartition { @Override public String toString() { - return "HCatPartition [" - + (tableName != null ? "tableName=" + tableName + ", " : "tableName=null") - + (dbName != null ? "dbName=" + dbName + ", " : "dbName=null") - + (values != null ? "values=" + values + ", " : "values=null") - + "createTime=" + createTime + ", lastAccessTime=" - + lastAccessTime + ", " + (sd != null ? "sd=" + sd + ", " : "sd=null") - + (parameters != null ? "parameters=" + parameters : "parameters=null") + "]"; + return "HCatPartition [ " + + "tableName=" + tableName + "," + + "dbName=" + dbName + "," + + "values=" + values + "," + + "createTime=" + createTime + "," + + "lastAccessTime=" + lastAccessTime + "," + + "sd=" + sd + "," + + "parameters=" + parameters + "]"; } } Modified: hive/branches/cbo/hcatalog/webhcat/java-client/src/main/java/org/apache/hive/hcatalog/api/HCatTable.java URL: http://svn.apache.org/viewvc/hive/branches/cbo/hcatalog/webhcat/java-client/src/main/java/org/apache/hive/hcatalog/api/HCatTable.java?rev=1619936&r1=1619935&r2=1619936&view=diff ============================================================================== --- hive/branches/cbo/hcatalog/webhcat/java-client/src/main/java/org/apache/hive/hcatalog/api/HCatTable.java (original) +++ hive/branches/cbo/hcatalog/webhcat/java-client/src/main/java/org/apache/hive/hcatalog/api/HCatTable.java Fri Aug 22 21:36:47 2014 @@ -19,62 +19,200 @@ package org.apache.hive.hcatalog.api; import java.util.ArrayList; +import java.util.EnumSet; +import java.util.HashMap; import java.util.List; import java.util.Map; +import com.google.common.collect.Maps; +import org.apache.commons.lang.StringUtils; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.metastore.MetaStoreUtils; +import org.apache.hadoop.hive.metastore.TableType; import org.apache.hadoop.hive.metastore.api.FieldSchema; import org.apache.hadoop.hive.metastore.api.Order; +import org.apache.hadoop.hive.metastore.api.SerDeInfo; +import org.apache.hadoop.hive.metastore.api.StorageDescriptor; import org.apache.hadoop.hive.metastore.api.Table; +import org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat; +import org.apache.hadoop.hive.ql.io.RCFileInputFormat; +import org.apache.hadoop.hive.ql.io.RCFileOutputFormat; +import org.apache.hadoop.hive.ql.io.orc.OrcInputFormat; +import org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat; +import org.apache.hadoop.hive.ql.io.orc.OrcSerde; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.metadata.HiveStorageHandler; +import org.apache.hadoop.hive.ql.metadata.HiveUtils; +import org.apache.hadoop.hive.serde.serdeConstants; +import org.apache.hadoop.hive.serde2.columnar.LazyBinaryColumnarSerDe; +import org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe; +import org.apache.hadoop.mapred.SequenceFileInputFormat; import org.apache.hive.hcatalog.common.HCatException; import org.apache.hive.hcatalog.data.schema.HCatFieldSchema; import org.apache.hive.hcatalog.data.schema.HCatSchemaUtils; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; /** * The HCatTable is a wrapper around org.apache.hadoop.hive.metastore.api.Table. */ public class HCatTable { + private static final Logger LOG = LoggerFactory.getLogger(HCatTable.class); + public static enum Type { + MANAGED_TABLE, + EXTERNAL_TABLE, + VIRTUAL_VIEW, + INDEX_TABLE + } + + /** + * Attributes that can be compared between HCatTables. + */ + public static enum TableAttribute { + COLUMNS, + PARTITION_COLUMNS, + INPUT_FORMAT, + OUTPUT_FORMAT, + SERDE, + SERDE_PROPERTIES, + STORAGE_HANDLER, + LOCATION, + TABLE_PROPERTIES, + STATS // TODO: Handle replication of changes to Table-STATS. + } + + /** + * The default set of attributes that can be diffed between HCatTables. + */ + public static final EnumSet<TableAttribute> DEFAULT_COMPARISON_ATTRIBUTES + = EnumSet.of(TableAttribute.COLUMNS, + TableAttribute.INPUT_FORMAT, + TableAttribute.OUTPUT_FORMAT, + TableAttribute.SERDE, + TableAttribute.SERDE_PROPERTIES, + TableAttribute.STORAGE_HANDLER, + TableAttribute.TABLE_PROPERTIES); + + /** + * 2 HCatTables are considered equivalent if {@code lhs.diff(rhs).equals(NO_DIFF) == true; } + */ + public static final EnumSet<TableAttribute> NO_DIFF = EnumSet.noneOf(TableAttribute.class); + + public static final String DEFAULT_SERDE_CLASS = org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe.class.getName(); + public static final String DEFAULT_INPUT_FORMAT_CLASS = org.apache.hadoop.mapred.TextInputFormat.class.getName(); + public static final String DEFAULT_OUTPUT_FORMAT_CLASS = org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat.class.getName(); + + private String dbName = MetaStoreUtils.DEFAULT_DATABASE_NAME; private String tableName; - private String tabletype; - private List<HCatFieldSchema> cols; - private List<HCatFieldSchema> partCols; - private List<String> bucketCols; - private List<Order> sortCols; - private int numBuckets; - private String inputFileFormat; - private String outputFileFormat; - private String storageHandler; - private Map<String, String> tblProps; - private String dbName; - private String serde; - private String location; - private Map<String, String> serdeParams; + private HiveConf conf; + private String tableType; + private boolean isExternal; + private List<HCatFieldSchema> cols = new ArrayList<HCatFieldSchema>(); + private List<HCatFieldSchema> partCols = new ArrayList<HCatFieldSchema>(); + private StorageDescriptor sd; + private String fileFormat; + private Map<String, String> tblProps = new HashMap<String, String>(); + private String comment = ""; + private String owner; + + public HCatTable(String dbName, String tableName) { + this.dbName = StringUtils.isBlank(dbName)? MetaStoreUtils.DEFAULT_DATABASE_NAME : dbName; + this.tableName = tableName; + this.sd = new StorageDescriptor(); + this.sd.setInputFormat(DEFAULT_INPUT_FORMAT_CLASS); + this.sd.setOutputFormat(DEFAULT_OUTPUT_FORMAT_CLASS); + this.sd.setSerdeInfo(new SerDeInfo()); + this.sd.getSerdeInfo().setSerializationLib(DEFAULT_SERDE_CLASS); + this.sd.getSerdeInfo().setParameters(new HashMap<String, String>()); + this.sd.getSerdeInfo().getParameters().put(serdeConstants.SERIALIZATION_FORMAT, "1"); // Default serialization format. + } HCatTable(Table hiveTable) throws HCatException { - this.tableName = hiveTable.getTableName(); - this.dbName = hiveTable.getDbName(); - this.tabletype = hiveTable.getTableType(); - cols = new ArrayList<HCatFieldSchema>(); - for (FieldSchema colFS : hiveTable.getSd().getCols()) { + tableName = hiveTable.getTableName(); + dbName = hiveTable.getDbName(); + tableType = hiveTable.getTableType(); + isExternal = hiveTable.getTableType().equals(TableType.EXTERNAL_TABLE.toString()); + sd = hiveTable.getSd(); + for (FieldSchema colFS : sd.getCols()) { cols.add(HCatSchemaUtils.getHCatFieldSchema(colFS)); } partCols = new ArrayList<HCatFieldSchema>(); for (FieldSchema colFS : hiveTable.getPartitionKeys()) { partCols.add(HCatSchemaUtils.getHCatFieldSchema(colFS)); } - bucketCols = hiveTable.getSd().getBucketCols(); - sortCols = hiveTable.getSd().getSortCols(); - numBuckets = hiveTable.getSd().getNumBuckets(); - inputFileFormat = hiveTable.getSd().getInputFormat(); - outputFileFormat = hiveTable.getSd().getOutputFormat(); - storageHandler = hiveTable - .getSd() - .getParameters() - .get(org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.META_TABLE_STORAGE); - tblProps = hiveTable.getParameters(); - serde = hiveTable.getSd().getSerdeInfo().getSerializationLib(); - location = hiveTable.getSd().getLocation(); - serdeParams = hiveTable.getSd().getSerdeInfo().getParameters(); + if (hiveTable.getParameters() != null) { + tblProps.putAll(hiveTable.getParameters()); + } + + if (StringUtils.isNotBlank(tblProps.get("comment"))) { + comment = tblProps.get("comment"); + } + + owner = hiveTable.getOwner(); + } + + Table toHiveTable() throws HCatException { + Table newTable = new Table(); + newTable.setDbName(dbName); + newTable.setTableName(tableName); + if (tblProps != null) { + newTable.setParameters(tblProps); + } + + if (isExternal) { + newTable.putToParameters("EXTERNAL", "TRUE"); + newTable.setTableType(TableType.EXTERNAL_TABLE.toString()); + } else { + newTable.setTableType(TableType.MANAGED_TABLE.toString()); + } + + if (this.comment != null) { + newTable.putToParameters("comment", comment); + } + + newTable.setSd(sd); + if (partCols != null) { + ArrayList<FieldSchema> hivePtnCols = new ArrayList<FieldSchema>(); + for (HCatFieldSchema fs : partCols) { + hivePtnCols.add(HCatSchemaUtils.getFieldSchema(fs)); + } + newTable.setPartitionKeys(hivePtnCols); + } + + newTable.setCreateTime((int) (System.currentTimeMillis() / 1000)); + newTable.setLastAccessTimeIsSet(false); + try { + // TODO: Verify that this works for systems using UGI.doAs() (e.g. Oozie). + newTable.setOwner(owner == null? getConf().getUser() : owner); + } + catch (Exception exception) { + throw new HCatException("Unable to determine owner of table (" + dbName + "." + tableName + + ") from HiveConf."); + } + return newTable; + } + + void setConf(Configuration conf) { + if (conf instanceof HiveConf) { + this.conf = (HiveConf)conf; + } + else { + this.conf = new HiveConf(conf, getClass()); + } + } + + HiveConf getConf() { + if (conf == null) { + LOG.warn("Conf hasn't been set yet. Using defaults."); + conf = new HiveConf(); + } + return conf; + } + + StorageDescriptor getSd() { + return sd; } /** @@ -87,6 +225,14 @@ public class HCatTable { } /** + * Setter for TableName. + */ + public HCatTable tableName(String tableName) { + this.tableName = tableName; + return this; + } + + /** * Gets the db name. * * @return the db name @@ -96,6 +242,14 @@ public class HCatTable { } /** + * Setter for db-name. + */ + public HCatTable dbName(String dbName) { + this.dbName = dbName; + return this; + } + + /** * Gets the columns. * * @return the columns @@ -105,6 +259,18 @@ public class HCatTable { } /** + * Setter for Column schemas. + */ + public HCatTable cols(List<HCatFieldSchema> cols) { + if (!this.cols.equals(cols)) { + this.cols.clear(); + this.cols.addAll(cols); + this.sd.setCols(HCatSchemaUtils.getFieldSchemas(cols)); + } + return this; + } + + /** * Gets the part columns. * * @return the part columns @@ -114,12 +280,40 @@ public class HCatTable { } /** + * Setter for list of partition columns. + */ + public HCatTable partCols(List<HCatFieldSchema> partCols) { + this.partCols = partCols; + return this; + } + + /** + * Setter for individual partition columns. + */ + public HCatTable partCol(HCatFieldSchema partCol) { + if (this.partCols == null) { + this.partCols = new ArrayList<HCatFieldSchema>(); + } + + this.partCols.add(partCol); + return this; + } + + /** * Gets the bucket columns. * * @return the bucket columns */ public List<String> getBucketCols() { - return bucketCols; + return this.sd.getBucketCols(); + } + + /** + * Setter for list of bucket columns. + */ + public HCatTable bucketCols(List<String> bucketCols) { + this.sd.setBucketCols(bucketCols); + return this; } /** @@ -128,7 +322,15 @@ public class HCatTable { * @return the sort columns */ public List<Order> getSortCols() { - return sortCols; + return this.sd.getSortCols(); + } + + /** + * Setter for Sort-cols. + */ + public HCatTable sortCols(List<Order> sortCols) { + this.sd.setSortCols(sortCols); + return this; } /** @@ -137,7 +339,15 @@ public class HCatTable { * @return the number of buckets */ public int getNumBuckets() { - return numBuckets; + return this.sd.getNumBuckets(); + } + + /** + * Setter for number of buckets. + */ + public HCatTable numBuckets(int numBuckets) { + this.sd.setNumBuckets(numBuckets); + return this; } /** @@ -146,7 +356,29 @@ public class HCatTable { * @return the storage handler */ public String getStorageHandler() { - return storageHandler; + return this.tblProps.get(org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.META_TABLE_STORAGE); + } + + /** + * Setter for StorageHandler class. + */ + public HCatTable storageHandler(String storageHandler) throws HCatException { + this.tblProps.put( + org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.META_TABLE_STORAGE, + storageHandler); + LOG.warn("HiveStorageHandlers can't be reliably instantiated on the client-side. " + + "Attempting to derive Input/OutputFormat settings from StorageHandler, on best effort: "); + try { + HiveStorageHandler sh = HiveUtils.getStorageHandler(getConf(), storageHandler); + this.sd.setInputFormat(sh.getInputFormatClass().getName()); + this.sd.setOutputFormat(sh.getOutputFormatClass().getName()); + this.sd.getSerdeInfo().setSerializationLib(sh.getSerDeClass().getName()); + } catch (HiveException e) { + LOG.warn("Could not derive Input/OutputFormat and SerDe settings from storageHandler. " + + "These values need to be set explicitly.", e); + } + + return this; } /** @@ -159,21 +391,83 @@ public class HCatTable { } /** - * Gets the tabletype. + * Setter for TableProperty map. + */ + public HCatTable tblProps(Map<String, String> tblProps) { + if (!this.tblProps.equals(tblProps)) { + this.tblProps.clear(); + this.tblProps.putAll(tblProps); + } + return this; + } + + /** + * Gets the tableType. * - * @return the tabletype + * @return the tableType */ public String getTabletype() { - return tabletype; + return tableType; + } + + /** + * Setter for table-type. + */ + public HCatTable tableType(Type tableType) { + this.tableType = tableType.name(); + this.isExternal = tableType.equals(Type.EXTERNAL_TABLE); + return this; + } + + private SerDeInfo getSerDeInfo() { + if (!sd.isSetSerdeInfo()) { + sd.setSerdeInfo(new SerDeInfo()); + } + return sd.getSerdeInfo(); + } + + public HCatTable fileFormat(String fileFormat) { + this.fileFormat = fileFormat; + + if (fileFormat.equalsIgnoreCase("sequencefile")) { + inputFileFormat(SequenceFileInputFormat.class.getName()); + outputFileFormat(HiveSequenceFileOutputFormat.class.getName()); + serdeLib(LazySimpleSerDe.class.getName()); + } + else + if (fileFormat.equalsIgnoreCase("rcfile")) { + inputFileFormat(RCFileInputFormat.class.getName()); + outputFileFormat(RCFileOutputFormat.class.getName()); + serdeLib(LazyBinaryColumnarSerDe.class.getName()); + } + else + if (fileFormat.equalsIgnoreCase("orcfile")) { + inputFileFormat(OrcInputFormat.class.getName()); + outputFileFormat(OrcOutputFormat.class.getName()); + serdeLib(OrcSerde.class.getName()); + } + + return this; } + public String fileFormat() { + return fileFormat; + } /** * Gets the input file format. * * @return the input file format */ public String getInputFileFormat() { - return inputFileFormat; + return sd.getInputFormat(); + } + + /** + * Setter for InputFormat class. + */ + public HCatTable inputFileFormat(String inputFileFormat) { + sd.setInputFormat(inputFileFormat); + return this; } /** @@ -182,7 +476,15 @@ public class HCatTable { * @return the output file format */ public String getOutputFileFormat() { - return outputFileFormat; + return sd.getOutputFormat(); + } + + /** + * Setter for OutputFormat class. + */ + public HCatTable outputFileFormat(String outputFileFormat) { + this.sd.setOutputFormat(outputFileFormat); + return this; } /** @@ -191,7 +493,37 @@ public class HCatTable { * @return the serde lib */ public String getSerdeLib() { - return serde; + return getSerDeInfo().getSerializationLib(); + } + + /** + * Setter for SerDe class name. + */ + public HCatTable serdeLib(String serde) { + getSerDeInfo().setSerializationLib(serde); + return this; + } + + public HCatTable serdeParams(Map<String, String> serdeParams) { + getSerDeInfo().setParameters(serdeParams); + return this; + } + + public HCatTable serdeParam(String paramName, String value) { + SerDeInfo serdeInfo = getSerDeInfo(); + if (serdeInfo.getParameters() == null) { + serdeInfo.setParameters(new HashMap<String, String>()); + } + serdeInfo.getParameters().put(paramName, value); + + return this; + } + + /** + * Returns parameters such as field delimiter,etc. + */ + public Map<String, String> getSerdeParams() { + return getSerDeInfo().getParameters(); } /** @@ -200,38 +532,230 @@ public class HCatTable { * @return the location */ public String getLocation() { - return location; + return sd.getLocation(); } + /** - * Returns parameters such as field delimiter,etc. + * Setter for location. */ - public Map<String, String> getSerdeParams() { - return serdeParams; + public HCatTable location(String location) { + this.sd.setLocation(location); + return this; + } + + /** + * Getter for table-owner. + */ + public String owner() { + return owner; + } + + /** + * Setter for table-owner. + */ + public HCatTable owner(String owner) { + this.owner = owner; + return this; + } + + public String comment() { + return this.comment; + } + + /** + * Setter for table-level comment. + */ + public HCatTable comment(String comment) { + this.comment = comment; + return this; + } + + /** + * See <i>row_format</i> element of CREATE_TABLE DDL for Hive. + */ + public HCatTable fieldsTerminatedBy(char delimiter) { + return serdeParam(serdeConstants.FIELD_DELIM, Character.toString(delimiter)); + } + /** + * See <i>row_format</i> element of CREATE_TABLE DDL for Hive. + */ + public HCatTable escapeChar(char escapeChar) { + return serdeParam(serdeConstants.ESCAPE_CHAR, Character.toString(escapeChar)); + } + /** + * See <i>row_format</i> element of CREATE_TABLE DDL for Hive. + */ + public HCatTable collectionItemsTerminatedBy(char delimiter) { + return serdeParam(serdeConstants.COLLECTION_DELIM, Character.toString(delimiter)); + } + /** + * See <i>row_format</i> element of CREATE_TABLE DDL for Hive. + */ + public HCatTable mapKeysTerminatedBy(char delimiter) { + return serdeParam(serdeConstants.MAPKEY_DELIM, Character.toString(delimiter)); + } + /** + * See <i>row_format</i> element of CREATE_TABLE DDL for Hive. + */ + public HCatTable linesTerminatedBy(char delimiter) { + return serdeParam(serdeConstants.LINE_DELIM, Character.toString(delimiter)); + } + /** + * See <i>row_format</i> element of CREATE_TABLE DDL for Hive. + */ + public HCatTable nullDefinedAs(char nullChar) { + return serdeParam(serdeConstants.SERIALIZATION_NULL_FORMAT, Character.toString(nullChar)); } @Override public String toString() { - return "HCatTable [" - + (tableName != null ? "tableName=" + tableName + ", " : "tableName=null") - + (dbName != null ? "dbName=" + dbName + ", " : "dbName=null") - + (tabletype != null ? "tabletype=" + tabletype + ", " : "tabletype=null") - + (cols != null ? "cols=" + cols + ", " : "cols=null") - + (partCols != null ? "partCols=" + partCols + ", " : "partCols==null") - + (bucketCols != null ? "bucketCols=" + bucketCols + ", " : "bucketCols=null") - + (sortCols != null ? "sortCols=" + sortCols + ", " : "sortCols=null") - + "numBuckets=" - + numBuckets - + ", " - + (inputFileFormat != null ? "inputFileFormat=" - + inputFileFormat + ", " : "inputFileFormat=null") - + (outputFileFormat != null ? "outputFileFormat=" - + outputFileFormat + ", " : "outputFileFormat=null") - + (storageHandler != null ? "storageHandler=" + storageHandler - + ", " : "storageHandler=null") - + (tblProps != null ? "tblProps=" + tblProps + ", " : "tblProps=null") - + (serde != null ? "serde=" + serde + ", " : "serde=") - + (location != null ? "location=" + location : "location=") - + ",serdeParams=" + (serdeParams == null ? "null" : serdeParams) - + "]"; + return "HCatTable [ " + + "tableName=" + tableName + ", " + + "dbName=" + dbName + ", " + + "tableType=" + tableType + ", " + + "cols=" + cols + ", " + + "partCols=" + partCols + ", " + + "bucketCols=" + getBucketCols() + ", " + + "numBuckets=" + getNumBuckets() + ", " + + "sortCols=" + getSortCols() + ", " + + "inputFormat=" + getInputFileFormat() + ", " + + "outputFormat=" + getOutputFileFormat() + ", " + + "storageHandler=" + getStorageHandler() + ", " + + "serde=" + getSerdeLib() + ", " + + "tblProps=" + getTblProps() + ", " + + "location=" + getLocation() + ", " + + "owner=" + owner() + " ]"; + + } + + /** + * Method to compare the attributes of 2 HCatTable instances. + * @param rhs The other table being compared against. Can't be null. + * @param attributesToCheck The list of TableAttributes being compared. + * @return {@code EnumSet<TableAttribute>} containing all the attribute that differ between {@code this} and rhs. + * Subset of {@code attributesToCheck}. + */ + public EnumSet<TableAttribute> diff(HCatTable rhs, EnumSet<TableAttribute> attributesToCheck) { + EnumSet<TableAttribute> theDiff = EnumSet.noneOf(TableAttribute.class); + + for (TableAttribute attribute : attributesToCheck) { + + if (attribute.equals(TableAttribute.COLUMNS)) { + if (!rhs.getCols().containsAll(getCols()) || + !getCols().containsAll(rhs.getCols())) { + theDiff.add(TableAttribute.COLUMNS); + } + } + + if (attribute.equals(TableAttribute.INPUT_FORMAT)) { + if ((getInputFileFormat() == null && rhs.getInputFileFormat() != null) + || (getInputFileFormat() != null && (rhs.getInputFileFormat() == null || !rhs.getInputFileFormat().equals(getInputFileFormat())))) { + theDiff.add(TableAttribute.INPUT_FORMAT); + } + } + + if (attribute.equals(TableAttribute.OUTPUT_FORMAT)) { + if ((getOutputFileFormat() == null && rhs.getOutputFileFormat() != null) + || (getOutputFileFormat() != null && (rhs.getOutputFileFormat() == null || !rhs.getOutputFileFormat().equals(getOutputFileFormat())))) { + theDiff.add(TableAttribute.OUTPUT_FORMAT); + } + } + + if (attribute.equals(TableAttribute.STORAGE_HANDLER)) { + if ((getStorageHandler() == null && rhs.getStorageHandler() != null) + || (getStorageHandler() != null && (rhs.getStorageHandler() == null || !rhs.getStorageHandler().equals(getStorageHandler())))) { + theDiff.add(TableAttribute.STORAGE_HANDLER); + } + } + + if (attribute.equals(TableAttribute.SERDE)) { + if ((getSerdeLib() == null && rhs.getSerdeLib() != null) + || (getSerdeLib() != null && (rhs.getSerdeLib() == null || !rhs.getSerdeLib().equals(getSerdeLib())))) { + theDiff.add(TableAttribute.SERDE); + } + } + + if (attribute.equals(TableAttribute.SERDE_PROPERTIES)) { + if (!equivalent(sd.getSerdeInfo().getParameters(), rhs.sd.getSerdeInfo().getParameters())) { + theDiff.add(TableAttribute.SERDE_PROPERTIES); + } + } + + if (attribute.equals(TableAttribute.TABLE_PROPERTIES)) { + if (!equivalent(tblProps, rhs.tblProps)) { + theDiff.add(TableAttribute.TABLE_PROPERTIES); + } + } + + } + + return theDiff; + } + + /** + * Helper method to compare 2 Map instances, for equivalence. + * @param lhs First map to be compared. + * @param rhs Second map to be compared. + * @return true, if the 2 Maps contain the same entries. + */ + private static boolean equivalent(Map<String, String> lhs, Map<String, String> rhs) { + return lhs.size() == rhs.size() && Maps.difference(lhs, rhs).areEqual(); + } + + /** + * Method to compare the attributes of 2 HCatTable instances. + * Only the {@code DEFAULT_COMPARISON_ATTRIBUTES} are compared. + * @param rhs The other table being compared against. Can't be null. + * @return {@code EnumSet<TableAttribute>} containing all the attribute that differ between {@code this} and rhs. + * Subset of {@code DEFAULT_COMPARISON_ATTRIBUTES}. + */ + public EnumSet<TableAttribute> diff (HCatTable rhs) { + return diff(rhs, DEFAULT_COMPARISON_ATTRIBUTES); + } + + /** + * Method to "adopt" the specified attributes from rhs into this HCatTable object. + * @param rhs The "source" table from which attributes are to be copied from. + * @param attributes The set of attributes to be copied from rhs. Usually the result of {@code this.diff(rhs)}. + * @return This HCatTable + * @throws HCatException + */ + public HCatTable resolve(HCatTable rhs, EnumSet<TableAttribute> attributes) throws HCatException { + + if (rhs == this) + return this; + + for (TableAttribute attribute : attributes) { + + if (attribute.equals(TableAttribute.COLUMNS)) { + cols(rhs.cols); + } + + if (attribute.equals(TableAttribute.INPUT_FORMAT)) { + inputFileFormat(rhs.getInputFileFormat()); + } + + if (attribute.equals(TableAttribute.OUTPUT_FORMAT)) { + outputFileFormat(rhs.getOutputFileFormat()); + } + + if (attribute.equals(TableAttribute.SERDE)) { + serdeLib(rhs.getSerdeLib()); + } + + if (attribute.equals(TableAttribute.SERDE_PROPERTIES)) { + serdeParams(rhs.getSerdeParams()); + } + + if (attribute.equals(TableAttribute.STORAGE_HANDLER)) { + storageHandler(rhs.getStorageHandler()); + } + + if (attribute.equals(TableAttribute.TABLE_PROPERTIES)) { + tblProps(rhs.tblProps); + } + } + + return this; } }