Repository: hive Updated Branches: refs/heads/master fe81a3760 -> 01d06aa83
HIVE-12907 : Improve dynamic partition loading - II (Ashutosh Chauhan via Prasanth J) Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/01d06aa8 Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/01d06aa8 Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/01d06aa8 Branch: refs/heads/master Commit: 01d06aa83ec6b55805a6529c470a978f76a8b49b Parents: fe81a37 Author: Ashutosh Chauhan <[email protected]> Authored: Thu Jan 21 16:56:30 2016 -0800 Committer: Ashutosh Chauhan <[email protected]> Committed: Wed Jan 27 09:36:48 2016 -0800 ---------------------------------------------------------------------- .../hadoop/hive/common/StatsSetupConst.java | 6 +- .../listener/TestDbNotificationListener.java | 30 ++++----- .../apache/hadoop/hive/ql/metadata/Hive.java | 71 +++++++++++++------- .../hadoop/hive/ql/metadata/Partition.java | 3 +- 4 files changed, 64 insertions(+), 46 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hive/blob/01d06aa8/common/src/java/org/apache/hadoop/hive/common/StatsSetupConst.java ---------------------------------------------------------------------- diff --git a/common/src/java/org/apache/hadoop/hive/common/StatsSetupConst.java b/common/src/java/org/apache/hadoop/hive/common/StatsSetupConst.java index 029d415..c9ef647 100644 --- a/common/src/java/org/apache/hadoop/hive/common/StatsSetupConst.java +++ b/common/src/java/org/apache/hadoop/hive/common/StatsSetupConst.java @@ -192,7 +192,7 @@ public class StatsSetupConst { // note that set basic stats false will wipe out column stats too. public static void setBasicStatsState(Map<String, String> params, String setting) { if (setting.equals(FALSE)) { - if (params.containsKey(COLUMN_STATS_ACCURATE)) { + if (params != null && params.containsKey(COLUMN_STATS_ACCURATE)) { params.remove(COLUMN_STATS_ACCURATE); } } else { @@ -299,8 +299,8 @@ public class StatsSetupConst { } public static void clearColumnStatsState(Map<String, String> params) { - String statsAcc = params.get(COLUMN_STATS_ACCURATE); - if (statsAcc != null) { + String statsAcc; + if (params != null && (statsAcc = params.get(COLUMN_STATS_ACCURATE)) != null) { // statsAcc may not be jason format, which will throw exception JSONObject stats; try { http://git-wip-us.apache.org/repos/asf/hive/blob/01d06aa8/itests/hcatalog-unit/src/test/java/org/apache/hive/hcatalog/listener/TestDbNotificationListener.java ---------------------------------------------------------------------- diff --git a/itests/hcatalog-unit/src/test/java/org/apache/hive/hcatalog/listener/TestDbNotificationListener.java b/itests/hcatalog-unit/src/test/java/org/apache/hive/hcatalog/listener/TestDbNotificationListener.java index 56f5c8b..36b624e 100644 --- a/itests/hcatalog-unit/src/test/java/org/apache/hive/hcatalog/listener/TestDbNotificationListener.java +++ b/itests/hcatalog-unit/src/test/java/org/apache/hive/hcatalog/listener/TestDbNotificationListener.java @@ -534,35 +534,35 @@ public class TestDbNotificationListener { for (NotificationEvent ne : rsp.getEvents()) LOG.debug("EVENT: " + ne.getMessage()); // For reasons not clear to me there's one or more alter partitions after add partition and // insert. - assertEquals(25, rsp.getEventsSize()); + assertEquals(19, rsp.getEventsSize()); NotificationEvent event = rsp.getEvents().get(1); assertEquals(firstEventId + 2, event.getEventId()); assertEquals(HCatConstants.HCAT_ADD_PARTITION_EVENT, event.getEventType()); - event = rsp.getEvents().get(5); - assertEquals(firstEventId + 6, event.getEventId()); + event = rsp.getEvents().get(3); + assertEquals(firstEventId + 4, event.getEventId()); assertEquals(HCatConstants.HCAT_INSERT_EVENT, event.getEventType()); // Make sure the files are listed in the insert assertTrue(event.getMessage().matches(".*\"files\":\\[\"pfile.*")); - event = rsp.getEvents().get(9); - assertEquals(firstEventId + 10, event.getEventId()); + event = rsp.getEvents().get(6); + assertEquals(firstEventId + 7, event.getEventId()); assertEquals(HCatConstants.HCAT_INSERT_EVENT, event.getEventType()); assertTrue(event.getMessage().matches(".*\"files\":\\[\"pfile.*")); - event = rsp.getEvents().get(12); - assertEquals(firstEventId + 13, event.getEventId()); + event = rsp.getEvents().get(9); + assertEquals(firstEventId + 10, event.getEventId()); assertEquals(HCatConstants.HCAT_ADD_PARTITION_EVENT, event.getEventType()); - event = rsp.getEvents().get(14); - assertEquals(firstEventId + 15, event.getEventId()); + event = rsp.getEvents().get(10); + assertEquals(firstEventId + 11, event.getEventId()); assertEquals(HCatConstants.HCAT_INSERT_EVENT, event.getEventType()); assertTrue(event.getMessage().matches(".*\"files\":\\[\"pfile.*")); - event = rsp.getEvents().get(18); - assertEquals(firstEventId + 19, event.getEventId()); + event = rsp.getEvents().get(13); + assertEquals(firstEventId + 14, event.getEventId()); assertEquals(HCatConstants.HCAT_INSERT_EVENT, event.getEventType()); assertTrue(event.getMessage().matches(".*\"files\":\\[\"pfile.*")); - event = rsp.getEvents().get(21); - assertEquals(firstEventId + 22, event.getEventId()); + event = rsp.getEvents().get(16); + assertEquals(firstEventId + 17, event.getEventId()); assertEquals(HCatConstants.HCAT_ADD_PARTITION_EVENT, event.getEventType()); - event = rsp.getEvents().get(24); - assertEquals(firstEventId + 25, event.getEventId()); + event = rsp.getEvents().get(18); + assertEquals(firstEventId + 19, event.getEventId()); assertEquals(HCatConstants.HCAT_DROP_PARTITION_EVENT, event.getEventType()); } } http://git-wip-us.apache.org/repos/asf/hive/blob/01d06aa8/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java b/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java index 50681c1..0bab769 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java @@ -84,6 +84,7 @@ import org.apache.hadoop.hive.metastore.api.HiveObjectRef; import org.apache.hadoop.hive.metastore.api.HiveObjectType; import org.apache.hadoop.hive.metastore.api.Index; import org.apache.hadoop.hive.metastore.api.InsertEventRequestData; +import org.apache.hadoop.hive.metastore.api.InvalidObjectException; import org.apache.hadoop.hive.metastore.api.InvalidOperationException; import org.apache.hadoop.hive.metastore.api.MetaException; import org.apache.hadoop.hive.metastore.api.NoSuchObjectException; @@ -631,11 +632,7 @@ public class Hive { public void alterPartition(String dbName, String tblName, Partition newPart) throws InvalidOperationException, HiveException { try { - // Remove the DDL time so that it gets refreshed - if (newPart.getParameters() != null) { - newPart.getParameters().remove(hive_metastoreConstants.DDL_TIME); - } - newPart.checkValidity(); + validatePartition(newPart); getMSC().alter_partition(dbName, tblName, newPart.getTPartition()); } catch (MetaException e) { @@ -645,6 +642,14 @@ public class Hive { } } + private void validatePartition(Partition newPart) throws HiveException { + // Remove the DDL time so that it gets refreshed + if (newPart.getParameters() != null) { + newPart.getParameters().remove(hive_metastoreConstants.DDL_TIME); + } + newPart.checkValidity(); + } + /** * Updates the existing table metadata with the new metadata. * @@ -1427,14 +1432,12 @@ public class Hive { * @param isSrcLocal * If the source directory is LOCAL * @param isAcid true if this is an ACID operation - * @throws JSONException */ public Partition loadPartition(Path loadPath, Table tbl, Map<String, String> partSpec, boolean replace, boolean inheritTableSpecs, boolean isSkewedStoreAsSubdir, boolean isSrcLocal, boolean isAcid) throws HiveException { Path tblDataLocationPath = tbl.getDataLocation(); - Partition newTPart = null; try { /** * Move files before creating the partition since down stream processes @@ -1474,19 +1477,25 @@ public class Hive { } else { newPartPath = oldPartPath; } - List<Path> newFiles = null; if (replace) { Hive.replaceFiles(tbl.getPath(), loadPath, newPartPath, oldPartPath, getConf(), isSrcLocal); } else { - newFiles = new ArrayList<Path>(); + if (conf.getBoolVar(ConfVars.FIRE_EVENTS_FOR_DML) && !tbl.isTemporary()) { + newFiles = new ArrayList<>(); + } + FileSystem fs = tbl.getDataLocation().getFileSystem(conf); Hive.copyFiles(conf, loadPath, newPartPath, fs, isSrcLocal, isAcid, newFiles); } + Partition newTPart = oldPart != null ? oldPart : new Partition(tbl, partSpec, newPartPath); + alterPartitionSpecInMemory(tbl, partSpec, newTPart.getTPartition(), inheritTableSpecs, newPartPath.toString()); + validatePartition(newTPart); + if (oldPart != null && null != newFiles) { + fireInsertEvent(tbl, partSpec, newFiles); + } - newTPart = getPartition(tbl, partSpec, true, newPartPath.toString(), - inheritTableSpecs, newFiles); //column stats will be inaccurate StatsSetupConst.clearColumnStatsState(newTPart.getParameters()); @@ -1500,18 +1509,18 @@ public class Hive { /* Add list bucketing location mappings. */ skewedInfo.setSkewedColValueLocationMaps(skewedColValueLocationMaps); newCreatedTpart.getSd().setSkewedInfo(skewedInfo); - if(!this.getConf().getBoolVar(HiveConf.ConfVars.HIVESTATSAUTOGATHER)) { - StatsSetupConst.setBasicStatsState(newTPart.getParameters(), StatsSetupConst.FALSE); - } - alterPartition(tbl.getDbName(), tbl.getTableName(), new Partition(tbl, newCreatedTpart)); - newTPart = getPartition(tbl, partSpec, true, newPartPath.toString(), inheritTableSpecs, - newFiles); - return new Partition(tbl, newCreatedTpart); } if(!this.getConf().getBoolVar(HiveConf.ConfVars.HIVESTATSAUTOGATHER)) { StatsSetupConst.setBasicStatsState(newTPart.getParameters(), StatsSetupConst.FALSE); } - alterPartition(tbl.getDbName(), tbl.getTableName(), new Partition(tbl, newTPart.getTPartition())); + if (oldPart == null) { + newTPart.getTPartition().setParameters(new HashMap<String,String>()); + MetaStoreUtils.populateQuickStats(HiveStatsUtils.getFileStatusRecurse(newPartPath, -1, newPartPath.getFileSystem(conf)), newTPart.getParameters()); + getMSC().add_partition(newTPart.getTPartition()); + } else { + alterPartition(tbl.getDbName(), tbl.getTableName(), new Partition(tbl, newTPart.getTPartition())); + } + return newTPart; } catch (IOException e) { LOG.error(StringUtils.stringifyException(e)); throw new HiveException(e); @@ -1521,8 +1530,10 @@ public class Hive { } catch (InvalidOperationException e) { LOG.error(StringUtils.stringifyException(e)); throw new HiveException(e); + } catch (TException e) { + LOG.error(StringUtils.stringifyException(e)); + throw new HiveException(e); } - return newTPart; } /** @@ -1622,7 +1633,6 @@ private void constructOneLBLocationMap(FileStatus fSta, * @param txnId txnId, can be 0 unless isAcid == true * @return partition map details (PartitionSpec and Partition) * @throws HiveException - * @throws JSONException */ public Map<Map<String, String>, Partition> loadDynamicPartitions(Path loadPath, String tableName, Map<String, String> partSpec, boolean replace, @@ -1985,6 +1995,20 @@ private void constructOneLBLocationMap(FileStatus fSta, org.apache.hadoop.hive.metastore.api.Partition tpart, boolean inheritTableSpecs, String partPath) throws HiveException, InvalidOperationException { + + alterPartitionSpecInMemory(tbl, partSpec, tpart, inheritTableSpecs, partPath); + String fullName = tbl.getTableName(); + if (!org.apache.commons.lang.StringUtils.isEmpty(tbl.getDbName())) { + fullName = tbl.getDbName() + "." + tbl.getTableName(); + } + alterPartition(fullName, new Partition(tbl, tpart)); + } + + private void alterPartitionSpecInMemory(Table tbl, + Map<String, String> partSpec, + org.apache.hadoop.hive.metastore.api.Partition tpart, + boolean inheritTableSpecs, + String partPath) throws HiveException, InvalidOperationException { LOG.debug("altering partition for table " + tbl.getTableName() + " with partition spec : " + partSpec); if (inheritTableSpecs) { @@ -2001,11 +2025,6 @@ private void constructOneLBLocationMap(FileStatus fSta, throw new HiveException("new partition path should not be null or empty."); } tpart.getSd().setLocation(partPath); - String fullName = tbl.getTableName(); - if (!org.apache.commons.lang.StringUtils.isEmpty(tbl.getDbName())) { - fullName = tbl.getDbName() + "." + tbl.getTableName(); - } - alterPartition(fullName, new Partition(tbl, tpart)); } private void fireInsertEvent(Table tbl, Map<String, String> partitionSpec, List<Path> newFiles) http://git-wip-us.apache.org/repos/asf/hive/blob/01d06aa8/ql/src/java/org/apache/hadoop/hive/ql/metadata/Partition.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/metadata/Partition.java b/ql/src/java/org/apache/hadoop/hive/ql/metadata/Partition.java index c8895c2..c0edde9 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/metadata/Partition.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/metadata/Partition.java @@ -164,12 +164,11 @@ public class Partition implements Serializable { if (table.isPartitioned()) { try { - String partName = Warehouse.makePartName(table.getPartCols(), tPartition.getValues()); if (tPartition.getSd().getLocation() == null) { // set default if location is not set and this is a physical // table partition (not a view partition) if (table.getDataLocation() != null) { - Path partPath = new Path(table.getDataLocation(), partName); + Path partPath = new Path(table.getDataLocation(), Warehouse.makePartName(table.getPartCols(), tPartition.getValues())); tPartition.getSd().setLocation(partPath.toString()); } }
