Repository: hive Updated Branches: refs/heads/master 79e474005 -> 62ae11188
HIVE-18587 : insert DML event may attempt to calculate a checksum on directories (Sergey Shelukhin, reviewed by Ashutosh Chauhan) Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/62ae1118 Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/62ae1118 Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/62ae1118 Branch: refs/heads/master Commit: 62ae11188f3fbb4092385f02937c2b268d1c04a3 Parents: 79e4740 Author: sergey <[email protected]> Authored: Wed Jan 31 14:35:01 2018 -0800 Committer: sergey <[email protected]> Committed: Wed Jan 31 14:35:01 2018 -0800 ---------------------------------------------------------------------- .../apache/hadoop/hive/ql/metadata/Hive.java | 91 +++++++++++--------- 1 file changed, 52 insertions(+), 39 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hive/blob/62ae1118/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java b/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java index 63bcedc..c8299e2 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java @@ -2591,28 +2591,7 @@ private void constructOneLBLocationMap(FileStatus fSta, public Partition getPartition(Table tbl, Map<String, String> partSpec, boolean forceCreate) throws HiveException { - return getPartition(tbl, partSpec, forceCreate, null, true, null); - } - - /** - * Returns partition metadata - * - * @param tbl - * the partition's table - * @param partSpec - * partition keys and values - * @param forceCreate - * if this is true and partition doesn't exist then a partition is - * created - * @param partPath the path where the partition data is located - * @param inheritTableSpecs whether to copy over the table specs for if/of/serde - * @return result partition object or null if there is no partition - * @throws HiveException - */ - public Partition getPartition(Table tbl, Map<String, String> partSpec, boolean forceCreate, - String partPath, boolean inheritTableSpecs) - throws HiveException { - return getPartition(tbl, partSpec, forceCreate, partPath, inheritTableSpecs, null); + return getPartition(tbl, partSpec, forceCreate, null, true); } /** @@ -2633,8 +2612,7 @@ private void constructOneLBLocationMap(FileStatus fSta, * @throws HiveException */ public Partition getPartition(Table tbl, Map<String, String> partSpec, - boolean forceCreate, String partPath, boolean inheritTableSpecs, List<Path> newFiles) - throws HiveException { + boolean forceCreate, String partPath, boolean inheritTableSpecs) throws HiveException { tbl.validatePartColumnNames(partSpec, true); List<String> pvals = new ArrayList<String>(); for (FieldSchema field : tbl.getPartCols()) { @@ -2694,7 +2672,7 @@ private void constructOneLBLocationMap(FileStatus fSta, } else { alterPartitionSpec(tbl, partSpec, tpart, inheritTableSpecs, partPath); - fireInsertEvent(tbl, partSpec, true, newFiles); + fireInsertEvent(tbl, partSpec, true, null); } } if (tpart == null) { @@ -2758,20 +2736,8 @@ private void constructOneLBLocationMap(FileStatus fSta, InsertEventRequestData insertData = new InsertEventRequestData(); insertData.setReplace(replace); data.setInsertData(insertData); - if (newFiles != null && newFiles.size() > 0) { - for (Path p : newFiles) { - insertData.addToFilesAdded(p.toString()); - FileChecksum cksum = fileSystem.getFileChecksum(p); - // File checksum is not implemented for local filesystem (RawLocalFileSystem) - if (cksum != null) { - String checksumString = - StringUtils.byteToHexString(cksum.getBytes(), 0, cksum.getLength()); - insertData.addToFilesAddedChecksum(checksumString); - } else { - // Add an empty checksum string for filesystems that don't generate one - insertData.addToFilesAddedChecksum(""); - } - } + if (newFiles != null && !newFiles.isEmpty()) { + addInsertFileInformation(newFiles, fileSystem, insertData); } else { insertData.setFilesAdded(new ArrayList<String>()); } @@ -2792,6 +2758,53 @@ private void constructOneLBLocationMap(FileStatus fSta, } } + + private static void addInsertFileInformation(List<Path> newFiles, FileSystem fileSystem, + InsertEventRequestData insertData) throws IOException { + LinkedList<Path> directories = null; + for (Path p : newFiles) { + if (fileSystem.isDirectory(p)) { + if (directories == null) { + directories = new LinkedList<>(); + } + directories.add(p); + continue; + } + addInsertNonDirectoryInformation(p, fileSystem, insertData); + } + if (directories == null) return; + // We don't expect any nesting in most cases, or a lot of it if it is present; union and LB + // are some examples where we would have 1, or few, levels respectively. + while (!directories.isEmpty()) { + Path dir = directories.poll(); + FileStatus[] contents = fileSystem.listStatus(dir); + if (contents == null) continue; + for (FileStatus status : contents) { + if (status.isDirectory()) { + directories.add(status.getPath()); + continue; + } + addInsertNonDirectoryInformation(status.getPath(), fileSystem, insertData); + } + } + } + + + private static void addInsertNonDirectoryInformation(Path p, FileSystem fileSystem, + InsertEventRequestData insertData) throws IOException { + insertData.addToFilesAdded(p.toString()); + FileChecksum cksum = fileSystem.getFileChecksum(p); + // File checksum is not implemented for local filesystem (RawLocalFileSystem) + if (cksum != null) { + String checksumString = + StringUtils.byteToHexString(cksum.getBytes(), 0, cksum.getLength()); + insertData.addToFilesAddedChecksum(checksumString); + } else { + // Add an empty checksum string for filesystems that don't generate one + insertData.addToFilesAddedChecksum(""); + } + } + public boolean dropPartition(String tblName, List<String> part_vals, boolean deleteData) throws HiveException { String[] names = Utilities.getDbTableName(tblName);
