This is an automated email from the ASF dual-hosted git repository.
weiz pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git
The following commit(s) were added to refs/heads/master by this push:
new dbe2a323351 HIVE-26685: Improve path name escaping/unescaping (#3721)
dbe2a323351 is described below
commit dbe2a323351b7a0196fc7834023b9bc28cd3244e
Author: James Petty <[email protected]>
AuthorDate: Mon Dec 5 13:04:54 2022 -0500
HIVE-26685: Improve path name escaping/unescaping (#3721)
---
.../org/apache/hadoop/hive/common/FileUtils.java | 38 +++++++++++++++++++---
.../apache/hadoop/hive/common/TestFileUtils.java | 8 +++++
2 files changed, 42 insertions(+), 4 deletions(-)
diff --git a/common/src/java/org/apache/hadoop/hive/common/FileUtils.java
b/common/src/java/org/apache/hadoop/hive/common/FileUtils.java
index 37ff2c04dc2..17169d6e184 100644
--- a/common/src/java/org/apache/hadoop/hive/common/FileUtils.java
+++ b/common/src/java/org/apache/hadoop/hive/common/FileUtils.java
@@ -258,6 +258,11 @@ public final class FileUtils {
}
}
+ /**
+ * Hex encoding characters indexed by integer value
+ */
+ private static final char[] HEX_UPPER_CHARS = {'0', '1', '2', '3', '4', '5',
'6', '7', '8', '9', 'A', 'B', 'C', 'D', 'E', 'F'};
+
static boolean needsEscaping(char c) {
return c < charToEscape.size() && charToEscape.get(c);
}
@@ -287,12 +292,28 @@ public final class FileUtils {
}
}
- StringBuilder sb = new StringBuilder();
+ // Fast-path detection, no escaping and therefore no copying necessary
+ int firstEscapeIndex = -1;
for (int i = 0; i < path.length(); i++) {
+ if (needsEscaping(path.charAt(i))) {
+ firstEscapeIndex = i;
+ break;
+ }
+ }
+ if (firstEscapeIndex == -1) {
+ return path;
+ }
+
+ // slow path, escape beyond the first required escape character into a new
string
+ StringBuilder sb = new StringBuilder();
+ if (firstEscapeIndex > 0) {
+ sb.append(path, 0, firstEscapeIndex);
+ }
+
+ for (int i = firstEscapeIndex; i < path.length(); i++) {
char c = path.charAt(i);
if (needsEscaping(c)) {
- sb.append('%');
- sb.append(String.format("%1$02X", (int) c));
+ sb.append('%').append(HEX_UPPER_CHARS[(0xF0 & c) >>>
4]).append(HEX_UPPER_CHARS[(0x0F & c)]);
} else {
sb.append(c);
}
@@ -301,8 +322,17 @@ public final class FileUtils {
}
public static String unescapePathName(String path) {
+ int firstUnescapeIndex = path.indexOf('%');
+ if (firstUnescapeIndex == -1) {
+ return path;
+ }
+
StringBuilder sb = new StringBuilder();
- for (int i = 0; i < path.length(); i++) {
+ if (firstUnescapeIndex > 0) {
+ sb.append(path, 0, firstUnescapeIndex);
+ }
+
+ for (int i = firstUnescapeIndex; i < path.length(); i++) {
char c = path.charAt(i);
if (c == '%' && i + 2 < path.length()) {
int code = -1;
diff --git a/common/src/test/org/apache/hadoop/hive/common/TestFileUtils.java
b/common/src/test/org/apache/hadoop/hive/common/TestFileUtils.java
index 2721deb7a03..9ffb52ba5f9 100644
--- a/common/src/test/org/apache/hadoop/hive/common/TestFileUtils.java
+++ b/common/src/test/org/apache/hadoop/hive/common/TestFileUtils.java
@@ -303,6 +303,14 @@ public class TestFileUtils {
assertEquals(1, assertExpectedFilePaths(itr,
Collections.singletonList("mock:/tmp/dummy")));
}
+ @Test
+ public void testPathEscapeChars() {
+ StringBuilder sb = new StringBuilder();
+ FileUtils.charToEscape.stream().forEach(integer -> sb.append((char)
integer));
+ String path = sb.toString();
+ assertEquals(path,
FileUtils.unescapePathName(FileUtils.escapePathName(path)));
+ }
+
private int assertExpectedFilePaths(RemoteIterator<? extends FileStatus>
lfs, List<String> expectedPaths)
throws Exception {
int count = 0;