Repository: tajo Updated Branches: refs/heads/master ddfc3f330 -> 87e7ba214
TAJO-947: ColPartitionStoreExec can cause URISyntaxException due to special characters. (Mai Hai Thanh via hyunsik) Project: http://git-wip-us.apache.org/repos/asf/tajo/repo Commit: http://git-wip-us.apache.org/repos/asf/tajo/commit/87e7ba21 Tree: http://git-wip-us.apache.org/repos/asf/tajo/tree/87e7ba21 Diff: http://git-wip-us.apache.org/repos/asf/tajo/diff/87e7ba21 Branch: refs/heads/master Commit: 87e7ba21491ac8a5a6a56357d7b4185c94f5dfd6 Parents: ddfc3f3 Author: Hyunsik Choi <[email protected]> Authored: Mon Aug 11 14:04:07 2014 +0900 Committer: Hyunsik Choi <[email protected]> Committed: Mon Aug 11 14:04:07 2014 +0900 ---------------------------------------------------------------------- CHANGES | 3 + .../java/org/apache/tajo/util/StringUtils.java | 96 ++++++++++++++++++++ .../HashBasedColPartitionStoreExec.java | 3 +- .../SortBasedColPartitionStoreExec.java | 7 +- .../org/apache/tajo/engine/utils/TupleUtil.java | 3 +- .../tajo/engine/query/TestTablePartitions.java | 43 +++++++++ .../TestTablePartitions/lineitemspecial.tbl | 5 + .../TestTablePartitions/lineitemspecial_ddl.sql | 3 + .../TestSpecialCharPartitionKeys1.result | 4 + .../TestSpecialCharPartitionKeys2.result | 3 + 10 files changed, 165 insertions(+), 5 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/tajo/blob/87e7ba21/CHANGES ---------------------------------------------------------------------- diff --git a/CHANGES b/CHANGES index 182d07d..8788b17 100644 --- a/CHANGES +++ b/CHANGES @@ -109,6 +109,9 @@ Release 0.9.0 - unreleased BUG FIXES + TAJO-947: ColPartitionStoreExec can cause URISyntaxException due + to special characters. (Mai Hai Thanh via hyunsik) + TAJO-999: SequenceFile key class need to be compatible. (jaehwa) TAJO-994: 'count(distinct x)' function counts first null value. (hyunsik) http://git-wip-us.apache.org/repos/asf/tajo/blob/87e7ba21/tajo-common/src/main/java/org/apache/tajo/util/StringUtils.java ---------------------------------------------------------------------- diff --git a/tajo-common/src/main/java/org/apache/tajo/util/StringUtils.java b/tajo-common/src/main/java/org/apache/tajo/util/StringUtils.java index 41ea153..90391a8 100644 --- a/tajo-common/src/main/java/org/apache/tajo/util/StringUtils.java +++ b/tajo-common/src/main/java/org/apache/tajo/util/StringUtils.java @@ -21,10 +21,12 @@ package org.apache.tajo.util; import org.apache.commons.lang.CharUtils; import org.apache.commons.lang.StringEscapeUtils; import org.apache.commons.lang.SystemUtils; +import org.apache.hadoop.util.Shell; import org.apache.hadoop.util.ShutdownHookManager; import org.apache.hadoop.util.SignalLogger; import java.util.Arrays; +import java.util.BitSet; public class StringUtils { @@ -180,4 +182,98 @@ public class StringUtils { public static String unicodeEscapedDelimiter(char c) { return CharUtils.unicodeEscaped(c); } + + /** + * The following lines of code that deals with escape characters is mostly copied from HIVE's FileUtils.java + */ + + static BitSet charToEscape = new BitSet(128); + static { + for (char c = 0; c < ' '; c++) { + charToEscape.set(c); + } + + /** + * ASCII 01-1F are HTTP control characters that need to be escaped. + * \u000A and \u000D are \n and \r, respectively. + */ + char[] clist = new char[] {'\u0001', '\u0002', '\u0003', '\u0004', + '\u0005', '\u0006', '\u0007', '\u0008', '\u0009', '\n', '\u000B', + '\u000C', '\r', '\u000E', '\u000F', '\u0010', '\u0011', '\u0012', + '\u0013', '\u0014', '\u0015', '\u0016', '\u0017', '\u0018', '\u0019', + '\u001A', '\u001B', '\u001C', '\u001D', '\u001E', '\u001F', + '"', '#', '%', '\'', '*', '/', ':', '=', '?', '\\', '\u007F', '{', + '[', ']', '^'}; + + for (char c : clist) { + charToEscape.set(c); + } + + if(Shell.WINDOWS){ + // On windows, following chars need to be escaped as well + char [] winClist = {' ', '<','>','|'}; + for (char c : winClist) { + charToEscape.set(c); + } + } + } + + static boolean needsEscaping(char c) { + return c >= 0 && c < charToEscape.size() && charToEscape.get(c); + } + + public static String escapePathName(String path) { + return escapePathName(path, null); + } + + /** + * Escapes a path name. + * @param path The path to escape. + * @param defaultPath + * The default name for the path, if the given path is empty or null. + * @return An escaped path name. + */ + public static String escapePathName(String path, String defaultPath) { + if (path == null || path.length() == 0) { + if (defaultPath == null) { + return "__TAJO_DEFAULT_PARTITION__"; + } else { + return defaultPath; + } + } + + StringBuilder sb = new StringBuilder(); + for (int i = 0; i < path.length(); i++) { + char c = path.charAt(i); + if (needsEscaping(c)) { + sb.append('%'); + sb.append(String.format("%1$02X", (int) c)); + } else { + sb.append(c); + } + } + return sb.toString(); + } + + public static String unescapePathName(String path) { + StringBuilder sb = new StringBuilder(); + for (int i = 0; i < path.length(); i++) { + char c = path.charAt(i); + if (c == '%' && i + 2 < path.length()) { + int code = -1; + try { + code = Integer.valueOf(path.substring(i + 1, i + 3), 16); + } catch (Exception e) { + code = -1; + } + if (code >= 0) { + sb.append((char) code); + i += 2; + continue; + } + } + sb.append(c); + } + return sb.toString(); + } } http://git-wip-us.apache.org/repos/asf/tajo/blob/87e7ba21/tajo-core/src/main/java/org/apache/tajo/engine/planner/physical/HashBasedColPartitionStoreExec.java ---------------------------------------------------------------------- diff --git a/tajo-core/src/main/java/org/apache/tajo/engine/planner/physical/HashBasedColPartitionStoreExec.java b/tajo-core/src/main/java/org/apache/tajo/engine/planner/physical/HashBasedColPartitionStoreExec.java index 6cef22e..44d1270 100644 --- a/tajo-core/src/main/java/org/apache/tajo/engine/planner/physical/HashBasedColPartitionStoreExec.java +++ b/tajo-core/src/main/java/org/apache/tajo/engine/planner/physical/HashBasedColPartitionStoreExec.java @@ -26,6 +26,7 @@ import org.apache.tajo.datum.Datum; import org.apache.tajo.engine.planner.logical.StoreTableNode; import org.apache.tajo.storage.Appender; import org.apache.tajo.storage.Tuple; +import org.apache.tajo.util.StringUtils; import org.apache.tajo.worker.TaskAttemptContext; import java.io.IOException; @@ -79,7 +80,7 @@ public class HashBasedColPartitionStoreExec extends ColPartitionStoreExec { if(i > 0) sb.append("/"); sb.append(keyNames[i]).append("="); - sb.append(datum.asChars()); + sb.append(StringUtils.escapePathName(datum.asChars())); } } http://git-wip-us.apache.org/repos/asf/tajo/blob/87e7ba21/tajo-core/src/main/java/org/apache/tajo/engine/planner/physical/SortBasedColPartitionStoreExec.java ---------------------------------------------------------------------- diff --git a/tajo-core/src/main/java/org/apache/tajo/engine/planner/physical/SortBasedColPartitionStoreExec.java b/tajo-core/src/main/java/org/apache/tajo/engine/planner/physical/SortBasedColPartitionStoreExec.java index d09e296..9ce455f 100644 --- a/tajo-core/src/main/java/org/apache/tajo/engine/planner/physical/SortBasedColPartitionStoreExec.java +++ b/tajo-core/src/main/java/org/apache/tajo/engine/planner/physical/SortBasedColPartitionStoreExec.java @@ -30,6 +30,7 @@ import org.apache.tajo.engine.planner.logical.StoreTableNode; import org.apache.tajo.storage.Appender; import org.apache.tajo.storage.Tuple; import org.apache.tajo.storage.VTuple; +import org.apache.tajo.util.StringUtils; import org.apache.tajo.worker.TaskAttemptContext; import java.io.IOException; @@ -78,8 +79,8 @@ public class SortBasedColPartitionStoreExec extends ColPartitionStoreExec { if(i > 0) { sb.append("/"); } - sb.append(keyNames[i]).append("="); - sb.append(datum.asChars()); + sb.append(keyNames[i]).append("="); + sb.append(StringUtils.escapePathName(datum.asChars())); } return sb.toString(); } @@ -95,7 +96,7 @@ public class SortBasedColPartitionStoreExec extends ColPartitionStoreExec { appender = getAppender(getSubdirectory(currentKey)); prevKey = new VTuple(currentKey); } else { - if (!prevKey.equals(currentKey)) { + if (!prevKey.equals(currentKey) && !getSubdirectory(prevKey).equalsIgnoreCase(getSubdirectory(currentKey))) { appender.close(); StatisticsUtil.aggregateTableStat(aggregated, appender.getStats()); http://git-wip-us.apache.org/repos/asf/tajo/blob/87e7ba21/tajo-core/src/main/java/org/apache/tajo/engine/utils/TupleUtil.java ---------------------------------------------------------------------- diff --git a/tajo-core/src/main/java/org/apache/tajo/engine/utils/TupleUtil.java b/tajo-core/src/main/java/org/apache/tajo/engine/utils/TupleUtil.java index f2e47bc..0752e11 100644 --- a/tajo-core/src/main/java/org/apache/tajo/engine/utils/TupleUtil.java +++ b/tajo-core/src/main/java/org/apache/tajo/engine/utils/TupleUtil.java @@ -37,6 +37,7 @@ import org.apache.tajo.storage.RowStoreUtil.RowStoreEncoder; import org.apache.tajo.storage.Tuple; import org.apache.tajo.storage.TupleRange; import org.apache.tajo.storage.VTuple; +import org.apache.tajo.util.StringUtils; import java.io.UnsupportedEncodingException; import java.net.URLEncoder; @@ -258,7 +259,7 @@ public class TupleUtil { } int columnId = partitionColumnSchema.getColumnIdByName(parts[0]); Column keyColumn = partitionColumnSchema.getColumn(columnId); - tuple.put(columnId, DatumFactory.createFromString(keyColumn.getDataType(), parts[1])); + tuple.put(columnId, DatumFactory.createFromString(keyColumn.getDataType(), StringUtils.unescapePathName(parts[1]))); } for (; i < partitionColumnSchema.size(); i++) { tuple.put(i, NullDatum.get()); http://git-wip-us.apache.org/repos/asf/tajo/blob/87e7ba21/tajo-core/src/test/java/org/apache/tajo/engine/query/TestTablePartitions.java ---------------------------------------------------------------------- diff --git a/tajo-core/src/test/java/org/apache/tajo/engine/query/TestTablePartitions.java b/tajo-core/src/test/java/org/apache/tajo/engine/query/TestTablePartitions.java index d9aea53..d80fdb5 100644 --- a/tajo-core/src/test/java/org/apache/tajo/engine/query/TestTablePartitions.java +++ b/tajo-core/src/test/java/org/apache/tajo/engine/query/TestTablePartitions.java @@ -786,4 +786,47 @@ public class TestTablePartitions extends QueryTestCaseBase { fail("Can't find query from workers" + queryId); return null; } + + @Test + public final void TestSpecialCharPartitionKeys1() throws Exception { + // See - TAJO-947: ColPartitionStoreExec can cause URISyntaxException due to special characters. + + executeDDL("lineitemspecial_ddl.sql", "lineitemspecial.tbl"); + + executeString("CREATE TABLE IF NOT EXISTS pTable947 (id int, name text) PARTITION BY COLUMN (type text)") + .close(); + executeString("INSERT OVERWRITE INTO pTable947 SELECT l_orderkey, l_shipinstruct, l_shipmode FROM lineitemspecial") + .close(); + ResultSet res = executeString("select * from pTable947 where type='RA:*?><I/L#%S' or type='AIR'"); + + String resStr = resultSetToString(res); + String expected = + "id,name,type\n" + + "-------------------------------\n" + + "3,NONE,AIR\n" + + "3,TEST SPECIAL CHARS,RA:*?><I/L#%S\n"; + + assertEquals(expected, resStr); + cleanupQuery(res); + } + + @Test + public final void TestSpecialCharPartitionKeys2() throws Exception { + // See - TAJO-947: ColPartitionStoreExec can cause URISyntaxException due to special characters. + + executeDDL("lineitemspecial_ddl.sql", "lineitemspecial.tbl"); + + executeString("CREATE TABLE IF NOT EXISTS pTable947 (id int, name text) PARTITION BY COLUMN (type text)") + .close(); + executeString("INSERT OVERWRITE INTO pTable947 SELECT l_orderkey, l_shipinstruct, l_shipmode FROM lineitemspecial") + .close(); + + ResultSet res = executeString("select * from pTable947 where type='RA:*?><I/L#%S'"); + assertResultSet(res); + cleanupQuery(res); + + res = executeString("select * from pTable947 where type='RA:*?><I/L#%S' or type='AIR01'"); + assertResultSet(res); + cleanupQuery(res); + } } http://git-wip-us.apache.org/repos/asf/tajo/blob/87e7ba21/tajo-core/src/test/resources/dataset/TestTablePartitions/lineitemspecial.tbl ---------------------------------------------------------------------- diff --git a/tajo-core/src/test/resources/dataset/TestTablePartitions/lineitemspecial.tbl b/tajo-core/src/test/resources/dataset/TestTablePartitions/lineitemspecial.tbl new file mode 100644 index 0000000..2241213 --- /dev/null +++ b/tajo-core/src/test/resources/dataset/TestTablePartitions/lineitemspecial.tbl @@ -0,0 +1,5 @@ +1|DELIVER IN PERSON|TRUCK| +1|TAKE BACK RETURN|MAIL| +2|TAKE BACK RETURN|RAIL| +3|NONE|AIR| +3|TEST SPECIAL CHARS|RA:*?><I/L#%S| http://git-wip-us.apache.org/repos/asf/tajo/blob/87e7ba21/tajo-core/src/test/resources/queries/TestTablePartitions/lineitemspecial_ddl.sql ---------------------------------------------------------------------- diff --git a/tajo-core/src/test/resources/queries/TestTablePartitions/lineitemspecial_ddl.sql b/tajo-core/src/test/resources/queries/TestTablePartitions/lineitemspecial_ddl.sql new file mode 100644 index 0000000..77e76d5 --- /dev/null +++ b/tajo-core/src/test/resources/queries/TestTablePartitions/lineitemspecial_ddl.sql @@ -0,0 +1,3 @@ +create external table if not exists lineitemspecial ( + l_orderkey INT4, l_shipinstruct TEXT, l_shipmode TEXT) +using csv with ('csvfile.delimiter'='|', 'csvfile.null'='NULL') location ${table.path}; http://git-wip-us.apache.org/repos/asf/tajo/blob/87e7ba21/tajo-core/src/test/resources/results/TestTablePartitions/TestSpecialCharPartitionKeys1.result ---------------------------------------------------------------------- diff --git a/tajo-core/src/test/resources/results/TestTablePartitions/TestSpecialCharPartitionKeys1.result b/tajo-core/src/test/resources/results/TestTablePartitions/TestSpecialCharPartitionKeys1.result new file mode 100644 index 0000000..98af4ec --- /dev/null +++ b/tajo-core/src/test/resources/results/TestTablePartitions/TestSpecialCharPartitionKeys1.result @@ -0,0 +1,4 @@ +id,name,type +------------------------------- +3,NONE,AIR +3,TEST SPECIAL CHARS,RA:*?><I/L#%S \ No newline at end of file http://git-wip-us.apache.org/repos/asf/tajo/blob/87e7ba21/tajo-core/src/test/resources/results/TestTablePartitions/TestSpecialCharPartitionKeys2.result ---------------------------------------------------------------------- diff --git a/tajo-core/src/test/resources/results/TestTablePartitions/TestSpecialCharPartitionKeys2.result b/tajo-core/src/test/resources/results/TestTablePartitions/TestSpecialCharPartitionKeys2.result new file mode 100644 index 0000000..d20fff7 --- /dev/null +++ b/tajo-core/src/test/resources/results/TestTablePartitions/TestSpecialCharPartitionKeys2.result @@ -0,0 +1,3 @@ +id,name,type +------------------------------- +3,TEST SPECIAL CHARS,RA:*?><I/L#%S \ No newline at end of file
