prashantvithani commented on a change in pull request #498: PHOENIX-5258 Add
support for parsing CSV header as columns
URL: https://github.com/apache/phoenix/pull/498#discussion_r280677047
##########
File path:
phoenix-core/src/it/java/org/apache/phoenix/end2end/CsvBulkLoadToolIT.java
##########
@@ -530,4 +530,91 @@ public void testIgnoreCsvHeader() throws Exception {
}
}
}
+
+ @Test
+ public void testParseCsvHeaderAsInputColumns() throws Exception {
+ try (Statement stmt = conn.createStatement()) {
+ stmt.execute("CREATE TABLE S.TABLE14 (ID INTEGER NOT NULL PRIMARY
KEY, NAME VARCHAR, TYPE VARCHAR, CATEGORY VARCHAR)");
+
+ final Configuration conf = new
Configuration(getUtility().getConfiguration());
+ FileSystem fs = FileSystem.get(getUtility().getConfiguration());
+ FSDataOutputStream outputStream1 = fs.create(new
Path("/tmp/input14-1.csv"));
+ FSDataOutputStream outputStream2 = fs.create(new
Path("/tmp/input14-2.csv"));
+ FSDataOutputStream outputStream3 = fs.create(new
Path("/tmp/input14-3.csv"));
+ try (PrintWriter printWriter = new PrintWriter(outputStream1)) {
+ printWriter.println("id,name");
+ printWriter.println("1,Name 1");
+ printWriter.println("2,Name 2");
+ printWriter.println("3,Name 3");
+ }
+ try (PrintWriter printWriter = new PrintWriter(outputStream2)) {
+ printWriter.println("id,name,type");
+ printWriter.println("1,Name 1,Type 1");
+ printWriter.println("2,Name 2,Type 2");
+ printWriter.println("4,Name 4,Type 4");
+ }
+ try (PrintWriter printWriter = new PrintWriter(outputStream3)) {
+ printWriter.println("id,name");
+ printWriter.println("4,Name 4");
+ printWriter.println("2,Name 5");
+ printWriter.println("5,Name 6");
+ }
+
+ CsvBulkLoadTool csvBulkLoadTool = new CsvBulkLoadTool();
+ csvBulkLoadTool.setConf(conf);
+
+ int exitCode = csvBulkLoadTool.run(new String[] {
+ "--input", "/tmp/input14-1.csv,/tmp/input14-2.csv",
+ "--table", "table14",
+ "--schema", "s",
+ "--zookeeper", zkQuorum});
+ assertEquals(-1, exitCode);
+ try (ResultSet rs = stmt.executeQuery("SELECT COUNT(1) FROM
S.TABLE14")) {
+ assertTrue(rs.next());
+ assertEquals(0, rs.getInt(1));
+ assertFalse(rs.next());
+ }
+
+ exitCode = csvBulkLoadTool.run(new String[] {
+ "--input", "/tmp/input14-1.csv",
+ "--table", "table14",
+ "--schema", "s",
+ "--header",
+ "--zookeeper", zkQuorum});
+ assertEquals(0, exitCode);
+
+ try (ResultSet rs = stmt.executeQuery("SELECT COUNT(1) FROM
S.TABLE14")) {
+ assertTrue(rs.next());
+ assertEquals(3, rs.getInt(1));
+ assertFalse(rs.next());
+ }
+
+ try {
Review comment:
The previous scenario is when the `--header` argument is not passed. The
total number of columns is 4, and the input files contain lesser than that. So,
the scenario expects the upsert executor to exit abnormally.
In here, the `--header` is passed but the two input files have different
headers, which is not supported and expecting an exception.
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]
With regards,
Apache Git Services