This is an automated email from the ASF dual-hosted git repository.

nitiraj pushed a commit to branch branch-2.5
in repository https://gitbox.apache.org/repos/asf/ambari.git


The following commit(s) were added to refs/heads/branch-2.5 by this push:
     new f37a37b  AMBARI-22999 : Ambari Hive View 2.0 'Upload Table' does not 
support UTF8 files with BOM (nitirajrathore) (#510) (#527)
f37a37b is described below

commit f37a37b0f62b87aa01ac8243f92600200d399a74
Author: nitirajrathore <nitiraj.rath...@gmail.com>
AuthorDate: Mon Mar 5 12:58:55 2018 +0530

    AMBARI-22999 : Ambari Hive View 2.0 'Upload Table' does not support UTF8 
files with BOM (nitirajrathore) (#510) (#527)
    
    * AMBARI-22999 : Ambari Hive View 2.0 'Upload Table' does not support UTF8 
files with BOM (nitirajrathore)
    
    * AMBARI-22999 : Added relevant test cases (nitirajrathore)
---
 .../hive20/resources/uploads/UploadService.java    | 25 +++++++--
 .../resources/uploads/parsers/PreviewData.java     |  8 +++
 .../InsertFromQueryGeneratorSpecTest.groovy        | 13 +++--
 .../resources/uploads/UploadServiceTest.java       | 60 ++++++++++++++++++++++
 4 files changed, 98 insertions(+), 8 deletions(-)

diff --git 
a/contrib/views/hive20/src/main/java/org/apache/ambari/view/hive20/resources/uploads/UploadService.java
 
b/contrib/views/hive20/src/main/java/org/apache/ambari/view/hive20/resources/uploads/UploadService.java
index 3164da0..b0ce4bb 100644
--- 
a/contrib/views/hive20/src/main/java/org/apache/ambari/view/hive20/resources/uploads/UploadService.java
+++ 
b/contrib/views/hive20/src/main/java/org/apache/ambari/view/hive20/resources/uploads/UploadService.java
@@ -18,6 +18,7 @@
 
 package org.apache.ambari.view.hive20.resources.uploads;
 
+import com.google.common.annotations.VisibleForTesting;
 import com.google.common.base.Optional;
 import com.sun.jersey.core.header.FormDataContentDisposition;
 import com.sun.jersey.multipart.FormDataParam;
@@ -46,6 +47,8 @@ import 
org.apache.ambari.view.hive20.resources.uploads.query.InsertFromQueryInpu
 import org.apache.ambari.view.hive20.utils.ServiceFormattedException;
 import org.apache.ambari.view.hive20.utils.SharedObjectsFactory;
 import org.apache.ambari.view.utils.ambari.AmbariApi;
+import org.apache.commons.io.ByteOrderMark;
+import org.apache.commons.io.input.BOMInputStream;
 import org.apache.commons.io.input.ReaderInputStream;
 import org.apache.hadoop.fs.FSDataInputStream;
 import org.apache.hadoop.fs.FSDataOutputStream;
@@ -195,6 +198,19 @@ public class UploadService extends BaseService {
     }
   }
 
+  private Reader getInputStreamReader(InputStream is) throws IOException {
+    BOMInputStream bomInputStream = new BOMInputStream(is,
+        ByteOrderMark.UTF_8, ByteOrderMark.UTF_16LE, ByteOrderMark.UTF_16BE,
+        ByteOrderMark.UTF_32LE, ByteOrderMark.UTF_32BE
+    );
+    if (bomInputStream.hasBOM()) {
+      String charSetName = bomInputStream.getBOMCharsetName();
+      return new InputStreamReader(bomInputStream, charSetName); // return 
with the encoded charset encoding.
+    } else {
+      return new InputStreamReader(bomInputStream); //return with default 
charset
+    }
+  }
+
   private CSVParams getCsvParams(String csvDelimiter, String csvQuote, String 
csvEscape) {
     char csvq =  CSVParams.DEFAULT_QUOTE_CHAR;
     char csvd =  CSVParams.DEFAULT_DELIMITER_CHAR;
@@ -455,7 +471,8 @@ public class UploadService extends BaseService {
     else return e.getMessage();
   }
 
-  private PreviewData generatePreview(Boolean isFirstRowHeader, String 
inputFileType, CSVParams csvParams, InputStream uploadedInputStream) throws 
Exception {
+  @VisibleForTesting
+  PreviewData generatePreview(Boolean isFirstRowHeader, String inputFileType, 
CSVParams csvParams, InputStream uploadedInputStream) throws Exception {
     ParseOptions parseOptions = new ParseOptions();
     parseOptions.setOption(ParseOptions.OPTIONS_FILE_TYPE, inputFileType);
     if (inputFileType.equals(ParseOptions.InputFileType.CSV.toString())){
@@ -473,7 +490,8 @@ public class UploadService extends BaseService {
 
     LOG.info("isFirstRowHeader : {}, inputFileType : {}", isFirstRowHeader, 
inputFileType);
 
-    DataParser dataParser = new DataParser(new 
InputStreamReader(uploadedInputStream), parseOptions);
+    Reader reader = getInputStreamReader(uploadedInputStream);
+    DataParser dataParser = new DataParser(reader, parseOptions);
 
     return dataParser.parsePreview();
   }
@@ -519,7 +537,8 @@ public class UploadService extends BaseService {
       parseOptions.setOption(ParseOptions.OPTIONS_CSV_QUOTE, 
csvParams.getCsvQuote());
     }
 
-    DataParser dataParser = new DataParser(new 
InputStreamReader(uploadedInputStream), parseOptions);
+    Reader reader = getInputStreamReader(uploadedInputStream);
+    DataParser dataParser = new DataParser(reader, parseOptions);
 
     Reader csvReader = new TableDataReader(dataParser.iterator(), header, 
containsEndlines); // encode column values into HEX so that \n etc dont appear 
in the hive table data
     String path = uploadIntoTable(csvReader, databaseName, tableName);
diff --git 
a/contrib/views/hive20/src/main/java/org/apache/ambari/view/hive20/resources/uploads/parsers/PreviewData.java
 
b/contrib/views/hive20/src/main/java/org/apache/ambari/view/hive20/resources/uploads/parsers/PreviewData.java
index dd31457..c68e3a6 100644
--- 
a/contrib/views/hive20/src/main/java/org/apache/ambari/view/hive20/resources/uploads/parsers/PreviewData.java
+++ 
b/contrib/views/hive20/src/main/java/org/apache/ambari/view/hive20/resources/uploads/parsers/PreviewData.java
@@ -54,4 +54,12 @@ public class PreviewData {
   public void setPreviewRows(List<Row> previewRows) {
     this.previewRows = previewRows;
   }
+
+  @Override
+  public String toString() {
+    return "PreviewData{" +
+        "header=" + header +
+        ", previewRows=" + previewRows +
+        '}';
+  }
 }
diff --git 
a/contrib/views/hive20/src/test/java/org/apache/ambari/view/hive20/internal/query/generators/InsertFromQueryGeneratorSpecTest.groovy
 
b/contrib/views/hive20/src/test/java/org/apache/ambari/view/hive20/internal/query/generators/InsertFromQueryGeneratorSpecTest.groovy
index dfdf9df..5fed745 100644
--- 
a/contrib/views/hive20/src/test/java/org/apache/ambari/view/hive20/internal/query/generators/InsertFromQueryGeneratorSpecTest.groovy
+++ 
b/contrib/views/hive20/src/test/java/org/apache/ambari/view/hive20/internal/query/generators/InsertFromQueryGeneratorSpecTest.groovy
@@ -28,7 +28,7 @@ class InsertFromQueryGeneratorSpecTest extends Specification {
     setup:
     List<ColumnInfo> colInfos = Arrays.asList(new ColumnInfo("col1", 
"STRING"), new ColumnInfo("col2", "INT"), new ColumnInfo("col3", "VARCHAR", 
255),
             new ColumnInfo("col4", "CHAR", 25))
-    InsertFromQueryInput insertFromQueryInput = new InsertFromQueryInput("d1", 
"t1", "d2", "t2", colInfos, false)
+    InsertFromQueryInput insertFromQueryInput = new InsertFromQueryInput("d1", 
"t1", "d2", "t2", Collections.emptyList(), colInfos, null, false)
     InsertFromQueryGenerator generator = new 
InsertFromQueryGenerator(insertFromQueryInput);
 
     when:
@@ -41,14 +41,16 @@ class InsertFromQueryGeneratorSpecTest extends 
Specification {
     String queryStr = query.get();
 
     then:
-    queryStr == "INSERT INTO TABLE `d2`.`t2` SELECT `col1`, `col2`, `col3`, 
`col4` FROM `d1.t1` ;"
+    queryStr == "set hive.exec.dynamic.partition.mode=nonstrict;\n" +
+            " FROM `d1`.`t1` tempTable INSERT INTO TABLE `d2`.`t2` SELECT 
tempTable.`col1`, tempTable.`col2`, tempTable.`col3`, tempTable.`col4`;"
   }
 
-  def "insert from with unhexing"() {
+  def "insert from with unhexing and partitioned columns"() {
     setup:
     List<ColumnInfo> colInfos = Arrays.asList(new ColumnInfo("col1", 
"STRING"), new ColumnInfo("col2", "INT"), new ColumnInfo("col3", "VARCHAR", 
255),
             new ColumnInfo("col4", "CHAR", 25))
-    InsertFromQueryInput insertFromQueryInput = new InsertFromQueryInput("d1", 
"t1", "d2", "t2", colInfos, true)
+    List<ColumnInfo> partititionedCols = Arrays.asList(new ColumnInfo("col5", 
"STRING"), new ColumnInfo("col6", "INT"))
+    InsertFromQueryInput insertFromQueryInput = new InsertFromQueryInput("d1", 
"t1", "d2", "t2", partititionedCols, colInfos, null, true)
     InsertFromQueryGenerator generator = new 
InsertFromQueryGenerator(insertFromQueryInput);
 
     when:
@@ -61,6 +63,7 @@ class InsertFromQueryGeneratorSpecTest extends Specification {
     String queryStr = query.get();
 
     then:
-    queryStr == "INSERT INTO TABLE `d2`.`t2` SELECT UNHEX(`col1`), `col2`, 
UNHEX(`col3`), UNHEX(`col4`) FROM `d1.t1` ;"
+    queryStr ==  "set hive.exec.dynamic.partition.mode=nonstrict;\n" +
+            " FROM `d1`.`t1` tempTable INSERT INTO TABLE `d2`.`t2` PARTITION 
(`col5`,`col6` )  SELECT UNHEX(tempTable.`col1`), tempTable.`col2`, 
UNHEX(tempTable.`col3`), UNHEX(tempTable.`col4`), UNHEX(tempTable.`col5`), 
tempTable.`col6`;"
   }
 }
diff --git 
a/contrib/views/hive20/src/test/java/org/apache/ambari/view/hive20/resources/uploads/UploadServiceTest.java
 
b/contrib/views/hive20/src/test/java/org/apache/ambari/view/hive20/resources/uploads/UploadServiceTest.java
new file mode 100644
index 0000000..01921c3
--- /dev/null
+++ 
b/contrib/views/hive20/src/test/java/org/apache/ambari/view/hive20/resources/uploads/UploadServiceTest.java
@@ -0,0 +1,60 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * <p/>
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * <p/>
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.ambari.view.hive20.resources.uploads;
+
+import org.apache.ambari.view.hive20.client.Row;
+import org.apache.ambari.view.hive20.internal.dto.ColumnInfo;
+import org.apache.ambari.view.hive20.resources.uploads.parsers.PreviewData;
+import org.junit.Assert;
+import org.junit.Test;
+
+import java.io.ByteArrayInputStream;
+import java.io.InputStream;
+import java.util.Arrays;
+
+public class UploadServiceTest {
+
+  @Test
+  public void generatePreviewWithBOM() throws Exception {
+    UploadService uploadService = new UploadService();
+    // convert String into InputStream
+    String str = "\ufeffCol1\tCol2\nA\tB\n";
+    InputStream inputStream = new ByteArrayInputStream(str.getBytes());
+    PreviewData previewData = uploadService.generatePreview(true, "CSV", new 
CSVParams('\t', '\"', '\\'), inputStream);
+
+    Assert.assertEquals("Incorrect number of columns detected.", 2, 
previewData.getHeader().size() );
+    Assert.assertEquals("incorrect col objects.", Arrays.asList(new 
ColumnInfo("Col1", "CHAR", null, null, null),
+        new ColumnInfo("Col2", "CHAR", null, null, null)), 
previewData.getHeader());
+    Assert.assertEquals("incorrect row objects.", Arrays.asList(new Row(new 
Object[]{"A", "B"})), previewData.getPreviewRows());
+  }
+
+  @Test
+  public void generatePreviewWithoutBOM() throws Exception {
+    UploadService uploadService = new UploadService();
+    // convert String into InputStream
+    String str = "Col1\tCol2\nA\tB\n";
+    InputStream inputStream = new ByteArrayInputStream(str.getBytes());
+    PreviewData previewData = uploadService.generatePreview(true, "CSV", new 
CSVParams('\t', '\"', '\\'), inputStream);
+
+    Assert.assertEquals("Incorrect number of columns detected.", 2, 
previewData.getHeader().size() );
+    Assert.assertEquals("incorrect col objects.", Arrays.asList(new 
ColumnInfo("Col1", "CHAR", null, null, null),
+        new ColumnInfo("Col2", "CHAR", null, null, null)), 
previewData.getHeader());
+    Assert.assertEquals("incorrect row objects.", Arrays.asList(new Row(new 
Object[]{"A", "B"})), previewData.getPreviewRows());
+  }
+}

-- 
To stop receiving notification emails like this one, please contact
niti...@apache.org.

Reply via email to