AMBARI-18583 : Hive view : handled BOM characters in upload table feature 
(nitirajrathore)


Project: http://git-wip-us.apache.org/repos/asf/ambari/repo
Commit: http://git-wip-us.apache.org/repos/asf/ambari/commit/5da18ae7
Tree: http://git-wip-us.apache.org/repos/asf/ambari/tree/5da18ae7
Diff: http://git-wip-us.apache.org/repos/asf/ambari/diff/5da18ae7

Branch: refs/heads/branch-dev-patch-upgrade
Commit: 5da18ae7896edfbcdabd6c140199e1139d414048
Parents: b537833
Author: Nitiraj Rathore <[email protected]>
Authored: Mon Nov 7 20:51:17 2016 +0530
Committer: Nitiraj Rathore <[email protected]>
Committed: Mon Nov 7 20:51:17 2016 +0530

----------------------------------------------------------------------
 .../hive2/resources/uploads/UploadService.java  | 21 +++++++++++++--
 .../hive/resources/uploads/UploadService.java   | 27 ++++++++++++++++++--
 2 files changed, 44 insertions(+), 4 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/ambari/blob/5da18ae7/contrib/views/hive-next/src/main/java/org/apache/ambari/view/hive2/resources/uploads/UploadService.java
----------------------------------------------------------------------
diff --git 
a/contrib/views/hive-next/src/main/java/org/apache/ambari/view/hive2/resources/uploads/UploadService.java
 
b/contrib/views/hive-next/src/main/java/org/apache/ambari/view/hive2/resources/uploads/UploadService.java
index 9800c22..0826945 100644
--- 
a/contrib/views/hive-next/src/main/java/org/apache/ambari/view/hive2/resources/uploads/UploadService.java
+++ 
b/contrib/views/hive-next/src/main/java/org/apache/ambari/view/hive2/resources/uploads/UploadService.java
@@ -42,6 +42,8 @@ import 
org.apache.ambari.view.hive2.resources.uploads.query.TableInfo;
 import org.apache.ambari.view.hive2.utils.ServiceFormattedException;
 import org.apache.ambari.view.hive2.utils.SharedObjectsFactory;
 import org.apache.ambari.view.utils.ambari.AmbariApi;
+import org.apache.commons.io.ByteOrderMark;
+import org.apache.commons.io.input.BOMInputStream;
 import org.apache.commons.io.input.ReaderInputStream;
 import org.apache.hadoop.fs.FSDataInputStream;
 import org.apache.hadoop.fs.FSDataOutputStream;
@@ -496,7 +498,8 @@ public class UploadService extends BaseService {
 
     LOG.info("isFirstRowHeader : {}, inputFileType : {}", isFirstRowHeader, 
inputFileType);
 
-    DataParser dataParser = new DataParser(new 
InputStreamReader(uploadedInputStream), parseOptions);
+    Reader reader = getInputStreamReader(uploadedInputStream);
+    DataParser dataParser = new DataParser(reader, parseOptions);
 
     return dataParser.parsePreview();
   }
@@ -542,13 +545,27 @@ public class UploadService extends BaseService {
       parseOptions.setOption(ParseOptions.OPTIONS_CSV_QUOTE, 
csvParams.getCsvQuote());
     }
 
-    DataParser dataParser = new DataParser(new 
InputStreamReader(uploadedInputStream), parseOptions);
+    Reader reader = getInputStreamReader(uploadedInputStream);
+    DataParser dataParser = new DataParser(reader, parseOptions);
 
     Reader csvReader = new TableDataReader(dataParser.iterator(), header, 
containsEndlines); // encode column values into HEX so that \n etc dont appear 
in the hive table data
     String path = uploadIntoTable(csvReader, databaseName, tableName);
     return path;
   }
 
+  private Reader getInputStreamReader(InputStream is) throws IOException {
+    BOMInputStream bomInputStream = new BOMInputStream(is,
+      ByteOrderMark.UTF_8, ByteOrderMark.UTF_16LE, ByteOrderMark.UTF_16BE,
+      ByteOrderMark.UTF_32LE, ByteOrderMark.UTF_32BE
+    );
+    if(bomInputStream.hasBOM()){
+      String charSetName = bomInputStream.getBOMCharsetName();
+      return new InputStreamReader(bomInputStream, charSetName); // return 
with the encoded charset encoding.
+    }else{
+      return new InputStreamReader(bomInputStream); //return with default 
charset
+    }
+  }
+
   private String getBasenameFromPath(String path) {
     String fileName = new File(path).getName();
     return getBasename(fileName);

http://git-wip-us.apache.org/repos/asf/ambari/blob/5da18ae7/contrib/views/hive/src/main/java/org/apache/ambari/view/hive/resources/uploads/UploadService.java
----------------------------------------------------------------------
diff --git 
a/contrib/views/hive/src/main/java/org/apache/ambari/view/hive/resources/uploads/UploadService.java
 
b/contrib/views/hive/src/main/java/org/apache/ambari/view/hive/resources/uploads/UploadService.java
index 7dccbd4..2dceadf 100644
--- 
a/contrib/views/hive/src/main/java/org/apache/ambari/view/hive/resources/uploads/UploadService.java
+++ 
b/contrib/views/hive/src/main/java/org/apache/ambari/view/hive/resources/uploads/UploadService.java
@@ -36,6 +36,8 @@ import 
org.apache.ambari.view.hive.resources.uploads.query.TableInfo;
 import org.apache.ambari.view.hive.utils.ServiceFormattedException;
 import org.apache.ambari.view.hive.utils.SharedObjectsFactory;
 import org.apache.ambari.view.utils.ambari.AmbariApi;
+import org.apache.commons.io.ByteOrderMark;
+import org.apache.commons.io.input.BOMInputStream;
 import org.apache.commons.io.input.ReaderInputStream;
 import org.apache.hadoop.fs.FSDataInputStream;
 import org.apache.hadoop.fs.FSDataOutputStream;
@@ -464,7 +466,8 @@ public class UploadService extends BaseService {
 
     LOG.info("isFirstRowHeader : {}, inputFileType : {}", isFirstRowHeader, 
inputFileType);
 
-    DataParser dataParser = new DataParser(new 
InputStreamReader(uploadedInputStream), parseOptions);
+    Reader reader = getInputStreamReader(uploadedInputStream);
+    DataParser dataParser = new DataParser(reader, parseOptions);
 
     return dataParser.parsePreview();
   }
@@ -510,13 +513,33 @@ public class UploadService extends BaseService {
       parseOptions.setOption(ParseOptions.OPTIONS_CSV_QUOTE, 
csvParams.getCsvQuote());
     }
 
-    DataParser dataParser = new DataParser(new 
InputStreamReader(uploadedInputStream), parseOptions);
+    Reader reader = getInputStreamReader(uploadedInputStream);
+    DataParser dataParser = new DataParser(reader, parseOptions);
 
     Reader csvReader = new TableDataReader(dataParser.iterator(), header, 
containsEndlines); // encode column values into HEX so that \n etc dont appear 
in the hive table data
     String path = uploadIntoTable(csvReader, databaseName, tableName);
     return path;
   }
 
+  /**
+   * takes care of any BOM in the stream
+   * @param is : the input stream
+   * @return : the reader from the stream
+   * @throws IOException
+   */
+  private Reader getInputStreamReader(InputStream is) throws IOException {
+    BOMInputStream bomInputStream = new BOMInputStream(is,
+      ByteOrderMark.UTF_8, ByteOrderMark.UTF_16LE, ByteOrderMark.UTF_16BE,
+      ByteOrderMark.UTF_32LE, ByteOrderMark.UTF_32BE
+    );
+    if(bomInputStream.hasBOM()){
+      String charSetName = bomInputStream.getBOMCharsetName();
+      return new InputStreamReader(bomInputStream, charSetName); // return 
with the encoded charset encoding.
+    }else{
+      return new InputStreamReader(bomInputStream); //return with default 
charset
+    }
+  }
+
   private String getBasenameFromPath(String path) {
     String fileName = new File(path).getName();
     return getBasename(fileName);

Reply via email to