This is an automated email from the ASF dual-hosted git repository.

yihua pushed a commit to branch branch-0.x
in repository https://gitbox.apache.org/repos/asf/hudi.git

commit 83ee6cf493d9af3db1b716b78344920b9390a25f
Author: xuzifu666 <[email protected]>
AuthorDate: Fri May 10 16:00:13 2024 +0800

    [HUDI-7738] Set FileStreamReader Charset as UTF-8 (#11181)
---
 .../src/main/java/org/apache/hudi/cli/utils/InputStreamConsumer.java | 3 ++-
 .../org/apache/hudi/common/config/DFSPropertiesConfiguration.java    | 5 +++--
 .../java/org/apache/hudi/utilities/HoodieWithTimelineServer.java     | 3 ++-
 .../src/main/java/org/apache/hudi/utilities/TableSizeStats.java      | 3 ++-
 4 files changed, 9 insertions(+), 5 deletions(-)

diff --git 
a/hudi-cli/src/main/java/org/apache/hudi/cli/utils/InputStreamConsumer.java 
b/hudi-cli/src/main/java/org/apache/hudi/cli/utils/InputStreamConsumer.java
index e99a499c69e..5209465d8a9 100644
--- a/hudi-cli/src/main/java/org/apache/hudi/cli/utils/InputStreamConsumer.java
+++ b/hudi-cli/src/main/java/org/apache/hudi/cli/utils/InputStreamConsumer.java
@@ -24,6 +24,7 @@ import org.slf4j.LoggerFactory;
 import java.io.BufferedReader;
 import java.io.InputStream;
 import java.io.InputStreamReader;
+import java.nio.charset.StandardCharsets;
 
 /**
  * This class is responsible to read a Process output.
@@ -40,7 +41,7 @@ public class InputStreamConsumer extends Thread {
   @Override
   public void run() {
     try {
-      InputStreamReader isr = new InputStreamReader(is);
+      InputStreamReader isr = new InputStreamReader(is, 
StandardCharsets.UTF_8);
       BufferedReader br = new BufferedReader(isr);
       br.lines().forEach(LOG::info);
     } catch (Exception e) {
diff --git 
a/hudi-hadoop-common/src/main/java/org/apache/hudi/common/config/DFSPropertiesConfiguration.java
 
b/hudi-hadoop-common/src/main/java/org/apache/hudi/common/config/DFSPropertiesConfiguration.java
index cc706dfd719..662c2ffe35a 100644
--- 
a/hudi-hadoop-common/src/main/java/org/apache/hudi/common/config/DFSPropertiesConfiguration.java
+++ 
b/hudi-hadoop-common/src/main/java/org/apache/hudi/common/config/DFSPropertiesConfiguration.java
@@ -42,6 +42,7 @@ import java.io.InputStreamReader;
 import java.net.URI;
 import java.net.URISyntaxException;
 import java.net.URL;
+import java.nio.charset.StandardCharsets;
 import java.util.HashSet;
 import java.util.Set;
 
@@ -104,7 +105,7 @@ public class DFSPropertiesConfiguration extends 
PropertiesConfig {
     // First try loading the external config file from class loader
     URL configFile = 
Thread.currentThread().getContextClassLoader().getResource(DEFAULT_PROPERTIES_FILE);
     if (configFile != null) {
-      try (BufferedReader br = new BufferedReader(new 
InputStreamReader(configFile.openStream()))) {
+      try (BufferedReader br = new BufferedReader(new 
InputStreamReader(configFile.openStream(), StandardCharsets.UTF_8))) {
         conf.addPropsFromStream(br, new StoragePath(configFile.toURI()));
         return conf.getProps();
       } catch (URISyntaxException e) {
@@ -160,7 +161,7 @@ public class DFSPropertiesConfiguration extends 
PropertiesConfig {
       throw new HoodieIOException("Cannot check if the properties file exist: 
" + filePath, ioe);
     }
 
-    try (BufferedReader reader = new BufferedReader(new 
InputStreamReader(storage.open(filePath)))) {
+    try (BufferedReader reader = new BufferedReader(new 
InputStreamReader(storage.open(filePath), StandardCharsets.UTF_8))) {
       visitedFilePaths.add(filePath.toString());
       addPropsFromStream(reader, filePath);
     } catch (IOException ioe) {
diff --git 
a/hudi-utilities/src/main/java/org/apache/hudi/utilities/HoodieWithTimelineServer.java
 
b/hudi-utilities/src/main/java/org/apache/hudi/utilities/HoodieWithTimelineServer.java
index fdcb806b434..9957c621545 100644
--- 
a/hudi-utilities/src/main/java/org/apache/hudi/utilities/HoodieWithTimelineServer.java
+++ 
b/hudi-utilities/src/main/java/org/apache/hudi/utilities/HoodieWithTimelineServer.java
@@ -37,6 +37,7 @@ import java.io.InputStreamReader;
 import java.io.Serializable;
 import java.net.InetAddress;
 import java.net.UnknownHostException;
+import java.nio.charset.StandardCharsets;
 import java.util.ArrayList;
 import java.util.List;
 import java.util.stream.IntStream;
@@ -111,7 +112,7 @@ public class HoodieWithTimelineServer implements 
Serializable {
 
       System.out.println("Response Code from(" + url + ") : " + 
response.getStatusLine().getStatusCode());
 
-      try (BufferedReader rd = new BufferedReader(new 
InputStreamReader(response.getEntity().getContent()))) {
+      try (BufferedReader rd = new BufferedReader(new 
InputStreamReader(response.getEntity().getContent(), StandardCharsets.UTF_8))) {
         StringBuilder result = new StringBuilder();
         rd.lines().forEach(result::append);
         System.out.println("Got result (" + result + ")");
diff --git 
a/hudi-utilities/src/main/java/org/apache/hudi/utilities/TableSizeStats.java 
b/hudi-utilities/src/main/java/org/apache/hudi/utilities/TableSizeStats.java
index c5c1d2aabad..1a6a1ba4f82 100644
--- a/hudi-utilities/src/main/java/org/apache/hudi/utilities/TableSizeStats.java
+++ b/hudi-utilities/src/main/java/org/apache/hudi/utilities/TableSizeStats.java
@@ -55,6 +55,7 @@ import java.io.BufferedReader;
 import java.io.IOException;
 import java.io.InputStreamReader;
 import java.io.Serializable;
+import java.nio.charset.StandardCharsets;
 import java.time.LocalDate;
 import java.time.format.DateTimeFormatter;
 import java.time.format.DateTimeFormatterBuilder;
@@ -364,7 +365,7 @@ public class TableSizeStats implements Serializable {
         Option.ofNullable(hadoopConf).orElseGet(Configuration::new)
     );
 
-    try (BufferedReader reader = new BufferedReader(new 
InputStreamReader(fs.open(new Path(propsPath))))) {
+    try (BufferedReader reader = new BufferedReader(new 
InputStreamReader(fs.open(new Path(propsPath)), StandardCharsets.UTF_8))) {
       String line = reader.readLine();
       while (line != null) {
         filePaths.add(line);

Reply via email to