This is an automated email from the ASF dual-hosted git repository.
danny0405 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hudi.git
The following commit(s) were added to refs/heads/master by this push:
new 0a6338b86e7 [HUDI-7738] Set FileStreamReader Charset as UTF-8 (#11181)
0a6338b86e7 is described below
commit 0a6338b86e7ee62b72679a7b56ab65d58cef6fab
Author: xuzifu666 <[email protected]>
AuthorDate: Fri May 10 16:00:13 2024 +0800
[HUDI-7738] Set FileStreamReader Charset as UTF-8 (#11181)
---
.../src/main/java/org/apache/hudi/cli/utils/InputStreamConsumer.java | 3 ++-
.../org/apache/hudi/common/config/DFSPropertiesConfiguration.java | 5 +++--
.../java/org/apache/hudi/utilities/HoodieWithTimelineServer.java | 3 ++-
.../src/main/java/org/apache/hudi/utilities/TableSizeStats.java | 3 ++-
4 files changed, 9 insertions(+), 5 deletions(-)
diff --git
a/hudi-cli/src/main/java/org/apache/hudi/cli/utils/InputStreamConsumer.java
b/hudi-cli/src/main/java/org/apache/hudi/cli/utils/InputStreamConsumer.java
index e99a499c69e..5209465d8a9 100644
--- a/hudi-cli/src/main/java/org/apache/hudi/cli/utils/InputStreamConsumer.java
+++ b/hudi-cli/src/main/java/org/apache/hudi/cli/utils/InputStreamConsumer.java
@@ -24,6 +24,7 @@ import org.slf4j.LoggerFactory;
import java.io.BufferedReader;
import java.io.InputStream;
import java.io.InputStreamReader;
+import java.nio.charset.StandardCharsets;
/**
* This class is responsible to read a Process output.
@@ -40,7 +41,7 @@ public class InputStreamConsumer extends Thread {
@Override
public void run() {
try {
- InputStreamReader isr = new InputStreamReader(is);
+ InputStreamReader isr = new InputStreamReader(is,
StandardCharsets.UTF_8);
BufferedReader br = new BufferedReader(isr);
br.lines().forEach(LOG::info);
} catch (Exception e) {
diff --git
a/hudi-hadoop-common/src/main/java/org/apache/hudi/common/config/DFSPropertiesConfiguration.java
b/hudi-hadoop-common/src/main/java/org/apache/hudi/common/config/DFSPropertiesConfiguration.java
index cc706dfd719..662c2ffe35a 100644
---
a/hudi-hadoop-common/src/main/java/org/apache/hudi/common/config/DFSPropertiesConfiguration.java
+++
b/hudi-hadoop-common/src/main/java/org/apache/hudi/common/config/DFSPropertiesConfiguration.java
@@ -42,6 +42,7 @@ import java.io.InputStreamReader;
import java.net.URI;
import java.net.URISyntaxException;
import java.net.URL;
+import java.nio.charset.StandardCharsets;
import java.util.HashSet;
import java.util.Set;
@@ -104,7 +105,7 @@ public class DFSPropertiesConfiguration extends
PropertiesConfig {
// First try loading the external config file from class loader
URL configFile =
Thread.currentThread().getContextClassLoader().getResource(DEFAULT_PROPERTIES_FILE);
if (configFile != null) {
- try (BufferedReader br = new BufferedReader(new
InputStreamReader(configFile.openStream()))) {
+ try (BufferedReader br = new BufferedReader(new
InputStreamReader(configFile.openStream(), StandardCharsets.UTF_8))) {
conf.addPropsFromStream(br, new StoragePath(configFile.toURI()));
return conf.getProps();
} catch (URISyntaxException e) {
@@ -160,7 +161,7 @@ public class DFSPropertiesConfiguration extends
PropertiesConfig {
throw new HoodieIOException("Cannot check if the properties file exist:
" + filePath, ioe);
}
- try (BufferedReader reader = new BufferedReader(new
InputStreamReader(storage.open(filePath)))) {
+ try (BufferedReader reader = new BufferedReader(new
InputStreamReader(storage.open(filePath), StandardCharsets.UTF_8))) {
visitedFilePaths.add(filePath.toString());
addPropsFromStream(reader, filePath);
} catch (IOException ioe) {
diff --git
a/hudi-utilities/src/main/java/org/apache/hudi/utilities/HoodieWithTimelineServer.java
b/hudi-utilities/src/main/java/org/apache/hudi/utilities/HoodieWithTimelineServer.java
index fdcb806b434..9957c621545 100644
---
a/hudi-utilities/src/main/java/org/apache/hudi/utilities/HoodieWithTimelineServer.java
+++
b/hudi-utilities/src/main/java/org/apache/hudi/utilities/HoodieWithTimelineServer.java
@@ -37,6 +37,7 @@ import java.io.InputStreamReader;
import java.io.Serializable;
import java.net.InetAddress;
import java.net.UnknownHostException;
+import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.List;
import java.util.stream.IntStream;
@@ -111,7 +112,7 @@ public class HoodieWithTimelineServer implements
Serializable {
System.out.println("Response Code from(" + url + ") : " +
response.getStatusLine().getStatusCode());
- try (BufferedReader rd = new BufferedReader(new
InputStreamReader(response.getEntity().getContent()))) {
+ try (BufferedReader rd = new BufferedReader(new
InputStreamReader(response.getEntity().getContent(), StandardCharsets.UTF_8))) {
StringBuilder result = new StringBuilder();
rd.lines().forEach(result::append);
System.out.println("Got result (" + result + ")");
diff --git
a/hudi-utilities/src/main/java/org/apache/hudi/utilities/TableSizeStats.java
b/hudi-utilities/src/main/java/org/apache/hudi/utilities/TableSizeStats.java
index c5c1d2aabad..1a6a1ba4f82 100644
--- a/hudi-utilities/src/main/java/org/apache/hudi/utilities/TableSizeStats.java
+++ b/hudi-utilities/src/main/java/org/apache/hudi/utilities/TableSizeStats.java
@@ -55,6 +55,7 @@ import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.Serializable;
+import java.nio.charset.StandardCharsets;
import java.time.LocalDate;
import java.time.format.DateTimeFormatter;
import java.time.format.DateTimeFormatterBuilder;
@@ -364,7 +365,7 @@ public class TableSizeStats implements Serializable {
Option.ofNullable(hadoopConf).orElseGet(Configuration::new)
);
- try (BufferedReader reader = new BufferedReader(new
InputStreamReader(fs.open(new Path(propsPath))))) {
+ try (BufferedReader reader = new BufferedReader(new
InputStreamReader(fs.open(new Path(propsPath)), StandardCharsets.UTF_8))) {
String line = reader.readLine();
while (line != null) {
filePaths.add(line);