This is an automated email from the ASF dual-hosted git repository.
dongjoon pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/orc.git
The following commit(s) were added to refs/heads/main by this push:
new 3ce846ddc ORC-1791: Remove `commons-lang3` dependency
3ce846ddc is described below
commit 3ce846ddc7571f87134a34ba83eaaa166505e184
Author: Dongjoon Hyun <[email protected]>
AuthorDate: Fri Oct 18 17:10:54 2024 -0700
ORC-1791: Remove `commons-lang3` dependency
### What changes were proposed in this pull request?
This PR aims to remove `commons-lang3` dependency from ORC library.
### Why are the changes needed?
To reduce the public dependency. We still allow `test` scope dependency.
### How was this patch tested?
Pass the CIs.
### Was this patch authored or co-authored using generative AI tooling?
No.
Closes #2053 from dongjoon-hyun/ORC-1791.
Authored-by: Dongjoon Hyun <[email protected]>
Signed-off-by: Dongjoon Hyun <[email protected]>
---
java/core/pom.xml | 9 ++--
java/core/src/java/org/apache/orc/OrcConf.java | 7 ++-
.../apache/orc/impl/ConvertTreeReaderFactory.java | 4 +-
.../java/org/apache/orc/impl/RecordReaderImpl.java | 10 +++-
.../org/apache/orc/impl/RecordReaderUtils.java | 5 +-
.../apache/orc/impl/mask/RedactMaskFactory.java | 5 +-
.../orc/impl/TestPredicatePushDownBounds.java | 53 ++++++++--------------
java/mapreduce/pom.xml | 4 --
.../java/org/apache/orc/mapred/OrcInputFormat.java | 3 +-
java/tools/pom.xml | 4 --
.../src/java/org/apache/orc/tools/FileDump.java | 3 +-
.../apache/orc/tools/json/JsonSchemaFinder.java | 14 +++++-
12 files changed, 54 insertions(+), 67 deletions(-)
diff --git a/java/core/pom.xml b/java/core/pom.xml
index 4cafffc71..dc9fd4ad5 100644
--- a/java/core/pom.xml
+++ b/java/core/pom.xml
@@ -43,10 +43,6 @@
<groupId>com.google.protobuf</groupId>
<artifactId>protobuf-java</artifactId>
</dependency>
- <dependency>
- <groupId>org.apache.commons</groupId>
- <artifactId>commons-lang3</artifactId>
- </dependency>
<dependency>
<groupId>io.airlift</groupId>
<artifactId>aircompressor</artifactId>
@@ -86,6 +82,11 @@
</dependency>
<!-- test inter-project -->
+ <dependency>
+ <groupId>org.apache.commons</groupId>
+ <artifactId>commons-lang3</artifactId>
+ <scope>test</scope>
+ </dependency>
<dependency>
<groupId>com.google.guava</groupId>
<artifactId>guava</artifactId>
diff --git a/java/core/src/java/org/apache/orc/OrcConf.java
b/java/core/src/java/org/apache/orc/OrcConf.java
index 9bc2b4492..6516517ba 100644
--- a/java/core/src/java/org/apache/orc/OrcConf.java
+++ b/java/core/src/java/org/apache/orc/OrcConf.java
@@ -18,7 +18,6 @@
package org.apache.orc;
-import org.apache.commons.lang3.StringUtils;
import org.apache.hadoop.conf.Configuration;
import java.util.ArrayList;
@@ -353,12 +352,12 @@ public enum OrcConf {
public List<String> getStringAsList(Configuration conf) {
String value = getString(null, conf);
List<String> confList = new ArrayList<>();
- if (StringUtils.isEmpty(value)) {
+ if (value == null || value.isEmpty()) {
return confList;
}
for (String str: value.split(",")) {
- String trimStr = StringUtils.trim(str);
- if (StringUtils.isNotEmpty(trimStr)) {
+ String trimStr = str.trim();
+ if (!trimStr.isEmpty()) {
confList.add(trimStr);
}
}
diff --git
a/java/core/src/java/org/apache/orc/impl/ConvertTreeReaderFactory.java
b/java/core/src/java/org/apache/orc/impl/ConvertTreeReaderFactory.java
index 4635973ab..6886b551e 100644
--- a/java/core/src/java/org/apache/orc/impl/ConvertTreeReaderFactory.java
+++ b/java/core/src/java/org/apache/orc/impl/ConvertTreeReaderFactory.java
@@ -17,7 +17,6 @@
*/
package org.apache.orc.impl;
-import org.apache.commons.lang3.ArrayUtils;
import org.apache.hadoop.hive.common.type.HiveDecimal;
import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector;
import org.apache.hadoop.hive.ql.exec.vector.ColumnVector;
@@ -1446,6 +1445,7 @@ public class ConvertTreeReaderFactory extends
TreeReaderFactory {
}
public static class StringGroupFromBinaryTreeReader extends
ConvertTreeReader {
+ public static final byte[] EMPTY_BYTE_ARRAY = new byte[0];
private final TypeDescription readerType;
private BytesColumnVector inBytesColVector;
private BytesColumnVector outBytesColVector;
@@ -1461,7 +1461,7 @@ public class ConvertTreeReaderFactory extends
TreeReaderFactory {
byte[] bytes = inBytesColVector.vector[elementNum];
int start = inBytesColVector.start[elementNum];
int length = inBytesColVector.length[elementNum];
- final byte[] string = (length == 0) ? ArrayUtils.EMPTY_BYTE_ARRAY : new
byte[3 * length - 1];
+ final byte[] string = (length == 0) ? EMPTY_BYTE_ARRAY : new byte[3 *
length - 1];
for(int p = 0; p < string.length; p += 2) {
if (p != 0) {
string[p++] = ' ';
diff --git a/java/core/src/java/org/apache/orc/impl/RecordReaderImpl.java
b/java/core/src/java/org/apache/orc/impl/RecordReaderImpl.java
index 323f24247..c9256964e 100644
--- a/java/core/src/java/org/apache/orc/impl/RecordReaderImpl.java
+++ b/java/core/src/java/org/apache/orc/impl/RecordReaderImpl.java
@@ -17,7 +17,6 @@
*/
package org.apache.orc.impl;
-import org.apache.commons.lang3.ArrayUtils;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hive.common.type.HiveDecimal;
@@ -340,7 +339,14 @@ public class RecordReaderImpl implements RecordReader {
this.startReadPhase = TypeReader.ReadPhase.ALL;
}
- this.rowIndexColsToRead = ArrayUtils.contains(rowIndexCols, true) ?
rowIndexCols : null;
+ var hasTrue = false;
+ for (boolean value: rowIndexCols) {
+ if (value) {
+ hasTrue = true;
+ break;
+ }
+ }
+ this.rowIndexColsToRead = hasTrue ? rowIndexCols : null;
TreeReaderFactory.ReaderContext readerContext =
new TreeReaderFactory.ReaderContext()
.setSchemaEvolution(evolution)
diff --git a/java/core/src/java/org/apache/orc/impl/RecordReaderUtils.java
b/java/core/src/java/org/apache/orc/impl/RecordReaderUtils.java
index 52dc25788..b4155ada8 100644
--- a/java/core/src/java/org/apache/orc/impl/RecordReaderUtils.java
+++ b/java/core/src/java/org/apache/orc/impl/RecordReaderUtils.java
@@ -17,7 +17,6 @@
*/
package org.apache.orc.impl;
-import org.apache.commons.lang3.builder.HashCodeBuilder;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FileRange;
import org.apache.hadoop.fs.FileSystem;
@@ -637,8 +636,8 @@ public class RecordReaderUtils {
@Override
public int hashCode() {
- return new
HashCodeBuilder().append(capacity).append(insertionGeneration)
- .toHashCode();
+ // This is idential to the previous hashCode from HashCodeBuilder
+ return (17 * 37 + capacity) * 37 + (int) (insertionGeneration ^
insertionGeneration >> 32);
}
}
diff --git a/java/core/src/java/org/apache/orc/impl/mask/RedactMaskFactory.java
b/java/core/src/java/org/apache/orc/impl/mask/RedactMaskFactory.java
index c6b65c3e8..1debb9349 100644
--- a/java/core/src/java/org/apache/orc/impl/mask/RedactMaskFactory.java
+++ b/java/core/src/java/org/apache/orc/impl/mask/RedactMaskFactory.java
@@ -17,7 +17,6 @@
*/
package org.apache.orc.impl.mask;
-import org.apache.commons.lang3.StringUtils;
import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector;
import org.apache.hadoop.hive.ql.exec.vector.ColumnVector;
import org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector;
@@ -135,7 +134,7 @@ public class RedactMaskFactory extends MaskFactory {
OTHER_NUMBER_REPLACEMENT = getNextCodepoint(param, DEFAULT_NUMBER_OTHER);
OTHER_REPLACEMENT = getNextCodepoint(param, DEFAULT_OTHER);
String[] timeParams;
- if (params.length < 2 || StringUtils.isBlank(params[1])) {
+ if (params.length < 2 || params[1].isBlank()) {
timeParams = null;
} else {
timeParams = params[1].split("\\W+");
@@ -154,7 +153,7 @@ public class RedactMaskFactory extends MaskFactory {
(SECOND_REPLACEMENT != UNMASKED_DATE);
/* un-mask range */
- if(!(params.length < 3 || StringUtils.isBlank(params[2]))) {
+ if(!(params.length < 3 || params[2].isBlank())) {
String[] unmaskIndexes = params[2].split(",");
for(int i=0; i < unmaskIndexes.length; i++ ) {
diff --git
a/java/core/src/test/org/apache/orc/impl/TestPredicatePushDownBounds.java
b/java/core/src/test/org/apache/orc/impl/TestPredicatePushDownBounds.java
index c2799ff90..aec865201 100644
--- a/java/core/src/test/org/apache/orc/impl/TestPredicatePushDownBounds.java
+++ b/java/core/src/test/org/apache/orc/impl/TestPredicatePushDownBounds.java
@@ -17,7 +17,6 @@
*/
package org.apache.orc.impl;
-import org.apache.commons.lang3.StringUtils;
import org.apache.hadoop.hive.ql.io.sarg.PredicateLeaf;
import org.apache.hadoop.hive.ql.io.sarg.SearchArgument;
import org.apache.hadoop.hive.ql.io.sarg.SearchArgumentFactory;
@@ -54,26 +53,20 @@ public class TestPredicatePushDownBounds {
BloomFilter bf = new BloomFilter(100);
// FFF... to PPP...
for (int i = 70; i <= 80; i++) {
- final String inputString = StringUtils
- .repeat(Character.toString((char) i), stringLength);
+ final String inputString = Character.toString((char)
i).repeat(stringLength);
bf.addString(inputString);
}
- final String longStringF = StringUtils
- .repeat(Character.toString('F'), stringLength);
- final String longStringP = StringUtils
- .repeat(Character.toString('P'), stringLength);
+ final String longStringF = Character.toString('F').repeat(stringLength);
+ final String longStringP = Character.toString('P').repeat(stringLength);
/* String that matches the upperbound value after truncation */
- final String upperboundString =
- StringUtils.repeat(Character.toString('P'), 1023) + "Q";
+ final String upperboundString = Character.toString('P').repeat(1023) + "Q";
/* String that matches the lower value after truncation */
- final String lowerboundString = StringUtils
- .repeat(Character.toString('F'), 1024);
+ final String lowerboundString = Character.toString('F').repeat(1024);
- final String shortStringF = StringUtils.repeat(Character.toString('F'),
50);
- final String shortStringP =
- StringUtils.repeat(Character.toString('P'), 50) + "Q";
+ final String shortStringF = Character.toString('F').repeat(50);
+ final String shortStringP = Character.toString('P').repeat(50) + "Q";
/* Test for a case EQUALS where only upperbound is set */
final PredicateLeaf predicateUpperBoundEquals = TestRecordReaderImpl
@@ -165,17 +158,13 @@ public class TestPredicatePushDownBounds {
BloomFilter bf = new BloomFilter(100);
// FFF... to PPP...
for (int i = 70; i <= 80; i++) {
- final String inputString = StringUtils
- .repeat(Character.toString((char) i), bfStringLength);
+ final String inputString = Character.toString((char)
i).repeat(bfStringLength);
bf.addString(inputString);
}
- final String longStringF = StringUtils
- .repeat(Character.toString('F'), stringLength);
- final String longStringP = StringUtils
- .repeat(Character.toString('P'), stringLength);
- final String predicateString = StringUtils
- .repeat(Character.toString('I'), 50);
+ final String longStringF = Character.toString('F').repeat(stringLength);
+ final String longStringP = Character.toString('P').repeat(stringLength);
+ final String predicateString = Character.toString('I').repeat(50);
/* Test for a case where only upperbound is set */
@@ -215,26 +204,20 @@ public class TestPredicatePushDownBounds {
final BloomFilter bf = new BloomFilter(100);
// FFF... to PPP...
for (int i = 70; i <= 80; i++) {
- final String inputString = StringUtils
- .repeat(Character.toString((char) i), stringLength);
+ final String inputString = Character.toString((char)
i).repeat(stringLength);
bf.addString(inputString);
}
- final String longStringF = StringUtils
- .repeat(Character.toString('F'), stringLength);
- final String longStringP = StringUtils
- .repeat(Character.toString('P'), stringLength);
+ final String longStringF = Character.toString('F').repeat(stringLength);
+ final String longStringP = Character.toString('P').repeat(stringLength);
/* String that matches the upperbound value after truncation */
- final String upperboundString =
- StringUtils.repeat(Character.toString('P'), 1023) + "Q";
+ final String upperboundString = Character.toString('P').repeat(1023) + "Q";
/* String that matches the lower value after truncation */
- final String lowerboundString = StringUtils
- .repeat(Character.toString('F'), 1024);
+ final String lowerboundString = Character.toString('F').repeat(1024);
- final String shortStringF = StringUtils.repeat(Character.toString('F'),
50);
- final String shortStringP =
- StringUtils.repeat(Character.toString('P'), 50) + "Q";
+ final String shortStringF = Character.toString('F').repeat(50);
+ final String shortStringP = Character.toString('P').repeat(50) + "Q";
final List<Object> args = new ArrayList<Object>();
args.add(upperboundString);
diff --git a/java/mapreduce/pom.xml b/java/mapreduce/pom.xml
index 30bd83e5b..a738e145f 100644
--- a/java/mapreduce/pom.xml
+++ b/java/mapreduce/pom.xml
@@ -39,10 +39,6 @@
<groupId>com.esotericsoftware</groupId>
<artifactId>kryo-shaded</artifactId>
</dependency>
- <dependency>
- <groupId>org.apache.commons</groupId>
- <artifactId>commons-lang3</artifactId>
- </dependency>
<dependency>
<groupId>com.google.guava</groupId>
<artifactId>guava</artifactId>
diff --git a/java/mapreduce/src/java/org/apache/orc/mapred/OrcInputFormat.java
b/java/mapreduce/src/java/org/apache/orc/mapred/OrcInputFormat.java
index ddb6a6ac2..947d9b6f8 100644
--- a/java/mapreduce/src/java/org/apache/orc/mapred/OrcInputFormat.java
+++ b/java/mapreduce/src/java/org/apache/orc/mapred/OrcInputFormat.java
@@ -21,7 +21,6 @@ package org.apache.orc.mapred;
import com.esotericsoftware.kryo.Kryo;
import com.esotericsoftware.kryo.io.Input;
import com.esotericsoftware.kryo.io.Output;
-import org.apache.commons.lang3.StringUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.hive.ql.io.sarg.SearchArgument;
@@ -68,7 +67,7 @@ public class OrcInputFormat<V extends WritableComparable>
boolean[] result = new boolean[schema.getMaximumId() + 1];
result[0] = true;
- if (StringUtils.isBlank(columnsStr)) {
+ if (columnsStr.isBlank()) {
return result;
}
diff --git a/java/tools/pom.xml b/java/tools/pom.xml
index cc7cdd34f..231e5bd0b 100644
--- a/java/tools/pom.xml
+++ b/java/tools/pom.xml
@@ -60,10 +60,6 @@
<groupId>commons-cli</groupId>
<artifactId>commons-cli</artifactId>
</dependency>
- <dependency>
- <groupId>org.apache.commons</groupId>
- <artifactId>commons-lang3</artifactId>
- </dependency>
<dependency>
<groupId>org.apache.hive</groupId>
<artifactId>hive-storage-api</artifactId>
diff --git a/java/tools/src/java/org/apache/orc/tools/FileDump.java
b/java/tools/src/java/org/apache/orc/tools/FileDump.java
index 790352e66..b8acb1cae 100644
--- a/java/tools/src/java/org/apache/orc/tools/FileDump.java
+++ b/java/tools/src/java/org/apache/orc/tools/FileDump.java
@@ -22,7 +22,6 @@ import org.apache.commons.cli.DefaultParser;
import org.apache.commons.cli.HelpFormatter;
import org.apache.commons.cli.Option;
import org.apache.commons.cli.Options;
-import org.apache.commons.lang3.StringUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FSDataOutputStream;
@@ -61,7 +60,7 @@ import java.util.List;
*/
public final class FileDump {
public static final String UNKNOWN = "UNKNOWN";
- public static final String SEPARATOR = StringUtils.repeat("_", 120) + "\n";
+ public static final String SEPARATOR = "_".repeat(120) + "\n";
public static final String RECOVER_READ_SIZE = "orc.recover.read.size"; //
only for testing
public static final int DEFAULT_BLOCK_SIZE = 256 * 1024 * 1024;
public static final String DEFAULT_BACKUP_PATH =
System.getProperty("java.io.tmpdir");
diff --git
a/java/tools/src/java/org/apache/orc/tools/json/JsonSchemaFinder.java
b/java/tools/src/java/org/apache/orc/tools/json/JsonSchemaFinder.java
index 358eb21a5..7a0765049 100644
--- a/java/tools/src/java/org/apache/orc/tools/json/JsonSchemaFinder.java
+++ b/java/tools/src/java/org/apache/orc/tools/json/JsonSchemaFinder.java
@@ -29,7 +29,6 @@ import org.apache.commons.cli.HelpFormatter;
import org.apache.commons.cli.Option;
import org.apache.commons.cli.Options;
import org.apache.commons.cli.ParseException;
-import org.apache.commons.lang3.exception.ExceptionUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.orc.TypeDescription;
import org.apache.orc.TypeDescriptionPrettyPrint;
@@ -40,6 +39,8 @@ import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.PrintStream;
+import java.io.PrintWriter;
+import java.io.StringWriter;
import java.math.BigInteger;
import java.nio.charset.StandardCharsets;
import java.util.List;
@@ -264,6 +265,15 @@ public class JsonSchemaFinder {
}
}
+ public static String getStackTrace(final Throwable throwable) {
+ if (throwable == null) {
+ return "";
+ }
+ final StringWriter sw = new StringWriter();
+ throwable.printStackTrace(new PrintWriter(sw, true));
+ return sw.toString();
+ }
+
private void printParseExceptionMsg(JsonParseException e, String filename) {
System.err.printf(
"A JsonParseException was thrown while processing the %dth record of
file %s.%n",
@@ -282,7 +292,7 @@ public class JsonSchemaFinder {
System.exit(1);
}
}
- System.err.printf("Please check the file.%n%n%s%n",
ExceptionUtils.getStackTrace(e));
+ System.err.printf("Please check the file.%n%n%s%n", getStackTrace(e));
System.exit(1);
}