This is an automated email from the ASF dual-hosted git repository.
lzljs3620320 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/paimon.git
The following commit(s) were added to refs/heads/master by this push:
new a5303acd6 [orc] Remove useless methods for orc writer
a5303acd6 is described below
commit a5303acd6473fee2d78c9ede53f9464e0b300a99
Author: Jingsong <[email protected]>
AuthorDate: Wed Apr 10 14:49:12 2024 +0800
[orc] Remove useless methods for orc writer
---
.../paimon/format/orc/writer/Vectorizer.java | 15 ----
.../paimon/format/orc/OrcBulkWriterTestUtil.java | 96 ----------------------
.../apache/paimon/format/orc/RecordVectorizer.java | 55 -------------
3 files changed, 166 deletions(-)
diff --git
a/paimon-format/src/main/java/org/apache/paimon/format/orc/writer/Vectorizer.java
b/paimon-format/src/main/java/org/apache/paimon/format/orc/writer/Vectorizer.java
index bd2967bcc..f1341b0b8 100644
---
a/paimon-format/src/main/java/org/apache/paimon/format/orc/writer/Vectorizer.java
+++
b/paimon-format/src/main/java/org/apache/paimon/format/orc/writer/Vectorizer.java
@@ -24,7 +24,6 @@ import org.apache.orc.Writer;
import java.io.IOException;
import java.io.Serializable;
-import java.nio.ByteBuffer;
import static org.apache.paimon.utils.Preconditions.checkNotNull;
@@ -67,20 +66,6 @@ public abstract class Vectorizer<T> implements Serializable {
this.writer = writer;
}
- /**
- * Adds arbitrary user metadata to the outgoing ORC file.
- *
- * <p>Users who want to dynamically add new metadata either based on
either the input or from an
- * external system can do so by calling <code>addUserMetadata(...)</code>
inside the overridden
- * vectorize() method.
- *
- * @param key a key to label the data with.
- * @param value the contents of the metadata.
- */
- public void addUserMetadata(String key, ByteBuffer value) {
- this.writer.addUserMetadata(key, value);
- }
-
/**
* Transforms the provided element to ColumnVectors and sets them in the
exposed
* VectorizedRowBatch.
diff --git
a/paimon-format/src/test/java/org/apache/paimon/format/orc/OrcBulkWriterTestUtil.java
b/paimon-format/src/test/java/org/apache/paimon/format/orc/OrcBulkWriterTestUtil.java
deleted file mode 100644
index b24f3d1ba..000000000
---
a/paimon-format/src/test/java/org/apache/paimon/format/orc/OrcBulkWriterTestUtil.java
+++ /dev/null
@@ -1,96 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.paimon.format.orc;
-
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector;
-import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
-import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
-import org.apache.orc.CompressionKind;
-import org.apache.orc.OrcFile;
-import org.apache.orc.Reader;
-import org.apache.orc.RecordReader;
-
-import java.io.File;
-import java.io.IOException;
-import java.nio.ByteBuffer;
-import java.util.ArrayList;
-import java.util.List;
-
-import static org.assertj.core.api.Assertions.assertThat;
-
-/** Util class for the OrcBulkWriter tests. */
-public class OrcBulkWriterTestUtil {
-
- public static final String USER_METADATA_KEY = "userKey";
- public static final ByteBuffer USER_METADATA_VALUE =
ByteBuffer.wrap("hello".getBytes());
-
- public static void validate(File files, List<Record> expected) throws
IOException {
- final File[] buckets = files.listFiles();
- assertThat(buckets).isNotNull();
- assertThat(buckets).hasSize(1);
-
- final File[] partFiles = buckets[0].listFiles();
- assertThat(partFiles).isNotNull();
-
- for (File partFile : partFiles) {
- assertThat(partFile.length()).isGreaterThan(0);
-
- OrcFile.ReaderOptions readerOptions = OrcFile.readerOptions(new
Configuration());
- Reader reader =
- OrcFile.createReader(
- new org.apache.hadoop.fs.Path(partFile.toURI()),
readerOptions);
-
- assertThat(reader.getNumberOfRows()).isEqualTo(3);
- assertThat(reader.getSchema().getFieldNames()).hasSize(2);
-
assertThat(reader.getCompressionKind()).isSameAs(CompressionKind.LZ4);
- assertThat(reader.hasMetadataValue(USER_METADATA_KEY)).isTrue();
- assertThat(reader.getMetadataKeys()).contains(USER_METADATA_KEY);
-
- List<Record> results = getResults(reader);
-
- assertThat(results).hasSize(3).isEqualTo(expected);
- }
- }
-
- private static List<Record> getResults(Reader reader) throws IOException {
- List<Record> results = new ArrayList<>();
-
- RecordReader recordReader = reader.rows();
- VectorizedRowBatch batch = reader.getSchema().createRowBatch();
-
- while (recordReader.nextBatch(batch)) {
- BytesColumnVector stringVector = (BytesColumnVector) batch.cols[0];
- LongColumnVector intVector = (LongColumnVector) batch.cols[1];
- for (int r = 0; r < batch.size; r++) {
- String name =
- new String(
- stringVector.vector[r],
- stringVector.start[r],
- stringVector.length[r]);
- int age = (int) intVector.vector[r];
-
- results.add(new Record(name, age));
- }
- recordReader.close();
- }
-
- return results;
- }
-}
diff --git
a/paimon-format/src/test/java/org/apache/paimon/format/orc/RecordVectorizer.java
b/paimon-format/src/test/java/org/apache/paimon/format/orc/RecordVectorizer.java
deleted file mode 100644
index 624fa6cc1..000000000
---
a/paimon-format/src/test/java/org/apache/paimon/format/orc/RecordVectorizer.java
+++ /dev/null
@@ -1,55 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.paimon.format.orc;
-
-import org.apache.paimon.format.orc.writer.Vectorizer;
-
-import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector;
-import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
-import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
-
-import java.io.IOException;
-import java.io.Serializable;
-import java.nio.charset.StandardCharsets;
-
-/**
- * A Vectorizer implementation used for tests.
- *
- * <p>It transforms an input element which is of type {@link Record} to a
VectorizedRowBatch.
- */
-public class RecordVectorizer extends Vectorizer<Record> implements
Serializable {
-
- public RecordVectorizer(String schema) {
- super(schema);
- }
-
- @Override
- public void vectorize(Record element, VectorizedRowBatch batch) throws
IOException {
- BytesColumnVector stringVector = (BytesColumnVector) batch.cols[0];
- LongColumnVector intColVector = (LongColumnVector) batch.cols[1];
-
- int row = batch.size++;
-
- stringVector.setVal(row,
element.getName().getBytes(StandardCharsets.UTF_8));
- intColVector.vector[row] = element.getAge();
-
- this.addUserMetadata(
- OrcBulkWriterTestUtil.USER_METADATA_KEY,
OrcBulkWriterTestUtil.USER_METADATA_VALUE);
- }
-}