This is an automated email from the ASF dual-hosted git repository.

dzamo pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/drill.git


The following commit(s) were added to refs/heads/master by this push:
     new a9e84a114e DRILL-8495: Tried to remove unmanaged buffer (#2913)
a9e84a114e is described below

commit a9e84a114ec8eb193ebfa8173de693e567b1c27a
Author: Maksym Rymar <rym...@apache.org>
AuthorDate: Fri May 17 17:00:10 2024 +0300

    DRILL-8495: Tried to remove unmanaged buffer (#2913)
---
 .../hive/readers/HiveDefaultRecordReader.java      |  6 +++---
 .../store/hive/writers/HiveValueWriterFactory.java | 20 ++++++++++----------
 .../apache/drill/exec/hive/TestHiveStorage.java    | 19 +++++++++++++++++++
 .../exec/hive/TestInfoSchemaOnHiveStorage.java     |  2 ++
 .../exec/store/hive/HiveTestDataGenerator.java     | 22 ++++++++++++++++++++++
 5 files changed, 56 insertions(+), 13 deletions(-)

diff --git 
a/contrib/storage-hive/core/src/main/java/org/apache/drill/exec/store/hive/readers/HiveDefaultRecordReader.java
 
b/contrib/storage-hive/core/src/main/java/org/apache/drill/exec/store/hive/readers/HiveDefaultRecordReader.java
index 535fa2f087..0e5d54ef13 100644
--- 
a/contrib/storage-hive/core/src/main/java/org/apache/drill/exec/store/hive/readers/HiveDefaultRecordReader.java
+++ 
b/contrib/storage-hive/core/src/main/java/org/apache/drill/exec/store/hive/readers/HiveDefaultRecordReader.java
@@ -168,7 +168,7 @@ public class HiveDefaultRecordReader extends 
AbstractRecordReader {
   protected boolean empty;
 
   /**
-   * Buffer used for population of partition vectors  and to fill in data into 
vectors via writers
+   * Buffer used for population of partition vectors
    */
   private final DrillBuf drillBuf;
 
@@ -238,7 +238,7 @@ public class HiveDefaultRecordReader extends 
AbstractRecordReader {
     this.proxyUserGroupInfo = proxyUgi;
     this.empty = inputSplits == null || inputSplits.isEmpty();
     this.inputSplitsIterator = empty ? Collections.emptyIterator() : 
inputSplits.iterator();
-    this.drillBuf = context.getManagedBuffer().reallocIfNeeded(256);
+    this.drillBuf = context.getManagedBuffer();
     this.partitionVectors = new ValueVector[0];
     this.partitionValues = new Object[0];
     setColumns(projectedColumns);
@@ -333,7 +333,7 @@ public class HiveDefaultRecordReader extends 
AbstractRecordReader {
       this.selectedStructFieldRefs = new 
StructField[selectedColumnNames.size()];
       this.columnValueWriters = new 
HiveValueWriter[selectedColumnNames.size()];
       this.outputWriter = new VectorContainerWriter(output, /*enabled union*/ 
false);
-      HiveValueWriterFactory hiveColumnValueWriterFactory = new 
HiveValueWriterFactory(drillBuf, outputWriter.getWriter());
+      HiveValueWriterFactory hiveColumnValueWriterFactory = new 
HiveValueWriterFactory(fragmentContext.getManagedBufferManager(), 
outputWriter.getWriter());
       for (int refIdx = 0; refIdx < selectedStructFieldRefs.length; refIdx++) {
         String columnName = selectedColumnNames.get(refIdx);
         StructField fieldRef = finalObjInspector.getStructFieldRef(columnName);
diff --git 
a/contrib/storage-hive/core/src/main/java/org/apache/drill/exec/store/hive/writers/HiveValueWriterFactory.java
 
b/contrib/storage-hive/core/src/main/java/org/apache/drill/exec/store/hive/writers/HiveValueWriterFactory.java
index fcb89ce7a4..d430b8370a 100644
--- 
a/contrib/storage-hive/core/src/main/java/org/apache/drill/exec/store/hive/writers/HiveValueWriterFactory.java
+++ 
b/contrib/storage-hive/core/src/main/java/org/apache/drill/exec/store/hive/writers/HiveValueWriterFactory.java
@@ -21,8 +21,8 @@ import java.util.List;
 import java.util.function.BiFunction;
 import java.util.function.Function;
 
-import io.netty.buffer.DrillBuf;
 import org.apache.drill.common.exceptions.UserException;
+import org.apache.drill.exec.ops.BufferManager;
 import org.apache.drill.exec.store.hive.writers.complex.HiveListWriter;
 import org.apache.drill.exec.store.hive.writers.complex.HiveMapWriter;
 import org.apache.drill.exec.store.hive.writers.complex.HiveStructWriter;
@@ -97,18 +97,18 @@ public final class HiveValueWriterFactory {
   private static final Logger logger = 
LoggerFactory.getLogger(HiveValueWriterFactory.class);
 
   /**
-   * Buffer shared across created Hive writers. May be used by writer for 
reading data
-   * to buffer than from buffer to vector.
+   * Buffer manager used to create buffers for Hive writers for reading data
+   * to buffer than from buffer to vector if needed.
    */
-  private final DrillBuf drillBuf;
+  private final BufferManager bufferManager;
 
   /**
    * Used to manage and create column writers.
    */
   private final SingleMapWriter rootWriter;
 
-  public HiveValueWriterFactory(DrillBuf drillBuf, SingleMapWriter rootWriter) 
{
-    this.drillBuf = drillBuf;
+  public HiveValueWriterFactory(BufferManager bufferManager, SingleMapWriter 
rootWriter) {
+    this.bufferManager = bufferManager;
     this.rootWriter = rootWriter;
   }
 
@@ -200,7 +200,7 @@ public final class HiveValueWriterFactory {
       case BINARY: {
         VarBinaryWriter writer = extractWriter(name, parentWriter,
             MapWriter::varBinary, ListWriter::varBinary, 
UnionVectorWriter::varBinary);
-        return new HiveBinaryWriter((BinaryObjectInspector) inspector, writer, 
drillBuf);
+        return new HiveBinaryWriter((BinaryObjectInspector) inspector, writer, 
bufferManager.getManagedBuffer());
       }
       case BOOLEAN: {
         BitWriter writer = extractWriter(name, parentWriter,
@@ -240,12 +240,12 @@ public final class HiveValueWriterFactory {
       case STRING: {
         VarCharWriter writer = extractWriter(name, parentWriter,
             MapWriter::varChar, ListWriter::varChar, 
UnionVectorWriter::varChar);
-        return new HiveStringWriter((StringObjectInspector) inspector, writer, 
drillBuf);
+        return new HiveStringWriter((StringObjectInspector) inspector, writer, 
bufferManager.getManagedBuffer());
       }
       case VARCHAR: {
         VarCharWriter writer = extractWriter(name, parentWriter,
             MapWriter::varChar, ListWriter::varChar, 
UnionVectorWriter::varChar);
-        return new HiveVarCharWriter((HiveVarcharObjectInspector) inspector, 
writer, drillBuf);
+        return new HiveVarCharWriter((HiveVarcharObjectInspector) inspector, 
writer, bufferManager.getManagedBuffer());
       }
       case TIMESTAMP: {
         TimeStampWriter writer = extractWriter(name, parentWriter,
@@ -260,7 +260,7 @@ public final class HiveValueWriterFactory {
       case CHAR: {
         VarCharWriter writer = extractWriter(name, parentWriter,
             MapWriter::varChar, ListWriter::varChar, 
UnionVectorWriter::varChar);
-        return new HiveCharWriter((HiveCharObjectInspector) inspector, writer, 
drillBuf);
+        return new HiveCharWriter((HiveCharObjectInspector) inspector, writer, 
bufferManager.getManagedBuffer());
       }
       case DECIMAL: {
         DecimalTypeInfo decimalType = (DecimalTypeInfo) typeInfo;
diff --git 
a/contrib/storage-hive/core/src/test/java/org/apache/drill/exec/hive/TestHiveStorage.java
 
b/contrib/storage-hive/core/src/test/java/org/apache/drill/exec/hive/TestHiveStorage.java
index ecfda55df9..deea6982b1 100644
--- 
a/contrib/storage-hive/core/src/test/java/org/apache/drill/exec/hive/TestHiveStorage.java
+++ 
b/contrib/storage-hive/core/src/test/java/org/apache/drill/exec/hive/TestHiveStorage.java
@@ -17,6 +17,7 @@
  */
 package org.apache.drill.exec.hive;
 
+import static com.google.common.base.Strings.repeat;
 import static org.hamcrest.CoreMatchers.containsString;
 import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertNotNull;
@@ -462,6 +463,24 @@ public class TestHiveStorage extends HiveTestBase {
       .go();
   }
 
+  @Test // see DRILL-8495
+  public void testReadingHiveDataBiggerThan256Bytes() throws Exception {
+    testBuilder()
+        .sqlQuery("select * from hive.`256_bytes_plus_table`")
+        .unOrdered()
+        .baselineColumns(
+            "char_col",
+            "varchar_col",
+            "binary_col",
+            "string_col")
+        .baselineValues(
+            repeat("A", 255),
+            repeat("B", 1200),
+            repeat("C", 320).getBytes(),
+            repeat("D", 2200))
+        .go();
+  }
+
   private void verifyColumnsMetadata(List<UserProtos.ResultColumnMetadata> 
columnsList, Map<String, Integer> expectedResult) {
     for (UserProtos.ResultColumnMetadata columnMetadata : columnsList) {
       assertTrue("Column should be present in result set", 
expectedResult.containsKey(columnMetadata.getColumnName()));
diff --git 
a/contrib/storage-hive/core/src/test/java/org/apache/drill/exec/hive/TestInfoSchemaOnHiveStorage.java
 
b/contrib/storage-hive/core/src/test/java/org/apache/drill/exec/hive/TestInfoSchemaOnHiveStorage.java
index 1d4c96e498..6383b3ce61 100644
--- 
a/contrib/storage-hive/core/src/test/java/org/apache/drill/exec/hive/TestInfoSchemaOnHiveStorage.java
+++ 
b/contrib/storage-hive/core/src/test/java/org/apache/drill/exec/hive/TestInfoSchemaOnHiveStorage.java
@@ -56,6 +56,7 @@ public class TestInfoSchemaOnHiveStorage extends HiveTestBase 
{
         .baselineValues("hive.default", "hive_view_m")
         .baselineValues("hive.default", "view_over_hive_view")
         .baselineValues("hive.default", "table_with_empty_parquet")
+        .baselineValues("hive.default", "256_bytes_plus_table")
         .go();
 
     testBuilder()
@@ -268,6 +269,7 @@ public class TestInfoSchemaOnHiveStorage extends 
HiveTestBase {
         .baselineValues("DRILL", "hive.default", "hive_view_m", "TABLE")
         .baselineValues("DRILL", "hive.default", "view_over_hive_view", "VIEW")
         .baselineValues("DRILL", "hive.default", "table_with_empty_parquet", 
"TABLE")
+        .baselineValues("DRILL", "hive.default", "256_bytes_plus_table", 
"TABLE")
         .baselineValues("DRILL", "hive.skipper", "kv_text_small", "TABLE")
         .baselineValues("DRILL", "hive.skipper", "kv_text_large", "TABLE")
         .baselineValues("DRILL", "hive.skipper", "kv_incorrect_skip_header", 
"TABLE")
diff --git 
a/contrib/storage-hive/core/src/test/java/org/apache/drill/exec/store/hive/HiveTestDataGenerator.java
 
b/contrib/storage-hive/core/src/test/java/org/apache/drill/exec/store/hive/HiveTestDataGenerator.java
index ac8a133f26..106fb22963 100644
--- 
a/contrib/storage-hive/core/src/test/java/org/apache/drill/exec/store/hive/HiveTestDataGenerator.java
+++ 
b/contrib/storage-hive/core/src/test/java/org/apache/drill/exec/store/hive/HiveTestDataGenerator.java
@@ -23,6 +23,7 @@ import java.nio.file.Paths;
 import java.sql.Date;
 import java.sql.Timestamp;
 
+import com.google.common.base.Strings;
 import org.apache.commons.io.FileUtils;
 import org.apache.commons.lang3.StringUtils;
 import org.apache.drill.exec.hive.HiveTestUtilities;
@@ -99,6 +100,9 @@ public class HiveTestDataGenerator {
       FileUtils.forceDelete(emptyTableLocation);
     }
 
+    // generate table with variable length columns and populate if with 
different size data
+    generateTableWithVariableLengthColumns(hiveDriver);
+
     // create a Hive table that has columns with data types which are 
supported for reading in Drill.
     testDataFile = generateAllTypesDataFile();
     executeQuery(hiveDriver,
@@ -609,4 +613,22 @@ public class HiveTestDataGenerator {
 
     return sb.toString();
   }
+
+  private void generateTableWithVariableLengthColumns(Driver hiveDriver) {
+    executeQuery(hiveDriver, "CREATE TABLE IF NOT EXISTS 256_bytes_plus_table 
(" +
+        "  char_col CHAR(255)," +
+        "  varchar_col VARCHAR(1500)," +
+        "  binary_col BINARY," +
+        "  string_col STRING" +
+        ")");
+
+    String insertQuery = String.format("INSERT INTO 256_bytes_plus_table 
VALUES\n" +
+            "  ('%s', '%s', '%s', '%s')",
+        Strings.repeat("A", 255),
+        Strings.repeat("B", 1200),
+        Strings.repeat("C", 320),
+        Strings.repeat("D", 2200));
+
+    executeQuery(hiveDriver, insertQuery);
+  }
 }

Reply via email to