This is an automated email from the ASF dual-hosted git repository.

dkuzmenko pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git


The following commit(s) were added to refs/heads/master by this push:
     new 7c15c5d7f6d HIVE-27647: NullPointerException in 
LowLevelCacheImpl#putFileData when maxAlloc = chunkLength
7c15c5d7f6d is described below

commit 7c15c5d7f6d065c0b248247c490300a53db62644
Author: Tanishq Chugh <[email protected]>
AuthorDate: Fri Oct 31 16:43:06 2025 +0530

    HIVE-27647: NullPointerException in LowLevelCacheImpl#putFileData when 
maxAlloc = chunkLength
---
 .../test/resources/testconfiguration.properties    |  1 +
 .../apache/hadoop/hive/llap/LlapCacheAwareFs.java  | 19 +++----
 ql/src/test/queries/clientpositive/llap_io_cache.q | 26 +++++++++
 .../clientpositive/llap/llap_io_cache.q.out        | 66 ++++++++++++++++++++++
 4 files changed, 101 insertions(+), 11 deletions(-)

diff --git a/itests/src/test/resources/testconfiguration.properties 
b/itests/src/test/resources/testconfiguration.properties
index 336a190c358..a6b04b23fa2 100644
--- a/itests/src/test/resources/testconfiguration.properties
+++ b/itests/src/test/resources/testconfiguration.properties
@@ -111,6 +111,7 @@ minillap.query.files=\
   intersect_distinct.q,\
   intersect_merge.q,\
   limit_bailout.q,\
+  llap_io_cache.q,\
   llap_nullscan.q,\
   llap_stats.q,\
   llap_udf.q,\
diff --git a/ql/src/java/org/apache/hadoop/hive/llap/LlapCacheAwareFs.java 
b/ql/src/java/org/apache/hadoop/hive/llap/LlapCacheAwareFs.java
index 28fa415b43e..0b94c775dc2 100644
--- a/ql/src/java/org/apache/hadoop/hive/llap/LlapCacheAwareFs.java
+++ b/ql/src/java/org/apache/hadoop/hive/llap/LlapCacheAwareFs.java
@@ -283,7 +283,10 @@ public DiskRangeList createCacheChunk(
               int chunkPartCount = largeBufCount + ((smallSize > 0) ? 1 : 0);
               DiskRange[] cacheRanges = new DiskRange[chunkPartCount];
               int extraOffsetInChunk = 0;
-              if (maxAlloc < chunkLength) {
+              newCacheData = new MemoryBuffer[chunkPartCount];
+              int index = 0;
+              
+              if (largeBufCount > 0) {
                 largeBuffers = new MemoryBuffer[largeBufCount];
                 // Note: we don't use StoppableAllocator here - this is not on 
an IO thread.
                 allocator.allocateMultiple(largeBuffers, maxAlloc, 
cache.getDataBufferFactory());
@@ -298,8 +301,10 @@ public DiskRangeList createCacheChunk(
                   extraDiskDataOffset += remaining;
                   extraOffsetInChunk += remaining;
                 }
+                for (MemoryBuffer buf : largeBuffers) {
+                  newCacheData[index++] = buf;
+                }
               }
-              newCacheData = largeBuffers;
               largeBuffers = null;
               if (smallSize > 0) {
                 smallBuffer = new MemoryBuffer[1];
@@ -311,15 +316,7 @@ public DiskRangeList createCacheChunk(
                     smallSize, bb, cacheRanges, largeBufCount, chunkFrom + 
extraOffsetInChunk);
                 extraDiskDataOffset += smallSize;
                 extraOffsetInChunk += smallSize; // Not strictly necessary, no 
one will look at it.
-                if (newCacheData == null) {
-                  newCacheData = smallBuffer;
-                } else {
-                  // TODO: add allocate overload with an offset and length
-                  MemoryBuffer[] combinedCacheData = new 
MemoryBuffer[largeBufCount + 1];
-                  System.arraycopy(newCacheData, 0, combinedCacheData, 0, 
largeBufCount);
-                  newCacheData = combinedCacheData;
-                  newCacheData[largeBufCount] = smallBuffer[0];
-                }
+                newCacheData[index] = smallBuffer[0];
                 smallBuffer = null;
               }
               cache.putFileData(fileKey, cacheRanges, newCacheData, 0, tag);
diff --git a/ql/src/test/queries/clientpositive/llap_io_cache.q 
b/ql/src/test/queries/clientpositive/llap_io_cache.q
new file mode 100644
index 00000000000..b5ab5b25bae
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/llap_io_cache.q
@@ -0,0 +1,26 @@
+set hive.llap.io.enabled=true;
+set hive.llap.io.memory.mode=cache;
+set hive.llap.io.allocator.alloc.max=16Mb;
+set hive.vectorized.execution.enabled=true;
+
+DROP TABLE IF EXISTS tbl_parq;
+
+CREATE TABLE tbl_parq (
+  id INT,
+  payload STRING
+)
+STORED AS PARQUET
+TBLPROPERTIES (
+  'parquet.block.size'='16777216',
+  'parquet.page.size'='16777216',
+  'parquet.compression'='UNCOMPRESSED'
+);
+
+INSERT INTO TABLE tbl_parq
+SELECT
+  1 AS id,
+  RPAD('x', 16777177, 'x') AS payload;
+
+SELECT LENGTH(payload) FROM tbl_parq;
+
+SELECT SUM(LENGTH(payload)) FROM tbl_parq;
\ No newline at end of file
diff --git a/ql/src/test/results/clientpositive/llap/llap_io_cache.q.out 
b/ql/src/test/results/clientpositive/llap/llap_io_cache.q.out
new file mode 100644
index 00000000000..765ae221631
--- /dev/null
+++ b/ql/src/test/results/clientpositive/llap/llap_io_cache.q.out
@@ -0,0 +1,66 @@
+PREHOOK: query: DROP TABLE IF EXISTS tbl_parq
+PREHOOK: type: DROPTABLE
+PREHOOK: Output: database:default
+POSTHOOK: query: DROP TABLE IF EXISTS tbl_parq
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Output: database:default
+PREHOOK: query: CREATE TABLE tbl_parq (
+  id INT,
+  payload STRING
+)
+STORED AS PARQUET
+TBLPROPERTIES (
+  'parquet.block.size'='16777216',
+  'parquet.page.size'='16777216',
+  'parquet.compression'='UNCOMPRESSED'
+)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@tbl_parq
+POSTHOOK: query: CREATE TABLE tbl_parq (
+  id INT,
+  payload STRING
+)
+STORED AS PARQUET
+TBLPROPERTIES (
+  'parquet.block.size'='16777216',
+  'parquet.page.size'='16777216',
+  'parquet.compression'='UNCOMPRESSED'
+)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@tbl_parq
+PREHOOK: query: INSERT INTO TABLE tbl_parq
+SELECT
+  1 AS id,
+  RPAD('x', 16777177, 'x') AS payload
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@tbl_parq
+POSTHOOK: query: INSERT INTO TABLE tbl_parq
+SELECT
+  1 AS id,
+  RPAD('x', 16777177, 'x') AS payload
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@tbl_parq
+POSTHOOK: Lineage: tbl_parq.id SIMPLE []
+POSTHOOK: Lineage: tbl_parq.payload SIMPLE []
+PREHOOK: query: SELECT LENGTH(payload) FROM tbl_parq
+PREHOOK: type: QUERY
+PREHOOK: Input: default@tbl_parq
+PREHOOK: Output: hdfs://### HDFS PATH ###
+POSTHOOK: query: SELECT LENGTH(payload) FROM tbl_parq
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@tbl_parq
+POSTHOOK: Output: hdfs://### HDFS PATH ###
+16777177
+PREHOOK: query: SELECT SUM(LENGTH(payload)) FROM tbl_parq
+PREHOOK: type: QUERY
+PREHOOK: Input: default@tbl_parq
+PREHOOK: Output: hdfs://### HDFS PATH ###
+POSTHOOK: query: SELECT SUM(LENGTH(payload)) FROM tbl_parq
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@tbl_parq
+POSTHOOK: Output: hdfs://### HDFS PATH ###
+16777177

Reply via email to