This is an automated email from the ASF dual-hosted git repository.

mblow pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/asterixdb.git

commit e9758db48da3276f0a88143a463eaef904381468
Author: Ritik Raj <[email protected]>
AuthorDate: Tue Nov 18 02:46:41 2025 +0530

    [ASTERIXDB-3652][STO] Consider isColumnMissingForCurrentTuple while 
assembling
    
    - user model changes: no
    - storage format changes: no
    - interface changes: yes
    
    Details:
    There can be two cases where a column is missing:
    1. For a disk component, if a particular column was not
       in all the leafs of the disk component, then the column
       is treated as allMissing.
    2. Across disk components, when Component1 has an array field
       with itemType Int, but the Component2 has the same array field
       with itemType String, when Component1 and Component2 will get
       merged, the new merged Component1_2 will have both columns
       as not "allMissing", which can be misleading for EndOfRepeatedGroup
       Assembler, as it should only pick non-missing column to end the array
       for the tuple. In this case for tuple1 the Integer column should be
       the reader in EoGAssembler responsible for closing the array not
       the missing String Column Assembler and vice-versa.
    
    Ext-ref:MB-69414
    Change-Id: I34d490a2089598b55e7cd8981a20f98b841fd998
    Reviewed-on: https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/20584
    Reviewed-by: Peeyush Gupta <[email protected]>
    Integration-Tests: Jenkins <[email protected]>
    Tested-by: Ritik Raj <[email protected]>
---
 .../ASTERIXDB-3652-4/ASTERIXDB-3652.001.ddl.sqlpp  | 28 +++++++++++++++++
 .../ASTERIXDB-3652.002.update.sqlpp                | 25 ++++++++++++++++
 .../ASTERIXDB-3652.003.update.sqlpp                | 25 ++++++++++++++++
 .../ASTERIXDB-3652.004.query.sqlpp                 | 23 ++++++++++++++
 .../ASTERIXDB-3652-5/ASTERIXDB-3652.001.ddl.sqlpp  | 28 +++++++++++++++++
 .../ASTERIXDB-3652.002.update.sqlpp                | 25 ++++++++++++++++
 .../ASTERIXDB-3652.003.update.sqlpp                | 25 ++++++++++++++++
 .../ASTERIXDB-3652.004.query.sqlpp                 | 23 ++++++++++++++
 .../ASTERIXDB-3652-4/ASTERIXDB-3652.004.adm        |  2 ++
 .../ASTERIXDB-3652-5/ASTERIXDB-3652.004.adm        |  2 ++
 .../runtimets/testsuite_single_partition_sqlpp.xml | 10 +++++++
 .../assembler/AbstractPrimitiveValueAssembler.java |  6 +++-
 .../assembler/EndOfRepeatedGroupAssembler.java     | 11 +++----
 .../column/assembler/PrimitiveValueAssembler.java  |  2 +-
 .../assembler/RepeatedPrimitiveValueAssembler.java | 13 ++++----
 .../column/operation/query/ColumnAssembler.java    |  3 +-
 .../asterix/column/values/IColumnValuesReader.java | 23 ++++++++++----
 .../values/reader/AbstractColumnValuesReader.java  | 35 ++++++++++++++++++++--
 .../reader/AbstractDummyColumnValuesReader.java    | 15 ++++++----
 19 files changed, 297 insertions(+), 27 deletions(-)

diff --git 
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/column/assembly/ASTERIXDB-3652-4/ASTERIXDB-3652.001.ddl.sqlpp
 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/column/assembly/ASTERIXDB-3652-4/ASTERIXDB-3652.001.ddl.sqlpp
new file mode 100644
index 0000000000..b5514cfdf3
--- /dev/null
+++ 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/column/assembly/ASTERIXDB-3652-4/ASTERIXDB-3652.001.ddl.sqlpp
@@ -0,0 +1,28 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+DROP DATAVERSE test IF EXISTS;
+CREATE DATAVERSE test;
+
+USE test;
+
+CREATE DATASET ColumnDataset
+PRIMARY KEY (name: String) WITH {
+ "storage-format": {"format": "column"}
+};
\ No newline at end of file
diff --git 
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/column/assembly/ASTERIXDB-3652-4/ASTERIXDB-3652.002.update.sqlpp
 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/column/assembly/ASTERIXDB-3652-4/ASTERIXDB-3652.002.update.sqlpp
new file mode 100644
index 0000000000..462a581a02
--- /dev/null
+++ 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/column/assembly/ASTERIXDB-3652-4/ASTERIXDB-3652.002.update.sqlpp
@@ -0,0 +1,25 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+USE test;
+
+UPSERT INTO ColumnDataset ({
+    "name": "A",
+    "public_likes": []
+});
diff --git 
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/column/assembly/ASTERIXDB-3652-4/ASTERIXDB-3652.003.update.sqlpp
 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/column/assembly/ASTERIXDB-3652-4/ASTERIXDB-3652.003.update.sqlpp
new file mode 100644
index 0000000000..3722a07b2f
--- /dev/null
+++ 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/column/assembly/ASTERIXDB-3652-4/ASTERIXDB-3652.003.update.sqlpp
@@ -0,0 +1,25 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+USE test;
+
+UPSERT INTO ColumnDataset ({
+    "name": "B",
+    "public_likes": ["A", "B"]
+});
diff --git 
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/column/assembly/ASTERIXDB-3652-4/ASTERIXDB-3652.004.query.sqlpp
 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/column/assembly/ASTERIXDB-3652-4/ASTERIXDB-3652.004.query.sqlpp
new file mode 100644
index 0000000000..c368f8ae80
--- /dev/null
+++ 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/column/assembly/ASTERIXDB-3652-4/ASTERIXDB-3652.004.query.sqlpp
@@ -0,0 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+USE test;
+
+SELECT VALUE c
+FROM ColumnDataset c;
\ No newline at end of file
diff --git 
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/column/assembly/ASTERIXDB-3652-5/ASTERIXDB-3652.001.ddl.sqlpp
 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/column/assembly/ASTERIXDB-3652-5/ASTERIXDB-3652.001.ddl.sqlpp
new file mode 100644
index 0000000000..b5514cfdf3
--- /dev/null
+++ 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/column/assembly/ASTERIXDB-3652-5/ASTERIXDB-3652.001.ddl.sqlpp
@@ -0,0 +1,28 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+DROP DATAVERSE test IF EXISTS;
+CREATE DATAVERSE test;
+
+USE test;
+
+CREATE DATASET ColumnDataset
+PRIMARY KEY (name: String) WITH {
+ "storage-format": {"format": "column"}
+};
\ No newline at end of file
diff --git 
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/column/assembly/ASTERIXDB-3652-5/ASTERIXDB-3652.002.update.sqlpp
 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/column/assembly/ASTERIXDB-3652-5/ASTERIXDB-3652.002.update.sqlpp
new file mode 100644
index 0000000000..e113524800
--- /dev/null
+++ 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/column/assembly/ASTERIXDB-3652-5/ASTERIXDB-3652.002.update.sqlpp
@@ -0,0 +1,25 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+USE test;
+
+UPSERT INTO ColumnDataset ({
+    "name": "A",
+    "public_likes": [1, 2, 3]
+});
diff --git 
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/column/assembly/ASTERIXDB-3652-5/ASTERIXDB-3652.003.update.sqlpp
 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/column/assembly/ASTERIXDB-3652-5/ASTERIXDB-3652.003.update.sqlpp
new file mode 100644
index 0000000000..3722a07b2f
--- /dev/null
+++ 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/column/assembly/ASTERIXDB-3652-5/ASTERIXDB-3652.003.update.sqlpp
@@ -0,0 +1,25 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+USE test;
+
+UPSERT INTO ColumnDataset ({
+    "name": "B",
+    "public_likes": ["A", "B"]
+});
diff --git 
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/column/assembly/ASTERIXDB-3652-5/ASTERIXDB-3652.004.query.sqlpp
 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/column/assembly/ASTERIXDB-3652-5/ASTERIXDB-3652.004.query.sqlpp
new file mode 100644
index 0000000000..c368f8ae80
--- /dev/null
+++ 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/column/assembly/ASTERIXDB-3652-5/ASTERIXDB-3652.004.query.sqlpp
@@ -0,0 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+USE test;
+
+SELECT VALUE c
+FROM ColumnDataset c;
\ No newline at end of file
diff --git 
a/asterixdb/asterix-app/src/test/resources/runtimets/results/column/assembly/ASTERIXDB-3652-4/ASTERIXDB-3652.004.adm
 
b/asterixdb/asterix-app/src/test/resources/runtimets/results/column/assembly/ASTERIXDB-3652-4/ASTERIXDB-3652.004.adm
new file mode 100644
index 0000000000..a26bc2b98d
--- /dev/null
+++ 
b/asterixdb/asterix-app/src/test/resources/runtimets/results/column/assembly/ASTERIXDB-3652-4/ASTERIXDB-3652.004.adm
@@ -0,0 +1,2 @@
+{ "name": "A", "public_likes": [  ] }
+{ "name": "B", "public_likes": [ "A", "B" ] }
\ No newline at end of file
diff --git 
a/asterixdb/asterix-app/src/test/resources/runtimets/results/column/assembly/ASTERIXDB-3652-5/ASTERIXDB-3652.004.adm
 
b/asterixdb/asterix-app/src/test/resources/runtimets/results/column/assembly/ASTERIXDB-3652-5/ASTERIXDB-3652.004.adm
new file mode 100644
index 0000000000..180d25fc13
--- /dev/null
+++ 
b/asterixdb/asterix-app/src/test/resources/runtimets/results/column/assembly/ASTERIXDB-3652-5/ASTERIXDB-3652.004.adm
@@ -0,0 +1,2 @@
+{ "name": "A", "public_likes": [ 1, 2, 3 ] }
+{ "name": "B", "public_likes": [ "A", "B" ] }
\ No newline at end of file
diff --git 
a/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_single_partition_sqlpp.xml
 
b/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_single_partition_sqlpp.xml
index 4935ccec94..b248475c14 100644
--- 
a/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_single_partition_sqlpp.xml
+++ 
b/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_single_partition_sqlpp.xml
@@ -99,6 +99,16 @@
         <output-dir compare="Text">assembly/ASTERIXDB-3652-3</output-dir>
       </compilation-unit>
     </test-case>
+    <test-case FilePath="column">
+      <compilation-unit name="assembly/ASTERIXDB-3652-4">
+        <output-dir compare="Text">assembly/ASTERIXDB-3652-4</output-dir>
+      </compilation-unit>
+    </test-case>
+    <test-case FilePath="column">
+      <compilation-unit name="assembly/ASTERIXDB-3652-5">
+        <output-dir compare="Text">assembly/ASTERIXDB-3652-5</output-dir>
+      </compilation-unit>
+    </test-case>
     <test-case FilePath="column">
       <compilation-unit name="assembly/missing-inner-array">
         <output-dir compare="Text">assembly/missing-inner-array</output-dir>
diff --git 
a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/assembler/AbstractPrimitiveValueAssembler.java
 
b/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/assembler/AbstractPrimitiveValueAssembler.java
index c565ea8550..9bf8ebc573 100644
--- 
a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/assembler/AbstractPrimitiveValueAssembler.java
+++ 
b/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/assembler/AbstractPrimitiveValueAssembler.java
@@ -98,5 +98,9 @@ public abstract class AbstractPrimitiveValueAssembler extends 
AbstractValueAssem
      *
      * @return the index of the next value
      */
-    public abstract int next(AssemblerState state) throws HyracksDataException;
+    public abstract int next(int tupleIndex, AssemblerState state) throws 
HyracksDataException;
+
+    public void notifyCurrentTuple(int tupleIndex) {
+        reader.registerCurrentTuple(tupleIndex);
+    }
 }
diff --git 
a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/assembler/EndOfRepeatedGroupAssembler.java
 
b/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/assembler/EndOfRepeatedGroupAssembler.java
index 63204d71b7..389176e3ec 100644
--- 
a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/assembler/EndOfRepeatedGroupAssembler.java
+++ 
b/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/assembler/EndOfRepeatedGroupAssembler.java
@@ -47,11 +47,12 @@ public class EndOfRepeatedGroupAssembler extends 
AbstractPrimitiveValueAssembler
         // NoOp
     }
 
-    private IColumnValuesReader getNonMissingReader() {
+    private IColumnValuesReader getNonMissingReader(int tupleIndex) {
         IColumnValuesReader nonMissingReader = null;
         for (IColumnValuesReader r : readers) {
-            if (!r.areAllMissing()) {
+            if (!r.isColumnMissingForCurrentTuple(tupleIndex)) {
                 nonMissingReader = r;
+                break;
             }
         }
         if (nonMissingReader == null) {
@@ -61,9 +62,9 @@ public class EndOfRepeatedGroupAssembler extends 
AbstractPrimitiveValueAssembler
     }
 
     @Override
-    public int next(AssemblerState state) throws HyracksDataException {
-        if (reader.areAllMissing()) {
-            reader = getNonMissingReader();
+    public int next(int tupleIndex, AssemblerState state) throws 
HyracksDataException {
+        if (reader.isColumnMissingForCurrentTuple(tupleIndex)) {
+            reader = getNonMissingReader(tupleIndex);
             this.delimiterIndex = reader.getNumberOfDelimiters() - 
numDelimiters;
         }
         // Get the current delimiter index from the reader
diff --git 
a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/assembler/PrimitiveValueAssembler.java
 
b/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/assembler/PrimitiveValueAssembler.java
index f5edc3158e..d799439056 100644
--- 
a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/assembler/PrimitiveValueAssembler.java
+++ 
b/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/assembler/PrimitiveValueAssembler.java
@@ -44,7 +44,7 @@ final class PrimitiveValueAssembler extends 
AbstractPrimitiveValueAssembler {
     }
 
     @Override
-    public int next(AssemblerState state) throws HyracksDataException {
+    public int next(int tupleIndex, AssemblerState state) throws 
HyracksDataException {
         // Do not call next on PK readers as they are maintained by the cursor
         if (!primaryKey && !reader.next()) {
             throw createException();
diff --git 
a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/assembler/RepeatedPrimitiveValueAssembler.java
 
b/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/assembler/RepeatedPrimitiveValueAssembler.java
index 67379c0125..e2eced8bae 100644
--- 
a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/assembler/RepeatedPrimitiveValueAssembler.java
+++ 
b/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/assembler/RepeatedPrimitiveValueAssembler.java
@@ -39,15 +39,16 @@ final class RepeatedPrimitiveValueAssembler extends 
AbstractPrimitiveValueAssemb
     }
 
     @Override
-    public int next(AssemblerState state) throws HyracksDataException {
+    public int next(int tupleIndex, AssemblerState state) throws 
HyracksDataException {
         /*
          * Move to the next value if one of the following is true
          * - It is the first time we access this assembler (i.e., the first 
round)
          * - We are in an array (i.e., the parent array assembler is active)
          * - The value is a delimiter (i.e., the last round)
          */
-        if (!state.isInGroup() || reader.isRepeatedValue() || 
reader.isDelimiter() || reader.areAllMissing()) {
-            next();
+        if (!state.isInGroup() || reader.isRepeatedValue() || 
reader.isDelimiter()
+                || reader.isColumnMissingForCurrentTuple(tupleIndex)) {
+            next(tupleIndex);
         }
 
         if (isDelegate()) {
@@ -59,8 +60,8 @@ final class RepeatedPrimitiveValueAssembler extends 
AbstractPrimitiveValueAssemb
         return NEXT_ASSEMBLER;
     }
 
-    private void next() throws HyracksDataException {
-        if (reader.areAllMissing()) {
+    private void next(int tupleIndex) throws HyracksDataException {
+        if (reader.isColumnMissingForCurrentTuple(tupleIndex)) {
             // If all values are missing, we add missing to the ancestor at 
the lowest missing level
             addMissingToAncestor(reader.getLevel());
             return;
@@ -75,7 +76,7 @@ final class RepeatedPrimitiveValueAssembler extends 
AbstractPrimitiveValueAssemb
              * (i.e., arrayDelegate is true)
              */
             addNullToAncestor(reader.getLevel());
-        } else if (reader.isMissing() && reader.getLevel() < level) {
+        } else if ((reader.isMissing() && reader.getLevel() < level)) {
             /*
              * Add a missing item in either
              * - the array item is MISSING
diff --git 
a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/operation/query/ColumnAssembler.java
 
b/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/operation/query/ColumnAssembler.java
index e6ff021168..cbfe14c4f5 100644
--- 
a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/operation/query/ColumnAssembler.java
+++ 
b/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/operation/query/ColumnAssembler.java
@@ -81,10 +81,11 @@ public final class ColumnAssembler {
         int index = 0;
         while (index < assemblers.length) {
             AbstractPrimitiveValueAssembler assembler = assemblers[index];
+            assembler.notifyCurrentTuple(tupleIndex);
             int groupIndex;
 
             try {
-                groupIndex = assembler.next(state);
+                groupIndex = assembler.next(tupleIndex, state);
             } catch (ColumnarValueException e) {
                 appendInformation(e);
                 throw e;
diff --git 
a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/values/IColumnValuesReader.java
 
b/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/values/IColumnValuesReader.java
index 22cbb9fdde..76588e1a6f 100644
--- 
a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/values/IColumnValuesReader.java
+++ 
b/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/values/IColumnValuesReader.java
@@ -26,12 +26,6 @@ import org.apache.hyracks.data.std.api.IValueReference;
 import com.fasterxml.jackson.databind.node.ObjectNode;
 
 public interface IColumnValuesReader extends Comparable<IColumnValuesReader> {
-    /**
-     * Indicates if the column is missing in the leaf
-     * @return
-     */
-    boolean areAllMissing();
-
     /**
      * Reset the reader
      *
@@ -156,4 +150,21 @@ public interface IColumnValuesReader extends 
Comparable<IColumnValuesReader> {
      * @param node container for the reader's information
      */
     void appendReaderInformation(ObjectNode node);
+
+    /**
+     * There can be two cases when a column is missing for tuple:
+     * 1. column is not at all present in the leaf
+     * 2. column is present in the leaf, but for this tuple it was missing
+     * @param tupleIndex
+     * @return
+     */
+    boolean isColumnMissingForCurrentTuple(int tupleIndex);
+
+    /**
+     * Registers the current tuple index. This index is used to determine
+     * whether the value being read is the first value associated with
+     * the given tuple.
+     * @param tupleIndex
+     */
+    void registerCurrentTuple(int tupleIndex);
 }
diff --git 
a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/values/reader/AbstractColumnValuesReader.java
 
b/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/values/reader/AbstractColumnValuesReader.java
index 3dead4aed1..62d0aadea9 100644
--- 
a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/values/reader/AbstractColumnValuesReader.java
+++ 
b/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/values/reader/AbstractColumnValuesReader.java
@@ -60,6 +60,10 @@ abstract class AbstractColumnValuesReader implements 
IColumnValuesReader {
     private boolean nullLevel;
     private boolean allMissing;
 
+    private int previousTupleIndex;
+    private int currentTupleIndex;
+    private boolean firstValueForCurrentTuple;
+
     // For logging purposes only
     private int numberOfEncounteredMissing;
     private int numberOfEncounteredNull;
@@ -74,6 +78,7 @@ abstract class AbstractColumnValuesReader implements 
IColumnValuesReader {
         currentDefinitionLevels = definitionLevels.get(maxLevel);
         valuesStream = primaryKey ? new ByteBufferInputStream() : new 
MultiByteBufferInputStream();
         this.primaryKey = primaryKey;
+        this.previousTupleIndex = -1;
     }
 
     final void nextLevel() throws HyracksDataException {
@@ -89,6 +94,13 @@ abstract class AbstractColumnValuesReader implements 
IColumnValuesReader {
             nullLevel = ColumnValuesUtil.isNull(nullBitMask, actualLevel);
             //Clear the null bit to allow repeated value readers determine the 
correct delimiter for null values
             level = ColumnValuesUtil.clearNullBit(nullBitMask, actualLevel);
+            if (currentTupleIndex != previousTupleIndex) {
+                // We are at the first value for the current tuple
+                firstValueForCurrentTuple = true;
+                previousTupleIndex = currentTupleIndex;
+            } else {
+                firstValueForCurrentTuple = false;
+            }
 
             // For logging purposes only
             numberOfEncounteredMissing += isMissing() ? 1 : 0;
@@ -104,8 +116,24 @@ abstract class AbstractColumnValuesReader implements 
IColumnValuesReader {
     }
 
     @Override
-    public boolean areAllMissing() {
-        return allMissing;
+    public boolean isColumnMissingForCurrentTuple(int tupleIndex) {
+        return allMissing || missingColumnForCurrentTuple(tupleIndex);
+    }
+
+    private boolean missingColumnForCurrentTuple(int tupleIndex) {
+        // A column is considered missing for the current tuple if:
+        // 1- The level is 0 (indicating that there are no values present for 
this column)
+        // 2- This is the first value for the current tuple (to avoid false 
positives in repeated structures)
+        // 3- The previous tuple index matches the current tuple
+
+        // The last condition ensures that we are correctly checking for the 
same tuple index.
+        // and not using the "firstValueForCurrentTuple" from previous tuple.
+        return (level == 0 && firstValueForCurrentTuple && previousTupleIndex 
== tupleIndex);
+    }
+
+    @Override
+    public void registerCurrentTuple(int tupleIndex) {
+        currentTupleIndex = tupleIndex;
     }
 
     abstract void resetValues();
@@ -123,6 +151,9 @@ abstract class AbstractColumnValuesReader implements 
IColumnValuesReader {
         }
         allMissing = false;
         try {
+            previousTupleIndex = -1;
+            firstValueForCurrentTuple = false;
+
             maxLevel = BytesUtils.readZigZagVarInt(in);
             nullBitMask = ColumnValuesUtil.getNullMask(maxLevel);
 
diff --git 
a/asterixdb/asterix-column/src/test/java/org/apache/asterix/column/values/reader/AbstractDummyColumnValuesReader.java
 
b/asterixdb/asterix-column/src/test/java/org/apache/asterix/column/values/reader/AbstractDummyColumnValuesReader.java
index ed76cbdf53..8dbd63dd5c 100644
--- 
a/asterixdb/asterix-column/src/test/java/org/apache/asterix/column/values/reader/AbstractDummyColumnValuesReader.java
+++ 
b/asterixdb/asterix-column/src/test/java/org/apache/asterix/column/values/reader/AbstractDummyColumnValuesReader.java
@@ -85,11 +85,6 @@ public abstract class AbstractDummyColumnValuesReader 
implements IColumnValuesRe
         //noOp
     }
 
-    @Override
-    public boolean areAllMissing() {
-        return false;
-    }
-
     @Override
     public final ATypeTag getTypeTag() {
         return typeTag;
@@ -174,4 +169,14 @@ public abstract class AbstractDummyColumnValuesReader 
implements IColumnValuesRe
         node.put("level", level);
         node.put("maxLevel", maxLevel);
     }
+
+    @Override
+    public void registerCurrentTuple(int tupleIndex) {
+
+    }
+
+    @Override
+    public boolean isColumnMissingForCurrentTuple(int tupleIndex) {
+        return false;
+    }
 }

Reply via email to