This is an automated email from the ASF dual-hosted git repository.

mblow pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/asterixdb.git

commit 69b31add738949df2fbb10f62956e7f6bb77530e
Author: Ritik Raj <[email protected]>
AuthorDate: Wed Oct 8 11:50:15 2025 +0530

    [ASTERIXDB-3652][STO] Fixed column assembler issues
    
    - user model changes: no
    - storage format changes: no
    - interface changes: no
    
    Details:
    There were few issues related to union + objects + array.
    
    In case of union in objects, introduced a logical barrier,
    which in case of missing collects all the missing and only
    propogates when it encountered all the children for the
    current level, preventing extra null or missing being
    propogated above.
    
    There can be cases where a particular column is not presnet,
    and the EndOfGroupAssembler uses that column as a delegate,
    hence not able to correctly determine the ending of the array
    leading to missing of value propogation to parent.
    
    Ext-ref: MB-68881
    Change-Id: I782d17f81d00210a4ca8673cc5fbcf556fc4758c
    Reviewed-on: https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/20469
    Integration-Tests: Jenkins <[email protected]>
    Reviewed-by: Peeyush Gupta <[email protected]>
    Tested-by: Jenkins <[email protected]>
---
 .../ASTERIXDB-3652-1/ASTERIXDB-3652.001.ddl.sqlpp  |  28 +++++
 .../ASTERIXDB-3652.002.update.sqlpp                |  32 +++++
 .../ASTERIXDB-3652.003.query.sqlpp                 |  23 ++++
 .../ASTERIXDB-3652-2/ASTERIXDB-3652.001.ddl.sqlpp  |  28 +++++
 .../ASTERIXDB-3652.002.update.sqlpp                |  31 +++++
 .../ASTERIXDB-3652.003.query.sqlpp                 |  23 ++++
 .../ASTERIXDB-3652-3/ASTERIXDB-3652.001.ddl.sqlpp  |  28 +++++
 .../ASTERIXDB-3652.002.update.sqlpp                |  30 +++++
 .../ASTERIXDB-3652.003.query.sqlpp                 |  24 ++++
 .../ASTERIXDB-3652-1/ASTERIXDB-3652.003.adm        |   1 +
 .../ASTERIXDB-3652-2/ASTERIXDB-3652.003.adm        |   1 +
 .../ASTERIXDB-3652-3/ASTERIXDB-3652.003.adm        |   2 +
 .../src/test/resources/runtimets/sqlpp_queries.xml |  15 +++
 .../runtimets/testsuite_single_partition_sqlpp.xml |  15 +++
 .../assembler/AbstractNestedValueAssembler.java    |  10 +-
 .../assembler/AbstractPrimitiveValueAssembler.java |   2 +-
 .../assembler/ArrayWithUnionValueAssembler.java    | 133 +++++++++++++++++---
 .../column/assembler/AssemblerBuilderVisitor.java  |  22 +++-
 .../assembler/EndOfRepeatedGroupAssembler.java     |  36 +++++-
 .../column/assembler/ObjectValueAssembler.java     | 138 ++++++++++++++++++---
 .../column/assembler/PrimitiveValueAssembler.java  |   4 +-
 .../assembler/RepeatedPrimitiveValueAssembler.java |  33 ++---
 .../column/metadata/schema/ObjectSchemaNode.java   |   5 +-
 .../column/metadata/schema/UnionSchemaNode.java    |   2 +-
 .../collection/AbstractCollectionSchemaNode.java   |   2 +-
 .../operation/lsm/flush/BatchFinalizerVisitor.java |   1 -
 .../operation/lsm/flush/FlushColumnMetadata.java   |  18 ++-
 .../asterix/column/values/IColumnValuesReader.java |   6 +
 .../values/reader/AbstractColumnValuesReader.java  |   9 +-
 .../reader/AbstractDummyColumnValuesReader.java    |   5 +
 .../column/cloud/sweep/ColumnSweepPlanner.java     |   4 +
 31 files changed, 634 insertions(+), 77 deletions(-)

diff --git 
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/column/assembly/ASTERIXDB-3652-1/ASTERIXDB-3652.001.ddl.sqlpp
 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/column/assembly/ASTERIXDB-3652-1/ASTERIXDB-3652.001.ddl.sqlpp
new file mode 100644
index 0000000000..d151eb1317
--- /dev/null
+++ 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/column/assembly/ASTERIXDB-3652-1/ASTERIXDB-3652.001.ddl.sqlpp
@@ -0,0 +1,28 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+ DROP DATAVERSE test IF EXISTS;
+ CREATE DATAVERSE test;
+
+ USE test;
+
+ CREATE DATASET ColumnDataset
+ PRIMARY KEY (uid:int) WITH {
+     "storage-format": {"format": "column"}
+ };
\ No newline at end of file
diff --git 
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/column/assembly/ASTERIXDB-3652-1/ASTERIXDB-3652.002.update.sqlpp
 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/column/assembly/ASTERIXDB-3652-1/ASTERIXDB-3652.002.update.sqlpp
new file mode 100644
index 0000000000..7cd0e59329
--- /dev/null
+++ 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/column/assembly/ASTERIXDB-3652-1/ASTERIXDB-3652.002.update.sqlpp
@@ -0,0 +1,32 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+USE test;
+UPSERT INTO ColumnDataset(
+{
+  "uid": 1,
+  "key_0": [
+    "array_value_58",
+    866,
+    [
+      939
+    ]
+  ]
+}
+);
\ No newline at end of file
diff --git 
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/column/assembly/ASTERIXDB-3652-1/ASTERIXDB-3652.003.query.sqlpp
 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/column/assembly/ASTERIXDB-3652-1/ASTERIXDB-3652.003.query.sqlpp
new file mode 100644
index 0000000000..198869a5e7
--- /dev/null
+++ 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/column/assembly/ASTERIXDB-3652-1/ASTERIXDB-3652.003.query.sqlpp
@@ -0,0 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+ USE test;
+
+ SELECT VALUE c
+ FROM ColumnDataset c;
\ No newline at end of file
diff --git 
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/column/assembly/ASTERIXDB-3652-2/ASTERIXDB-3652.001.ddl.sqlpp
 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/column/assembly/ASTERIXDB-3652-2/ASTERIXDB-3652.001.ddl.sqlpp
new file mode 100644
index 0000000000..1f2f3baebb
--- /dev/null
+++ 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/column/assembly/ASTERIXDB-3652-2/ASTERIXDB-3652.001.ddl.sqlpp
@@ -0,0 +1,28 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+DROP DATAVERSE test IF EXISTS;
+CREATE DATAVERSE test;
+
+USE test;
+
+CREATE DATASET ColumnDataset
+PRIMARY KEY (uid:int) WITH {
+ "storage-format": {"format": "column"}
+};
\ No newline at end of file
diff --git 
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/column/assembly/ASTERIXDB-3652-2/ASTERIXDB-3652.002.update.sqlpp
 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/column/assembly/ASTERIXDB-3652-2/ASTERIXDB-3652.002.update.sqlpp
new file mode 100644
index 0000000000..b9443fd8e8
--- /dev/null
+++ 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/column/assembly/ASTERIXDB-3652-2/ASTERIXDB-3652.002.update.sqlpp
@@ -0,0 +1,31 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+USE test;
+UPSERT INTO ColumnDataset(
+{
+  "uid": 1,
+  "k": [
+    { "i": [ 1, null, 3 ] },
+    { },             -- i missing
+    { "i": null },   -- i explicit null
+    null             -- null element in outer array
+  ]
+}
+);
\ No newline at end of file
diff --git 
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/column/assembly/ASTERIXDB-3652-2/ASTERIXDB-3652.003.query.sqlpp
 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/column/assembly/ASTERIXDB-3652-2/ASTERIXDB-3652.003.query.sqlpp
new file mode 100644
index 0000000000..198869a5e7
--- /dev/null
+++ 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/column/assembly/ASTERIXDB-3652-2/ASTERIXDB-3652.003.query.sqlpp
@@ -0,0 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+ USE test;
+
+ SELECT VALUE c
+ FROM ColumnDataset c;
\ No newline at end of file
diff --git 
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/column/assembly/ASTERIXDB-3652-3/ASTERIXDB-3652.001.ddl.sqlpp
 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/column/assembly/ASTERIXDB-3652-3/ASTERIXDB-3652.001.ddl.sqlpp
new file mode 100644
index 0000000000..1f2f3baebb
--- /dev/null
+++ 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/column/assembly/ASTERIXDB-3652-3/ASTERIXDB-3652.001.ddl.sqlpp
@@ -0,0 +1,28 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+DROP DATAVERSE test IF EXISTS;
+CREATE DATAVERSE test;
+
+USE test;
+
+CREATE DATASET ColumnDataset
+PRIMARY KEY (uid:int) WITH {
+ "storage-format": {"format": "column"}
+};
\ No newline at end of file
diff --git 
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/column/assembly/ASTERIXDB-3652-3/ASTERIXDB-3652.002.update.sqlpp
 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/column/assembly/ASTERIXDB-3652-3/ASTERIXDB-3652.002.update.sqlpp
new file mode 100644
index 0000000000..8a8dd7b5a0
--- /dev/null
+++ 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/column/assembly/ASTERIXDB-3652-3/ASTERIXDB-3652.002.update.sqlpp
@@ -0,0 +1,30 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+USE test;
+
+UPSERT INTO ColumnDataset({
+"uid": 1,
+"a": {}
+});
+
+UPSERT INTO ColumnDataset({
+"uid": 2,
+"a": {"x": null}
+});
\ No newline at end of file
diff --git 
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/column/assembly/ASTERIXDB-3652-3/ASTERIXDB-3652.003.query.sqlpp
 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/column/assembly/ASTERIXDB-3652-3/ASTERIXDB-3652.003.query.sqlpp
new file mode 100644
index 0000000000..99f26c0fa1
--- /dev/null
+++ 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/column/assembly/ASTERIXDB-3652-3/ASTERIXDB-3652.003.query.sqlpp
@@ -0,0 +1,24 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+USE test;
+
+SELECT VALUE c
+FROM ColumnDataset c
+ORDER BY c.uid;
\ No newline at end of file
diff --git 
a/asterixdb/asterix-app/src/test/resources/runtimets/results/column/assembly/ASTERIXDB-3652-1/ASTERIXDB-3652.003.adm
 
b/asterixdb/asterix-app/src/test/resources/runtimets/results/column/assembly/ASTERIXDB-3652-1/ASTERIXDB-3652.003.adm
new file mode 100644
index 0000000000..5ce24caaf4
--- /dev/null
+++ 
b/asterixdb/asterix-app/src/test/resources/runtimets/results/column/assembly/ASTERIXDB-3652-1/ASTERIXDB-3652.003.adm
@@ -0,0 +1 @@
+{ "uid": 1, "key_0": [ "array_value_58", 866, [ 939 ] ] }
\ No newline at end of file
diff --git 
a/asterixdb/asterix-app/src/test/resources/runtimets/results/column/assembly/ASTERIXDB-3652-2/ASTERIXDB-3652.003.adm
 
b/asterixdb/asterix-app/src/test/resources/runtimets/results/column/assembly/ASTERIXDB-3652-2/ASTERIXDB-3652.003.adm
new file mode 100644
index 0000000000..0dac6ee7a8
--- /dev/null
+++ 
b/asterixdb/asterix-app/src/test/resources/runtimets/results/column/assembly/ASTERIXDB-3652-2/ASTERIXDB-3652.003.adm
@@ -0,0 +1 @@
+{ "uid": 1, "k": [ { "i": [ 1, null, 3 ] }, {  }, { "i": null }, null ] }
\ No newline at end of file
diff --git 
a/asterixdb/asterix-app/src/test/resources/runtimets/results/column/assembly/ASTERIXDB-3652-3/ASTERIXDB-3652.003.adm
 
b/asterixdb/asterix-app/src/test/resources/runtimets/results/column/assembly/ASTERIXDB-3652-3/ASTERIXDB-3652.003.adm
new file mode 100644
index 0000000000..6b69025b94
--- /dev/null
+++ 
b/asterixdb/asterix-app/src/test/resources/runtimets/results/column/assembly/ASTERIXDB-3652-3/ASTERIXDB-3652.003.adm
@@ -0,0 +1,2 @@
+{ "uid": 1, "a": {  } }
+{ "uid": 2, "a": { "x": null } }
\ No newline at end of file
diff --git 
a/asterixdb/asterix-app/src/test/resources/runtimets/sqlpp_queries.xml 
b/asterixdb/asterix-app/src/test/resources/runtimets/sqlpp_queries.xml
index 35a27bae5e..374952c72e 100644
--- a/asterixdb/asterix-app/src/test/resources/runtimets/sqlpp_queries.xml
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/sqlpp_queries.xml
@@ -16605,6 +16605,21 @@
         <output-dir compare="Text">assembly/missing-inner-array</output-dir>
       </compilation-unit>
     </test-case>
+    <test-case FilePath="column">
+      <compilation-unit name="assembly/ASTERIXDB-3652-1">
+        <output-dir compare="Text">assembly/ASTERIXDB-3652-1</output-dir>
+      </compilation-unit>
+    </test-case>
+    <test-case FilePath="column">
+      <compilation-unit name="assembly/ASTERIXDB-3652-2">
+        <output-dir compare="Text">assembly/ASTERIXDB-3652-2</output-dir>
+      </compilation-unit>
+    </test-case>
+    <test-case FilePath="column">
+      <compilation-unit name="assembly/ASTERIXDB-3652-3">
+        <output-dir compare="Text">assembly/ASTERIXDB-3652-3</output-dir>
+      </compilation-unit>
+    </test-case>
     <test-case FilePath="column">
       <compilation-unit name="filter/ASTERIXDB-3499">
         <output-dir compare="Text">filter/ASTERIXDB-3499</output-dir>
diff --git 
a/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_single_partition_sqlpp.xml
 
b/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_single_partition_sqlpp.xml
index f48b00d0e0..4935ccec94 100644
--- 
a/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_single_partition_sqlpp.xml
+++ 
b/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_single_partition_sqlpp.xml
@@ -84,6 +84,21 @@
         <output-dir compare="Text">assembly/ASTERIXDB-3539</output-dir>
       </compilation-unit>
     </test-case>
+    <test-case FilePath="column">
+      <compilation-unit name="assembly/ASTERIXDB-3652-1">
+        <output-dir compare="Text">assembly/ASTERIXDB-3652-1</output-dir>
+      </compilation-unit>
+    </test-case>
+    <test-case FilePath="column">
+      <compilation-unit name="assembly/ASTERIXDB-3652-2">
+        <output-dir compare="Text">assembly/ASTERIXDB-3652-2</output-dir>
+      </compilation-unit>
+    </test-case>
+    <test-case FilePath="column">
+      <compilation-unit name="assembly/ASTERIXDB-3652-3">
+        <output-dir compare="Text">assembly/ASTERIXDB-3652-3</output-dir>
+      </compilation-unit>
+    </test-case>
     <test-case FilePath="column">
       <compilation-unit name="assembly/missing-inner-array">
         <output-dir compare="Text">assembly/missing-inner-array</output-dir>
diff --git 
a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/assembler/AbstractNestedValueAssembler.java
 
b/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/assembler/AbstractNestedValueAssembler.java
index 13820e0e2e..4e17246525 100644
--- 
a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/assembler/AbstractNestedValueAssembler.java
+++ 
b/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/assembler/AbstractNestedValueAssembler.java
@@ -59,7 +59,7 @@ public abstract class AbstractNestedValueAssembler extends 
AbstractValueAssemble
     }
 
     @Override
-    final void addNullToAncestor(int nullLevel) throws HyracksDataException {
+    void addNullToAncestor(int nullLevel) throws HyracksDataException {
         AbstractNestedValueAssembler parent = getParent();
         if (nullLevel + 1 == level) {
             parent.start();
@@ -70,7 +70,7 @@ public abstract class AbstractNestedValueAssembler extends 
AbstractValueAssemble
     }
 
     @Override
-    final void addMissingToAncestor(int missingLevel) throws 
HyracksDataException {
+    void addMissingToAncestor(int missingLevel) throws HyracksDataException {
         AbstractNestedValueAssembler parent = getParent();
         if (missingLevel + 1 == level) {
             parent.start();
@@ -107,5 +107,11 @@ public abstract class AbstractNestedValueAssembler extends 
AbstractValueAssemble
         if (isDelegate()) {
             getParent().end();
         }
+
+        clearStateOnEnd();
+    }
+
+    public void clearStateOnEnd() {
+
     }
 }
diff --git 
a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/assembler/AbstractPrimitiveValueAssembler.java
 
b/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/assembler/AbstractPrimitiveValueAssembler.java
index eb2ddf866e..c565ea8550 100644
--- 
a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/assembler/AbstractPrimitiveValueAssembler.java
+++ 
b/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/assembler/AbstractPrimitiveValueAssembler.java
@@ -30,7 +30,7 @@ public abstract class AbstractPrimitiveValueAssembler extends 
AbstractValueAssem
      */
     public static final int NEXT_ASSEMBLER = -1;
     protected final IValueGetter primitiveValueGetter;
-    protected final IColumnValuesReader reader;
+    protected IColumnValuesReader reader;
 
     AbstractPrimitiveValueAssembler(int level, AssemblerInfo info, 
IColumnValuesReader reader,
             IValueGetter primitiveValueGetter) {
diff --git 
a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/assembler/ArrayWithUnionValueAssembler.java
 
b/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/assembler/ArrayWithUnionValueAssembler.java
index c2d6c42891..6eadbd6126 100644
--- 
a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/assembler/ArrayWithUnionValueAssembler.java
+++ 
b/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/assembler/ArrayWithUnionValueAssembler.java
@@ -22,10 +22,23 @@ import 
org.apache.asterix.column.metadata.schema.AbstractSchemaNode;
 import org.apache.asterix.column.metadata.schema.UnionSchemaNode;
 import org.apache.hyracks.api.exceptions.HyracksDataException;
 
+/**
+ * Handles assembling array elements whose items are union types.
+ * Manages propagation of nulls/missings to parent assemblers and
+ * chunk completion logic per union branch.
+ */
 public class ArrayWithUnionValueAssembler extends ArrayValueAssembler {
+
     private final int numberOfUnionChildren;
+
     private int numberOfAddedValues;
     private boolean nonMissingValueAdded;
+    private boolean firstTime = true;
+
+    private boolean nullAncestor;
+    private int nullClampedLevel;
+    private int missingClampedLevel;
+    private boolean parentMissing = true;
 
     ArrayWithUnionValueAssembler(int level, AssemblerInfo info, int 
firstValueIndex, AbstractSchemaNode itemNode) {
         super(level, info, firstValueIndex);
@@ -33,46 +46,138 @@ public class ArrayWithUnionValueAssembler extends 
ArrayValueAssembler {
     }
 
     @Override
-    void reset() {
-        numberOfAddedValues = 0;
+    public void reset() {
+        // Reset called before reading new array item
+        if (firstTime) {
+            resetCounters();
+        }
         nonMissingValueAdded = false;
         super.reset();
     }
 
     @Override
     void addValue(AbstractValueAssembler value) throws HyracksDataException {
+        parentMissing = false;
         nonMissingValueAdded = true;
         numberOfAddedValues++;
+
         super.addValue(value);
-        if (numberOfAddedValues == numberOfUnionChildren) {
-            // Completed a chunk; since we saw a non-missing, just reset the 
counters
-            nonMissingValueAdded = false;
-            numberOfAddedValues = 0;
+
+        if (isChunkComplete()) {
+            completeChunk();
         }
     }
 
     @Override
     void addNull(AbstractValueAssembler value) throws HyracksDataException {
+        parentMissing = false;
         nonMissingValueAdded = true;
         numberOfAddedValues++;
+
         super.addNull(value);
-        if (numberOfAddedValues == numberOfUnionChildren) {
-            // Completed a chunk; since we saw a non-missing, just reset the 
counters
-            nonMissingValueAdded = false;
-            numberOfAddedValues = 0;
+
+        if (isChunkComplete()) {
+            completeChunk();
         }
     }
 
     @Override
     void addMissing() throws HyracksDataException {
+        parentMissing = false;
         numberOfAddedValues++;
-        if (numberOfAddedValues == numberOfUnionChildren) {
+
+        if (isChunkComplete()) {
+            // Only propagate missing if no non-missing values appeared in 
this chunk
             if (!nonMissingValueAdded) {
                 super.addMissing();
             }
-            // Reset for the next chunk
-            numberOfAddedValues = 0;
-            nonMissingValueAdded = false;
+            completeChunk();
+        }
+    }
+
+    @Override
+    void addNullToAncestor(int nullLevel) throws HyracksDataException {
+        numberOfAddedValues++;
+        firstTime = false;
+        nullAncestor = true;
+        nullClampedLevel = Math.max(nullLevel, nullClampedLevel);
+
+        if (isChunkComplete()) {
+            if (parentMissing) {
+                propagateNullOrMissingToParent(true);
+            }
+            completeChunk();
+        }
+    }
+
+    @Override
+    void addMissingToAncestor(int missingLevel) throws HyracksDataException {
+        numberOfAddedValues++;
+        firstTime = false;
+        missingClampedLevel = Math.max(missingLevel, missingClampedLevel);
+
+        if (isChunkComplete()) {
+            if (parentMissing) {
+                propagateNullOrMissingToParent(false);
+            }
+            completeChunk();
+        }
+    }
+
+    private void propagateNullOrMissingToParent(boolean isNull) throws 
HyracksDataException {
+        AbstractNestedValueAssembler parent = getParent();
+        if (parent == null) {
+            return;
+        }
+
+        // If any ancestor was null, always propagate null
+        if (isNull || nullAncestor) {
+            boolean atCurrentLevel = (nullClampedLevel + 1) == level;
+            if (atCurrentLevel) {
+                parent.start();
+                parent.addNull(this);
+            } else {
+                parent.addNullToAncestor(nullClampedLevel);
+            }
+        } else {
+            boolean atCurrentLevel = (missingClampedLevel + 1) == level;
+            if (atCurrentLevel) {
+                parent.start();
+                parent.addMissing();
+            } else {
+                parent.addMissingToAncestor(missingClampedLevel);
+            }
         }
     }
+
+    private boolean isChunkComplete() {
+        return numberOfAddedValues == numberOfUnionChildren;
+    }
+
+    private void completeChunk() {
+        // Reset chunk-local state
+        resetCounters();
+        firstTime = true;
+    }
+
+    private void resetCounters() {
+        numberOfAddedValues = 0;
+        nonMissingValueAdded = false;
+        parentMissing = true;
+        nullAncestor = false;
+        nullClampedLevel = 0;
+        missingClampedLevel = 0;
+    }
+
+    @Override
+    void addValueToParent() throws HyracksDataException {
+        super.addValueToParent();
+        clearStateOnEnd();
+    }
+
+    @Override
+    public void clearStateOnEnd() {
+        resetCounters();
+        firstTime = true;
+    }
 }
\ No newline at end of file
diff --git 
a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/assembler/AssemblerBuilderVisitor.java
 
b/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/assembler/AssemblerBuilderVisitor.java
index cb447c85e8..62eab35283 100644
--- 
a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/assembler/AssemblerBuilderVisitor.java
+++ 
b/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/assembler/AssemblerBuilderVisitor.java
@@ -58,6 +58,7 @@ public class AssemblerBuilderVisitor implements 
ISchemaNodeVisitor<AbstractValue
     private final IValueGetterFactory valueGetterFactory;
     private final Map<Integer, IColumnValuesReader> primaryKeyReaders;
     private AbstractValueAssembler rootAssembler;
+    private List<IColumnValuesReader> endOfGroupPrimitiveReaders;
 
     //Recursion info
     private final IntList delimiters;
@@ -72,6 +73,7 @@ public class AssemblerBuilderVisitor implements 
ISchemaNodeVisitor<AbstractValue
         valueAssemblers = new ArrayList<>();
         delimiters = new IntArrayList();
         primaryKeyReaders = new HashMap<>();
+        this.endOfGroupPrimitiveReaders = new ArrayList<>();
         for (IColumnValuesReader reader : 
columnMetadata.getPrimaryKeyReaders()) {
             primaryKeyReaders.put(reader.getColumnIndex(), reader);
         }
@@ -92,11 +94,11 @@ public class AssemblerBuilderVisitor implements 
ISchemaNodeVisitor<AbstractValue
 
     @Override
     public AbstractValueAssembler visit(ObjectSchemaNode objectNode, 
AssemblerInfo info) throws HyracksDataException {
+        IntList childrenFieldNameIndexes = 
objectNode.getChildrenFieldNameIndexes();
+        int effectiveNumberOfChildren = childrenFieldNameIndexes.size();
         ObjectValueAssembler objectAssembler = new ObjectValueAssembler(level, 
info);
         level++;
-
         BitSet declaredFields = handleDeclaredFields(objectNode, info, 
objectAssembler);
-        IntList childrenFieldNameIndexes = 
objectNode.getChildrenFieldNameIndexes();
         int numberOfAddedChildren = declaredFields.cardinality();
         if (numberOfAddedChildren < childrenFieldNameIndexes.size()) {
             // Now handle any open fields
@@ -110,11 +112,17 @@ public class AssemblerBuilderVisitor implements 
ISchemaNodeVisitor<AbstractValue
                     //The last child should be a delegate
                     boolean delegate = numberOfAddedChildren == 
childrenFieldNameIndexes.size();
                     AssemblerInfo childInfo = new AssemblerInfo(childType, 
objectAssembler, delegate, fieldName);
+                    if (childNode instanceof UnionSchemaNode) {
+                        //UnionSchemaNode does not actually exist. We know the 
parent of the union could have items of multiple types.
+                        //Thus, the union's parent is the actual parent for 
all the union types
+                        effectiveNumberOfChildren += ((UnionSchemaNode) 
childNode).getChildren().size() - 1;
+                    }
                     childNode.accept(this, childInfo);
                 }
             }
         }
 
+        objectAssembler.setNumberOfEffectiveFields(effectiveNumberOfChildren);
         level--;
         return objectAssembler;
     }
@@ -163,6 +171,9 @@ public class AssemblerBuilderVisitor implements 
ISchemaNodeVisitor<AbstractValue
         RepeatedPrimitiveValueAssembler previousDelegate = delegateAssembler;
         delegateAssembler = null;
 
+        List<IColumnValuesReader> previousEndOfGroupPrimitiveReaders = 
endOfGroupPrimitiveReaders;
+        endOfGroupPrimitiveReaders = new ArrayList<>();
+
         IAType itemDeclaredType = getChildType(itemNode, 
declaredType.getItemType());
         AssemblerInfo itemInfo = new AssemblerInfo(itemDeclaredType, 
arrayAssembler, false);
         itemNode.accept(this, itemInfo);
@@ -170,12 +181,12 @@ public class AssemblerBuilderVisitor implements 
ISchemaNodeVisitor<AbstractValue
         // if delegateAssembler is null, that means no column will be accessed
         if (delegateAssembler != null) {
             // Set repeated assembler as a delegate (responsible for writing 
null values)
-            delegateAssembler.setAsDelegate(level - 1);
             IColumnValuesReader reader = delegateAssembler.getReader();
             int numberOfDelimiters = reader.getNumberOfDelimiters();
             // End of group assembler is responsible to finalize 
array/multiset builders
             EndOfRepeatedGroupAssembler endOfGroupAssembler =
-                    new EndOfRepeatedGroupAssembler(reader, arrayAssembler, 
numberOfDelimiters - delimiters.size());
+                    new 
EndOfRepeatedGroupAssembler(endOfGroupPrimitiveReaders, arrayAssembler, 
delimiters.size());
+            
previousEndOfGroupPrimitiveReaders.addAll(endOfGroupPrimitiveReaders);
             valueAssemblers.add(endOfGroupAssembler);
         }
 
@@ -185,6 +196,8 @@ public class AssemblerBuilderVisitor implements 
ISchemaNodeVisitor<AbstractValue
             // Return the delegate assembler to the previous one
             delegateAssembler = previousDelegate;
         }
+
+        endOfGroupPrimitiveReaders = previousEndOfGroupPrimitiveReaders;
         return arrayAssembler;
     }
 
@@ -216,6 +229,7 @@ public class AssemblerBuilderVisitor implements 
ISchemaNodeVisitor<AbstractValue
 
             assembler = new RepeatedPrimitiveValueAssembler(level, info, 
reader, valueGetter);
             setDelegate(reader, (RepeatedPrimitiveValueAssembler) assembler);
+            endOfGroupPrimitiveReaders.add(reader);
 
         } else {
             IColumnValuesReader reader;
diff --git 
a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/assembler/EndOfRepeatedGroupAssembler.java
 
b/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/assembler/EndOfRepeatedGroupAssembler.java
index 805f4934de..63204d71b7 100644
--- 
a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/assembler/EndOfRepeatedGroupAssembler.java
+++ 
b/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/assembler/EndOfRepeatedGroupAssembler.java
@@ -18,21 +18,27 @@
  */
 package org.apache.asterix.column.assembler;
 
+import java.util.List;
+
 import org.apache.asterix.column.assembler.value.MissingValueGetter;
 import org.apache.asterix.column.bytes.stream.in.AbstractBytesInputStream;
 import org.apache.asterix.column.values.IColumnValuesReader;
 import org.apache.hyracks.api.exceptions.HyracksDataException;
 
 public class EndOfRepeatedGroupAssembler extends 
AbstractPrimitiveValueAssembler {
-    //    private final List<ArrayValueAssembler> arrays;
+    private final List<IColumnValuesReader> readers;
     private final ArrayValueAssembler arrayAssembler;
-    private final int delimiterIndex;
+    private final int numDelimiters;
+    private int delimiterIndex;
     private EndOfRepeatedGroupAssembler previousGroup;
 
-    EndOfRepeatedGroupAssembler(IColumnValuesReader reader, 
ArrayValueAssembler arrayAssembler, int delimiterIndex) {
-        super(reader.getLevel(), new AssemblerInfo(), reader, 
MissingValueGetter.INSTANCE);
+    EndOfRepeatedGroupAssembler(List<IColumnValuesReader> readers, 
ArrayValueAssembler arrayAssembler,
+            int numDelimiters) {
+        super(readers.get(0).getLevel(), new AssemblerInfo(), readers.get(0), 
MissingValueGetter.INSTANCE);
+        this.readers = readers;
         this.arrayAssembler = arrayAssembler;
-        this.delimiterIndex = delimiterIndex;
+        this.numDelimiters = numDelimiters;
+        this.delimiterIndex = readers.get(0).getNumberOfDelimiters() - 
numDelimiters;
         previousGroup = null;
     }
 
@@ -41,8 +47,25 @@ public class EndOfRepeatedGroupAssembler extends 
AbstractPrimitiveValueAssembler
         // NoOp
     }
 
+    private IColumnValuesReader getNonMissingReader() {
+        IColumnValuesReader nonMissingReader = null;
+        for (IColumnValuesReader r : readers) {
+            if (!r.areAllMissing()) {
+                nonMissingReader = r;
+            }
+        }
+        if (nonMissingReader == null) {
+            return readers.get(0); // all are missing, return the first one
+        }
+        return nonMissingReader;
+    }
+
     @Override
     public int next(AssemblerState state) throws HyracksDataException {
+        if (reader.areAllMissing()) {
+            reader = getNonMissingReader();
+            this.delimiterIndex = reader.getNumberOfDelimiters() - 
numDelimiters;
+        }
         // Get the current delimiter index from the reader
         int delimiterIndex = reader.getDelimiterIndex();
         /*
@@ -54,6 +77,9 @@ public class EndOfRepeatedGroupAssembler extends 
AbstractPrimitiveValueAssembler
             if (arrayAssembler.isDelegate()) {
                 // Yes it is a delegate, end the arrayAssembler to signal to 
the parent assembler to finalize
                 arrayAssembler.end();
+            } else {
+                // since the current assembled values are for a higher nesting 
level, we need to clear the state
+                arrayAssembler.clearStateOnEnd();
             }
             // Move ot the next assembler
             return NEXT_ASSEMBLER;
diff --git 
a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/assembler/ObjectValueAssembler.java
 
b/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/assembler/ObjectValueAssembler.java
index 536ce02005..8870258872 100644
--- 
a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/assembler/ObjectValueAssembler.java
+++ 
b/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/assembler/ObjectValueAssembler.java
@@ -24,50 +24,160 @@ import 
org.apache.hyracks.api.exceptions.HyracksDataException;
 import org.apache.hyracks.data.std.api.IValueReference;
 
 public class ObjectValueAssembler extends AbstractNestedValueAssembler {
+
     private final RecordBuilder recordBuilder;
     private final ARecordType recordType;
 
+    private int effectiveFieldCount;
+    private int visitedCount;
+    private boolean hasNonMissingValue;
+    private boolean firstTime;
+    private boolean hasNullAncestor;
+    // These are used to know the highest level of null/missing ancestor to 
propagate
+    // because some of the fields may be allMissing (as in column was not 
present)
+    private int nullClampedLevel;
+    private int missingClampedLevel;
+
     ObjectValueAssembler(int level, AssemblerInfo info) {
         super(level, info);
-        recordBuilder = new RecordBuilder();
-        recordType = (ARecordType) info.getDeclaredType();
+        this.recordBuilder = new RecordBuilder();
+        this.recordType = (ARecordType) info.getDeclaredType();
+        clearState();
     }
 
     @Override
-    void reset() {
+    public void reset() {
         recordBuilder.reset(recordType);
         storage.reset();
+        if (firstTime) {
+            clearState();
+        }
     }
 
     @Override
     void addValue(AbstractValueAssembler value) throws HyracksDataException {
-        int valueIndex = value.getFieldIndex();
-        if (valueIndex >= 0) {
-            recordBuilder.addField(valueIndex, value.getValue());
-        } else {
-            recordBuilder.addField(value.getFieldName(), value.getValue());
+        visitedCount++;
+        hasNonMissingValue = true;
+        addField(value, value.getValue());
+
+        if (isChunkComplete()) {
+            finalizeChunk();
         }
     }
 
     @Override
     void addNull(AbstractValueAssembler value) throws HyracksDataException {
-        int valueIndex = value.getFieldIndex();
-        if (valueIndex >= 0) {
-            recordBuilder.addField(valueIndex, NULL);
-        } else {
-            recordBuilder.addField(value.getFieldName(), NULL);
+        visitedCount++;
+        hasNonMissingValue = true;
+        addField(value, NULL);
+
+        if (isChunkComplete()) {
+            finalizeChunk();
+        }
+    }
+
+    @Override
+    void addMissing() throws HyracksDataException {
+        visitedCount++;
+        hasNonMissingValue = true;
+        if (isChunkComplete()) {
+            finalizeChunk();
         }
     }
 
+    @Override
+    void addNullToAncestor(int nullLevel) throws HyracksDataException {
+        handleAncestorAddition(nullLevel, true);
+    }
+
+    @Override
+    void addMissingToAncestor(int missingLevel) throws HyracksDataException {
+        handleAncestorAddition(missingLevel, false);
+    }
+
+    void setNumberOfEffectiveFields(int numberOfEffectiveFields) {
+        this.effectiveFieldCount = numberOfEffectiveFields;
+    }
+
     @Override
     void addValueToParent() throws HyracksDataException {
         storage.reset();
         recordBuilder.write(storage.getDataOutput(), true);
         getParent().addValue(this);
+        clearStateOnEnd();
     }
 
     @Override
     public IValueReference getValue() {
         return storage;
     }
-}
+
+    @Override
+    public void clearStateOnEnd() {
+        clearState();
+    }
+
+    // 
-------------------------------------------------------------------------
+    // Internal helpers
+    // 
-------------------------------------------------------------------------
+
+    private void addField(AbstractValueAssembler value, IValueReference 
fieldValue) throws HyracksDataException {
+        int fieldIndex = value.getFieldIndex();
+        if (fieldIndex >= 0) {
+            recordBuilder.addField(fieldIndex, fieldValue);
+        } else {
+            recordBuilder.addField(value.getFieldName(), fieldValue);
+        }
+    }
+
+    private void handleAncestorAddition(int level, boolean isNull) throws 
HyracksDataException {
+        firstTime = false;
+        visitedCount++;
+
+        if (isNull) {
+            hasNullAncestor = true;
+            nullClampedLevel = Math.max(nullClampedLevel, level);
+        } else {
+            missingClampedLevel = Math.max(missingClampedLevel, level);
+        }
+
+        if (isChunkComplete() && !hasNonMissingValue) {
+            propagateToAncestor(); // propagate only if no values were seen
+        }
+
+        if (isChunkComplete()) {
+            finalizeChunk();
+        }
+    }
+
+    private boolean isChunkComplete() {
+        return visitedCount == effectiveFieldCount;
+    }
+
+    private void finalizeChunk() {
+        clearChunkState();
+        firstTime = true; // ready for next chunk
+    }
+
+    private void propagateToAncestor() throws HyracksDataException {
+        // propagate nulls preferentially
+        if (hasNullAncestor) {
+            super.addNullToAncestor(nullClampedLevel);
+        } else {
+            super.addMissingToAncestor(missingClampedLevel);
+        }
+    }
+
+    private void clearChunkState() {
+        visitedCount = 0;
+        hasNullAncestor = false;
+        hasNonMissingValue = false;
+        nullClampedLevel = 0;
+        missingClampedLevel = 0;
+    }
+
+    private void clearState() {
+        clearChunkState();
+        firstTime = true;
+    }
+}
\ No newline at end of file
diff --git 
a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/assembler/PrimitiveValueAssembler.java
 
b/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/assembler/PrimitiveValueAssembler.java
index 520f1d5e1c..f5edc3158e 100644
--- 
a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/assembler/PrimitiveValueAssembler.java
+++ 
b/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/assembler/PrimitiveValueAssembler.java
@@ -48,9 +48,9 @@ final class PrimitiveValueAssembler extends 
AbstractPrimitiveValueAssembler {
         // Do not call next on PK readers as they are maintained by the cursor
         if (!primaryKey && !reader.next()) {
             throw createException();
-        } else if (reader.isNull() && (isDelegate() || reader.getLevel() + 1 
== level)) {
+        } else if (reader.isNull()) {
             addNullToAncestor(reader.getLevel());
-        } else if (reader.isMissing() && isDelegate() && reader.getLevel() < 
level) {
+        } else if (reader.isMissing()) {
             addMissingToAncestor(reader.getLevel());
         } else if (reader.isValue()) {
             addValueToParent();
diff --git 
a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/assembler/RepeatedPrimitiveValueAssembler.java
 
b/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/assembler/RepeatedPrimitiveValueAssembler.java
index 61d0741b38..67379c0125 100644
--- 
a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/assembler/RepeatedPrimitiveValueAssembler.java
+++ 
b/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/assembler/RepeatedPrimitiveValueAssembler.java
@@ -27,15 +27,10 @@ import 
org.apache.hyracks.storage.am.lsm.btree.column.error.ColumnarValueExcepti
 import com.fasterxml.jackson.databind.node.ObjectNode;
 
 final class RepeatedPrimitiveValueAssembler extends 
AbstractPrimitiveValueAssembler {
-    private int arrayDelegateLevels;
-    private boolean arrayDelegate;
 
     RepeatedPrimitiveValueAssembler(int level, AssemblerInfo info, 
IColumnValuesReader reader,
             IValueGetter primitiveValue) {
         super(level, info, reader, primitiveValue);
-        this.arrayDelegate = false;
-        this.arrayDelegateLevels = 0;
-
     }
 
     @Override
@@ -51,7 +46,7 @@ final class RepeatedPrimitiveValueAssembler extends 
AbstractPrimitiveValueAssemb
          * - We are in an array (i.e., the parent array assembler is active)
          * - The value is a delimiter (i.e., the last round)
          */
-        if (!state.isInGroup() || reader.isRepeatedValue() || 
reader.isDelimiter()) {
+        if (!state.isInGroup() || reader.isRepeatedValue() || 
reader.isDelimiter() || reader.areAllMissing()) {
             next();
         }
 
@@ -64,20 +59,15 @@ final class RepeatedPrimitiveValueAssembler extends 
AbstractPrimitiveValueAssemb
         return NEXT_ASSEMBLER;
     }
 
-    public IColumnValuesReader getReader() {
-        return reader;
-    }
-
-    public void setAsDelegate(int arrayLevel) {
-        // This assembler is responsible for adding null values
-        this.arrayDelegate = true;
-        this.arrayDelegateLevels |= (1 << arrayLevel);
-    }
-
     private void next() throws HyracksDataException {
+        if (reader.areAllMissing()) {
+            // If all values are missing, we add missing to the ancestor at 
the lowest missing level
+            addMissingToAncestor(reader.getLevel());
+            return;
+        }
         if (!reader.next()) {
             throw createException();
-        } else if (reader.isNull() && (arrayDelegate || reader.getLevel() + 1 
== level)) {
+        } else if (reader.isNull()) {
             /*
              * There are two cases here for where the null belongs to:
              * 1- If the null is an array item, then add it
@@ -85,7 +75,7 @@ final class RepeatedPrimitiveValueAssembler extends 
AbstractPrimitiveValueAssemb
              * (i.e., arrayDelegate is true)
              */
             addNullToAncestor(reader.getLevel());
-        } else if (reader.isMissing() && (isArrayDelegate(reader.getLevel()) 
|| reader.getLevel() + 1 == level)) {
+        } else if (reader.isMissing() && reader.getLevel() < level) {
             /*
              * Add a missing item in either
              * - the array item is MISSING
@@ -97,19 +87,14 @@ final class RepeatedPrimitiveValueAssembler extends 
AbstractPrimitiveValueAssemb
         }
     }
 
-    private boolean isArrayDelegate(int level) {
-        return (arrayDelegateLevels & (1 << level)) != 0;
-    }
-
     private ColumnarValueException createException() {
         ColumnarValueException e = new ColumnarValueException();
         ObjectNode assemblerNode = e.createNode(getClass().getSimpleName());
         assemblerNode.put("isDelegate", isDelegate());
-        assemblerNode.put("isArrayDelegate", arrayDelegate);
 
         ObjectNode readerNode = assemblerNode.putObject("assemblerReader");
         reader.appendReaderInformation(readerNode);
 
         return e;
     }
-}
\ No newline at end of file
+}
diff --git 
a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/metadata/schema/ObjectSchemaNode.java
 
b/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/metadata/schema/ObjectSchemaNode.java
index 497654c451..59a3edde4c 100644
--- 
a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/metadata/schema/ObjectSchemaNode.java
+++ 
b/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/metadata/schema/ObjectSchemaNode.java
@@ -81,9 +81,10 @@ public final class ObjectSchemaNode extends 
AbstractSchemaNestedNode {
             FlushColumnMetadata columnMetadata) throws HyracksDataException {
         int numberOfChildren = children.size();
         int fieldNameIndex = 
columnMetadata.getFieldNamesDictionary().getOrCreateFieldNameIndex(fieldName);
+        boolean previouslyMissing = isEmptyObject;
         int childIndex = 
fieldNameIndexToChildIndexMap.getOrDefault(fieldNameIndex, 
nextIndex.apply(fieldNameIndex));
         AbstractSchemaNode currentChild = childIndex == numberOfChildren ? 
null : children.get(childIndex);
-        AbstractSchemaNode newChild = 
columnMetadata.getOrCreateChild(currentChild, childTypeTag);
+        AbstractSchemaNode newChild = 
columnMetadata.getOrCreateChild(currentChild, childTypeTag, previouslyMissing);
         if (currentChild == null) {
             children.add(childIndex, newChild);
             fieldNameIndexToChildIndexMap.put(fieldNameIndex, childIndex);
@@ -111,7 +112,7 @@ public final class ObjectSchemaNode extends 
AbstractSchemaNestedNode {
             return null;
         }
         isEmptyObject = true;
-        AbstractSchemaNode emptyChild = columnMetadata.getOrCreateChild(null, 
ATypeTag.MISSING);
+        AbstractSchemaNode emptyChild = columnMetadata.getOrCreateChild(null, 
ATypeTag.MISSING, false);
         addChild(DUMMY_FIELD_NAME_INDEX, emptyChild);
         nextIndex = this::emptyColumnIndex;
         return emptyChild;
diff --git 
a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/metadata/schema/UnionSchemaNode.java
 
b/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/metadata/schema/UnionSchemaNode.java
index 8124780ee5..89f640fb36 100644
--- 
a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/metadata/schema/UnionSchemaNode.java
+++ 
b/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/metadata/schema/UnionSchemaNode.java
@@ -92,7 +92,7 @@ public final class UnionSchemaNode extends 
AbstractSchemaNestedNode {
         ATypeTag normalizedTypeTag = getNormalizedTypeTag(childTypeTag);
         AbstractSchemaNode currentChild = children.get(normalizedTypeTag);
         //The parent of a union child should be the actual parent
-        AbstractSchemaNode newChild = 
columnMetadata.getOrCreateChild(currentChild, normalizedTypeTag);
+        AbstractSchemaNode newChild = 
columnMetadata.getOrCreateChild(currentChild, normalizedTypeTag, false);
         if (currentChild != newChild) {
             putChild(newChild, currentChild);
         } else {
diff --git 
a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/metadata/schema/collection/AbstractCollectionSchemaNode.java
 
b/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/metadata/schema/collection/AbstractCollectionSchemaNode.java
index 1bcf1e9ed9..bfdc1304ac 100644
--- 
a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/metadata/schema/collection/AbstractCollectionSchemaNode.java
+++ 
b/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/metadata/schema/collection/AbstractCollectionSchemaNode.java
@@ -51,7 +51,7 @@ public abstract class AbstractCollectionSchemaNode extends 
AbstractSchemaNestedN
 
     public final AbstractSchemaNode getOrCreateItem(ATypeTag childTypeTag, 
FlushColumnMetadata columnMetadata)
             throws HyracksDataException {
-        AbstractSchemaNode newItem = columnMetadata.getOrCreateChild(item, 
childTypeTag);
+        AbstractSchemaNode newItem = columnMetadata.getOrCreateChild(item, 
childTypeTag, false);
         if (item == null) {
             newItem.getDeltaColumnsChanged();
             numberOfColumns += newItem.getNumberOfColumns();
diff --git 
a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/operation/lsm/flush/BatchFinalizerVisitor.java
 
b/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/operation/lsm/flush/BatchFinalizerVisitor.java
index bc8f1841bf..9beaf6fb8a 100644
--- 
a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/operation/lsm/flush/BatchFinalizerVisitor.java
+++ 
b/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/operation/lsm/flush/BatchFinalizerVisitor.java
@@ -137,7 +137,6 @@ public final class BatchFinalizerVisitor implements 
ISchemaNodeVisitor<Void, Abs
                 primitiveNode.getColumnIndex(), needAllColumns)) {
             
orderedColumns.add(columnSchemaMetadata.getWriter(primitiveNode.getColumnIndex()));
         }
-
         //Prepare for the next batch
         primitiveNode.setCounter(0);
         primitiveNode.setNumberOfVisitedColumnsInBatch(0);
diff --git 
a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/operation/lsm/flush/FlushColumnMetadata.java
 
b/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/operation/lsm/flush/FlushColumnMetadata.java
index 1c93ebb727..23032350e3 100644
--- 
a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/operation/lsm/flush/FlushColumnMetadata.java
+++ 
b/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/operation/lsm/flush/FlushColumnMetadata.java
@@ -355,14 +355,20 @@ public class FlushColumnMetadata extends 
AbstractColumnMetadata {
         return columnWriters.size();
     }
 
-    public AbstractSchemaNode getOrCreateChild(AbstractSchemaNode child, 
ATypeTag childTypeTag)
-            throws HyracksDataException {
+    public AbstractSchemaNode getOrCreateChild(AbstractSchemaNode child, 
ATypeTag childTypeTag,
+            boolean replaceObjectDummyField) throws HyracksDataException {
         AbstractSchemaNode currentChild = child;
         ATypeTag normalizedTypeTag = getNormalizedTypeTag(childTypeTag);
         boolean newChild = currentChild == null;
-        if (currentChild == null || normalizedTypeTag != ATypeTag.MISSING && 
normalizedTypeTag != ATypeTag.NULL
-                && currentChild.getTypeTag() != ATypeTag.UNION
-                && getNormalizedTypeTag(currentChild.getTypeTag()) != 
normalizedTypeTag) {
+        if (currentChild == null
+                // special case(ASTERIXDB-3652): where initially the object 
has no child
+                // hence an empty object was added, see ColumnTransformer.
+                // but in later documents, object got a child with value NULL.
+                // we need to record both NULL with the dummy MISSING in the 
union to maintain the structure.
+                || (replaceObjectDummyField && normalizedTypeTag == 
ATypeTag.NULL)
+                || normalizedTypeTag != ATypeTag.MISSING && normalizedTypeTag 
!= ATypeTag.NULL
+                        && currentChild.getTypeTag() != ATypeTag.UNION
+                        && getNormalizedTypeTag(currentChild.getTypeTag()) != 
normalizedTypeTag) {
             //Create a new child or union type if required type is different 
from the current child type
             int visitedBatchVersion = newChild ? -1 : 
currentChild.getVisitedBatchVersion();
             currentChild = createChild(child, childTypeTag);
@@ -449,6 +455,8 @@ public class FlushColumnMetadata extends 
AbstractColumnMetadata {
         //Flush all definition levels from parent to the current node
         flushDefinitionLevels(level, parent, node, includeChildColumns);
         //Add null value (+2) to say that both the parent and the child are 
present
+        //A null is represented by the (childMask | (level which is not NULL))
+        //{ "x": { "a": NULL } } --> In here, the nullLevel stored will be 
nullMask(2) | 1 (level of x)
         definitionLevels.get(node).add(ColumnValuesUtil.getNullMask(level + 2) 
| level);
         node.incrementCounter();
     }
diff --git 
a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/values/IColumnValuesReader.java
 
b/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/values/IColumnValuesReader.java
index 51ef2c1d67..22cbb9fdde 100644
--- 
a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/values/IColumnValuesReader.java
+++ 
b/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/values/IColumnValuesReader.java
@@ -26,6 +26,12 @@ import org.apache.hyracks.data.std.api.IValueReference;
 import com.fasterxml.jackson.databind.node.ObjectNode;
 
 public interface IColumnValuesReader extends Comparable<IColumnValuesReader> {
+    /**
+     * Indicates if the column is missing in the leaf
+     * @return
+     */
+    boolean areAllMissing();
+
     /**
      * Reset the reader
      *
diff --git 
a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/values/reader/AbstractColumnValuesReader.java
 
b/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/values/reader/AbstractColumnValuesReader.java
index e8c6d3e597..3dead4aed1 100644
--- 
a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/values/reader/AbstractColumnValuesReader.java
+++ 
b/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/values/reader/AbstractColumnValuesReader.java
@@ -103,6 +103,11 @@ abstract class AbstractColumnValuesReader implements 
IColumnValuesReader {
         }
     }
 
+    @Override
+    public boolean areAllMissing() {
+        return allMissing;
+    }
+
     abstract void resetValues();
 
     @Override
@@ -161,7 +166,9 @@ abstract class AbstractColumnValuesReader implements 
IColumnValuesReader {
 
     @Override
     public final boolean isMissing() {
-        return !isDelimiter() && level < maxLevel;
+        // After clearing the nullBitMask, a level less than maxLevel may 
incorrectly be considered missing.
+        // This is because the nullBitMask can affect the interpretation of 
the level value.
+        return !isDelimiter() && !nullLevel && level < maxLevel;
     }
 
     @Override
diff --git 
a/asterixdb/asterix-column/src/test/java/org/apache/asterix/column/values/reader/AbstractDummyColumnValuesReader.java
 
b/asterixdb/asterix-column/src/test/java/org/apache/asterix/column/values/reader/AbstractDummyColumnValuesReader.java
index 78ca96bc7c..ed76cbdf53 100644
--- 
a/asterixdb/asterix-column/src/test/java/org/apache/asterix/column/values/reader/AbstractDummyColumnValuesReader.java
+++ 
b/asterixdb/asterix-column/src/test/java/org/apache/asterix/column/values/reader/AbstractDummyColumnValuesReader.java
@@ -85,6 +85,11 @@ public abstract class AbstractDummyColumnValuesReader 
implements IColumnValuesRe
         //noOp
     }
 
+    @Override
+    public boolean areAllMissing() {
+        return false;
+    }
+
     @Override
     public final ATypeTag getTypeTag() {
         return typeTag;
diff --git 
a/hyracks-fullstack/hyracks/hyracks-storage-am-lsm-btree-column/src/main/java/org/apache/hyracks/storage/am/lsm/btree/column/cloud/sweep/ColumnSweepPlanner.java
 
b/hyracks-fullstack/hyracks/hyracks-storage-am-lsm-btree-column/src/main/java/org/apache/hyracks/storage/am/lsm/btree/column/cloud/sweep/ColumnSweepPlanner.java
index 5b837a154a..9a200cc5a0 100644
--- 
a/hyracks-fullstack/hyracks/hyracks-storage-am-lsm-btree-column/src/main/java/org/apache/hyracks/storage/am/lsm/btree/column/cloud/sweep/ColumnSweepPlanner.java
+++ 
b/hyracks-fullstack/hyracks/hyracks-storage-am-lsm-btree-column/src/main/java/org/apache/hyracks/storage/am/lsm/btree/column/cloud/sweep/ColumnSweepPlanner.java
@@ -118,6 +118,7 @@ public final class ColumnSweepPlanner {
         long accessTime = clock.getCurrentTime();
         lastAccess = accessTime;
         int numberOfColumns = projectionInfo.getNumberOfProjectedColumns();
+        resizeStatsArrays(numberOfColumns);
         boolean requireCloudAccess = false;
         for (int i = 0; i < numberOfColumns; i++) {
             int columnIndex = projectionInfo.getColumnIndex(i);
@@ -185,6 +186,9 @@ public final class ColumnSweepPlanner {
     }
 
     private void resizeStatsArrays(int numberOfColumns) {
+        if (numberOfColumns <= sizes.length) {
+            return;
+        }
         sizes = IntArrays.ensureCapacity(sizes, numberOfColumns);
         lastAccesses = LongArrays.ensureCapacity(lastAccesses, 
numberOfColumns);
         this.numberOfColumns = numberOfColumns - numberOfPrimaryKeys;

Reply via email to