This is an automated email from the ASF dual-hosted git repository. mblow pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/asterixdb.git
commit 69b31add738949df2fbb10f62956e7f6bb77530e Author: Ritik Raj <[email protected]> AuthorDate: Wed Oct 8 11:50:15 2025 +0530 [ASTERIXDB-3652][STO] Fixed column assembler issues - user model changes: no - storage format changes: no - interface changes: no Details: There were few issues related to union + objects + array. In case of union in objects, introduced a logical barrier, which in case of missing collects all the missing and only propogates when it encountered all the children for the current level, preventing extra null or missing being propogated above. There can be cases where a particular column is not presnet, and the EndOfGroupAssembler uses that column as a delegate, hence not able to correctly determine the ending of the array leading to missing of value propogation to parent. Ext-ref: MB-68881 Change-Id: I782d17f81d00210a4ca8673cc5fbcf556fc4758c Reviewed-on: https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/20469 Integration-Tests: Jenkins <[email protected]> Reviewed-by: Peeyush Gupta <[email protected]> Tested-by: Jenkins <[email protected]> --- .../ASTERIXDB-3652-1/ASTERIXDB-3652.001.ddl.sqlpp | 28 +++++ .../ASTERIXDB-3652.002.update.sqlpp | 32 +++++ .../ASTERIXDB-3652.003.query.sqlpp | 23 ++++ .../ASTERIXDB-3652-2/ASTERIXDB-3652.001.ddl.sqlpp | 28 +++++ .../ASTERIXDB-3652.002.update.sqlpp | 31 +++++ .../ASTERIXDB-3652.003.query.sqlpp | 23 ++++ .../ASTERIXDB-3652-3/ASTERIXDB-3652.001.ddl.sqlpp | 28 +++++ .../ASTERIXDB-3652.002.update.sqlpp | 30 +++++ .../ASTERIXDB-3652.003.query.sqlpp | 24 ++++ .../ASTERIXDB-3652-1/ASTERIXDB-3652.003.adm | 1 + .../ASTERIXDB-3652-2/ASTERIXDB-3652.003.adm | 1 + .../ASTERIXDB-3652-3/ASTERIXDB-3652.003.adm | 2 + .../src/test/resources/runtimets/sqlpp_queries.xml | 15 +++ .../runtimets/testsuite_single_partition_sqlpp.xml | 15 +++ .../assembler/AbstractNestedValueAssembler.java | 10 +- .../assembler/AbstractPrimitiveValueAssembler.java | 2 +- .../assembler/ArrayWithUnionValueAssembler.java | 133 +++++++++++++++++--- .../column/assembler/AssemblerBuilderVisitor.java | 22 +++- .../assembler/EndOfRepeatedGroupAssembler.java | 36 +++++- .../column/assembler/ObjectValueAssembler.java | 138 ++++++++++++++++++--- .../column/assembler/PrimitiveValueAssembler.java | 4 +- .../assembler/RepeatedPrimitiveValueAssembler.java | 33 ++--- .../column/metadata/schema/ObjectSchemaNode.java | 5 +- .../column/metadata/schema/UnionSchemaNode.java | 2 +- .../collection/AbstractCollectionSchemaNode.java | 2 +- .../operation/lsm/flush/BatchFinalizerVisitor.java | 1 - .../operation/lsm/flush/FlushColumnMetadata.java | 18 ++- .../asterix/column/values/IColumnValuesReader.java | 6 + .../values/reader/AbstractColumnValuesReader.java | 9 +- .../reader/AbstractDummyColumnValuesReader.java | 5 + .../column/cloud/sweep/ColumnSweepPlanner.java | 4 + 31 files changed, 634 insertions(+), 77 deletions(-) diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/column/assembly/ASTERIXDB-3652-1/ASTERIXDB-3652.001.ddl.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/column/assembly/ASTERIXDB-3652-1/ASTERIXDB-3652.001.ddl.sqlpp new file mode 100644 index 0000000000..d151eb1317 --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/column/assembly/ASTERIXDB-3652-1/ASTERIXDB-3652.001.ddl.sqlpp @@ -0,0 +1,28 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + + DROP DATAVERSE test IF EXISTS; + CREATE DATAVERSE test; + + USE test; + + CREATE DATASET ColumnDataset + PRIMARY KEY (uid:int) WITH { + "storage-format": {"format": "column"} + }; \ No newline at end of file diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/column/assembly/ASTERIXDB-3652-1/ASTERIXDB-3652.002.update.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/column/assembly/ASTERIXDB-3652-1/ASTERIXDB-3652.002.update.sqlpp new file mode 100644 index 0000000000..7cd0e59329 --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/column/assembly/ASTERIXDB-3652-1/ASTERIXDB-3652.002.update.sqlpp @@ -0,0 +1,32 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +USE test; +UPSERT INTO ColumnDataset( +{ + "uid": 1, + "key_0": [ + "array_value_58", + 866, + [ + 939 + ] + ] +} +); \ No newline at end of file diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/column/assembly/ASTERIXDB-3652-1/ASTERIXDB-3652.003.query.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/column/assembly/ASTERIXDB-3652-1/ASTERIXDB-3652.003.query.sqlpp new file mode 100644 index 0000000000..198869a5e7 --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/column/assembly/ASTERIXDB-3652-1/ASTERIXDB-3652.003.query.sqlpp @@ -0,0 +1,23 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + + USE test; + + SELECT VALUE c + FROM ColumnDataset c; \ No newline at end of file diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/column/assembly/ASTERIXDB-3652-2/ASTERIXDB-3652.001.ddl.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/column/assembly/ASTERIXDB-3652-2/ASTERIXDB-3652.001.ddl.sqlpp new file mode 100644 index 0000000000..1f2f3baebb --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/column/assembly/ASTERIXDB-3652-2/ASTERIXDB-3652.001.ddl.sqlpp @@ -0,0 +1,28 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +DROP DATAVERSE test IF EXISTS; +CREATE DATAVERSE test; + +USE test; + +CREATE DATASET ColumnDataset +PRIMARY KEY (uid:int) WITH { + "storage-format": {"format": "column"} +}; \ No newline at end of file diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/column/assembly/ASTERIXDB-3652-2/ASTERIXDB-3652.002.update.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/column/assembly/ASTERIXDB-3652-2/ASTERIXDB-3652.002.update.sqlpp new file mode 100644 index 0000000000..b9443fd8e8 --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/column/assembly/ASTERIXDB-3652-2/ASTERIXDB-3652.002.update.sqlpp @@ -0,0 +1,31 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +USE test; +UPSERT INTO ColumnDataset( +{ + "uid": 1, + "k": [ + { "i": [ 1, null, 3 ] }, + { }, -- i missing + { "i": null }, -- i explicit null + null -- null element in outer array + ] +} +); \ No newline at end of file diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/column/assembly/ASTERIXDB-3652-2/ASTERIXDB-3652.003.query.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/column/assembly/ASTERIXDB-3652-2/ASTERIXDB-3652.003.query.sqlpp new file mode 100644 index 0000000000..198869a5e7 --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/column/assembly/ASTERIXDB-3652-2/ASTERIXDB-3652.003.query.sqlpp @@ -0,0 +1,23 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + + USE test; + + SELECT VALUE c + FROM ColumnDataset c; \ No newline at end of file diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/column/assembly/ASTERIXDB-3652-3/ASTERIXDB-3652.001.ddl.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/column/assembly/ASTERIXDB-3652-3/ASTERIXDB-3652.001.ddl.sqlpp new file mode 100644 index 0000000000..1f2f3baebb --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/column/assembly/ASTERIXDB-3652-3/ASTERIXDB-3652.001.ddl.sqlpp @@ -0,0 +1,28 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +DROP DATAVERSE test IF EXISTS; +CREATE DATAVERSE test; + +USE test; + +CREATE DATASET ColumnDataset +PRIMARY KEY (uid:int) WITH { + "storage-format": {"format": "column"} +}; \ No newline at end of file diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/column/assembly/ASTERIXDB-3652-3/ASTERIXDB-3652.002.update.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/column/assembly/ASTERIXDB-3652-3/ASTERIXDB-3652.002.update.sqlpp new file mode 100644 index 0000000000..8a8dd7b5a0 --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/column/assembly/ASTERIXDB-3652-3/ASTERIXDB-3652.002.update.sqlpp @@ -0,0 +1,30 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +USE test; + +UPSERT INTO ColumnDataset({ +"uid": 1, +"a": {} +}); + +UPSERT INTO ColumnDataset({ +"uid": 2, +"a": {"x": null} +}); \ No newline at end of file diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/column/assembly/ASTERIXDB-3652-3/ASTERIXDB-3652.003.query.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/column/assembly/ASTERIXDB-3652-3/ASTERIXDB-3652.003.query.sqlpp new file mode 100644 index 0000000000..99f26c0fa1 --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/column/assembly/ASTERIXDB-3652-3/ASTERIXDB-3652.003.query.sqlpp @@ -0,0 +1,24 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +USE test; + +SELECT VALUE c +FROM ColumnDataset c +ORDER BY c.uid; \ No newline at end of file diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/column/assembly/ASTERIXDB-3652-1/ASTERIXDB-3652.003.adm b/asterixdb/asterix-app/src/test/resources/runtimets/results/column/assembly/ASTERIXDB-3652-1/ASTERIXDB-3652.003.adm new file mode 100644 index 0000000000..5ce24caaf4 --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/column/assembly/ASTERIXDB-3652-1/ASTERIXDB-3652.003.adm @@ -0,0 +1 @@ +{ "uid": 1, "key_0": [ "array_value_58", 866, [ 939 ] ] } \ No newline at end of file diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/column/assembly/ASTERIXDB-3652-2/ASTERIXDB-3652.003.adm b/asterixdb/asterix-app/src/test/resources/runtimets/results/column/assembly/ASTERIXDB-3652-2/ASTERIXDB-3652.003.adm new file mode 100644 index 0000000000..0dac6ee7a8 --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/column/assembly/ASTERIXDB-3652-2/ASTERIXDB-3652.003.adm @@ -0,0 +1 @@ +{ "uid": 1, "k": [ { "i": [ 1, null, 3 ] }, { }, { "i": null }, null ] } \ No newline at end of file diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/column/assembly/ASTERIXDB-3652-3/ASTERIXDB-3652.003.adm b/asterixdb/asterix-app/src/test/resources/runtimets/results/column/assembly/ASTERIXDB-3652-3/ASTERIXDB-3652.003.adm new file mode 100644 index 0000000000..6b69025b94 --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/column/assembly/ASTERIXDB-3652-3/ASTERIXDB-3652.003.adm @@ -0,0 +1,2 @@ +{ "uid": 1, "a": { } } +{ "uid": 2, "a": { "x": null } } \ No newline at end of file diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/sqlpp_queries.xml b/asterixdb/asterix-app/src/test/resources/runtimets/sqlpp_queries.xml index 35a27bae5e..374952c72e 100644 --- a/asterixdb/asterix-app/src/test/resources/runtimets/sqlpp_queries.xml +++ b/asterixdb/asterix-app/src/test/resources/runtimets/sqlpp_queries.xml @@ -16605,6 +16605,21 @@ <output-dir compare="Text">assembly/missing-inner-array</output-dir> </compilation-unit> </test-case> + <test-case FilePath="column"> + <compilation-unit name="assembly/ASTERIXDB-3652-1"> + <output-dir compare="Text">assembly/ASTERIXDB-3652-1</output-dir> + </compilation-unit> + </test-case> + <test-case FilePath="column"> + <compilation-unit name="assembly/ASTERIXDB-3652-2"> + <output-dir compare="Text">assembly/ASTERIXDB-3652-2</output-dir> + </compilation-unit> + </test-case> + <test-case FilePath="column"> + <compilation-unit name="assembly/ASTERIXDB-3652-3"> + <output-dir compare="Text">assembly/ASTERIXDB-3652-3</output-dir> + </compilation-unit> + </test-case> <test-case FilePath="column"> <compilation-unit name="filter/ASTERIXDB-3499"> <output-dir compare="Text">filter/ASTERIXDB-3499</output-dir> diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_single_partition_sqlpp.xml b/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_single_partition_sqlpp.xml index f48b00d0e0..4935ccec94 100644 --- a/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_single_partition_sqlpp.xml +++ b/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_single_partition_sqlpp.xml @@ -84,6 +84,21 @@ <output-dir compare="Text">assembly/ASTERIXDB-3539</output-dir> </compilation-unit> </test-case> + <test-case FilePath="column"> + <compilation-unit name="assembly/ASTERIXDB-3652-1"> + <output-dir compare="Text">assembly/ASTERIXDB-3652-1</output-dir> + </compilation-unit> + </test-case> + <test-case FilePath="column"> + <compilation-unit name="assembly/ASTERIXDB-3652-2"> + <output-dir compare="Text">assembly/ASTERIXDB-3652-2</output-dir> + </compilation-unit> + </test-case> + <test-case FilePath="column"> + <compilation-unit name="assembly/ASTERIXDB-3652-3"> + <output-dir compare="Text">assembly/ASTERIXDB-3652-3</output-dir> + </compilation-unit> + </test-case> <test-case FilePath="column"> <compilation-unit name="assembly/missing-inner-array"> <output-dir compare="Text">assembly/missing-inner-array</output-dir> diff --git a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/assembler/AbstractNestedValueAssembler.java b/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/assembler/AbstractNestedValueAssembler.java index 13820e0e2e..4e17246525 100644 --- a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/assembler/AbstractNestedValueAssembler.java +++ b/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/assembler/AbstractNestedValueAssembler.java @@ -59,7 +59,7 @@ public abstract class AbstractNestedValueAssembler extends AbstractValueAssemble } @Override - final void addNullToAncestor(int nullLevel) throws HyracksDataException { + void addNullToAncestor(int nullLevel) throws HyracksDataException { AbstractNestedValueAssembler parent = getParent(); if (nullLevel + 1 == level) { parent.start(); @@ -70,7 +70,7 @@ public abstract class AbstractNestedValueAssembler extends AbstractValueAssemble } @Override - final void addMissingToAncestor(int missingLevel) throws HyracksDataException { + void addMissingToAncestor(int missingLevel) throws HyracksDataException { AbstractNestedValueAssembler parent = getParent(); if (missingLevel + 1 == level) { parent.start(); @@ -107,5 +107,11 @@ public abstract class AbstractNestedValueAssembler extends AbstractValueAssemble if (isDelegate()) { getParent().end(); } + + clearStateOnEnd(); + } + + public void clearStateOnEnd() { + } } diff --git a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/assembler/AbstractPrimitiveValueAssembler.java b/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/assembler/AbstractPrimitiveValueAssembler.java index eb2ddf866e..c565ea8550 100644 --- a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/assembler/AbstractPrimitiveValueAssembler.java +++ b/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/assembler/AbstractPrimitiveValueAssembler.java @@ -30,7 +30,7 @@ public abstract class AbstractPrimitiveValueAssembler extends AbstractValueAssem */ public static final int NEXT_ASSEMBLER = -1; protected final IValueGetter primitiveValueGetter; - protected final IColumnValuesReader reader; + protected IColumnValuesReader reader; AbstractPrimitiveValueAssembler(int level, AssemblerInfo info, IColumnValuesReader reader, IValueGetter primitiveValueGetter) { diff --git a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/assembler/ArrayWithUnionValueAssembler.java b/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/assembler/ArrayWithUnionValueAssembler.java index c2d6c42891..6eadbd6126 100644 --- a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/assembler/ArrayWithUnionValueAssembler.java +++ b/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/assembler/ArrayWithUnionValueAssembler.java @@ -22,10 +22,23 @@ import org.apache.asterix.column.metadata.schema.AbstractSchemaNode; import org.apache.asterix.column.metadata.schema.UnionSchemaNode; import org.apache.hyracks.api.exceptions.HyracksDataException; +/** + * Handles assembling array elements whose items are union types. + * Manages propagation of nulls/missings to parent assemblers and + * chunk completion logic per union branch. + */ public class ArrayWithUnionValueAssembler extends ArrayValueAssembler { + private final int numberOfUnionChildren; + private int numberOfAddedValues; private boolean nonMissingValueAdded; + private boolean firstTime = true; + + private boolean nullAncestor; + private int nullClampedLevel; + private int missingClampedLevel; + private boolean parentMissing = true; ArrayWithUnionValueAssembler(int level, AssemblerInfo info, int firstValueIndex, AbstractSchemaNode itemNode) { super(level, info, firstValueIndex); @@ -33,46 +46,138 @@ public class ArrayWithUnionValueAssembler extends ArrayValueAssembler { } @Override - void reset() { - numberOfAddedValues = 0; + public void reset() { + // Reset called before reading new array item + if (firstTime) { + resetCounters(); + } nonMissingValueAdded = false; super.reset(); } @Override void addValue(AbstractValueAssembler value) throws HyracksDataException { + parentMissing = false; nonMissingValueAdded = true; numberOfAddedValues++; + super.addValue(value); - if (numberOfAddedValues == numberOfUnionChildren) { - // Completed a chunk; since we saw a non-missing, just reset the counters - nonMissingValueAdded = false; - numberOfAddedValues = 0; + + if (isChunkComplete()) { + completeChunk(); } } @Override void addNull(AbstractValueAssembler value) throws HyracksDataException { + parentMissing = false; nonMissingValueAdded = true; numberOfAddedValues++; + super.addNull(value); - if (numberOfAddedValues == numberOfUnionChildren) { - // Completed a chunk; since we saw a non-missing, just reset the counters - nonMissingValueAdded = false; - numberOfAddedValues = 0; + + if (isChunkComplete()) { + completeChunk(); } } @Override void addMissing() throws HyracksDataException { + parentMissing = false; numberOfAddedValues++; - if (numberOfAddedValues == numberOfUnionChildren) { + + if (isChunkComplete()) { + // Only propagate missing if no non-missing values appeared in this chunk if (!nonMissingValueAdded) { super.addMissing(); } - // Reset for the next chunk - numberOfAddedValues = 0; - nonMissingValueAdded = false; + completeChunk(); + } + } + + @Override + void addNullToAncestor(int nullLevel) throws HyracksDataException { + numberOfAddedValues++; + firstTime = false; + nullAncestor = true; + nullClampedLevel = Math.max(nullLevel, nullClampedLevel); + + if (isChunkComplete()) { + if (parentMissing) { + propagateNullOrMissingToParent(true); + } + completeChunk(); + } + } + + @Override + void addMissingToAncestor(int missingLevel) throws HyracksDataException { + numberOfAddedValues++; + firstTime = false; + missingClampedLevel = Math.max(missingLevel, missingClampedLevel); + + if (isChunkComplete()) { + if (parentMissing) { + propagateNullOrMissingToParent(false); + } + completeChunk(); + } + } + + private void propagateNullOrMissingToParent(boolean isNull) throws HyracksDataException { + AbstractNestedValueAssembler parent = getParent(); + if (parent == null) { + return; + } + + // If any ancestor was null, always propagate null + if (isNull || nullAncestor) { + boolean atCurrentLevel = (nullClampedLevel + 1) == level; + if (atCurrentLevel) { + parent.start(); + parent.addNull(this); + } else { + parent.addNullToAncestor(nullClampedLevel); + } + } else { + boolean atCurrentLevel = (missingClampedLevel + 1) == level; + if (atCurrentLevel) { + parent.start(); + parent.addMissing(); + } else { + parent.addMissingToAncestor(missingClampedLevel); + } } } + + private boolean isChunkComplete() { + return numberOfAddedValues == numberOfUnionChildren; + } + + private void completeChunk() { + // Reset chunk-local state + resetCounters(); + firstTime = true; + } + + private void resetCounters() { + numberOfAddedValues = 0; + nonMissingValueAdded = false; + parentMissing = true; + nullAncestor = false; + nullClampedLevel = 0; + missingClampedLevel = 0; + } + + @Override + void addValueToParent() throws HyracksDataException { + super.addValueToParent(); + clearStateOnEnd(); + } + + @Override + public void clearStateOnEnd() { + resetCounters(); + firstTime = true; + } } \ No newline at end of file diff --git a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/assembler/AssemblerBuilderVisitor.java b/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/assembler/AssemblerBuilderVisitor.java index cb447c85e8..62eab35283 100644 --- a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/assembler/AssemblerBuilderVisitor.java +++ b/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/assembler/AssemblerBuilderVisitor.java @@ -58,6 +58,7 @@ public class AssemblerBuilderVisitor implements ISchemaNodeVisitor<AbstractValue private final IValueGetterFactory valueGetterFactory; private final Map<Integer, IColumnValuesReader> primaryKeyReaders; private AbstractValueAssembler rootAssembler; + private List<IColumnValuesReader> endOfGroupPrimitiveReaders; //Recursion info private final IntList delimiters; @@ -72,6 +73,7 @@ public class AssemblerBuilderVisitor implements ISchemaNodeVisitor<AbstractValue valueAssemblers = new ArrayList<>(); delimiters = new IntArrayList(); primaryKeyReaders = new HashMap<>(); + this.endOfGroupPrimitiveReaders = new ArrayList<>(); for (IColumnValuesReader reader : columnMetadata.getPrimaryKeyReaders()) { primaryKeyReaders.put(reader.getColumnIndex(), reader); } @@ -92,11 +94,11 @@ public class AssemblerBuilderVisitor implements ISchemaNodeVisitor<AbstractValue @Override public AbstractValueAssembler visit(ObjectSchemaNode objectNode, AssemblerInfo info) throws HyracksDataException { + IntList childrenFieldNameIndexes = objectNode.getChildrenFieldNameIndexes(); + int effectiveNumberOfChildren = childrenFieldNameIndexes.size(); ObjectValueAssembler objectAssembler = new ObjectValueAssembler(level, info); level++; - BitSet declaredFields = handleDeclaredFields(objectNode, info, objectAssembler); - IntList childrenFieldNameIndexes = objectNode.getChildrenFieldNameIndexes(); int numberOfAddedChildren = declaredFields.cardinality(); if (numberOfAddedChildren < childrenFieldNameIndexes.size()) { // Now handle any open fields @@ -110,11 +112,17 @@ public class AssemblerBuilderVisitor implements ISchemaNodeVisitor<AbstractValue //The last child should be a delegate boolean delegate = numberOfAddedChildren == childrenFieldNameIndexes.size(); AssemblerInfo childInfo = new AssemblerInfo(childType, objectAssembler, delegate, fieldName); + if (childNode instanceof UnionSchemaNode) { + //UnionSchemaNode does not actually exist. We know the parent of the union could have items of multiple types. + //Thus, the union's parent is the actual parent for all the union types + effectiveNumberOfChildren += ((UnionSchemaNode) childNode).getChildren().size() - 1; + } childNode.accept(this, childInfo); } } } + objectAssembler.setNumberOfEffectiveFields(effectiveNumberOfChildren); level--; return objectAssembler; } @@ -163,6 +171,9 @@ public class AssemblerBuilderVisitor implements ISchemaNodeVisitor<AbstractValue RepeatedPrimitiveValueAssembler previousDelegate = delegateAssembler; delegateAssembler = null; + List<IColumnValuesReader> previousEndOfGroupPrimitiveReaders = endOfGroupPrimitiveReaders; + endOfGroupPrimitiveReaders = new ArrayList<>(); + IAType itemDeclaredType = getChildType(itemNode, declaredType.getItemType()); AssemblerInfo itemInfo = new AssemblerInfo(itemDeclaredType, arrayAssembler, false); itemNode.accept(this, itemInfo); @@ -170,12 +181,12 @@ public class AssemblerBuilderVisitor implements ISchemaNodeVisitor<AbstractValue // if delegateAssembler is null, that means no column will be accessed if (delegateAssembler != null) { // Set repeated assembler as a delegate (responsible for writing null values) - delegateAssembler.setAsDelegate(level - 1); IColumnValuesReader reader = delegateAssembler.getReader(); int numberOfDelimiters = reader.getNumberOfDelimiters(); // End of group assembler is responsible to finalize array/multiset builders EndOfRepeatedGroupAssembler endOfGroupAssembler = - new EndOfRepeatedGroupAssembler(reader, arrayAssembler, numberOfDelimiters - delimiters.size()); + new EndOfRepeatedGroupAssembler(endOfGroupPrimitiveReaders, arrayAssembler, delimiters.size()); + previousEndOfGroupPrimitiveReaders.addAll(endOfGroupPrimitiveReaders); valueAssemblers.add(endOfGroupAssembler); } @@ -185,6 +196,8 @@ public class AssemblerBuilderVisitor implements ISchemaNodeVisitor<AbstractValue // Return the delegate assembler to the previous one delegateAssembler = previousDelegate; } + + endOfGroupPrimitiveReaders = previousEndOfGroupPrimitiveReaders; return arrayAssembler; } @@ -216,6 +229,7 @@ public class AssemblerBuilderVisitor implements ISchemaNodeVisitor<AbstractValue assembler = new RepeatedPrimitiveValueAssembler(level, info, reader, valueGetter); setDelegate(reader, (RepeatedPrimitiveValueAssembler) assembler); + endOfGroupPrimitiveReaders.add(reader); } else { IColumnValuesReader reader; diff --git a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/assembler/EndOfRepeatedGroupAssembler.java b/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/assembler/EndOfRepeatedGroupAssembler.java index 805f4934de..63204d71b7 100644 --- a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/assembler/EndOfRepeatedGroupAssembler.java +++ b/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/assembler/EndOfRepeatedGroupAssembler.java @@ -18,21 +18,27 @@ */ package org.apache.asterix.column.assembler; +import java.util.List; + import org.apache.asterix.column.assembler.value.MissingValueGetter; import org.apache.asterix.column.bytes.stream.in.AbstractBytesInputStream; import org.apache.asterix.column.values.IColumnValuesReader; import org.apache.hyracks.api.exceptions.HyracksDataException; public class EndOfRepeatedGroupAssembler extends AbstractPrimitiveValueAssembler { - // private final List<ArrayValueAssembler> arrays; + private final List<IColumnValuesReader> readers; private final ArrayValueAssembler arrayAssembler; - private final int delimiterIndex; + private final int numDelimiters; + private int delimiterIndex; private EndOfRepeatedGroupAssembler previousGroup; - EndOfRepeatedGroupAssembler(IColumnValuesReader reader, ArrayValueAssembler arrayAssembler, int delimiterIndex) { - super(reader.getLevel(), new AssemblerInfo(), reader, MissingValueGetter.INSTANCE); + EndOfRepeatedGroupAssembler(List<IColumnValuesReader> readers, ArrayValueAssembler arrayAssembler, + int numDelimiters) { + super(readers.get(0).getLevel(), new AssemblerInfo(), readers.get(0), MissingValueGetter.INSTANCE); + this.readers = readers; this.arrayAssembler = arrayAssembler; - this.delimiterIndex = delimiterIndex; + this.numDelimiters = numDelimiters; + this.delimiterIndex = readers.get(0).getNumberOfDelimiters() - numDelimiters; previousGroup = null; } @@ -41,8 +47,25 @@ public class EndOfRepeatedGroupAssembler extends AbstractPrimitiveValueAssembler // NoOp } + private IColumnValuesReader getNonMissingReader() { + IColumnValuesReader nonMissingReader = null; + for (IColumnValuesReader r : readers) { + if (!r.areAllMissing()) { + nonMissingReader = r; + } + } + if (nonMissingReader == null) { + return readers.get(0); // all are missing, return the first one + } + return nonMissingReader; + } + @Override public int next(AssemblerState state) throws HyracksDataException { + if (reader.areAllMissing()) { + reader = getNonMissingReader(); + this.delimiterIndex = reader.getNumberOfDelimiters() - numDelimiters; + } // Get the current delimiter index from the reader int delimiterIndex = reader.getDelimiterIndex(); /* @@ -54,6 +77,9 @@ public class EndOfRepeatedGroupAssembler extends AbstractPrimitiveValueAssembler if (arrayAssembler.isDelegate()) { // Yes it is a delegate, end the arrayAssembler to signal to the parent assembler to finalize arrayAssembler.end(); + } else { + // since the current assembled values are for a higher nesting level, we need to clear the state + arrayAssembler.clearStateOnEnd(); } // Move ot the next assembler return NEXT_ASSEMBLER; diff --git a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/assembler/ObjectValueAssembler.java b/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/assembler/ObjectValueAssembler.java index 536ce02005..8870258872 100644 --- a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/assembler/ObjectValueAssembler.java +++ b/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/assembler/ObjectValueAssembler.java @@ -24,50 +24,160 @@ import org.apache.hyracks.api.exceptions.HyracksDataException; import org.apache.hyracks.data.std.api.IValueReference; public class ObjectValueAssembler extends AbstractNestedValueAssembler { + private final RecordBuilder recordBuilder; private final ARecordType recordType; + private int effectiveFieldCount; + private int visitedCount; + private boolean hasNonMissingValue; + private boolean firstTime; + private boolean hasNullAncestor; + // These are used to know the highest level of null/missing ancestor to propagate + // because some of the fields may be allMissing (as in column was not present) + private int nullClampedLevel; + private int missingClampedLevel; + ObjectValueAssembler(int level, AssemblerInfo info) { super(level, info); - recordBuilder = new RecordBuilder(); - recordType = (ARecordType) info.getDeclaredType(); + this.recordBuilder = new RecordBuilder(); + this.recordType = (ARecordType) info.getDeclaredType(); + clearState(); } @Override - void reset() { + public void reset() { recordBuilder.reset(recordType); storage.reset(); + if (firstTime) { + clearState(); + } } @Override void addValue(AbstractValueAssembler value) throws HyracksDataException { - int valueIndex = value.getFieldIndex(); - if (valueIndex >= 0) { - recordBuilder.addField(valueIndex, value.getValue()); - } else { - recordBuilder.addField(value.getFieldName(), value.getValue()); + visitedCount++; + hasNonMissingValue = true; + addField(value, value.getValue()); + + if (isChunkComplete()) { + finalizeChunk(); } } @Override void addNull(AbstractValueAssembler value) throws HyracksDataException { - int valueIndex = value.getFieldIndex(); - if (valueIndex >= 0) { - recordBuilder.addField(valueIndex, NULL); - } else { - recordBuilder.addField(value.getFieldName(), NULL); + visitedCount++; + hasNonMissingValue = true; + addField(value, NULL); + + if (isChunkComplete()) { + finalizeChunk(); + } + } + + @Override + void addMissing() throws HyracksDataException { + visitedCount++; + hasNonMissingValue = true; + if (isChunkComplete()) { + finalizeChunk(); } } + @Override + void addNullToAncestor(int nullLevel) throws HyracksDataException { + handleAncestorAddition(nullLevel, true); + } + + @Override + void addMissingToAncestor(int missingLevel) throws HyracksDataException { + handleAncestorAddition(missingLevel, false); + } + + void setNumberOfEffectiveFields(int numberOfEffectiveFields) { + this.effectiveFieldCount = numberOfEffectiveFields; + } + @Override void addValueToParent() throws HyracksDataException { storage.reset(); recordBuilder.write(storage.getDataOutput(), true); getParent().addValue(this); + clearStateOnEnd(); } @Override public IValueReference getValue() { return storage; } -} + + @Override + public void clearStateOnEnd() { + clearState(); + } + + // ------------------------------------------------------------------------- + // Internal helpers + // ------------------------------------------------------------------------- + + private void addField(AbstractValueAssembler value, IValueReference fieldValue) throws HyracksDataException { + int fieldIndex = value.getFieldIndex(); + if (fieldIndex >= 0) { + recordBuilder.addField(fieldIndex, fieldValue); + } else { + recordBuilder.addField(value.getFieldName(), fieldValue); + } + } + + private void handleAncestorAddition(int level, boolean isNull) throws HyracksDataException { + firstTime = false; + visitedCount++; + + if (isNull) { + hasNullAncestor = true; + nullClampedLevel = Math.max(nullClampedLevel, level); + } else { + missingClampedLevel = Math.max(missingClampedLevel, level); + } + + if (isChunkComplete() && !hasNonMissingValue) { + propagateToAncestor(); // propagate only if no values were seen + } + + if (isChunkComplete()) { + finalizeChunk(); + } + } + + private boolean isChunkComplete() { + return visitedCount == effectiveFieldCount; + } + + private void finalizeChunk() { + clearChunkState(); + firstTime = true; // ready for next chunk + } + + private void propagateToAncestor() throws HyracksDataException { + // propagate nulls preferentially + if (hasNullAncestor) { + super.addNullToAncestor(nullClampedLevel); + } else { + super.addMissingToAncestor(missingClampedLevel); + } + } + + private void clearChunkState() { + visitedCount = 0; + hasNullAncestor = false; + hasNonMissingValue = false; + nullClampedLevel = 0; + missingClampedLevel = 0; + } + + private void clearState() { + clearChunkState(); + firstTime = true; + } +} \ No newline at end of file diff --git a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/assembler/PrimitiveValueAssembler.java b/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/assembler/PrimitiveValueAssembler.java index 520f1d5e1c..f5edc3158e 100644 --- a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/assembler/PrimitiveValueAssembler.java +++ b/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/assembler/PrimitiveValueAssembler.java @@ -48,9 +48,9 @@ final class PrimitiveValueAssembler extends AbstractPrimitiveValueAssembler { // Do not call next on PK readers as they are maintained by the cursor if (!primaryKey && !reader.next()) { throw createException(); - } else if (reader.isNull() && (isDelegate() || reader.getLevel() + 1 == level)) { + } else if (reader.isNull()) { addNullToAncestor(reader.getLevel()); - } else if (reader.isMissing() && isDelegate() && reader.getLevel() < level) { + } else if (reader.isMissing()) { addMissingToAncestor(reader.getLevel()); } else if (reader.isValue()) { addValueToParent(); diff --git a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/assembler/RepeatedPrimitiveValueAssembler.java b/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/assembler/RepeatedPrimitiveValueAssembler.java index 61d0741b38..67379c0125 100644 --- a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/assembler/RepeatedPrimitiveValueAssembler.java +++ b/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/assembler/RepeatedPrimitiveValueAssembler.java @@ -27,15 +27,10 @@ import org.apache.hyracks.storage.am.lsm.btree.column.error.ColumnarValueExcepti import com.fasterxml.jackson.databind.node.ObjectNode; final class RepeatedPrimitiveValueAssembler extends AbstractPrimitiveValueAssembler { - private int arrayDelegateLevels; - private boolean arrayDelegate; RepeatedPrimitiveValueAssembler(int level, AssemblerInfo info, IColumnValuesReader reader, IValueGetter primitiveValue) { super(level, info, reader, primitiveValue); - this.arrayDelegate = false; - this.arrayDelegateLevels = 0; - } @Override @@ -51,7 +46,7 @@ final class RepeatedPrimitiveValueAssembler extends AbstractPrimitiveValueAssemb * - We are in an array (i.e., the parent array assembler is active) * - The value is a delimiter (i.e., the last round) */ - if (!state.isInGroup() || reader.isRepeatedValue() || reader.isDelimiter()) { + if (!state.isInGroup() || reader.isRepeatedValue() || reader.isDelimiter() || reader.areAllMissing()) { next(); } @@ -64,20 +59,15 @@ final class RepeatedPrimitiveValueAssembler extends AbstractPrimitiveValueAssemb return NEXT_ASSEMBLER; } - public IColumnValuesReader getReader() { - return reader; - } - - public void setAsDelegate(int arrayLevel) { - // This assembler is responsible for adding null values - this.arrayDelegate = true; - this.arrayDelegateLevels |= (1 << arrayLevel); - } - private void next() throws HyracksDataException { + if (reader.areAllMissing()) { + // If all values are missing, we add missing to the ancestor at the lowest missing level + addMissingToAncestor(reader.getLevel()); + return; + } if (!reader.next()) { throw createException(); - } else if (reader.isNull() && (arrayDelegate || reader.getLevel() + 1 == level)) { + } else if (reader.isNull()) { /* * There are two cases here for where the null belongs to: * 1- If the null is an array item, then add it @@ -85,7 +75,7 @@ final class RepeatedPrimitiveValueAssembler extends AbstractPrimitiveValueAssemb * (i.e., arrayDelegate is true) */ addNullToAncestor(reader.getLevel()); - } else if (reader.isMissing() && (isArrayDelegate(reader.getLevel()) || reader.getLevel() + 1 == level)) { + } else if (reader.isMissing() && reader.getLevel() < level) { /* * Add a missing item in either * - the array item is MISSING @@ -97,19 +87,14 @@ final class RepeatedPrimitiveValueAssembler extends AbstractPrimitiveValueAssemb } } - private boolean isArrayDelegate(int level) { - return (arrayDelegateLevels & (1 << level)) != 0; - } - private ColumnarValueException createException() { ColumnarValueException e = new ColumnarValueException(); ObjectNode assemblerNode = e.createNode(getClass().getSimpleName()); assemblerNode.put("isDelegate", isDelegate()); - assemblerNode.put("isArrayDelegate", arrayDelegate); ObjectNode readerNode = assemblerNode.putObject("assemblerReader"); reader.appendReaderInformation(readerNode); return e; } -} \ No newline at end of file +} diff --git a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/metadata/schema/ObjectSchemaNode.java b/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/metadata/schema/ObjectSchemaNode.java index 497654c451..59a3edde4c 100644 --- a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/metadata/schema/ObjectSchemaNode.java +++ b/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/metadata/schema/ObjectSchemaNode.java @@ -81,9 +81,10 @@ public final class ObjectSchemaNode extends AbstractSchemaNestedNode { FlushColumnMetadata columnMetadata) throws HyracksDataException { int numberOfChildren = children.size(); int fieldNameIndex = columnMetadata.getFieldNamesDictionary().getOrCreateFieldNameIndex(fieldName); + boolean previouslyMissing = isEmptyObject; int childIndex = fieldNameIndexToChildIndexMap.getOrDefault(fieldNameIndex, nextIndex.apply(fieldNameIndex)); AbstractSchemaNode currentChild = childIndex == numberOfChildren ? null : children.get(childIndex); - AbstractSchemaNode newChild = columnMetadata.getOrCreateChild(currentChild, childTypeTag); + AbstractSchemaNode newChild = columnMetadata.getOrCreateChild(currentChild, childTypeTag, previouslyMissing); if (currentChild == null) { children.add(childIndex, newChild); fieldNameIndexToChildIndexMap.put(fieldNameIndex, childIndex); @@ -111,7 +112,7 @@ public final class ObjectSchemaNode extends AbstractSchemaNestedNode { return null; } isEmptyObject = true; - AbstractSchemaNode emptyChild = columnMetadata.getOrCreateChild(null, ATypeTag.MISSING); + AbstractSchemaNode emptyChild = columnMetadata.getOrCreateChild(null, ATypeTag.MISSING, false); addChild(DUMMY_FIELD_NAME_INDEX, emptyChild); nextIndex = this::emptyColumnIndex; return emptyChild; diff --git a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/metadata/schema/UnionSchemaNode.java b/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/metadata/schema/UnionSchemaNode.java index 8124780ee5..89f640fb36 100644 --- a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/metadata/schema/UnionSchemaNode.java +++ b/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/metadata/schema/UnionSchemaNode.java @@ -92,7 +92,7 @@ public final class UnionSchemaNode extends AbstractSchemaNestedNode { ATypeTag normalizedTypeTag = getNormalizedTypeTag(childTypeTag); AbstractSchemaNode currentChild = children.get(normalizedTypeTag); //The parent of a union child should be the actual parent - AbstractSchemaNode newChild = columnMetadata.getOrCreateChild(currentChild, normalizedTypeTag); + AbstractSchemaNode newChild = columnMetadata.getOrCreateChild(currentChild, normalizedTypeTag, false); if (currentChild != newChild) { putChild(newChild, currentChild); } else { diff --git a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/metadata/schema/collection/AbstractCollectionSchemaNode.java b/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/metadata/schema/collection/AbstractCollectionSchemaNode.java index 1bcf1e9ed9..bfdc1304ac 100644 --- a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/metadata/schema/collection/AbstractCollectionSchemaNode.java +++ b/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/metadata/schema/collection/AbstractCollectionSchemaNode.java @@ -51,7 +51,7 @@ public abstract class AbstractCollectionSchemaNode extends AbstractSchemaNestedN public final AbstractSchemaNode getOrCreateItem(ATypeTag childTypeTag, FlushColumnMetadata columnMetadata) throws HyracksDataException { - AbstractSchemaNode newItem = columnMetadata.getOrCreateChild(item, childTypeTag); + AbstractSchemaNode newItem = columnMetadata.getOrCreateChild(item, childTypeTag, false); if (item == null) { newItem.getDeltaColumnsChanged(); numberOfColumns += newItem.getNumberOfColumns(); diff --git a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/operation/lsm/flush/BatchFinalizerVisitor.java b/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/operation/lsm/flush/BatchFinalizerVisitor.java index bc8f1841bf..9beaf6fb8a 100644 --- a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/operation/lsm/flush/BatchFinalizerVisitor.java +++ b/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/operation/lsm/flush/BatchFinalizerVisitor.java @@ -137,7 +137,6 @@ public final class BatchFinalizerVisitor implements ISchemaNodeVisitor<Void, Abs primitiveNode.getColumnIndex(), needAllColumns)) { orderedColumns.add(columnSchemaMetadata.getWriter(primitiveNode.getColumnIndex())); } - //Prepare for the next batch primitiveNode.setCounter(0); primitiveNode.setNumberOfVisitedColumnsInBatch(0); diff --git a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/operation/lsm/flush/FlushColumnMetadata.java b/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/operation/lsm/flush/FlushColumnMetadata.java index 1c93ebb727..23032350e3 100644 --- a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/operation/lsm/flush/FlushColumnMetadata.java +++ b/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/operation/lsm/flush/FlushColumnMetadata.java @@ -355,14 +355,20 @@ public class FlushColumnMetadata extends AbstractColumnMetadata { return columnWriters.size(); } - public AbstractSchemaNode getOrCreateChild(AbstractSchemaNode child, ATypeTag childTypeTag) - throws HyracksDataException { + public AbstractSchemaNode getOrCreateChild(AbstractSchemaNode child, ATypeTag childTypeTag, + boolean replaceObjectDummyField) throws HyracksDataException { AbstractSchemaNode currentChild = child; ATypeTag normalizedTypeTag = getNormalizedTypeTag(childTypeTag); boolean newChild = currentChild == null; - if (currentChild == null || normalizedTypeTag != ATypeTag.MISSING && normalizedTypeTag != ATypeTag.NULL - && currentChild.getTypeTag() != ATypeTag.UNION - && getNormalizedTypeTag(currentChild.getTypeTag()) != normalizedTypeTag) { + if (currentChild == null + // special case(ASTERIXDB-3652): where initially the object has no child + // hence an empty object was added, see ColumnTransformer. + // but in later documents, object got a child with value NULL. + // we need to record both NULL with the dummy MISSING in the union to maintain the structure. + || (replaceObjectDummyField && normalizedTypeTag == ATypeTag.NULL) + || normalizedTypeTag != ATypeTag.MISSING && normalizedTypeTag != ATypeTag.NULL + && currentChild.getTypeTag() != ATypeTag.UNION + && getNormalizedTypeTag(currentChild.getTypeTag()) != normalizedTypeTag) { //Create a new child or union type if required type is different from the current child type int visitedBatchVersion = newChild ? -1 : currentChild.getVisitedBatchVersion(); currentChild = createChild(child, childTypeTag); @@ -449,6 +455,8 @@ public class FlushColumnMetadata extends AbstractColumnMetadata { //Flush all definition levels from parent to the current node flushDefinitionLevels(level, parent, node, includeChildColumns); //Add null value (+2) to say that both the parent and the child are present + //A null is represented by the (childMask | (level which is not NULL)) + //{ "x": { "a": NULL } } --> In here, the nullLevel stored will be nullMask(2) | 1 (level of x) definitionLevels.get(node).add(ColumnValuesUtil.getNullMask(level + 2) | level); node.incrementCounter(); } diff --git a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/values/IColumnValuesReader.java b/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/values/IColumnValuesReader.java index 51ef2c1d67..22cbb9fdde 100644 --- a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/values/IColumnValuesReader.java +++ b/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/values/IColumnValuesReader.java @@ -26,6 +26,12 @@ import org.apache.hyracks.data.std.api.IValueReference; import com.fasterxml.jackson.databind.node.ObjectNode; public interface IColumnValuesReader extends Comparable<IColumnValuesReader> { + /** + * Indicates if the column is missing in the leaf + * @return + */ + boolean areAllMissing(); + /** * Reset the reader * diff --git a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/values/reader/AbstractColumnValuesReader.java b/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/values/reader/AbstractColumnValuesReader.java index e8c6d3e597..3dead4aed1 100644 --- a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/values/reader/AbstractColumnValuesReader.java +++ b/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/values/reader/AbstractColumnValuesReader.java @@ -103,6 +103,11 @@ abstract class AbstractColumnValuesReader implements IColumnValuesReader { } } + @Override + public boolean areAllMissing() { + return allMissing; + } + abstract void resetValues(); @Override @@ -161,7 +166,9 @@ abstract class AbstractColumnValuesReader implements IColumnValuesReader { @Override public final boolean isMissing() { - return !isDelimiter() && level < maxLevel; + // After clearing the nullBitMask, a level less than maxLevel may incorrectly be considered missing. + // This is because the nullBitMask can affect the interpretation of the level value. + return !isDelimiter() && !nullLevel && level < maxLevel; } @Override diff --git a/asterixdb/asterix-column/src/test/java/org/apache/asterix/column/values/reader/AbstractDummyColumnValuesReader.java b/asterixdb/asterix-column/src/test/java/org/apache/asterix/column/values/reader/AbstractDummyColumnValuesReader.java index 78ca96bc7c..ed76cbdf53 100644 --- a/asterixdb/asterix-column/src/test/java/org/apache/asterix/column/values/reader/AbstractDummyColumnValuesReader.java +++ b/asterixdb/asterix-column/src/test/java/org/apache/asterix/column/values/reader/AbstractDummyColumnValuesReader.java @@ -85,6 +85,11 @@ public abstract class AbstractDummyColumnValuesReader implements IColumnValuesRe //noOp } + @Override + public boolean areAllMissing() { + return false; + } + @Override public final ATypeTag getTypeTag() { return typeTag; diff --git a/hyracks-fullstack/hyracks/hyracks-storage-am-lsm-btree-column/src/main/java/org/apache/hyracks/storage/am/lsm/btree/column/cloud/sweep/ColumnSweepPlanner.java b/hyracks-fullstack/hyracks/hyracks-storage-am-lsm-btree-column/src/main/java/org/apache/hyracks/storage/am/lsm/btree/column/cloud/sweep/ColumnSweepPlanner.java index 5b837a154a..9a200cc5a0 100644 --- a/hyracks-fullstack/hyracks/hyracks-storage-am-lsm-btree-column/src/main/java/org/apache/hyracks/storage/am/lsm/btree/column/cloud/sweep/ColumnSweepPlanner.java +++ b/hyracks-fullstack/hyracks/hyracks-storage-am-lsm-btree-column/src/main/java/org/apache/hyracks/storage/am/lsm/btree/column/cloud/sweep/ColumnSweepPlanner.java @@ -118,6 +118,7 @@ public final class ColumnSweepPlanner { long accessTime = clock.getCurrentTime(); lastAccess = accessTime; int numberOfColumns = projectionInfo.getNumberOfProjectedColumns(); + resizeStatsArrays(numberOfColumns); boolean requireCloudAccess = false; for (int i = 0; i < numberOfColumns; i++) { int columnIndex = projectionInfo.getColumnIndex(i); @@ -185,6 +186,9 @@ public final class ColumnSweepPlanner { } private void resizeStatsArrays(int numberOfColumns) { + if (numberOfColumns <= sizes.length) { + return; + } sizes = IntArrays.ensureCapacity(sizes, numberOfColumns); lastAccesses = LongArrays.ensureCapacity(lastAccesses, numberOfColumns); this.numberOfColumns = numberOfColumns - numberOfPrimaryKeys;
