This is an automated email from the ASF dual-hosted git repository. mblow pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/asterixdb.git
commit e9758db48da3276f0a88143a463eaef904381468 Author: Ritik Raj <[email protected]> AuthorDate: Tue Nov 18 02:46:41 2025 +0530 [ASTERIXDB-3652][STO] Consider isColumnMissingForCurrentTuple while assembling - user model changes: no - storage format changes: no - interface changes: yes Details: There can be two cases where a column is missing: 1. For a disk component, if a particular column was not in all the leafs of the disk component, then the column is treated as allMissing. 2. Across disk components, when Component1 has an array field with itemType Int, but the Component2 has the same array field with itemType String, when Component1 and Component2 will get merged, the new merged Component1_2 will have both columns as not "allMissing", which can be misleading for EndOfRepeatedGroup Assembler, as it should only pick non-missing column to end the array for the tuple. In this case for tuple1 the Integer column should be the reader in EoGAssembler responsible for closing the array not the missing String Column Assembler and vice-versa. Ext-ref:MB-69414 Change-Id: I34d490a2089598b55e7cd8981a20f98b841fd998 Reviewed-on: https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/20584 Reviewed-by: Peeyush Gupta <[email protected]> Integration-Tests: Jenkins <[email protected]> Tested-by: Ritik Raj <[email protected]> --- .../ASTERIXDB-3652-4/ASTERIXDB-3652.001.ddl.sqlpp | 28 +++++++++++++++++ .../ASTERIXDB-3652.002.update.sqlpp | 25 ++++++++++++++++ .../ASTERIXDB-3652.003.update.sqlpp | 25 ++++++++++++++++ .../ASTERIXDB-3652.004.query.sqlpp | 23 ++++++++++++++ .../ASTERIXDB-3652-5/ASTERIXDB-3652.001.ddl.sqlpp | 28 +++++++++++++++++ .../ASTERIXDB-3652.002.update.sqlpp | 25 ++++++++++++++++ .../ASTERIXDB-3652.003.update.sqlpp | 25 ++++++++++++++++ .../ASTERIXDB-3652.004.query.sqlpp | 23 ++++++++++++++ .../ASTERIXDB-3652-4/ASTERIXDB-3652.004.adm | 2 ++ .../ASTERIXDB-3652-5/ASTERIXDB-3652.004.adm | 2 ++ .../runtimets/testsuite_single_partition_sqlpp.xml | 10 +++++++ .../assembler/AbstractPrimitiveValueAssembler.java | 6 +++- .../assembler/EndOfRepeatedGroupAssembler.java | 11 +++---- .../column/assembler/PrimitiveValueAssembler.java | 2 +- .../assembler/RepeatedPrimitiveValueAssembler.java | 13 ++++---- .../column/operation/query/ColumnAssembler.java | 3 +- .../asterix/column/values/IColumnValuesReader.java | 23 ++++++++++---- .../values/reader/AbstractColumnValuesReader.java | 35 ++++++++++++++++++++-- .../reader/AbstractDummyColumnValuesReader.java | 15 ++++++---- 19 files changed, 297 insertions(+), 27 deletions(-) diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/column/assembly/ASTERIXDB-3652-4/ASTERIXDB-3652.001.ddl.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/column/assembly/ASTERIXDB-3652-4/ASTERIXDB-3652.001.ddl.sqlpp new file mode 100644 index 0000000000..b5514cfdf3 --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/column/assembly/ASTERIXDB-3652-4/ASTERIXDB-3652.001.ddl.sqlpp @@ -0,0 +1,28 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +DROP DATAVERSE test IF EXISTS; +CREATE DATAVERSE test; + +USE test; + +CREATE DATASET ColumnDataset +PRIMARY KEY (name: String) WITH { + "storage-format": {"format": "column"} +}; \ No newline at end of file diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/column/assembly/ASTERIXDB-3652-4/ASTERIXDB-3652.002.update.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/column/assembly/ASTERIXDB-3652-4/ASTERIXDB-3652.002.update.sqlpp new file mode 100644 index 0000000000..462a581a02 --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/column/assembly/ASTERIXDB-3652-4/ASTERIXDB-3652.002.update.sqlpp @@ -0,0 +1,25 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +USE test; + +UPSERT INTO ColumnDataset ({ + "name": "A", + "public_likes": [] +}); diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/column/assembly/ASTERIXDB-3652-4/ASTERIXDB-3652.003.update.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/column/assembly/ASTERIXDB-3652-4/ASTERIXDB-3652.003.update.sqlpp new file mode 100644 index 0000000000..3722a07b2f --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/column/assembly/ASTERIXDB-3652-4/ASTERIXDB-3652.003.update.sqlpp @@ -0,0 +1,25 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +USE test; + +UPSERT INTO ColumnDataset ({ + "name": "B", + "public_likes": ["A", "B"] +}); diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/column/assembly/ASTERIXDB-3652-4/ASTERIXDB-3652.004.query.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/column/assembly/ASTERIXDB-3652-4/ASTERIXDB-3652.004.query.sqlpp new file mode 100644 index 0000000000..c368f8ae80 --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/column/assembly/ASTERIXDB-3652-4/ASTERIXDB-3652.004.query.sqlpp @@ -0,0 +1,23 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +USE test; + +SELECT VALUE c +FROM ColumnDataset c; \ No newline at end of file diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/column/assembly/ASTERIXDB-3652-5/ASTERIXDB-3652.001.ddl.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/column/assembly/ASTERIXDB-3652-5/ASTERIXDB-3652.001.ddl.sqlpp new file mode 100644 index 0000000000..b5514cfdf3 --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/column/assembly/ASTERIXDB-3652-5/ASTERIXDB-3652.001.ddl.sqlpp @@ -0,0 +1,28 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +DROP DATAVERSE test IF EXISTS; +CREATE DATAVERSE test; + +USE test; + +CREATE DATASET ColumnDataset +PRIMARY KEY (name: String) WITH { + "storage-format": {"format": "column"} +}; \ No newline at end of file diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/column/assembly/ASTERIXDB-3652-5/ASTERIXDB-3652.002.update.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/column/assembly/ASTERIXDB-3652-5/ASTERIXDB-3652.002.update.sqlpp new file mode 100644 index 0000000000..e113524800 --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/column/assembly/ASTERIXDB-3652-5/ASTERIXDB-3652.002.update.sqlpp @@ -0,0 +1,25 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +USE test; + +UPSERT INTO ColumnDataset ({ + "name": "A", + "public_likes": [1, 2, 3] +}); diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/column/assembly/ASTERIXDB-3652-5/ASTERIXDB-3652.003.update.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/column/assembly/ASTERIXDB-3652-5/ASTERIXDB-3652.003.update.sqlpp new file mode 100644 index 0000000000..3722a07b2f --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/column/assembly/ASTERIXDB-3652-5/ASTERIXDB-3652.003.update.sqlpp @@ -0,0 +1,25 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +USE test; + +UPSERT INTO ColumnDataset ({ + "name": "B", + "public_likes": ["A", "B"] +}); diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/column/assembly/ASTERIXDB-3652-5/ASTERIXDB-3652.004.query.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/column/assembly/ASTERIXDB-3652-5/ASTERIXDB-3652.004.query.sqlpp new file mode 100644 index 0000000000..c368f8ae80 --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/column/assembly/ASTERIXDB-3652-5/ASTERIXDB-3652.004.query.sqlpp @@ -0,0 +1,23 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +USE test; + +SELECT VALUE c +FROM ColumnDataset c; \ No newline at end of file diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/column/assembly/ASTERIXDB-3652-4/ASTERIXDB-3652.004.adm b/asterixdb/asterix-app/src/test/resources/runtimets/results/column/assembly/ASTERIXDB-3652-4/ASTERIXDB-3652.004.adm new file mode 100644 index 0000000000..a26bc2b98d --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/column/assembly/ASTERIXDB-3652-4/ASTERIXDB-3652.004.adm @@ -0,0 +1,2 @@ +{ "name": "A", "public_likes": [ ] } +{ "name": "B", "public_likes": [ "A", "B" ] } \ No newline at end of file diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/column/assembly/ASTERIXDB-3652-5/ASTERIXDB-3652.004.adm b/asterixdb/asterix-app/src/test/resources/runtimets/results/column/assembly/ASTERIXDB-3652-5/ASTERIXDB-3652.004.adm new file mode 100644 index 0000000000..180d25fc13 --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/column/assembly/ASTERIXDB-3652-5/ASTERIXDB-3652.004.adm @@ -0,0 +1,2 @@ +{ "name": "A", "public_likes": [ 1, 2, 3 ] } +{ "name": "B", "public_likes": [ "A", "B" ] } \ No newline at end of file diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_single_partition_sqlpp.xml b/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_single_partition_sqlpp.xml index 4935ccec94..b248475c14 100644 --- a/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_single_partition_sqlpp.xml +++ b/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_single_partition_sqlpp.xml @@ -99,6 +99,16 @@ <output-dir compare="Text">assembly/ASTERIXDB-3652-3</output-dir> </compilation-unit> </test-case> + <test-case FilePath="column"> + <compilation-unit name="assembly/ASTERIXDB-3652-4"> + <output-dir compare="Text">assembly/ASTERIXDB-3652-4</output-dir> + </compilation-unit> + </test-case> + <test-case FilePath="column"> + <compilation-unit name="assembly/ASTERIXDB-3652-5"> + <output-dir compare="Text">assembly/ASTERIXDB-3652-5</output-dir> + </compilation-unit> + </test-case> <test-case FilePath="column"> <compilation-unit name="assembly/missing-inner-array"> <output-dir compare="Text">assembly/missing-inner-array</output-dir> diff --git a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/assembler/AbstractPrimitiveValueAssembler.java b/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/assembler/AbstractPrimitiveValueAssembler.java index c565ea8550..9bf8ebc573 100644 --- a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/assembler/AbstractPrimitiveValueAssembler.java +++ b/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/assembler/AbstractPrimitiveValueAssembler.java @@ -98,5 +98,9 @@ public abstract class AbstractPrimitiveValueAssembler extends AbstractValueAssem * * @return the index of the next value */ - public abstract int next(AssemblerState state) throws HyracksDataException; + public abstract int next(int tupleIndex, AssemblerState state) throws HyracksDataException; + + public void notifyCurrentTuple(int tupleIndex) { + reader.registerCurrentTuple(tupleIndex); + } } diff --git a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/assembler/EndOfRepeatedGroupAssembler.java b/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/assembler/EndOfRepeatedGroupAssembler.java index 63204d71b7..389176e3ec 100644 --- a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/assembler/EndOfRepeatedGroupAssembler.java +++ b/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/assembler/EndOfRepeatedGroupAssembler.java @@ -47,11 +47,12 @@ public class EndOfRepeatedGroupAssembler extends AbstractPrimitiveValueAssembler // NoOp } - private IColumnValuesReader getNonMissingReader() { + private IColumnValuesReader getNonMissingReader(int tupleIndex) { IColumnValuesReader nonMissingReader = null; for (IColumnValuesReader r : readers) { - if (!r.areAllMissing()) { + if (!r.isColumnMissingForCurrentTuple(tupleIndex)) { nonMissingReader = r; + break; } } if (nonMissingReader == null) { @@ -61,9 +62,9 @@ public class EndOfRepeatedGroupAssembler extends AbstractPrimitiveValueAssembler } @Override - public int next(AssemblerState state) throws HyracksDataException { - if (reader.areAllMissing()) { - reader = getNonMissingReader(); + public int next(int tupleIndex, AssemblerState state) throws HyracksDataException { + if (reader.isColumnMissingForCurrentTuple(tupleIndex)) { + reader = getNonMissingReader(tupleIndex); this.delimiterIndex = reader.getNumberOfDelimiters() - numDelimiters; } // Get the current delimiter index from the reader diff --git a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/assembler/PrimitiveValueAssembler.java b/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/assembler/PrimitiveValueAssembler.java index f5edc3158e..d799439056 100644 --- a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/assembler/PrimitiveValueAssembler.java +++ b/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/assembler/PrimitiveValueAssembler.java @@ -44,7 +44,7 @@ final class PrimitiveValueAssembler extends AbstractPrimitiveValueAssembler { } @Override - public int next(AssemblerState state) throws HyracksDataException { + public int next(int tupleIndex, AssemblerState state) throws HyracksDataException { // Do not call next on PK readers as they are maintained by the cursor if (!primaryKey && !reader.next()) { throw createException(); diff --git a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/assembler/RepeatedPrimitiveValueAssembler.java b/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/assembler/RepeatedPrimitiveValueAssembler.java index 67379c0125..e2eced8bae 100644 --- a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/assembler/RepeatedPrimitiveValueAssembler.java +++ b/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/assembler/RepeatedPrimitiveValueAssembler.java @@ -39,15 +39,16 @@ final class RepeatedPrimitiveValueAssembler extends AbstractPrimitiveValueAssemb } @Override - public int next(AssemblerState state) throws HyracksDataException { + public int next(int tupleIndex, AssemblerState state) throws HyracksDataException { /* * Move to the next value if one of the following is true * - It is the first time we access this assembler (i.e., the first round) * - We are in an array (i.e., the parent array assembler is active) * - The value is a delimiter (i.e., the last round) */ - if (!state.isInGroup() || reader.isRepeatedValue() || reader.isDelimiter() || reader.areAllMissing()) { - next(); + if (!state.isInGroup() || reader.isRepeatedValue() || reader.isDelimiter() + || reader.isColumnMissingForCurrentTuple(tupleIndex)) { + next(tupleIndex); } if (isDelegate()) { @@ -59,8 +60,8 @@ final class RepeatedPrimitiveValueAssembler extends AbstractPrimitiveValueAssemb return NEXT_ASSEMBLER; } - private void next() throws HyracksDataException { - if (reader.areAllMissing()) { + private void next(int tupleIndex) throws HyracksDataException { + if (reader.isColumnMissingForCurrentTuple(tupleIndex)) { // If all values are missing, we add missing to the ancestor at the lowest missing level addMissingToAncestor(reader.getLevel()); return; @@ -75,7 +76,7 @@ final class RepeatedPrimitiveValueAssembler extends AbstractPrimitiveValueAssemb * (i.e., arrayDelegate is true) */ addNullToAncestor(reader.getLevel()); - } else if (reader.isMissing() && reader.getLevel() < level) { + } else if ((reader.isMissing() && reader.getLevel() < level)) { /* * Add a missing item in either * - the array item is MISSING diff --git a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/operation/query/ColumnAssembler.java b/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/operation/query/ColumnAssembler.java index e6ff021168..cbfe14c4f5 100644 --- a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/operation/query/ColumnAssembler.java +++ b/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/operation/query/ColumnAssembler.java @@ -81,10 +81,11 @@ public final class ColumnAssembler { int index = 0; while (index < assemblers.length) { AbstractPrimitiveValueAssembler assembler = assemblers[index]; + assembler.notifyCurrentTuple(tupleIndex); int groupIndex; try { - groupIndex = assembler.next(state); + groupIndex = assembler.next(tupleIndex, state); } catch (ColumnarValueException e) { appendInformation(e); throw e; diff --git a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/values/IColumnValuesReader.java b/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/values/IColumnValuesReader.java index 22cbb9fdde..76588e1a6f 100644 --- a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/values/IColumnValuesReader.java +++ b/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/values/IColumnValuesReader.java @@ -26,12 +26,6 @@ import org.apache.hyracks.data.std.api.IValueReference; import com.fasterxml.jackson.databind.node.ObjectNode; public interface IColumnValuesReader extends Comparable<IColumnValuesReader> { - /** - * Indicates if the column is missing in the leaf - * @return - */ - boolean areAllMissing(); - /** * Reset the reader * @@ -156,4 +150,21 @@ public interface IColumnValuesReader extends Comparable<IColumnValuesReader> { * @param node container for the reader's information */ void appendReaderInformation(ObjectNode node); + + /** + * There can be two cases when a column is missing for tuple: + * 1. column is not at all present in the leaf + * 2. column is present in the leaf, but for this tuple it was missing + * @param tupleIndex + * @return + */ + boolean isColumnMissingForCurrentTuple(int tupleIndex); + + /** + * Registers the current tuple index. This index is used to determine + * whether the value being read is the first value associated with + * the given tuple. + * @param tupleIndex + */ + void registerCurrentTuple(int tupleIndex); } diff --git a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/values/reader/AbstractColumnValuesReader.java b/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/values/reader/AbstractColumnValuesReader.java index 3dead4aed1..62d0aadea9 100644 --- a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/values/reader/AbstractColumnValuesReader.java +++ b/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/values/reader/AbstractColumnValuesReader.java @@ -60,6 +60,10 @@ abstract class AbstractColumnValuesReader implements IColumnValuesReader { private boolean nullLevel; private boolean allMissing; + private int previousTupleIndex; + private int currentTupleIndex; + private boolean firstValueForCurrentTuple; + // For logging purposes only private int numberOfEncounteredMissing; private int numberOfEncounteredNull; @@ -74,6 +78,7 @@ abstract class AbstractColumnValuesReader implements IColumnValuesReader { currentDefinitionLevels = definitionLevels.get(maxLevel); valuesStream = primaryKey ? new ByteBufferInputStream() : new MultiByteBufferInputStream(); this.primaryKey = primaryKey; + this.previousTupleIndex = -1; } final void nextLevel() throws HyracksDataException { @@ -89,6 +94,13 @@ abstract class AbstractColumnValuesReader implements IColumnValuesReader { nullLevel = ColumnValuesUtil.isNull(nullBitMask, actualLevel); //Clear the null bit to allow repeated value readers determine the correct delimiter for null values level = ColumnValuesUtil.clearNullBit(nullBitMask, actualLevel); + if (currentTupleIndex != previousTupleIndex) { + // We are at the first value for the current tuple + firstValueForCurrentTuple = true; + previousTupleIndex = currentTupleIndex; + } else { + firstValueForCurrentTuple = false; + } // For logging purposes only numberOfEncounteredMissing += isMissing() ? 1 : 0; @@ -104,8 +116,24 @@ abstract class AbstractColumnValuesReader implements IColumnValuesReader { } @Override - public boolean areAllMissing() { - return allMissing; + public boolean isColumnMissingForCurrentTuple(int tupleIndex) { + return allMissing || missingColumnForCurrentTuple(tupleIndex); + } + + private boolean missingColumnForCurrentTuple(int tupleIndex) { + // A column is considered missing for the current tuple if: + // 1- The level is 0 (indicating that there are no values present for this column) + // 2- This is the first value for the current tuple (to avoid false positives in repeated structures) + // 3- The previous tuple index matches the current tuple + + // The last condition ensures that we are correctly checking for the same tuple index. + // and not using the "firstValueForCurrentTuple" from previous tuple. + return (level == 0 && firstValueForCurrentTuple && previousTupleIndex == tupleIndex); + } + + @Override + public void registerCurrentTuple(int tupleIndex) { + currentTupleIndex = tupleIndex; } abstract void resetValues(); @@ -123,6 +151,9 @@ abstract class AbstractColumnValuesReader implements IColumnValuesReader { } allMissing = false; try { + previousTupleIndex = -1; + firstValueForCurrentTuple = false; + maxLevel = BytesUtils.readZigZagVarInt(in); nullBitMask = ColumnValuesUtil.getNullMask(maxLevel); diff --git a/asterixdb/asterix-column/src/test/java/org/apache/asterix/column/values/reader/AbstractDummyColumnValuesReader.java b/asterixdb/asterix-column/src/test/java/org/apache/asterix/column/values/reader/AbstractDummyColumnValuesReader.java index ed76cbdf53..8dbd63dd5c 100644 --- a/asterixdb/asterix-column/src/test/java/org/apache/asterix/column/values/reader/AbstractDummyColumnValuesReader.java +++ b/asterixdb/asterix-column/src/test/java/org/apache/asterix/column/values/reader/AbstractDummyColumnValuesReader.java @@ -85,11 +85,6 @@ public abstract class AbstractDummyColumnValuesReader implements IColumnValuesRe //noOp } - @Override - public boolean areAllMissing() { - return false; - } - @Override public final ATypeTag getTypeTag() { return typeTag; @@ -174,4 +169,14 @@ public abstract class AbstractDummyColumnValuesReader implements IColumnValuesRe node.put("level", level); node.put("maxLevel", maxLevel); } + + @Override + public void registerCurrentTuple(int tupleIndex) { + + } + + @Override + public boolean isColumnMissingForCurrentTuple(int tupleIndex) { + return false; + } }
