>From Wail Alkowaileet <[email protected]>: Wail Alkowaileet has submitted this change. ( https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/17500 )
Change subject: [ASTERIXDB-3171][OTH] Log schema ...................................................................... [ASTERIXDB-3171][OTH] Log schema - user model changes: no - storage format changes: no - interface changes: no Details: In order to debug future issues in columnar datasets, we need to know the schema information. Change-Id: If5d1b9af8f0579df5cb27a90d3456407263ed53f Reviewed-on: https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/17500 Integration-Tests: Jenkins <[email protected]> Tested-by: Jenkins <[email protected]> Reviewed-by: Wail Alkowaileet <[email protected]> Reviewed-by: Murtadha Hubail <[email protected]> --- M asterixdb/asterix-column/src/main/java/org/apache/asterix/column/operation/lsm/flush/FlushColumnMetadata.java M asterixdb/asterix-column/src/main/java/org/apache/asterix/column/operation/query/QueryColumnWithMetaMetadata.java M asterixdb/asterix-column/src/main/java/org/apache/asterix/column/metadata/schema/visitor/PathExtractorVisitor.java M asterixdb/asterix-column/src/main/java/org/apache/asterix/column/operation/query/QueryColumnMetadata.java M asterixdb/asterix-column/src/main/java/org/apache/asterix/column/metadata/schema/visitor/SchemaClipperVisitor.java A asterixdb/asterix-column/src/main/java/org/apache/asterix/column/util/SchemaStringBuilderVisitor.java 6 files changed, 224 insertions(+), 1 deletion(-) Approvals: Murtadha Hubail: Looks good to me, approved Wail Alkowaileet: Looks good to me, but someone else must approve Jenkins: Verified; Verified Anon. E. Moose #1000171: diff --git a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/metadata/schema/visitor/PathExtractorVisitor.java b/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/metadata/schema/visitor/PathExtractorVisitor.java index 2917074..2bd2954 100644 --- a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/metadata/schema/visitor/PathExtractorVisitor.java +++ b/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/metadata/schema/visitor/PathExtractorVisitor.java @@ -27,10 +27,13 @@ import org.apache.asterix.column.metadata.schema.primitive.PrimitiveSchemaNode; import org.apache.hyracks.api.exceptions.HyracksDataException; +import it.unimi.dsi.fastutil.ints.IntList; + public class PathExtractorVisitor implements ISchemaNodeVisitor<AbstractSchemaNode, Void> { @Override public AbstractSchemaNode visit(ObjectSchemaNode objectNode, Void arg) throws HyracksDataException { - int fieldNameIndex = objectNode.getChildrenFieldNameIndexes().getInt(0); + IntList fieldNameIndexes = objectNode.getChildrenFieldNameIndexes(); + int fieldNameIndex = fieldNameIndexes.isEmpty() ? -1 : objectNode.getChildrenFieldNameIndexes().getInt(0); if (fieldNameIndex < 0) { return MissingFieldSchemaNode.INSTANCE; } diff --git a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/metadata/schema/visitor/SchemaClipperVisitor.java b/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/metadata/schema/visitor/SchemaClipperVisitor.java index 0771ccb..351dcdf 100644 --- a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/metadata/schema/visitor/SchemaClipperVisitor.java +++ b/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/metadata/schema/visitor/SchemaClipperVisitor.java @@ -65,6 +65,10 @@ try { for (int i = 0; i < fieldNames.length; i++) { int fieldNameIndex = fieldNamesDictionary.getFieldNameIndex(fieldNames[i]); + if (fieldNameIndex == -1) { + // Missing child + continue; + } AbstractSchemaNode child = objectNode.getChild(fieldNameIndex); clippedObjectNode.addChild(fieldNameIndex, fieldTypes[i].accept(this, child)); } diff --git a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/operation/lsm/flush/FlushColumnMetadata.java b/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/operation/lsm/flush/FlushColumnMetadata.java index 8cd1e98..0d7404d 100644 --- a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/operation/lsm/flush/FlushColumnMetadata.java +++ b/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/operation/lsm/flush/FlushColumnMetadata.java @@ -42,6 +42,7 @@ import org.apache.asterix.column.metadata.schema.visitor.SchemaBuilderFromIATypeVisitor; import org.apache.asterix.column.util.ColumnValuesUtil; import org.apache.asterix.column.util.RunLengthIntArray; +import org.apache.asterix.column.util.SchemaStringBuilderVisitor; import org.apache.asterix.column.values.IColumnValuesWriter; import org.apache.asterix.column.values.IColumnValuesWriterFactory; import org.apache.asterix.column.values.writer.AbstractColumnValuesWriter; @@ -54,6 +55,9 @@ import org.apache.hyracks.data.std.util.ArrayBackedValueStorage; import org.apache.hyracks.storage.am.lsm.btree.column.api.IColumnWriteMultiPageOp; import org.apache.hyracks.storage.am.lsm.common.api.ILSMMemoryComponent; +import org.apache.hyracks.util.LogRedactionUtil; +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; import it.unimi.dsi.fastutil.ints.IntArrayList; @@ -62,6 +66,7 @@ * The schema here is mutable and can change according to the flushed records */ public final class FlushColumnMetadata extends AbstractColumnMetadata { + private static final Logger LOGGER = LogManager.getLogger(); private final Map<AbstractSchemaNestedNode, RunLengthIntArray> definitionLevels; private final Mutable<IColumnWriteMultiPageOp> multiPageOpRef; private final FieldNamesDictionary fieldNamesDictionary; @@ -160,6 +165,7 @@ if (changed) { try { serializeChanges(); + logSchema(root, metaRoot, fieldNamesDictionary); changed = false; } catch (IOException e) { throw HyracksDataException.create(e); @@ -253,6 +259,7 @@ ArrayBackedValueStorage schemaStorage = new ArrayBackedValueStorage(serializedMetadata.getLength()); schemaStorage.append(serializedMetadata); + logSchema(root, metaRoot, fieldNamesDictionary); return new FlushColumnMetadata(datasetType, metaType, primaryKeys, metaContainsKeys, columnWriterFactory, multiPageOpRef, writers, fieldNamesDictionary, root, metaRoot, definitionLevels, schemaStorage); } @@ -540,6 +547,22 @@ return nestedNode; } + private static void logSchema(ObjectSchemaNode root, ObjectSchemaNode metaRoot, + FieldNamesDictionary fieldNamesDictionary) throws HyracksDataException { + if (!LOGGER.isDebugEnabled()) { + return; + } + // This should be a low frequency object creation + SchemaStringBuilderVisitor schemaBuilder = new SchemaStringBuilderVisitor(fieldNamesDictionary); + String recordSchema = LogRedactionUtil.userData(schemaBuilder.build(root)); + LOGGER.debug("Schema for {} has changed: \n {}", SchemaStringBuilderVisitor.RECORD_SCHEMA, recordSchema); + if (metaRoot != null) { + String metaRecordSchema = LogRedactionUtil.userData(schemaBuilder.build(metaRoot)); + LOGGER.debug("Schema for {} has changed: \n {}", SchemaStringBuilderVisitor.META_RECORD_SCHEMA, + metaRecordSchema); + } + } + public static ATypeTag getNormalizedTypeTag(ATypeTag typeTag) { switch (typeTag) { case TINYINT: diff --git a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/operation/query/QueryColumnMetadata.java b/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/operation/query/QueryColumnMetadata.java index b9babf1..54ac60c 100644 --- a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/operation/query/QueryColumnMetadata.java +++ b/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/operation/query/QueryColumnMetadata.java @@ -31,6 +31,7 @@ import org.apache.asterix.column.metadata.schema.AbstractSchemaNode; import org.apache.asterix.column.metadata.schema.ObjectSchemaNode; import org.apache.asterix.column.metadata.schema.visitor.SchemaClipperVisitor; +import org.apache.asterix.column.util.SchemaStringBuilderVisitor; import org.apache.asterix.column.values.IColumnValuesReaderFactory; import org.apache.asterix.column.values.reader.PrimitiveColumnValuesReader; import org.apache.asterix.column.values.reader.filter.FilterAccessorProvider; @@ -178,6 +179,9 @@ LOGGER.info("Filter: {}", filterString); } + // log requested schema + logSchema(clippedRoot, SchemaStringBuilderVisitor.RECORD_SCHEMA, fieldNamesDictionary); + return new QueryColumnMetadata(datasetType, null, primaryKeyReaders, serializedMetadata, fieldNamesDictionary, clippedRoot, readerFactory, valueGetterFactory, filterEvaluator, filterValueAccessors); } @@ -204,4 +208,13 @@ } return primaryKeyReaders; } + + protected static void logSchema(ObjectSchemaNode root, String schemaSource, + FieldNamesDictionary fieldNamesDictionary) throws HyracksDataException { + if (LOGGER.isDebugEnabled()) { + SchemaStringBuilderVisitor schemaBuilder = new SchemaStringBuilderVisitor(fieldNamesDictionary); + String schema = LogRedactionUtil.userData(schemaBuilder.build(root)); + LOGGER.debug("Queried {} schema: \n {}", schemaSource, schema); + } + } } diff --git a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/operation/query/QueryColumnWithMetaMetadata.java b/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/operation/query/QueryColumnWithMetaMetadata.java index 2de1948..2896231 100644 --- a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/operation/query/QueryColumnWithMetaMetadata.java +++ b/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/operation/query/QueryColumnWithMetaMetadata.java @@ -30,6 +30,7 @@ import org.apache.asterix.column.metadata.schema.AbstractSchemaNode; import org.apache.asterix.column.metadata.schema.ObjectSchemaNode; import org.apache.asterix.column.metadata.schema.visitor.SchemaClipperVisitor; +import org.apache.asterix.column.util.SchemaStringBuilderVisitor; import org.apache.asterix.column.values.IColumnValuesReaderFactory; import org.apache.asterix.column.values.reader.PrimitiveColumnValuesReader; import org.apache.asterix.column.values.reader.filter.FilterAccessorProvider; @@ -144,6 +145,11 @@ PrimitiveColumnValuesReader[] primaryKeyReaders = createPrimaryKeyReaders(input, readerFactory, numberOfPrimaryKeys); + // log requested schema for record + logSchema(clippedRoot, SchemaStringBuilderVisitor.RECORD_SCHEMA, fieldNamesDictionary); + // log requested schema for meta-record + logSchema(metaClippedRoot, SchemaStringBuilderVisitor.META_RECORD_SCHEMA, fieldNamesDictionary); + return new QueryColumnWithMetaMetadata(datasetType, metaType, primaryKeyReaders, serializedMetadata, fieldNamesDictionary, clippedRoot, metaClippedRoot, readerFactory, valueGetterFactory, filterEvaluator, filterValueAccessors); diff --git a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/util/SchemaStringBuilderVisitor.java b/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/util/SchemaStringBuilderVisitor.java new file mode 100644 index 0000000..85aca8f --- /dev/null +++ b/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/util/SchemaStringBuilderVisitor.java @@ -0,0 +1,152 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.asterix.column.util; + +import java.util.ArrayList; +import java.util.List; + +import org.apache.asterix.column.metadata.FieldNamesDictionary; +import org.apache.asterix.column.metadata.schema.AbstractSchemaNode; +import org.apache.asterix.column.metadata.schema.ISchemaNodeVisitor; +import org.apache.asterix.column.metadata.schema.ObjectSchemaNode; +import org.apache.asterix.column.metadata.schema.UnionSchemaNode; +import org.apache.asterix.column.metadata.schema.collection.AbstractCollectionSchemaNode; +import org.apache.asterix.column.metadata.schema.primitive.PrimitiveSchemaNode; +import org.apache.asterix.dataflow.data.nontagged.serde.AStringSerializerDeserializer; +import org.apache.asterix.om.base.AString; +import org.apache.hyracks.api.exceptions.HyracksDataException; +import org.apache.hyracks.data.std.api.IValueReference; +import org.apache.hyracks.data.std.util.ByteArrayAccessibleDataInputStream; +import org.apache.hyracks.data.std.util.ByteArrayAccessibleInputStream; +import org.apache.hyracks.util.string.UTF8StringReader; +import org.apache.hyracks.util.string.UTF8StringWriter; + +import it.unimi.dsi.fastutil.ints.IntList; + +public class SchemaStringBuilderVisitor implements ISchemaNodeVisitor<Void, Void> { + public static String RECORD_SCHEMA = "record"; + public static String META_RECORD_SCHEMA = "meta-record"; + private final StringBuilder builder; + private final List<String> fieldNames; + + private int level; + private int indent; + + public SchemaStringBuilderVisitor(FieldNamesDictionary dictionary) throws HyracksDataException { + builder = new StringBuilder(); + this.fieldNames = new ArrayList<>(); + AStringSerializerDeserializer stringSerDer = + new AStringSerializerDeserializer(new UTF8StringWriter(), new UTF8StringReader()); + List<IValueReference> extractedFieldNames = dictionary.getFieldNames(); + + //Deserialize field names + ByteArrayAccessibleInputStream in = new ByteArrayAccessibleInputStream(new byte[0], 0, 0); + ByteArrayAccessibleDataInputStream dataIn = new ByteArrayAccessibleDataInputStream(in); + for (IValueReference serFieldName : extractedFieldNames) { + in.setContent(serFieldName.getByteArray(), 0, serFieldName.getLength()); + AString fieldName = stringSerDer.deserialize(dataIn); + this.fieldNames.add(fieldName.getStringValue()); + } + level = 0; + indent = 0; + } + + public String build(ObjectSchemaNode root) throws HyracksDataException { + builder.append("root\n"); + visit(root, null); + return builder.toString(); + } + + @Override + public Void visit(ObjectSchemaNode objectNode, Void arg) throws HyracksDataException { + List<AbstractSchemaNode> children = objectNode.getChildren(); + IntList fieldNameIndexes = objectNode.getChildrenFieldNameIndexes(); + level++; + indent++; + + for (int i = 0; i < children.size(); i++) { + int index = fieldNameIndexes.getInt(i); + String fieldName = fieldNames.get(index); + AbstractSchemaNode child = children.get(i); + append(fieldName, index, child); + child.accept(this, null); + } + + level--; + indent--; + return null; + } + + @Override + public Void visit(AbstractCollectionSchemaNode collectionNode, Void arg) throws HyracksDataException { + level++; + indent++; + AbstractSchemaNode itemNode = collectionNode.getItemNode(); + append("item", itemNode); + itemNode.accept(this, null); + level--; + indent--; + return null; + } + + @Override + public Void visit(UnionSchemaNode unionNode, Void arg) throws HyracksDataException { + indent++; + for (AbstractSchemaNode child : unionNode.getChildren().values()) { + append(child.getTypeTag().toString(), child); + child.accept(this, null); + } + indent--; + return null; + } + + @Override + public Void visit(PrimitiveSchemaNode primitiveNode, Void arg) throws HyracksDataException { + return null; + } + + private void appendDecor() { + builder.append("| ".repeat(Math.max(0, indent - 1))); + builder.append("|-- "); + } + + private void append(String key, AbstractSchemaNode node) { + append(key, -1, node); + } + + private void append(String key, int index, AbstractSchemaNode node) { + appendDecor(); + builder.append(key); + if (index >= 0) { + builder.append(" ("); + builder.append(index); + builder.append(')'); + } + builder.append(": "); + builder.append(node.getTypeTag().toString()); + builder.append(" <level: "); + builder.append(level); + if (!node.isNested()) { + final PrimitiveSchemaNode primitiveNode = (PrimitiveSchemaNode) node; + builder.append(", index: "); + builder.append(primitiveNode.getColumnIndex()); + } + builder.append(">\n"); + } +} -- To view, visit https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/17500 To unsubscribe, or for help writing mail filters, visit https://asterix-gerrit.ics.uci.edu/settings Gerrit-Project: asterixdb Gerrit-Branch: master Gerrit-Change-Id: If5d1b9af8f0579df5cb27a90d3456407263ed53f Gerrit-Change-Number: 17500 Gerrit-PatchSet: 3 Gerrit-Owner: Wail Alkowaileet <[email protected]> Gerrit-Reviewer: Anon. E. Moose #1000171 Gerrit-Reviewer: Jenkins <[email protected]> Gerrit-Reviewer: Murtadha Hubail <[email protected]> Gerrit-Reviewer: Wail Alkowaileet <[email protected]> Gerrit-MessageType: merged
