This is an automated email from the ASF dual-hosted git repository.

wyk pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/asterixdb.git


The following commit(s) were added to refs/heads/master by this push:
     new 6c940ddac7 [ASTERIXDB-3171][OTH] Log schema
6c940ddac7 is described below

commit 6c940ddac7926e706e4de39efa38a385017752db
Author: Wail Alkowaileet <[email protected]>
AuthorDate: Mon May 1 10:19:59 2023 -0700

    [ASTERIXDB-3171][OTH] Log schema
    
    - user model changes: no
    - storage format changes: no
    - interface changes: no
    
    Details:
    In order to debug future issues in columnar datasets,
    we need to know the schema information.
    
    Change-Id: If5d1b9af8f0579df5cb27a90d3456407263ed53f
    Reviewed-on: https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/17500
    Integration-Tests: Jenkins <[email protected]>
    Tested-by: Jenkins <[email protected]>
    Reviewed-by: Wail Alkowaileet <[email protected]>
    Reviewed-by: Murtadha Hubail <[email protected]>
---
 .../schema/visitor/PathExtractorVisitor.java       |   5 +-
 .../schema/visitor/SchemaClipperVisitor.java       |   4 +
 .../operation/lsm/flush/FlushColumnMetadata.java   |  23 ++++
 .../operation/query/QueryColumnMetadata.java       |  13 ++
 .../query/QueryColumnWithMetaMetadata.java         |   6 +
 .../column/util/SchemaStringBuilderVisitor.java    | 152 +++++++++++++++++++++
 6 files changed, 202 insertions(+), 1 deletion(-)

diff --git 
a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/metadata/schema/visitor/PathExtractorVisitor.java
 
b/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/metadata/schema/visitor/PathExtractorVisitor.java
index 2917074393..2bd2954cd3 100644
--- 
a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/metadata/schema/visitor/PathExtractorVisitor.java
+++ 
b/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/metadata/schema/visitor/PathExtractorVisitor.java
@@ -27,10 +27,13 @@ import 
org.apache.asterix.column.metadata.schema.primitive.MissingFieldSchemaNod
 import org.apache.asterix.column.metadata.schema.primitive.PrimitiveSchemaNode;
 import org.apache.hyracks.api.exceptions.HyracksDataException;
 
+import it.unimi.dsi.fastutil.ints.IntList;
+
 public class PathExtractorVisitor implements 
ISchemaNodeVisitor<AbstractSchemaNode, Void> {
     @Override
     public AbstractSchemaNode visit(ObjectSchemaNode objectNode, Void arg) 
throws HyracksDataException {
-        int fieldNameIndex = 
objectNode.getChildrenFieldNameIndexes().getInt(0);
+        IntList fieldNameIndexes = objectNode.getChildrenFieldNameIndexes();
+        int fieldNameIndex = fieldNameIndexes.isEmpty() ? -1 : 
objectNode.getChildrenFieldNameIndexes().getInt(0);
         if (fieldNameIndex < 0) {
             return MissingFieldSchemaNode.INSTANCE;
         }
diff --git 
a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/metadata/schema/visitor/SchemaClipperVisitor.java
 
b/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/metadata/schema/visitor/SchemaClipperVisitor.java
index 0771ccb51e..351dcdfd88 100644
--- 
a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/metadata/schema/visitor/SchemaClipperVisitor.java
+++ 
b/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/metadata/schema/visitor/SchemaClipperVisitor.java
@@ -65,6 +65,10 @@ public class SchemaClipperVisitor implements 
IATypeVisitor<AbstractSchemaNode, A
         try {
             for (int i = 0; i < fieldNames.length; i++) {
                 int fieldNameIndex = 
fieldNamesDictionary.getFieldNameIndex(fieldNames[i]);
+                if (fieldNameIndex == -1) {
+                    // Missing child
+                    continue;
+                }
                 AbstractSchemaNode child = objectNode.getChild(fieldNameIndex);
                 clippedObjectNode.addChild(fieldNameIndex, 
fieldTypes[i].accept(this, child));
             }
diff --git 
a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/operation/lsm/flush/FlushColumnMetadata.java
 
b/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/operation/lsm/flush/FlushColumnMetadata.java
index 8cd1e98e14..0d7404d980 100644
--- 
a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/operation/lsm/flush/FlushColumnMetadata.java
+++ 
b/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/operation/lsm/flush/FlushColumnMetadata.java
@@ -42,6 +42,7 @@ import 
org.apache.asterix.column.metadata.schema.primitive.PrimitiveSchemaNode;
 import 
org.apache.asterix.column.metadata.schema.visitor.SchemaBuilderFromIATypeVisitor;
 import org.apache.asterix.column.util.ColumnValuesUtil;
 import org.apache.asterix.column.util.RunLengthIntArray;
+import org.apache.asterix.column.util.SchemaStringBuilderVisitor;
 import org.apache.asterix.column.values.IColumnValuesWriter;
 import org.apache.asterix.column.values.IColumnValuesWriterFactory;
 import org.apache.asterix.column.values.writer.AbstractColumnValuesWriter;
@@ -54,6 +55,9 @@ import org.apache.hyracks.data.std.primitive.IntegerPointable;
 import org.apache.hyracks.data.std.util.ArrayBackedValueStorage;
 import 
org.apache.hyracks.storage.am.lsm.btree.column.api.IColumnWriteMultiPageOp;
 import org.apache.hyracks.storage.am.lsm.common.api.ILSMMemoryComponent;
+import org.apache.hyracks.util.LogRedactionUtil;
+import org.apache.logging.log4j.LogManager;
+import org.apache.logging.log4j.Logger;
 
 import it.unimi.dsi.fastutil.ints.IntArrayList;
 
@@ -62,6 +66,7 @@ import it.unimi.dsi.fastutil.ints.IntArrayList;
  * The schema here is mutable and can change according to the flushed records
  */
 public final class FlushColumnMetadata extends AbstractColumnMetadata {
+    private static final Logger LOGGER = LogManager.getLogger();
     private final Map<AbstractSchemaNestedNode, RunLengthIntArray> 
definitionLevels;
     private final Mutable<IColumnWriteMultiPageOp> multiPageOpRef;
     private final FieldNamesDictionary fieldNamesDictionary;
@@ -160,6 +165,7 @@ public final class FlushColumnMetadata extends 
AbstractColumnMetadata {
         if (changed) {
             try {
                 serializeChanges();
+                logSchema(root, metaRoot, fieldNamesDictionary);
                 changed = false;
             } catch (IOException e) {
                 throw HyracksDataException.create(e);
@@ -253,6 +259,7 @@ public final class FlushColumnMetadata extends 
AbstractColumnMetadata {
 
         ArrayBackedValueStorage schemaStorage = new 
ArrayBackedValueStorage(serializedMetadata.getLength());
         schemaStorage.append(serializedMetadata);
+        logSchema(root, metaRoot, fieldNamesDictionary);
         return new FlushColumnMetadata(datasetType, metaType, primaryKeys, 
metaContainsKeys, columnWriterFactory,
                 multiPageOpRef, writers, fieldNamesDictionary, root, metaRoot, 
definitionLevels, schemaStorage);
     }
@@ -540,6 +547,22 @@ public final class FlushColumnMetadata extends 
AbstractColumnMetadata {
         return nestedNode;
     }
 
+    private static void logSchema(ObjectSchemaNode root, ObjectSchemaNode 
metaRoot,
+            FieldNamesDictionary fieldNamesDictionary) throws 
HyracksDataException {
+        if (!LOGGER.isDebugEnabled()) {
+            return;
+        }
+        // This should be a low frequency object creation
+        SchemaStringBuilderVisitor schemaBuilder = new 
SchemaStringBuilderVisitor(fieldNamesDictionary);
+        String recordSchema = 
LogRedactionUtil.userData(schemaBuilder.build(root));
+        LOGGER.debug("Schema for {} has changed: \n {}", 
SchemaStringBuilderVisitor.RECORD_SCHEMA, recordSchema);
+        if (metaRoot != null) {
+            String metaRecordSchema = 
LogRedactionUtil.userData(schemaBuilder.build(metaRoot));
+            LOGGER.debug("Schema for {} has changed: \n {}", 
SchemaStringBuilderVisitor.META_RECORD_SCHEMA,
+                    metaRecordSchema);
+        }
+    }
+
     public static ATypeTag getNormalizedTypeTag(ATypeTag typeTag) {
         switch (typeTag) {
             case TINYINT:
diff --git 
a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/operation/query/QueryColumnMetadata.java
 
b/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/operation/query/QueryColumnMetadata.java
index b9babf1838..54ac60c73f 100644
--- 
a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/operation/query/QueryColumnMetadata.java
+++ 
b/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/operation/query/QueryColumnMetadata.java
@@ -31,6 +31,7 @@ import 
org.apache.asterix.column.metadata.FieldNamesDictionary;
 import org.apache.asterix.column.metadata.schema.AbstractSchemaNode;
 import org.apache.asterix.column.metadata.schema.ObjectSchemaNode;
 import org.apache.asterix.column.metadata.schema.visitor.SchemaClipperVisitor;
+import org.apache.asterix.column.util.SchemaStringBuilderVisitor;
 import org.apache.asterix.column.values.IColumnValuesReaderFactory;
 import org.apache.asterix.column.values.reader.PrimitiveColumnValuesReader;
 import org.apache.asterix.column.values.reader.filter.FilterAccessorProvider;
@@ -178,6 +179,9 @@ public class QueryColumnMetadata extends 
AbstractColumnImmutableReadMetadata {
             LOGGER.info("Filter: {}", filterString);
         }
 
+        // log requested schema
+        logSchema(clippedRoot, SchemaStringBuilderVisitor.RECORD_SCHEMA, 
fieldNamesDictionary);
+
         return new QueryColumnMetadata(datasetType, null, primaryKeyReaders, 
serializedMetadata, fieldNamesDictionary,
                 clippedRoot, readerFactory, valueGetterFactory, 
filterEvaluator, filterValueAccessors);
     }
@@ -204,4 +208,13 @@ public class QueryColumnMetadata extends 
AbstractColumnImmutableReadMetadata {
         }
         return primaryKeyReaders;
     }
+
+    protected static void logSchema(ObjectSchemaNode root, String schemaSource,
+            FieldNamesDictionary fieldNamesDictionary) throws 
HyracksDataException {
+        if (LOGGER.isDebugEnabled()) {
+            SchemaStringBuilderVisitor schemaBuilder = new 
SchemaStringBuilderVisitor(fieldNamesDictionary);
+            String schema = 
LogRedactionUtil.userData(schemaBuilder.build(root));
+            LOGGER.debug("Queried {} schema: \n {}", schemaSource, schema);
+        }
+    }
 }
diff --git 
a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/operation/query/QueryColumnWithMetaMetadata.java
 
b/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/operation/query/QueryColumnWithMetaMetadata.java
index 2de1948b81..289623101e 100644
--- 
a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/operation/query/QueryColumnWithMetaMetadata.java
+++ 
b/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/operation/query/QueryColumnWithMetaMetadata.java
@@ -30,6 +30,7 @@ import 
org.apache.asterix.column.metadata.FieldNamesDictionary;
 import org.apache.asterix.column.metadata.schema.AbstractSchemaNode;
 import org.apache.asterix.column.metadata.schema.ObjectSchemaNode;
 import org.apache.asterix.column.metadata.schema.visitor.SchemaClipperVisitor;
+import org.apache.asterix.column.util.SchemaStringBuilderVisitor;
 import org.apache.asterix.column.values.IColumnValuesReaderFactory;
 import org.apache.asterix.column.values.reader.PrimitiveColumnValuesReader;
 import org.apache.asterix.column.values.reader.filter.FilterAccessorProvider;
@@ -144,6 +145,11 @@ public final class QueryColumnWithMetaMetadata extends 
QueryColumnMetadata {
         PrimitiveColumnValuesReader[] primaryKeyReaders =
                 createPrimaryKeyReaders(input, readerFactory, 
numberOfPrimaryKeys);
 
+        // log requested schema for record
+        logSchema(clippedRoot, SchemaStringBuilderVisitor.RECORD_SCHEMA, 
fieldNamesDictionary);
+        // log requested schema for meta-record
+        logSchema(metaClippedRoot, 
SchemaStringBuilderVisitor.META_RECORD_SCHEMA, fieldNamesDictionary);
+
         return new QueryColumnWithMetaMetadata(datasetType, metaType, 
primaryKeyReaders, serializedMetadata,
                 fieldNamesDictionary, clippedRoot, metaClippedRoot, 
readerFactory, valueGetterFactory, filterEvaluator,
                 filterValueAccessors);
diff --git 
a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/util/SchemaStringBuilderVisitor.java
 
b/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/util/SchemaStringBuilderVisitor.java
new file mode 100644
index 0000000000..85aca8f3ca
--- /dev/null
+++ 
b/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/util/SchemaStringBuilderVisitor.java
@@ -0,0 +1,152 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.asterix.column.util;
+
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.asterix.column.metadata.FieldNamesDictionary;
+import org.apache.asterix.column.metadata.schema.AbstractSchemaNode;
+import org.apache.asterix.column.metadata.schema.ISchemaNodeVisitor;
+import org.apache.asterix.column.metadata.schema.ObjectSchemaNode;
+import org.apache.asterix.column.metadata.schema.UnionSchemaNode;
+import 
org.apache.asterix.column.metadata.schema.collection.AbstractCollectionSchemaNode;
+import org.apache.asterix.column.metadata.schema.primitive.PrimitiveSchemaNode;
+import 
org.apache.asterix.dataflow.data.nontagged.serde.AStringSerializerDeserializer;
+import org.apache.asterix.om.base.AString;
+import org.apache.hyracks.api.exceptions.HyracksDataException;
+import org.apache.hyracks.data.std.api.IValueReference;
+import org.apache.hyracks.data.std.util.ByteArrayAccessibleDataInputStream;
+import org.apache.hyracks.data.std.util.ByteArrayAccessibleInputStream;
+import org.apache.hyracks.util.string.UTF8StringReader;
+import org.apache.hyracks.util.string.UTF8StringWriter;
+
+import it.unimi.dsi.fastutil.ints.IntList;
+
+public class SchemaStringBuilderVisitor implements ISchemaNodeVisitor<Void, 
Void> {
+    public static String RECORD_SCHEMA = "record";
+    public static String META_RECORD_SCHEMA = "meta-record";
+    private final StringBuilder builder;
+    private final List<String> fieldNames;
+
+    private int level;
+    private int indent;
+
+    public SchemaStringBuilderVisitor(FieldNamesDictionary dictionary) throws 
HyracksDataException {
+        builder = new StringBuilder();
+        this.fieldNames = new ArrayList<>();
+        AStringSerializerDeserializer stringSerDer =
+                new AStringSerializerDeserializer(new UTF8StringWriter(), new 
UTF8StringReader());
+        List<IValueReference> extractedFieldNames = dictionary.getFieldNames();
+
+        //Deserialize field names
+        ByteArrayAccessibleInputStream in = new 
ByteArrayAccessibleInputStream(new byte[0], 0, 0);
+        ByteArrayAccessibleDataInputStream dataIn = new 
ByteArrayAccessibleDataInputStream(in);
+        for (IValueReference serFieldName : extractedFieldNames) {
+            in.setContent(serFieldName.getByteArray(), 0, 
serFieldName.getLength());
+            AString fieldName = stringSerDer.deserialize(dataIn);
+            this.fieldNames.add(fieldName.getStringValue());
+        }
+        level = 0;
+        indent = 0;
+    }
+
+    public String build(ObjectSchemaNode root) throws HyracksDataException {
+        builder.append("root\n");
+        visit(root, null);
+        return builder.toString();
+    }
+
+    @Override
+    public Void visit(ObjectSchemaNode objectNode, Void arg) throws 
HyracksDataException {
+        List<AbstractSchemaNode> children = objectNode.getChildren();
+        IntList fieldNameIndexes = objectNode.getChildrenFieldNameIndexes();
+        level++;
+        indent++;
+
+        for (int i = 0; i < children.size(); i++) {
+            int index = fieldNameIndexes.getInt(i);
+            String fieldName = fieldNames.get(index);
+            AbstractSchemaNode child = children.get(i);
+            append(fieldName, index, child);
+            child.accept(this, null);
+        }
+
+        level--;
+        indent--;
+        return null;
+    }
+
+    @Override
+    public Void visit(AbstractCollectionSchemaNode collectionNode, Void arg) 
throws HyracksDataException {
+        level++;
+        indent++;
+        AbstractSchemaNode itemNode = collectionNode.getItemNode();
+        append("item", itemNode);
+        itemNode.accept(this, null);
+        level--;
+        indent--;
+        return null;
+    }
+
+    @Override
+    public Void visit(UnionSchemaNode unionNode, Void arg) throws 
HyracksDataException {
+        indent++;
+        for (AbstractSchemaNode child : unionNode.getChildren().values()) {
+            append(child.getTypeTag().toString(), child);
+            child.accept(this, null);
+        }
+        indent--;
+        return null;
+    }
+
+    @Override
+    public Void visit(PrimitiveSchemaNode primitiveNode, Void arg) throws 
HyracksDataException {
+        return null;
+    }
+
+    private void appendDecor() {
+        builder.append("|    ".repeat(Math.max(0, indent - 1)));
+        builder.append("|-- ");
+    }
+
+    private void append(String key, AbstractSchemaNode node) {
+        append(key, -1, node);
+    }
+
+    private void append(String key, int index, AbstractSchemaNode node) {
+        appendDecor();
+        builder.append(key);
+        if (index >= 0) {
+            builder.append(" (");
+            builder.append(index);
+            builder.append(')');
+        }
+        builder.append(": ");
+        builder.append(node.getTypeTag().toString());
+        builder.append(" <level: ");
+        builder.append(level);
+        if (!node.isNested()) {
+            final PrimitiveSchemaNode primitiveNode = (PrimitiveSchemaNode) 
node;
+            builder.append(", index: ");
+            builder.append(primitiveNode.getColumnIndex());
+        }
+        builder.append(">\n");
+    }
+}

Reply via email to