steveniemitz commented on code in PR #22348: URL: https://github.com/apache/beam/pull/22348#discussion_r925560370
########## sdks/java/core/src/main/java/org/apache/beam/sdk/schemas/utils/avro/RowDeserializerCodeGenerator.java: ########## @@ -0,0 +1,1180 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.beam.sdk.schemas.utils.avro; + +import static java.nio.charset.StandardCharsets.UTF_8; +import static org.apache.beam.sdk.schemas.utils.avro.utils.SerDesUtils.getClassName; +import static org.apache.beam.sdk.schemas.utils.avro.utils.SerDesUtils.getSchemaFingerprint; + +import com.sun.codemodel.JArray; +import com.sun.codemodel.JBlock; +import com.sun.codemodel.JCatchBlock; +import com.sun.codemodel.JClass; +import com.sun.codemodel.JConditional; +import com.sun.codemodel.JDefinedClass; +import com.sun.codemodel.JDoLoop; +import com.sun.codemodel.JExpr; +import com.sun.codemodel.JExpression; +import com.sun.codemodel.JFieldVar; +import com.sun.codemodel.JForLoop; +import com.sun.codemodel.JInvocation; +import com.sun.codemodel.JMethod; +import com.sun.codemodel.JMod; +import com.sun.codemodel.JPackage; +import com.sun.codemodel.JStatement; +import com.sun.codemodel.JTryBlock; +import com.sun.codemodel.JVar; +import org.apache.beam.sdk.schemas.utils.avro.exceptions.RowSerdesGeneratorException; +import org.apache.beam.sdk.schemas.utils.avro.utils.SerDesUtils; +import java.io.File; +import java.io.IOException; +import java.lang.reflect.Field; +import java.nio.ByteBuffer; +import java.util.Arrays; +import java.util.Collections; +import java.util.HashMap; +import java.util.Iterator; +import java.util.ListIterator; +import java.util.Map; +import java.util.Map.Entry; +import java.util.Set; +import java.util.function.BiConsumer; +import java.util.stream.Collectors; +import org.apache.avro.AvroRuntimeException; +import org.apache.avro.Schema; +import org.apache.avro.Schema.Type; +import org.apache.avro.io.Decoder; +import org.apache.avro.io.parsing.ResolvingGrammarGenerator; +import org.apache.avro.io.parsing.Symbol; +import org.apache.beam.sdk.schemas.utils.AvroUtils; +import org.apache.beam.sdk.values.Row; +import org.apache.commons.lang3.StringUtils; +import org.codehaus.jackson.JsonNode; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +public class RowDeserializerCodeGenerator<T> extends SerDesBase { + + private static final Logger LOGGER = LoggerFactory.getLogger(RowDeserializerCodeGenerator.class); + + private static final String DECODER = "decoder"; + private static final String REUSE = "reuse"; + + private final Schema writer; + private final Schema reader; + private JDefinedClass deserializerClass; + private JFieldVar schemaMapField; + private JMethod generateBeamSchemasMethod; + private Map<Long, Schema> schemaMap = new HashMap<>(); + private Map<Long, JVar> schemaVarMap = new HashMap<>(); + private Map<String, JMethod> deserializeMethodMap = new HashMap<>(); + private Map<String, JMethod> skipMethodMap = new HashMap<>(); + private Map<JMethod, Set<Class<? extends Exception>>> exceptionFromMethodMap = new HashMap<>(); + private JFieldVar avroSchemaMapField; + + + /** + * Row Deserializer Code generator class. Based on given schemas it generates Java code. + * + * @param writer the writer's Avro schema + * @param reader the reader's Avro schema + * @param destination Path for generated java codes. + * @param classLoader classLoader + * @param compileClassPath Path for compiled java classes. + */ + public RowDeserializerCodeGenerator(Schema writer, Schema reader, File destination, + ClassLoader classLoader, String compileClassPath) { + super("deserializer", destination, classLoader, compileClassPath); + this.writer = writer; + this.reader = reader; + LOGGER.warn("RowDeserializerCodeGenerator Constructed."); + } + + /** + * Row Deserializer Code generator. Based on given constructor's variable it generates Java Code + * and load it in classpath then return instance of the generated code. + * + * @return {@link RowDeserializer} + */ + public RowDeserializer<T> generateDeserializer() { + LOGGER.warn("Start generating code of deserializer"); + String className = getClassName(writer, reader, "RowDeserializer"); + JPackage classPackage = codeModel._package(this.generatedPackageName); + LOGGER.warn("Start generating code of deserializer: " + className); + + try { + deserializerClass = classPackage._class(className); + + JVar readerSchemaVar = deserializerClass + .field(JMod.PRIVATE | JMod.FINAL, Schema.class, "avroSchema"); + JMethod constructor = deserializerClass.constructor(JMod.PUBLIC); + JVar constructorParam = constructor.param(Schema.class, "avroSchema"); + constructor.body().assign(JExpr.refthis(readerSchemaVar.name()), constructorParam); + + Schema aliasedWriterSchema = Schema.applyAliases(writer, reader); + Symbol resolvingGrammar = new ResolvingGrammarGenerator() + .generate(aliasedWriterSchema, reader); Review Comment: I would use [Resolver.resolve](https://avro.apache.org/docs/current/api/java/org/apache/avro/Resolver.html#resolve-org.apache.avro.Schema-org.apache.avro.Schema-) here rather than attempting to consume the grammar directly. It was made for basically this purpose, building your own decoders, but without having to understand the underlying complexities of the grammar. See also [refactoring-resolution](https://github.com/apache/avro/blob/master/doc-deprecated/src/content/mddocs/refactoring-resolution.md). imo using the resolver is significantly easier and less error prone than trying to interpret the grammar directly. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: [email protected] For queries about this service, please contact Infrastructure at: [email protected]
