Re: [PR] AVRO-3666 [Java] Use the new schema parser [avro]

via GitHub Wed, 20 Dec 2023 07:44:23 -0800


clesaec commented on code in PR #2642:
URL: https://github.com/apache/avro/pull/2642#discussion_r1432418183



##########
lang/java/avro/src/main/java/org/apache/avro/ParseContext.java:
##########
@@ -220,6 +232,77 @@ public void rollback() {
     newSchemas.clear();
   }
 
+  /**
+   * Resolve all (named) schemas that were parsed. This resolves all forward
+   * references, even if parsed from different files.
+   *
+   * @return all parsed schemas, in the order they were parsed
+   * @throws AvroTypeException if a reference cannot be resolved
+   */
+  public List<Schema> resolveAllTypes() {
+    if (hasNewSchemas()) {
+      throw new IllegalStateException("Types cannot be resolved unless the 
ParseContext is committed.");
+    }
+
+    if (!isResolved) {
+      NameValidator saved = Schema.getNameValidator();
+      try {
+        Schema.setNameValidator(nameValidator); // Ensure we use the same 
validation.
+        HashMap<String, Schema> result = new LinkedHashMap<>(oldSchemas);

Review Comment:
   Why LinkedHashMap here; where it needs to keep order ?
   `Map<String, Schema> result = new HashMap<>(oldSchemas);`
   is not enough ?
   



##########
lang/java/avro/src/main/java/org/apache/avro/Schema.java:
##########
@@ -1867,47 +1781,70 @@ private static boolean isValidValue(Schema schema, 
JsonNode value) {
     }
   }
 
-  /**
-   * Parse named schema in order to fill names map. This method does not parse
-   * field of record/error schema.
-   *
-   * @param schema           : json schema representation.
-   * @param names            : map of named schema.
-   * @param currentNameSpace : current working name space.
-   * @return schema.
-   */
-  static Schema parseNamesDeclared(JsonNode schema, Names names, String 
currentNameSpace) {
+  /** @see #parse(String) */
+  static Schema parse(JsonNode schema, ParseContext context, String 
currentNameSpace) {
     if (schema == null) {
-      return null;
+      throw new SchemaParseException("Cannot parse <null> schema");
     }
-    if (schema.isObject()) {
-
-      String type = Schema.getOptionalText(schema, "type");
+    if (schema.isTextual()) { // name
+      return context.find(schema.textValue(), currentNameSpace);
+    } else if (schema.isObject()) {
+      Schema result;
+      String type = getRequiredText(schema, "type", "No type");
       Name name = null;
-
+      String space = null;
       String doc = null;
-      Schema result = null;
       final boolean isTypeError = "error".equals(type);
       final boolean isTypeRecord = "record".equals(type);
       final boolean isTypeEnum = "enum".equals(type);
       final boolean isTypeFixed = "fixed".equals(type);
-
       if (isTypeRecord || isTypeError || isTypeEnum || isTypeFixed) {
-        String space = getOptionalText(schema, "namespace");
+        space = getOptionalText(schema, "namespace");
         doc = getOptionalText(schema, "doc");
         if (space == null)
           space = currentNameSpace;
         name = new Name(getRequiredText(schema, "name", "No name in schema"), 
space);
       }
-      if (isTypeRecord || isTypeError) { // record
+      if (PRIMITIVES.containsKey(type)) { // primitive
+        result = create(PRIMITIVES.get(type));
+      } else if (isTypeRecord || isTypeError) { // record
+        List<Field> fields = new ArrayList<>();
         result = new RecordSchema(name, doc, isTypeError);
-        names.add(result);
+        context.put(result);
         JsonNode fieldsNode = schema.get("fields");
-
         if (fieldsNode == null || !fieldsNode.isArray())
           throw new SchemaParseException("Record has no fields: " + schema);
-        exploreFields(fieldsNode, names, name != null ? name.space : null);
-
+        for (JsonNode field : fieldsNode) {

Review Comment:
   why not keep here a separate method for fields , and then, reduce complexity 
of this method ?



##########
lang/java/avro/src/main/java/org/apache/avro/Schema.java:
##########
@@ -1920,214 +1857,54 @@ static Schema parseNamesDeclared(JsonNode schema, 
Names names, String currentNam
         if (enumDefault != null)
           defaultSymbol = enumDefault.textValue();
         result = new EnumSchema(name, doc, symbols, defaultSymbol);
-        names.add(result);
+        context.put(result);
       } else if (type.equals("array")) { // array
         JsonNode itemsNode = schema.get("items");
         if (itemsNode == null)
           throw new SchemaParseException("Array has no items type: " + schema);
-        final Schema items = Schema.parseNamesDeclared(itemsNode, names, 
currentNameSpace);
-        result = Schema.createArray(items);
+        result = new ArraySchema(parse(itemsNode, context, currentNameSpace));
       } else if (type.equals("map")) { // map
         JsonNode valuesNode = schema.get("values");
         if (valuesNode == null)
           throw new SchemaParseException("Map has no values type: " + schema);
-        final Schema values = Schema.parseNamesDeclared(valuesNode, names, 
currentNameSpace);
-        result = Schema.createMap(values);
+        result = new MapSchema(parse(valuesNode, context, currentNameSpace));
       } else if (isTypeFixed) { // fixed
         JsonNode sizeNode = schema.get("size");
         if (sizeNode == null || !sizeNode.isInt())
           throw new SchemaParseException("Invalid or no size: " + schema);
         result = new FixedSchema(name, doc, sizeNode.intValue());
-        if (name != null)
-          names.add(result);
-      } else if (PRIMITIVES.containsKey(type)) {
-        result = Schema.create(PRIMITIVES.get(type));
-      }
-      if (result != null) {
-        Set<String> reserved = SCHEMA_RESERVED;
-        if (isTypeEnum) {
-          reserved = ENUM_RESERVED;
-        }
-        Schema.addProperties(schema, reserved, result);
-      }
-      return result;
-    } else if (schema.isArray()) {
-      List<Schema> subs = new ArrayList<>(schema.size());
-      schema.forEach((JsonNode item) -> {
-        Schema sub = Schema.parseNamesDeclared(item, names, currentNameSpace);
-        if (sub != null) {
-          subs.add(sub);
-        }
-      });
-      return Schema.createUnion(subs);
-    } else if (schema.isTextual()) {
-      String value = schema.asText();
-      return names.get(value);
-    }
-    return null;
-  }
-
-  private static void addProperties(JsonNode schema, Set<String> reserved, 
Schema avroSchema) {
-    Iterator<String> i = schema.fieldNames();
-    while (i.hasNext()) { // add properties
-      String prop = i.next();
-      if (!reserved.contains(prop)) // ignore reserved
-        avroSchema.addProp(prop, schema.get(prop));
-    }
-    // parse logical type if present
-    avroSchema.logicalType = LogicalTypes.fromSchemaIgnoreInvalid(avroSchema);
-    // names.space(savedSpace); // restore space
-    if (avroSchema instanceof NamedSchema) {
-      Set<String> aliases = parseAliases(schema);
-      if (aliases != null) // add aliases
-        for (String alias : aliases)
-          avroSchema.addAlias(alias);
-    }
-  }
-
-  /**
-   * Explore record fields in order to fill names map with inner defined named
-   * types.
-   *
-   * @param fieldsNode : json node for field.
-   * @param names      : names map.
-   * @param nameSpace  : current working namespace.
-   */
-  private static void exploreFields(JsonNode fieldsNode, Names names, String 
nameSpace) {
-    for (JsonNode field : fieldsNode) {
-      final JsonNode fieldType = field.get("type");
-      if (fieldType != null) {
-        if (fieldType.isObject()) {
-          parseNamesDeclared(fieldType, names, nameSpace);
-        } else if (fieldType.isArray()) {
-          exploreFields(fieldType, names, nameSpace);
-        } else if (fieldType.isTextual() && field.isObject()) {
-          parseNamesDeclared(field, names, nameSpace);
-        }
-      }
-    }
-  }
-
-  /**
-   * in complement of parseNamesDeclared, this method parse schema in details.
-   *
-   * @param schema       : json schema.
-   * @param names        : names map.
-   * @param currentSpace : current working name space.
-   * @return complete schema.
-   */
-  static Schema parseCompleteSchema(JsonNode schema, Names names, String 
currentSpace) {
-    if (schema == null) {
-      throw new SchemaParseException("Cannot parse <null> schema");
-    }
-    if (schema.isTextual()) {
-      String type = schema.asText();
-      Schema avroSchema = names.get(type);
-      if (avroSchema == null) {
-        avroSchema = names.get(currentSpace + "." + type);
-      }
-      return avroSchema;
-    }
-    if (schema.isArray()) {
-      List<Schema> schemas = StreamSupport.stream(schema.spliterator(), false)
-          .map((JsonNode sub) -> parseCompleteSchema(sub, names, 
currentSpace)).collect(Collectors.toList());
-      return Schema.createUnion(schemas);
-    }
-    if (schema.isObject()) {
-      Schema result = null;
-      String type = getRequiredText(schema, "type", "No type");
-      Name name = null;
-
-      final boolean isTypeError = "error".equals(type);
-      final boolean isTypeRecord = "record".equals(type);
-      final boolean isTypeArray = "array".equals(type);
-
-      if (isTypeRecord || isTypeError || "enum".equals(type) || 
"fixed".equals(type)) {
-        // named schema
-        String space = getOptionalText(schema, "namespace");
-
-        if (space == null)
-          space = currentSpace;
-        name = new Name(getRequiredText(schema, "name", "No name in schema"), 
space);
-
-        result = names.get(name);
-        if (result == null) {
-          throw new SchemaParseException("Unparsed field type " + name);
-        }
-      }
-      if (isTypeRecord || isTypeError) {
-        if (result != null && !result.hasFields()) {
-          final List<Field> fields = new ArrayList<>();
-          JsonNode fieldsNode = schema.get("fields");
-          if (fieldsNode == null || !fieldsNode.isArray())
-            throw new SchemaParseException("Record has no fields: " + schema);
-
-          for (JsonNode field : fieldsNode) {
-            Field f = Field.parse(field, names, name.space);
-
-            fields.add(f);
-            if (f.schema.getLogicalType() == null && getOptionalText(field, 
LOGICAL_TYPE_PROP) != null)
-              LOG.warn(
-                  "Ignored the {}.{}.logicalType property (\"{}\"). It should 
probably be nested inside the \"type\" for the field.",
-                  name, f.name, getOptionalText(field, "logicalType"));
-          }
-          result.setFields(fields);
-        }
-      } else if (isTypeArray) {
-        JsonNode items = schema.get("items");
-        Schema schemaItems = parseCompleteSchema(items, names, currentSpace);
-        result = Schema.createArray(schemaItems);
-      } else if ("map".equals(type)) {
-        JsonNode values = schema.get("values");
-        Schema mapItems = parseCompleteSchema(values, names, currentSpace);
-        result = Schema.createMap(mapItems);
-      } else if (result == null) {
-        result = names.get(currentSpace + "." + type);
-        if (result == null) {
-          result = names.get(type);
-        }
+        context.put(result);
+      } else { // For unions with self reference
+        return context.find(type, currentNameSpace);
       }
+      Iterator<String> i = schema.fieldNames();
 
       Set<String> reserved = SCHEMA_RESERVED;
-      if ("enum".equals(type)) {
+      if (isTypeEnum) {
         reserved = ENUM_RESERVED;
       }
-      Schema.addProperties(schema, reserved, result);
-      return result;
-    }
-    return null;
-  }
-
-  static Schema parse(JsonNode schema, Names names) {
-    if (schema == null) {
-      throw new SchemaParseException("Cannot parse <null> schema");
-    }
-
-    Schema result = Schema.parseNamesDeclared(schema, names, names.space);
-    Schema.parseCompleteSchema(schema, names, names.space);
-
-    return result;
-  }
-
-  static Schema resolveSchema(JsonNode schema, Names names, String 
currentNameSpace) {
-    String np = currentNameSpace;
-    String nodeName = getOptionalText(schema, "name");
-    if (nodeName != null) {
-      final JsonNode nameSpace = schema.get("namespace");
-      StringBuilder fullName = new StringBuilder();
-      if (nameSpace != null && nameSpace.isTextual()) {
-        fullName.append(nameSpace.asText()).append(".");
-        np = nameSpace.asText();
+      while (i.hasNext()) { // add properties

Review Comment:
   does iterator forEachRemaining method  would be simpler or not ?



##########
lang/java/avro/src/main/java/org/apache/avro/Schema.java:
##########
@@ -125,20 +125,20 @@ private Object readResolve() {
     FACTORY.setCodec(MAPPER);
   }
 
-  /** The type of a schema. */
+  /** The type of schema. */
   public enum Type {
     RECORD, ENUM, ARRAY, MAP, UNION, FIXED, STRING, BYTES, INT, LONG, FLOAT, 
DOUBLE, BOOLEAN, NULL;
 
     private final String name;
 
-    private Type() {
+    Type() {

Review Comment:
   why not keep it private



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Re: [PR] AVRO-3666 [Java] Use the new schema parser [avro]

Reply via email to