pabloem commented on code in PR #24271:
URL: https://github.com/apache/beam/pull/24271#discussion_r1038438271


##########
sdks/java/core/src/main/java/org/apache/beam/sdk/schemas/utils/JsonUtils.java:
##########
@@ -73,6 +123,93 @@ public String apply(Row input) {
     };
   }
 
+  public static Schema beamSchemaFromJsonSchema(String jsonSchemaStr) {
+    org.everit.json.schema.ObjectSchema jsonSchema = 
jsonSchemaFromString(jsonSchemaStr);
+    return beamSchemaFromJsonSchema(jsonSchema);
+  }
+
+  private static Schema 
beamSchemaFromJsonSchema(org.everit.json.schema.ObjectSchema jsonSchema) {
+    Schema.Builder beamSchemaBuilder = Schema.builder();
+    for (String propertyName : jsonSchema.getPropertySchemas().keySet()) {
+      org.everit.json.schema.Schema propertySchema =
+          jsonSchema.getPropertySchemas().get(propertyName);
+      if (propertySchema == null) {
+        throw new IllegalArgumentException("Unable to parse schema " + 
jsonSchema);
+      }
+      java.util.function.BiFunction<String, Schema.FieldType, Schema.Field> 
fieldConstructor =
+          jsonSchema.getRequiredProperties().contains(propertyName)
+              ? Schema.Field::of
+              : Schema.Field::nullable;
+      if (propertySchema instanceof org.everit.json.schema.ArraySchema) {
+        if (((ArraySchema) propertySchema).getAllItemSchema() == null) {
+          throw new IllegalArgumentException(
+              "Array schema is not properly formatted or unsupported ("
+                  + propertyName
+                  + "). Note that JSON-schema's tuple-like arrays are not 
supported by Beam.");
+        }
+        beamSchemaBuilder =
+            beamSchemaBuilder.addField(
+                fieldConstructor.apply(
+                    propertyName,
+                    Schema.FieldType.array(
+                        beamTypeFromJsonSchemaType(
+                            ((ArraySchema) 
propertySchema).getAllItemSchema()))));
+      } else {
+        try {
+          beamSchemaBuilder =
+              beamSchemaBuilder.addField(
+                  fieldConstructor.apply(propertyName, 
beamTypeFromJsonSchemaType(propertySchema)));
+        } catch (IllegalArgumentException e) {
+          throw new IllegalArgumentException(
+              "Unsupported field type " + propertySchema.getClass() + " in 
field " + propertyName,
+              e);
+        }
+      }
+    }
+    return beamSchemaBuilder.build();
+  }
+
+  private static Schema.FieldType beamTypeFromJsonSchemaType(
+      org.everit.json.schema.Schema propertySchema) {
+    if (propertySchema instanceof org.everit.json.schema.ObjectSchema) {
+      return Schema.FieldType.row(beamSchemaFromJsonSchema((ObjectSchema) 
propertySchema));
+    } else if (propertySchema instanceof org.everit.json.schema.BooleanSchema) 
{
+      return Schema.FieldType.BOOLEAN;
+    } else if (propertySchema instanceof org.everit.json.schema.NumberSchema) {
+      return ((NumberSchema) propertySchema).requiresInteger()
+          ? Schema.FieldType.INT64
+          : Schema.FieldType.DOUBLE;
+    }
+    if (propertySchema instanceof org.everit.json.schema.StringSchema) {
+      return Schema.FieldType.STRING;
+    } else if (propertySchema instanceof 
org.everit.json.schema.ReferenceSchema) {
+      org.everit.json.schema.Schema sch = ((ReferenceSchema) 
propertySchema).getReferredSchema();
+      return beamTypeFromJsonSchemaType(sch);
+    } else if (propertySchema instanceof org.everit.json.schema.ArraySchema) {
+      if (((ArraySchema) propertySchema).getAllItemSchema() == null) {
+        throw new IllegalArgumentException(
+            "Array schema is not properly formatted or unsupported ("
+                + propertySchema
+                + "). Note that JSON-schema's tuple-like arrays are not 
supported by Beam.");

Review Comment:
   I'll leave as the original, since it points to the `propertySchema` that we 
use - and at least can help users identify it.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to