pabloem commented on code in PR #24271:
URL: https://github.com/apache/beam/pull/24271#discussion_r1027179002
##########
sdks/java/core/src/main/java/org/apache/beam/sdk/schemas/utils/JsonUtils.java:
##########
@@ -73,6 +78,77 @@ public String apply(Row input) {
};
}
+ public static Schema beamSchemaFromJsonSchema(String jsonSchemaStr) {
+ org.everit.json.schema.ObjectSchema jsonSchema =
jsonSchemaFromString(jsonSchemaStr);
+ return beamSchemaFromJsonSchema(jsonSchema);
+ }
+
+ private static Schema
beamSchemaFromJsonSchema(org.everit.json.schema.ObjectSchema jsonSchema) {
+ Schema.Builder beamSchemaBuilder = Schema.builder();
+ for (String propertyName : jsonSchema.getPropertySchemas().keySet()) {
+ org.everit.json.schema.Schema propertySchema =
+ jsonSchema.getPropertySchemas().get(propertyName);
+ if (propertySchema == null) {
+ throw new IllegalArgumentException("Unable to parse schema " +
jsonSchema.toString());
+ }
+ if
(propertySchema.getClass().equals(org.everit.json.schema.ObjectSchema.class)) {
+ beamSchemaBuilder =
+ beamSchemaBuilder.addField(
+ Schema.Field.of(propertyName,
beamTypeFromJsonSchemaType(propertySchema)));
+ } else if
(propertySchema.getClass().equals(org.everit.json.schema.ArraySchema.class)) {
+ beamSchemaBuilder =
+ beamSchemaBuilder.addField(
+ Schema.Field.of(
+ propertyName,
+ Schema.FieldType.array(
+ beamTypeFromJsonSchemaType(
+ ((ArraySchema)
propertySchema).getAllItemSchema()))));
Review Comment:
hmmm good observation - these don't seem to translate well to Beam schemas.
since the individual 'fields' in a tuple don't have a name, they can't be a
Beam Row.
I've just added validation to not support them and validation for the error
message.
##########
sdks/java/core/src/main/java/org/apache/beam/sdk/schemas/utils/JsonUtils.java:
##########
@@ -73,6 +78,77 @@ public String apply(Row input) {
};
}
+ public static Schema beamSchemaFromJsonSchema(String jsonSchemaStr) {
+ org.everit.json.schema.ObjectSchema jsonSchema =
jsonSchemaFromString(jsonSchemaStr);
+ return beamSchemaFromJsonSchema(jsonSchema);
+ }
+
+ private static Schema
beamSchemaFromJsonSchema(org.everit.json.schema.ObjectSchema jsonSchema) {
+ Schema.Builder beamSchemaBuilder = Schema.builder();
+ for (String propertyName : jsonSchema.getPropertySchemas().keySet()) {
+ org.everit.json.schema.Schema propertySchema =
+ jsonSchema.getPropertySchemas().get(propertyName);
+ if (propertySchema == null) {
+ throw new IllegalArgumentException("Unable to parse schema " +
jsonSchema.toString());
+ }
+ if
(propertySchema.getClass().equals(org.everit.json.schema.ObjectSchema.class)) {
+ beamSchemaBuilder =
+ beamSchemaBuilder.addField(
+ Schema.Field.of(propertyName,
beamTypeFromJsonSchemaType(propertySchema)));
+ } else if
(propertySchema.getClass().equals(org.everit.json.schema.ArraySchema.class)) {
+ beamSchemaBuilder =
+ beamSchemaBuilder.addField(
+ Schema.Field.of(
+ propertyName,
+ Schema.FieldType.array(
+ beamTypeFromJsonSchemaType(
+ ((ArraySchema)
propertySchema).getAllItemSchema()))));
+ } else {
+ try {
+ beamSchemaBuilder =
+ beamSchemaBuilder.addField(
+ Schema.Field.of(propertyName,
beamTypeFromJsonSchemaType(propertySchema)));
+ } catch (IllegalArgumentException e) {
+ throw new IllegalArgumentException("Unsupported field type in field
" + propertyName, e);
+ }
+ }
+ }
+ return beamSchemaBuilder.build();
+ }
+
+ private static Schema.FieldType beamTypeFromJsonSchemaType(
+ org.everit.json.schema.Schema propertySchema) {
+ if
(propertySchema.getClass().equals(org.everit.json.schema.ObjectSchema.class)) {
+ return Schema.FieldType.row(beamSchemaFromJsonSchema((ObjectSchema)
propertySchema));
+ } else if
(propertySchema.getClass().equals(org.everit.json.schema.BooleanSchema.class)) {
+ return Schema.FieldType.BOOLEAN;
+ } else if
(propertySchema.getClass().equals(org.everit.json.schema.NumberSchema.class)) {
+ return ((NumberSchema) propertySchema).requiresInteger()
+ ? Schema.FieldType.INT64
+ : Schema.FieldType.DOUBLE;
+ }
+ if
(propertySchema.getClass().equals(org.everit.json.schema.StringSchema.class)) {
+ return Schema.FieldType.STRING;
+ } else if
(propertySchema.getClass().equals(org.everit.json.schema.ReferenceSchema.class))
{
+ org.everit.json.schema.Schema sch = ((ReferenceSchema)
propertySchema).getReferredSchema();
+ return beamTypeFromJsonSchemaType(sch);
+ } else {
+ throw new IllegalArgumentException(
+ "Unsupported schema type: " + propertySchema.getClass().toString());
Review Comment:
done thanks!
##########
sdks/java/core/src/test/resources/schemas/json/nested_arrays_objects_json_schema.json:
##########
@@ -0,0 +1,33 @@
+{
+ "$id": "https://example.com/arrays.schema.json",
+ "description": "A representation of a person, company, organization, or
place",
Review Comment:
i copied this from elsewhere. thanks for the observation : )
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]