ahmedabu98 commented on code in PR #29923:
URL: https://github.com/apache/beam/pull/29923#discussion_r1445294389
##########
sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryIO.java:
##########
@@ -3575,11 +3573,22 @@ private <DestinationT> WriteResult continueExpandTyped(
!getPropagateSuccessfulStorageApiWrites(),
"withPropagateSuccessfulStorageApiWrites only supported when using
storage api writes.");
- // Batch load jobs currently support JSON data insertion only with CSV
files
+ // Beam does not yet support Batch load jobs with Avro files
Review Comment:
Remove this comment? I think it's supported for all types now
##########
sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryIO.java:
##########
@@ -3725,28 +3734,27 @@ private <DestinationT> WriteResult continueExpandTyped(
}
}
- private void validateNoJsonTypeInSchema(JsonElement schema) {
+ private boolean hasJsonTypeInSchema(JsonElement schema) {
JsonElement fields = schema.getAsJsonObject().get("fields");
if (!fields.isJsonArray() || fields.getAsJsonArray().isEmpty()) {
- return;
+ return false;
}
JsonArray fieldArray = fields.getAsJsonArray();
for (int i = 0; i < fieldArray.size(); i++) {
JsonObject field = fieldArray.get(i).getAsJsonObject();
- checkArgument(
- !field.get("type").getAsString().equals("JSON"),
- "Found JSON type in TableSchema. JSON data insertion is currently "
- + "not supported with 'FILE_LOADS' write method. This is
supported with the "
- + "other write methods, however. For more information, visit: "
- +
"https://cloud.google.com/bigquery/docs/reference/standard-sql/"
- + "json-data#ingest_json_data");
+ if (field.get("type").getAsString().equals("JSON")) {
+ return true;
+ }
if (field.get("type").getAsString().equals("STRUCT")) {
- validateNoJsonTypeInSchema(field);
+ if (hasJsonTypeInSchema(field)) {
+ return true;
+ }
Review Comment:
```suggestion
return hasJsonTypeInSchema(field);
```
##########
sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryIO.java:
##########
@@ -3575,11 +3573,22 @@ private <DestinationT> WriteResult continueExpandTyped(
!getPropagateSuccessfulStorageApiWrites(),
"withPropagateSuccessfulStorageApiWrites only supported when using
storage api writes.");
- // Batch load jobs currently support JSON data insertion only with CSV
files
+ // Beam does not yet support Batch load jobs with Avro files
if (getJsonSchema() != null && getJsonSchema().isAccessible()) {
JsonElement schema = JsonParser.parseString(getJsonSchema().get());
- if (!schema.getAsJsonObject().keySet().isEmpty()) {
- validateNoJsonTypeInSchema(schema);
+ if (!schema.getAsJsonObject().keySet().isEmpty() &&
hasJsonTypeInSchema(schema)) {
+ if (rowWriterFactory.getOutputType() == OutputType.JsonTableRow) {
+ LOG.warn(
+ "Found JSON type in TableSchema for 'FILE_LOADS' write
method. \n"
+ + "Make sure the TableSchema field is a parsed JSON to
ensure the read as a "
+ + "JSON type. Otherwise it will read as a raw (escaped)
string.");
Review Comment:
Does this mean the TableRow value should be a parsed JSON?
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]