RustedBones commented on code in PR #32482: URL: https://github.com/apache/beam/pull/32482#discussion_r1771044282
########## sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryAvroUtils.java: ########## @@ -64,35 +60,96 @@ */ class BigQueryAvroUtils { + // org.apache.avro.LogicalType + static class DateTimeLogicalType extends LogicalType { + public DateTimeLogicalType() { + super("datetime"); + } + } + + static final DateTimeLogicalType DATETIME_LOGICAL_TYPE = new DateTimeLogicalType(); + /** * Defines the valid mapping between BigQuery types and native Avro types. * - * <p>Some BigQuery types are duplicated here since slightly different Avro records are produced - * when exporting data in Avro format and when reading data directly using the read API. + * @see <a href=https://cloud.google.com/bigquery/docs/exporting-data#avro_export_details>BQ avro + * export</a> + * @see <a href=https://cloud.google.com/bigquery/docs/reference/storage#avro_schema_details>BQ + * avro storage</a> */ - static final ImmutableMultimap<String, Type> BIG_QUERY_TO_AVRO_TYPES = - ImmutableMultimap.<String, Type>builder() - .put("STRING", Type.STRING) - .put("GEOGRAPHY", Type.STRING) - .put("BYTES", Type.BYTES) - .put("INTEGER", Type.LONG) - .put("INT64", Type.LONG) - .put("FLOAT", Type.DOUBLE) - .put("FLOAT64", Type.DOUBLE) - .put("NUMERIC", Type.BYTES) - .put("BIGNUMERIC", Type.BYTES) - .put("BOOLEAN", Type.BOOLEAN) - .put("BOOL", Type.BOOLEAN) - .put("TIMESTAMP", Type.LONG) - .put("RECORD", Type.RECORD) - .put("STRUCT", Type.RECORD) - .put("DATE", Type.STRING) - .put("DATE", Type.INT) - .put("DATETIME", Type.STRING) - .put("TIME", Type.STRING) - .put("TIME", Type.LONG) - .put("JSON", Type.STRING) - .build(); + static Schema getPrimitiveType(TableFieldSchema schema, Boolean useAvroLogicalTypes) { + String bqType = schema.getType(); + // see + // https://googleapis.dev/java/google-api-services-bigquery/latest/com/google/api/services/bigquery/model/TableFieldSchema.html#getType-- + switch (bqType) { + case "STRING": + // string + return SchemaBuilder.builder().stringType(); + case "BYTES": + // bytes + return SchemaBuilder.builder().bytesType(); + case "INTEGER": + case "INT64": + // long + return SchemaBuilder.builder().longType(); + case "FLOAT": + case "FLOAT64": + // double + return SchemaBuilder.builder().doubleType(); + case "BOOLEAN": + case "BOOL": + // boolean + return SchemaBuilder.builder().booleanType(); + case "TIMESTAMP": + // in Extract Jobs, it always uses the Avro logical type + // we may have to change this if we move to EXPORT DATA + return LogicalTypes.timestampMicros().addToSchema(SchemaBuilder.builder().longType()); Review Comment: `useAvroLogicalTypes` has a different behavior when used in extract job vs export data for `TIMESTAMP` columns. Currently, beam's implementation of query is relying on temp table + extract job, so we don't have to handle this discrepancy. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: github-unsubscr...@beam.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org