RustedBones commented on code in PR #32482:
URL: https://github.com/apache/beam/pull/32482#discussion_r1770946575


##########
sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryAvroUtils.java:
##########
@@ -273,145 +292,104 @@ private static List<Object> convertRepeatedField(
     @SuppressWarnings("unchecked")
     List<Object> elements = (List<Object>) v;
     ArrayList<Object> values = new ArrayList<>();
-    Type elementType = schema.getElementType().getType();
-    LogicalType elementLogicalType = schema.getElementType().getLogicalType();
     for (Object element : elements) {
-      values.add(convertRequiredField(elementType, elementLogicalType, 
fieldSchema, element));
+      values.add(convertRequiredField(name, elementType, element));
     }
     return values;
   }
 
-  private static Object convertRequiredField(
-      Type avroType, LogicalType avroLogicalType, TableFieldSchema 
fieldSchema, Object v) {
+  private static Object convertRequiredField(String name, Schema schema, 
Object v) {
     // REQUIRED fields are represented as the corresponding Avro types. For 
example, a BigQuery
     // INTEGER type maps to an Avro LONG type.
-    checkNotNull(v, "REQUIRED field %s should not be null", 
fieldSchema.getName());
-    // Per https://cloud.google.com/bigquery/docs/reference/v2/tables#schema, 
the type field
-    // is required, so it may not be null.
-    String bqType = fieldSchema.getType();
-    ImmutableCollection<Type> expectedAvroTypes = 
BIG_QUERY_TO_AVRO_TYPES.get(bqType);
-    verifyNotNull(expectedAvroTypes, "Unsupported BigQuery type: %s", bqType);
-    verify(
-        expectedAvroTypes.contains(avroType),
-        "Expected Avro schema types %s for BigQuery %s field %s, but received 
%s",
-        expectedAvroTypes,
-        bqType,
-        fieldSchema.getName(),
-        avroType);
+    checkNotNull(v, "REQUIRED field %s should not be null", name);
+
     // For historical reasons, don't validate avroLogicalType except for with 
NUMERIC.
     // BigQuery represents NUMERIC in Avro format as BYTES with a DECIMAL 
logical type.
-    switch (bqType) {
-      case "STRING":
-      case "DATETIME":
-      case "GEOGRAPHY":
-      case "JSON":
-        // Avro will use a CharSequence to represent String objects, but it 
may not always use
-        // java.lang.String; for example, it may prefer 
org.apache.avro.util.Utf8.
-        verify(v instanceof CharSequence, "Expected CharSequence (String), got 
%s", v.getClass());
-        return v.toString();
-      case "DATE":
-        if (avroType == Type.INT) {
-          verify(v instanceof Integer, "Expected Integer, got %s", 
v.getClass());
-          verifyNotNull(avroLogicalType, "Expected Date logical type");
-          verify(avroLogicalType instanceof LogicalTypes.Date, "Expected Date 
logical type");
+    Type type = schema.getType();
+    LogicalType logicalType = schema.getLogicalType();
+    switch (type) {
+      case BOOLEAN:
+        // SQL types BOOL, BOOLEAN
+        return v;
+      case INT:
+        if (logicalType instanceof LogicalTypes.Date) {
+          // SQL types DATE
           return formatDate((Integer) v);
         } else {
-          verify(v instanceof CharSequence, "Expected CharSequence (String), 
got %s", v.getClass());
-          return v.toString();
+          throw new UnsupportedOperationException(
+              String.format(
+                  "Unexpected BigQuery field schema type %s for field named 
%s", type, name));
         }
-      case "TIME":
-        if (avroType == Type.LONG) {
-          verify(v instanceof Long, "Expected Long, got %s", v.getClass());
-          verifyNotNull(avroLogicalType, "Expected TimeMicros logical type");
-          verify(
-              avroLogicalType instanceof LogicalTypes.TimeMicros,
-              "Expected TimeMicros logical type");
+      case LONG:
+        if (logicalType instanceof LogicalTypes.TimeMicros) {
+          // SQL types TIME
           return formatTime((Long) v);
+        } else if (logicalType instanceof LogicalTypes.TimestampMicros) {
+          // SQL types TIMESTAMP
+          return formatTimestamp((Long) v);
         } else {
-          verify(v instanceof CharSequence, "Expected CharSequence (String), 
got %s", v.getClass());
-          return v.toString();
+          // SQL types INT64 (INT, SMALLINT, INTEGER, BIGINT, TINYINT, BYTEINT)
+          return ((Long) v).toString();

Review Comment:
   Yes, tests are failing.
   
   I tried to check what the java BQ client returns when querying a table, and 
it looks consistent. See here a `TableRow` record from 
`bigquery-public-data.samples.shakespeare`
   
![image](https://github.com/user-attachments/assets/d1119f99-964c-460b-ab67-3308cb468313)
   
   The 
[doc](https://cloud.google.com/bigquery/docs/exporting-data#export_limitations) 
actually says
   
   >When you export data in 
[JSON](https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#json_type)
 format, 
[INT64](https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#integer-type)
 (integer) data types are encoded as JSON strings to preserve 64-bit precision 
when the data is read by other systems.
   
   
   



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscr...@beam.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org

Reply via email to