[ 
https://issues.apache.org/jira/browse/BEAM-9179?focusedWorklogId=468797&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-468797
 ]

ASF GitHub Bot logged work on BEAM-9179:
----------------------------------------

                Author: ASF GitHub Bot
            Created on: 10/Aug/20 18:25
            Start Date: 10/Aug/20 18:25
    Worklog Time Spent: 10m 
      Work Description: robinyqiu commented on a change in pull request #12507:
URL: https://github.com/apache/beam/pull/12507#discussion_r468095301



##########
File path: 
sdks/java/extensions/sql/zetasql/src/main/java/org/apache/beam/sdk/extensions/sql/zetasql/ZetaSqlCalciteTranslationUtils.java
##########
@@ -17,174 +17,333 @@
  */
 package org.apache.beam.sdk.extensions.sql.zetasql;
 
-import static com.google.zetasql.ZetaSQLType.TypeKind.TYPE_BOOL;
-import static com.google.zetasql.ZetaSQLType.TypeKind.TYPE_BYTES;
-import static com.google.zetasql.ZetaSQLType.TypeKind.TYPE_DATE;
-import static com.google.zetasql.ZetaSQLType.TypeKind.TYPE_DATETIME;
-import static com.google.zetasql.ZetaSQLType.TypeKind.TYPE_DOUBLE;
-import static com.google.zetasql.ZetaSQLType.TypeKind.TYPE_INT64;
-import static com.google.zetasql.ZetaSQLType.TypeKind.TYPE_NUMERIC;
-import static com.google.zetasql.ZetaSQLType.TypeKind.TYPE_STRING;
-import static com.google.zetasql.ZetaSQLType.TypeKind.TYPE_TIME;
-import static com.google.zetasql.ZetaSQLType.TypeKind.TYPE_TIMESTAMP;
-import static java.util.stream.Collectors.toList;
-
-import com.google.zetasql.ArrayType;
+import com.google.zetasql.CivilTimeEncoder;
 import com.google.zetasql.StructType;
 import com.google.zetasql.StructType.StructField;
 import com.google.zetasql.Type;
 import com.google.zetasql.TypeFactory;
+import com.google.zetasql.Value;
 import com.google.zetasql.ZetaSQLType.TypeKind;
+import com.google.zetasql.functions.ZetaSQLDateTime.DateTimestampPart;
+import java.math.BigDecimal;
+import java.time.LocalDateTime;
+import java.time.LocalTime;
 import java.util.List;
-import java.util.function.Function;
+import java.util.stream.Collectors;
 import org.apache.beam.sdk.annotations.Internal;
+import 
org.apache.beam.sdk.extensions.sql.meta.provider.bigquery.BeamBigQuerySqlDialect;
+import org.apache.beam.sdk.extensions.sql.zetasql.translation.SqlOperators;
+import 
org.apache.beam.vendor.calcite.v1_20_0.org.apache.calcite.avatica.util.ByteString;
+import 
org.apache.beam.vendor.calcite.v1_20_0.org.apache.calcite.avatica.util.TimeUnit;
+import 
org.apache.beam.vendor.calcite.v1_20_0.org.apache.calcite.avatica.util.TimeUnitRange;
 import 
org.apache.beam.vendor.calcite.v1_20_0.org.apache.calcite.rel.type.RelDataType;
 import 
org.apache.beam.vendor.calcite.v1_20_0.org.apache.calcite.rex.RexBuilder;
+import org.apache.beam.vendor.calcite.v1_20_0.org.apache.calcite.rex.RexNode;
+import 
org.apache.beam.vendor.calcite.v1_20_0.org.apache.calcite.sql.fun.SqlStdOperatorTable;
 import 
org.apache.beam.vendor.calcite.v1_20_0.org.apache.calcite.sql.type.SqlTypeName;
+import 
org.apache.beam.vendor.calcite.v1_20_0.org.apache.calcite.util.DateString;
+import 
org.apache.beam.vendor.calcite.v1_20_0.org.apache.calcite.util.TimeString;
+import 
org.apache.beam.vendor.calcite.v1_20_0.org.apache.calcite.util.TimestampString;
 import 
org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.ImmutableList;
+import 
org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.ImmutableMap;
 
 /**
  * Utility methods for ZetaSQL <=> Calcite translation.
  *
- * <p>Unsupported ZetaSQL types: INT32, UINT32, UINT64, FLOAT, ENUM, PROTO, 
GEOGRAPHY
+ * <p>Unsupported ZetaSQL types: INT32, UINT32, UINT64, FLOAT, ENUM 
(internal), PROTO, GEOGRAPHY
  */
 @Internal
 public final class ZetaSqlCalciteTranslationUtils {
 
   private ZetaSqlCalciteTranslationUtils() {}
 
+  // TODO[BEAM-9178]: support DateTimestampPart.WEEK and "WEEK with weekday"s
+  private static final ImmutableMap<Integer, TimeUnit> TIME_UNIT_CASTING_MAP =
+      ImmutableMap.<Integer, TimeUnit>builder()
+          .put(DateTimestampPart.YEAR.getNumber(), TimeUnit.YEAR)
+          .put(DateTimestampPart.MONTH.getNumber(), TimeUnit.MONTH)
+          .put(DateTimestampPart.DAY.getNumber(), TimeUnit.DAY)
+          .put(DateTimestampPart.DAYOFWEEK.getNumber(), TimeUnit.DOW)
+          .put(DateTimestampPart.DAYOFYEAR.getNumber(), TimeUnit.DOY)
+          .put(DateTimestampPart.QUARTER.getNumber(), TimeUnit.QUARTER)
+          .put(DateTimestampPart.HOUR.getNumber(), TimeUnit.HOUR)
+          .put(DateTimestampPart.MINUTE.getNumber(), TimeUnit.MINUTE)
+          .put(DateTimestampPart.SECOND.getNumber(), TimeUnit.SECOND)
+          .put(DateTimestampPart.MILLISECOND.getNumber(), TimeUnit.MILLISECOND)
+          .put(DateTimestampPart.MICROSECOND.getNumber(), TimeUnit.MICROSECOND)
+          .put(DateTimestampPart.ISOYEAR.getNumber(), TimeUnit.ISOYEAR)
+          .put(DateTimestampPart.ISOWEEK.getNumber(), TimeUnit.WEEK)
+          .build();
+
   // Type conversion: Calcite => ZetaSQL
-  public static Type toZetaType(RelDataType calciteType) {
+  public static Type toZetaSqlType(RelDataType calciteType) {
     switch (calciteType.getSqlTypeName()) {
       case BIGINT:
-        return TypeFactory.createSimpleType(TYPE_INT64);
+        return TypeFactory.createSimpleType(TypeKind.TYPE_INT64);
       case DOUBLE:
-        return TypeFactory.createSimpleType(TYPE_DOUBLE);
+        return TypeFactory.createSimpleType(TypeKind.TYPE_DOUBLE);
       case BOOLEAN:
-        return TypeFactory.createSimpleType(TYPE_BOOL);
+        return TypeFactory.createSimpleType(TypeKind.TYPE_BOOL);
       case VARCHAR:
-        return TypeFactory.createSimpleType(TYPE_STRING);
+        return TypeFactory.createSimpleType(TypeKind.TYPE_STRING);
       case VARBINARY:
-        return TypeFactory.createSimpleType(TYPE_BYTES);
+        return TypeFactory.createSimpleType(TypeKind.TYPE_BYTES);
       case DECIMAL:
-        return TypeFactory.createSimpleType(TYPE_NUMERIC);
+        return TypeFactory.createSimpleType(TypeKind.TYPE_NUMERIC);
       case DATE:
-        return TypeFactory.createSimpleType(TYPE_DATE);
+        return TypeFactory.createSimpleType(TypeKind.TYPE_DATE);
       case TIME:
-        return TypeFactory.createSimpleType(TYPE_TIME);
+        return TypeFactory.createSimpleType(TypeKind.TYPE_TIME);
       case TIMESTAMP_WITH_LOCAL_TIME_ZONE:
-        return TypeFactory.createSimpleType(TYPE_DATETIME);
+        return TypeFactory.createSimpleType(TypeKind.TYPE_DATETIME);
       case TIMESTAMP:
-        return TypeFactory.createSimpleType(TYPE_TIMESTAMP);
+        return TypeFactory.createSimpleType(TypeKind.TYPE_TIMESTAMP);
       case ARRAY:
-        return 
TypeFactory.createArrayType(toZetaType(calciteType.getComponentType()));
+        return 
TypeFactory.createArrayType(toZetaSqlType(calciteType.getComponentType()));
       case ROW:
-        List<StructField> structFields =
+        return TypeFactory.createStructType(
             calciteType.getFieldList().stream()
-                .map(f -> new StructField(f.getName(), 
toZetaType(f.getType())))
-                .collect(toList());
-
-        return TypeFactory.createStructType(structFields);
+                .map(f -> new StructField(f.getName(), 
toZetaSqlType(f.getType())))
+                .collect(Collectors.toList()));
       default:
-        throw new UnsupportedOperationException("Unsupported RelDataType: " + 
calciteType);
+        throw new UnsupportedOperationException(
+            "Unknown Calcite type: " + calciteType.getSqlTypeName().getName());
     }
   }
 
   // Type conversion: ZetaSQL => Calcite
-  public static SqlTypeName toCalciteTypeName(TypeKind type) {
-    switch (type) {
+  public static RelDataType toCalciteType(Type type, boolean nullable, 
RexBuilder rexBuilder) {
+    RelDataType nonNullType;
+    switch (type.getKind()) {
       case TYPE_INT64:
-        return SqlTypeName.BIGINT;
+        nonNullType = 
rexBuilder.getTypeFactory().createSqlType(SqlTypeName.BIGINT);
+        break;
       case TYPE_DOUBLE:
-        return SqlTypeName.DOUBLE;
+        nonNullType = 
rexBuilder.getTypeFactory().createSqlType(SqlTypeName.DOUBLE);
+        break;
       case TYPE_BOOL:
-        return SqlTypeName.BOOLEAN;
+        nonNullType = 
rexBuilder.getTypeFactory().createSqlType(SqlTypeName.BOOLEAN);
+        break;
       case TYPE_STRING:
-        return SqlTypeName.VARCHAR;
+        nonNullType = 
rexBuilder.getTypeFactory().createSqlType(SqlTypeName.VARCHAR);
+        break;
       case TYPE_BYTES:
-        return SqlTypeName.VARBINARY;
+        nonNullType = 
rexBuilder.getTypeFactory().createSqlType(SqlTypeName.VARBINARY);
+        break;
       case TYPE_NUMERIC:
-        return SqlTypeName.DECIMAL;
+        nonNullType = 
rexBuilder.getTypeFactory().createSqlType(SqlTypeName.DECIMAL);
+        break;
       case TYPE_DATE:
-        return SqlTypeName.DATE;
+        nonNullType = 
rexBuilder.getTypeFactory().createSqlType(SqlTypeName.DATE);
+        break;
       case TYPE_TIME:
-        return SqlTypeName.TIME;
+        nonNullType = 
rexBuilder.getTypeFactory().createSqlType(SqlTypeName.TIME);
+        break;
       case TYPE_DATETIME:
-        return SqlTypeName.TIMESTAMP_WITH_LOCAL_TIME_ZONE;
+        nonNullType =
+            
rexBuilder.getTypeFactory().createSqlType(SqlTypeName.TIMESTAMP_WITH_LOCAL_TIME_ZONE);
+        break;
       case TYPE_TIMESTAMP:
-        // TODO: handle timestamp with time zone.
-        return SqlTypeName.TIMESTAMP;
-        // TODO[BEAM-9179] Add conversion code for ARRAY and ROW types
+        nonNullType = 
rexBuilder.getTypeFactory().createSqlType(SqlTypeName.TIMESTAMP);
+        break;
+      case TYPE_ARRAY:
+        // TODO: Should element type has the same nullability as the array 
type?
+        nonNullType = toCalciteArrayType(type.asArray().getElementType(), 
nullable, rexBuilder);
+        break;
+      case TYPE_STRUCT:
+        // TODO: Should field type has the same nullability as the struct type?
+        nonNullType = toCalciteStructType(type.asStruct(), nullable, 
rexBuilder);
+        break;
       default:
-        throw new UnsupportedOperationException("Unknown ZetaSQL type: " + 
type.name());
+        throw new UnsupportedOperationException("Unknown ZetaSQL type: " + 
type.getKind().name());
     }
+    return rexBuilder.getTypeFactory().createTypeWithNullability(nonNullType, 
nullable);
   }
 
-  public static RelDataType toRelDataType(RexBuilder rexBuilder, Type type, 
boolean isNullable) {
-    if (type.getKind().equals(TypeKind.TYPE_ARRAY)) {
-      return toArrayRelDataType(rexBuilder, type.asArray(), isNullable);
-    } else if (type.getKind().equals(TypeKind.TYPE_STRUCT)) {
-      return toStructRelDataType(rexBuilder, type.asStruct(), isNullable);
-    } else {
-      // TODO: Check type's nullability?
-      return toSimpleRelDataType(type.getKind(), rexBuilder, isNullable);
-    }
+  private static RelDataType toCalciteArrayType(
+      Type elementType, boolean nullable, RexBuilder rexBuilder) {
+    return rexBuilder
+        .getTypeFactory()
+        // -1 cardinality means unlimited array size
+        .createArrayType(toCalciteType(elementType, nullable, rexBuilder), -1);
   }
 
-  public static RelDataType toArrayRelDataType(
-      RexBuilder rexBuilder, ArrayType arrayType, boolean isNullable) {
-    // -1 cardinality means unlimited array size.
-    // TODO: is unlimited array size right for general case?
-    // TODO: whether isNullable should be ArrayType's nullablity (not its 
element type's?)
-    return nullable(
-        rexBuilder,
-        rexBuilder
-            .getTypeFactory()
-            .createArrayType(toRelDataType(rexBuilder, 
arrayType.getElementType(), isNullable), -1),
-        isNullable);
+  private static RelDataType toCalciteStructType(
+      StructType structType, boolean nullable, RexBuilder rexBuilder) {
+    List<StructField> fields = structType.getFieldList();
+    List<String> fieldNames = getFieldNameList(fields);
+    List<RelDataType> fieldTypes =
+        fields.stream()
+            .map(f -> toCalciteType(f.getType(), nullable, rexBuilder))
+            .collect(Collectors.toList());
+    return rexBuilder.getTypeFactory().createStructType(fieldTypes, 
fieldNames);
   }
 
-  private static List<String> toNameList(List<StructField> fields) {
+  private static List<String> getFieldNameList(List<StructField> fields) {
     ImmutableList.Builder<String> b = ImmutableList.builder();
     for (int i = 0; i < fields.size(); i++) {
       String name = fields.get(i).getName();
       if ("".equals(name)) {
-        name = "$col" + i;
+        name = "$col" + i; // empty field name is not allowed, generate an 
index-based name for it

Review comment:
       Yeah this comment is not accurate. The real problem is Beam does not 
allow multiple columns with the same name (empty in this case). Thanks for 
spotting this.




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
[email protected]


Issue Time Tracking
-------------------

    Worklog Id:     (was: 468797)
    Time Spent: 7h 40m  (was: 7.5h)

> Refactor Beam ZetaSQL type translation code
> -------------------------------------------
>
>                 Key: BEAM-9179
>                 URL: https://issues.apache.org/jira/browse/BEAM-9179
>             Project: Beam
>          Issue Type: Improvement
>          Components: dsl-sql-zetasql
>            Reporter: Robin Qiu
>            Assignee: Robin Qiu
>            Priority: P2
>          Time Spent: 7h 40m
>  Remaining Estimate: 0h
>
> Currently, how types work in Beam ZetaSQL is very confusing: code that does 
> type conversion between Calcite/Beam/ZetaSQL type systems are everywhere and 
> is likely to be not consistent. In addition, the recent migration to 
> BeamZetaSqlCalcRel makes the situation even worse: it adds another layer for 
> type conversion and makes a lot of code (for making ZetaSQL functions 
> manually) obsolete. This makes debugging and supporting new features very 
> hard. A few examples:
>  * Support new ZetaSQL types (e.g. DATE/TIME/DATETIME)
>  * Fix the long-standing precision & timezone issue of TIMESTMAP type
>  * Support BeamSQL query parameters
> Therefore, I propose we work on the following items to make the story of Beam 
> ZetaSQL type clearer:
>  * Define a clear mapping between Calcite/Beam/ZetaSQL types and values
>  * Refactor the codebase to move all type conversion code to a single source 
> of truth
>  * Add thorough testing for it
>  * Remove obsolete code that is added to make functions work before 
> BeamZetaSqlCalcRel



--
This message was sent by Atlassian Jira
(v8.3.4#803005)

Reply via email to