hequn8128 commented on a change in pull request #9977: [FLINK-14497][python] Support primitive data types in Python user-defined functions URL: https://github.com/apache/flink/pull/9977#discussion_r338641332
########## File path: flink-python/src/main/java/org/apache/flink/table/runtime/typeutils/PythonTypeUtils.java ########## @@ -0,0 +1,473 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.flink.table.runtime.typeutils; + +import org.apache.flink.annotation.Internal; +import org.apache.flink.api.common.typeutils.TypeSerializer; +import org.apache.flink.api.common.typeutils.base.BooleanSerializer; +import org.apache.flink.api.common.typeutils.base.ByteSerializer; +import org.apache.flink.api.common.typeutils.base.DoubleSerializer; +import org.apache.flink.api.common.typeutils.base.FloatSerializer; +import org.apache.flink.api.common.typeutils.base.IntSerializer; +import org.apache.flink.api.common.typeutils.base.LongSerializer; +import org.apache.flink.api.common.typeutils.base.ShortSerializer; +import org.apache.flink.api.common.typeutils.base.array.BytePrimitiveArraySerializer; +import org.apache.flink.api.java.typeutils.runtime.RowSerializer; +import org.apache.flink.fnexecution.v1.FlinkFnApi; +import org.apache.flink.table.runtime.typeutils.serializers.BaseRowSerializer; +import org.apache.flink.table.runtime.typeutils.serializers.DateSerializer; +import org.apache.flink.table.runtime.typeutils.serializers.StringSerializer; +import org.apache.flink.table.types.logical.AnyType; +import org.apache.flink.table.types.logical.ArrayType; +import org.apache.flink.table.types.logical.BigIntType; +import org.apache.flink.table.types.logical.BinaryType; +import org.apache.flink.table.types.logical.BooleanType; +import org.apache.flink.table.types.logical.CharType; +import org.apache.flink.table.types.logical.DateType; +import org.apache.flink.table.types.logical.DayTimeIntervalType; +import org.apache.flink.table.types.logical.DecimalType; +import org.apache.flink.table.types.logical.DistinctType; +import org.apache.flink.table.types.logical.DoubleType; +import org.apache.flink.table.types.logical.FloatType; +import org.apache.flink.table.types.logical.IntType; +import org.apache.flink.table.types.logical.LocalZonedTimestampType; +import org.apache.flink.table.types.logical.LogicalType; +import org.apache.flink.table.types.logical.LogicalTypeVisitor; +import org.apache.flink.table.types.logical.MapType; +import org.apache.flink.table.types.logical.MultisetType; +import org.apache.flink.table.types.logical.NullType; +import org.apache.flink.table.types.logical.RowType; +import org.apache.flink.table.types.logical.SmallIntType; +import org.apache.flink.table.types.logical.StructuredType; +import org.apache.flink.table.types.logical.SymbolType; +import org.apache.flink.table.types.logical.TimeType; +import org.apache.flink.table.types.logical.TimestampType; +import org.apache.flink.table.types.logical.TinyIntType; +import org.apache.flink.table.types.logical.VarBinaryType; +import org.apache.flink.table.types.logical.VarCharType; +import org.apache.flink.table.types.logical.YearMonthIntervalType; +import org.apache.flink.table.types.logical.ZonedTimestampType; + +/** + * Utilities for serializing Flink data. + */ +@Internal +public final class PythonTypeUtils { + + private static final String EMPTY_STRING = ""; + + public static TypeSerializer toFlinkTypeSerializer(LogicalType logicalType) { + return logicalType.accept(new LogicalTypeToTypeSerializerConverter()); + } + + public static TypeSerializer toBlinkTypeSerializer(LogicalType logicalType) { + return logicalType.accept(new LogicalTypeToBlinkTypeSerializerConverter()); + } + + public static FlinkFnApi.Schema.FieldType toProtoType(LogicalType logicalType) { + return logicalType.accept(new LogicalTypeToProtoTypeConverter()); + } + + private static class LogicalTypeToTypeSerializerConverter implements LogicalTypeVisitor<TypeSerializer> { + private TypeSerializer unSupportedDataType(LogicalType logicalType) { + throw new UnsupportedOperationException(String.format( + "Python UDF doesn't support data type %s currently.", logicalType.asSummaryString())); + } + + @Override + public TypeSerializer visit(CharType charType) { + return StringSerializer.INSTANCE; + } + + @Override + public TypeSerializer visit(VarCharType varCharType) { + return StringSerializer.INSTANCE; + } + + @Override + public TypeSerializer visit(BooleanType booleanType) { + return BooleanSerializer.INSTANCE; + } + + @Override + public TypeSerializer visit(BinaryType binaryType) { + return BytePrimitiveArraySerializer.INSTANCE; + } + + @Override + public TypeSerializer visit(VarBinaryType varBinaryType) { + return BytePrimitiveArraySerializer.INSTANCE; + } + + @Override + public TypeSerializer visit(DecimalType decimalType) { + return unSupportedDataType(decimalType); + } + + @Override + public TypeSerializer visit(TinyIntType tinyIntType) { + return ByteSerializer.INSTANCE; + } + + @Override + public TypeSerializer visit(SmallIntType smallIntType) { + return ShortSerializer.INSTANCE; + } + + @Override + public TypeSerializer visit(IntType intType) { + return IntSerializer.INSTANCE; + } + + @Override + public TypeSerializer visit(BigIntType bigIntType) { + return LongSerializer.INSTANCE; + } + + @Override + public TypeSerializer visit(FloatType floatType) { + return FloatSerializer.INSTANCE; + } + + @Override + public TypeSerializer visit(DoubleType doubleType) { + return DoubleSerializer.INSTANCE; + } + + @Override + public TypeSerializer visit(DateType dateType) { + return DateSerializer.INSTANCE; + } + + @Override + public TypeSerializer visit(TimeType timeType) { + return unSupportedDataType(timeType); + } + + @Override + public TypeSerializer visit(TimestampType timestampType) { + return unSupportedDataType(timestampType); + } + + @Override + public TypeSerializer visit(ZonedTimestampType zonedTimestampType) { + return unSupportedDataType(zonedTimestampType); + } + + @Override + public TypeSerializer visit(LocalZonedTimestampType localZonedTimestampType) { + return unSupportedDataType(localZonedTimestampType); + } + + @Override + public TypeSerializer visit(YearMonthIntervalType yearMonthIntervalType) { + return unSupportedDataType(yearMonthIntervalType); + } + + @Override + public TypeSerializer visit(DayTimeIntervalType dayTimeIntervalType) { + return unSupportedDataType(dayTimeIntervalType); + } + + @Override + public TypeSerializer visit(ArrayType arrayType) { + return unSupportedDataType(arrayType); + } + + @Override + public TypeSerializer visit(MultisetType multisetType) { + return unSupportedDataType(multisetType); + } + + @Override + public TypeSerializer visit(MapType mapType) { + return unSupportedDataType(mapType); + } + + @Override + public TypeSerializer visit(RowType rowType) { + final TypeSerializer[] fieldTypeSerializers = rowType.getFields() + .stream() + .map(f -> f.getType().accept(this)) + .toArray(TypeSerializer[]::new); + return new RowSerializer(fieldTypeSerializers); + } + + @Override + public TypeSerializer visit(DistinctType distinctType) { + return unSupportedDataType(distinctType); + } + + @Override + public TypeSerializer visit(StructuredType structuredType) { + return unSupportedDataType(structuredType); + } + + @Override + public TypeSerializer visit(NullType nullType) { + return unSupportedDataType(nullType); + } + + @Override + public TypeSerializer visit(AnyType<?> anyType) { + return unSupportedDataType(anyType); + } + + @Override + public TypeSerializer visit(SymbolType<?> symbolType) { + return unSupportedDataType(symbolType); + } + + @Override + public TypeSerializer visit(LogicalType other) { + return unSupportedDataType(other); + } + } + + private static class LogicalTypeToBlinkTypeSerializerConverter extends LogicalTypeToTypeSerializerConverter { + + @Override + public TypeSerializer visit(CharType charType) { + return BinaryStringSerializer.INSTANCE; + } + + @Override + public TypeSerializer visit(VarCharType varCharType) { + return BinaryStringSerializer.INSTANCE; + } + + @Override + public TypeSerializer visit(DateType dateType) { + return IntSerializer.INSTANCE; + } + + @Override + public TypeSerializer visit(RowType rowType) { + final TypeSerializer[] fieldTypeSerializers = rowType.getFields() + .stream() + .map(f -> f.getType().accept(this)) + .toArray(TypeSerializer[]::new); + return new BaseRowSerializer(rowType.getChildren().toArray(new LogicalType[0]), fieldTypeSerializers); + } + } + + private static class LogicalTypeToProtoTypeConverter implements LogicalTypeVisitor<FlinkFnApi.Schema.FieldType> { Review comment: Extends from LogicalTypeDefaultVisitor. ---------------------------------------------------------------- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: [email protected] With regards, Apache Git Services
