[
https://issues.apache.org/jira/browse/BEAM-7274?focusedWorklogId=351546&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-351546
]
ASF GitHub Bot logged work on BEAM-7274:
----------------------------------------
Author: ASF GitHub Bot
Created on: 30/Nov/19 18:52
Start Date: 30/Nov/19 18:52
Worklog Time Spent: 10m
Work Description: alexvanboxel commented on pull request #10247:
[BEAM-7274] In preparation for protocol-buffer schemas, add OneOf and
Enumeration logical types
URL: https://github.com/apache/beam/pull/10247#discussion_r352298426
##########
File path:
sdks/java/core/src/main/java/org/apache/beam/sdk/schemas/LogicalTypes.java
##########
@@ -111,4 +134,247 @@ public FieldType getBaseType() {
}
}
}
+
+ /**
+ * This class represents a single enum value. It can be referenced as a
String or as an integer.
+ */
+ public static class EnumerationValue {
+ private final String stringValue;
+ private final int value;
+
+ EnumerationValue(String stringValue, int value) {
+ this.stringValue = stringValue;
+ this.value = value;
+ }
+
+ /** Return the integer enum value. */
+ int getValue() {
+ return value;
+ }
+
+ /** Return the String enum value. */
+ @Override
+ public String toString() {
+ return stringValue;
+ }
+
+ @Override
+ public boolean equals(Object o) {
+ if (this == o) {
+ return true;
+ }
+ if (o == null || getClass() != o.getClass()) {
+ return false;
+ }
+ EnumerationValue enumValue = (EnumerationValue) o;
+ return value == enumValue.value && Objects.equals(stringValue,
enumValue.stringValue);
+ }
+
+ @Override
+ public int hashCode() {
+ return Objects.hash(stringValue, value);
+ }
+ }
+
+ /** This {@link LogicalType} represent an enumeration over a fixed set of
values. */
+ public static class EnumerationType implements LogicalType<EnumerationValue,
Integer> {
+ public static final String IDENTIFIER = "Enum";
+ final BiMap<String, Integer> enumValues = HashBiMap.create();
+
+ private EnumerationType(Map<String, Integer> enumValues) {
+ this.enumValues.putAll(enumValues);
+ }
+
+ /** Create an enumeration type over a set of String->Integer values. */
+ public static EnumerationType create(Map<String, Integer> enumValues) {
+ return new EnumerationType(enumValues);
+ }
+
+ /**
+ * Create an enumeration type from a fixed set of String values; integer
values will be
+ * automatically chosen.
+ */
+ public static EnumerationType create(List<String> enumValues) {
+ return new EnumerationType(
+ IntStream.range(0, enumValues.size())
+ .boxed()
+ .collect(Collectors.toMap(i -> enumValues.get(i), i -> i)));
+ }
+
+ /** Return an {@link EnumerationValue} corresponding to one of the
enumeration strings. */
+ public EnumerationValue valueOf(String stringValue) {
+ return new EnumerationValue(stringValue, enumValues.get(stringValue));
+ }
+
+ /**
+ * Return an {@link EnumerationValue} corresponding to one of the
enumeration integer values.
+ */
+ public EnumerationValue valueOf(int value) {
+ return new EnumerationValue(enumValues.inverse().get(value), value);
+ }
+
+ @Override
+ public String getIdentifier() {
+ return IDENTIFIER;
+ }
+
+ @Override
+ public FieldType getArgumentType() {
+ return FieldType.map(FieldType.STRING, FieldType.INT32);
+ }
+
+ @Override
+ public Map<String, Integer> getArgument() {
+ return enumValues;
+ }
+
+ @Override
+ public FieldType getBaseType() {
+ return FieldType.INT32;
+ }
+
+ @Override
+ public Integer toBaseType(EnumerationValue input) {
+ return input.value;
+ }
+
+ @Override
+ public EnumerationValue toInputType(Integer base) {
+ return valueOf(base);
+ }
+ }
+
+ /**
+ * Represents a single OneOf value. Each object contains an {@link
EnumerationValue} specifying
+ * which field is set along with the value of that field.
+ */
+ public static class OneOfValue {
+ private EnumerationValue caseType;
+ private Object value;
+
+ OneOfValue(EnumerationValue caseType, Object value) {
+ this.caseType = caseType;
+ this.value = value;
+ }
+
+ /** Returns the enumeration that specified which OneOf field is set. */
+ public EnumerationValue getCaseType() {
+ return caseType;
+ }
+
+ /** Returns the current value of the OneOf. */
+ @SuppressWarnings("TypeParameterUnusedInFormals")
+ public <T> T getValue() {
+ return (T) value;
+ }
+ }
+ /**
+ * A logical type representing a union of fields. This logical type is
initialized with a set of
+ * field and represents a union of those fields. This logical type is backed
by a Row object
+ * containing one nullable field matching each input field, and one
additional {@link
+ * EnumerationType} logical type field that indicates which field is set.
+ */
+ public static class OneOfType implements LogicalType<OneOfValue, Row> {
+ public static final String IDENTIFIER = "OneOf";
+ public static final String DEFAULT_ENUM_FIELD_NAME = "OneOfCase";
+
+ private final Schema oneOfSchema;
+ private final int oneOfCaseFieldId;
+ private final byte[] schemaProtoRepresentation;
+
+ private OneOfType(List<Field> fields, String enumField) {
+ List<Field> nullableFields =
+ fields.stream()
+ .map(f -> Field.nullable(f.getName(), f.getType()))
+ .collect(Collectors.toList());
+ List<String> enumValues =
+
nullableFields.stream().map(Field::getName).collect(Collectors.toList());
+ nullableFields.add(
+ Field.of(enumField,
FieldType.logicalType(EnumerationType.create(enumValues))));
+
+ oneOfSchema = Schema.builder().addFields(nullableFields).build();
+ schemaProtoRepresentation =
SchemaTranslation.schemaToProto(oneOfSchema).toByteArray();
+ this.oneOfCaseFieldId = oneOfSchema.indexOf(enumField);
Review comment:
I'm wondering if the oneOfCase needs to be stored in the row as it can be
inferred by the rest of the fields:
- the non-null value is the selected oneOf value
- if all fields are null the oneOf is NULL
this is how proto does it on the wire. Depends on what's important. Data
size (extra field) or Case performance.
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]
Issue Time Tracking
-------------------
Worklog Id: (was: 351546)
Time Spent: 11h 10m (was: 11h)
> Protobuf Beam Schema support
> ----------------------------
>
> Key: BEAM-7274
> URL: https://issues.apache.org/jira/browse/BEAM-7274
> Project: Beam
> Issue Type: Improvement
> Components: sdk-java-core
> Reporter: Alex Van Boxel
> Assignee: Alex Van Boxel
> Priority: Minor
> Time Spent: 11h 10m
> Remaining Estimate: 0h
>
> Add support for the new Beam Schema to the Protobuf extension.
--
This message was sent by Atlassian Jira
(v8.3.4#803005)