[
https://issues.apache.org/jira/browse/AVRO-1847?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=15329647#comment-15329647
]
Sean Busbey commented on AVRO-1847:
-----------------------------------
{code}
+ public static Object convertToLogicalType(Object datum, Schema schema,
LogicalType type,
+ Conversion<?> conversion) {
+ if (datum == null || schema == null || type == null || conversion == null)
{
+ throw new AvroRuntimeException("Parameters cannot be null! Parameter
values:" +
+ Arrays.deepToString(new Object[]{datum, schema, type, conversion}));
+ }
...
+ public static <T> Object convertToRawType(Object datum, Schema schema,
LogicalType type,
+ Conversion<T> conversion) {
+ if (datum == null || schema == null || type == null || conversion == null)
{
+ throw new AvroRuntimeException("Parameters cannot be null! Parameter
values:" +
+ Arrays.deepToString(new Object[]{datum, schema, type, conversion}));
+ }
+
{code}
These both will need javadocs. IllegalArgumentException is a better fit for
both of those checks.
{code}
+ public static boolean isDateTimeType(LogicalType type) {
{code}
Needs javadoc
{code}
diff --git
a/lang/java/avro/src/main/java/org/apache/avro/data/RecordBuilderBase.java
b/lang/java/avro/src/main/java/org/apache/avro/data/RecordBuilderBase.java
index 8e34a36..de27d6c 100644
--- a/lang/java/avro/src/main/java/org/apache/avro/data/RecordBuilderBase.java
+++ b/lang/java/avro/src/main/java/org/apache/avro/data/RecordBuilderBase.java
...
@@ -135,6 +137,25 @@ public abstract class RecordBuilderBase<T extends
IndexedRecord>
return data.deepCopy(field.schema(), data.getDefaultValue(field));
}
+ /**
+ * Gets the default value of the given field, if any.
+ * @param field the field whose default value should be retrieved.
+ * @return the default value associated with the given field,
+ * or null if none is specified in the schema.
+ * @throws IOException
+ */
+ @SuppressWarnings({ "rawtypes", "unchecked" })
+ protected Object defaultValue(Field field, Conversion<?> conversion) throws
IOException {
+ Schema schema = field.schema();
+ Object rawDefaultValue = data.deepCopy(schema,
data.getDefaultValue(field));
+ if (conversion == null) {
+ return rawDefaultValue;
+ } else {
+ return Conversions.convertToLogicalType(rawDefaultValue, schema,
+ schema.getLogicalType(), conversion);
+ }
+ }
{code}
we're relying on an invariant here, that things with conversions passed in have
schema's with a logical type (sine the convertToLogicalType will throw if the
logical type is null). I think that's the same as before? Is it worth noting in
the javadoc?
{code}
diff --git
a/lang/java/avro/src/main/java/org/apache/avro/generic/GenericDatumWriter.java
b/lang/java/avro/src/main/java/org/apache/avro/generic/GenericDatumWriter.java
index e66726e..cf06d63 100644
---
a/lang/java/avro/src/main/java/org/apache/avro/generic/GenericDatumWriter.java
+++
b/lang/java/avro/src/main/java/org/apache/avro/generic/GenericDatumWriter.java
...
@@ -76,25 +78,25 @@ public class GenericDatumWriter<D> implements
DatumWriter<D> {
protected <T> Object convert(Schema schema, LogicalType logicalType,
Conversion<T> conversion, Object datum) {
- if (conversion == null) {
- return datum;
- }
- Class<T> fromClass = conversion.getConvertedType();
- switch (schema.getType()) {
- case RECORD: return conversion.toRecord(fromClass.cast(datum), schema,
logicalType);
- case ENUM: return conversion.toEnumSymbol(fromClass.cast(datum),
schema, logicalType);
- case ARRAY: return conversion.toArray(fromClass.cast(datum), schema,
logicalType);
- case MAP: return conversion.toMap(fromClass.cast(datum), schema,
logicalType);
- case FIXED: return conversion.toFixed(fromClass.cast(datum), schema,
logicalType);
- case STRING: return conversion.toCharSequence(fromClass.cast(datum),
schema, logicalType);
- case BYTES: return conversion.toBytes(fromClass.cast(datum), schema,
logicalType);
- case INT: return conversion.toInt(fromClass.cast(datum), schema,
logicalType);
- case LONG: return conversion.toLong(fromClass.cast(datum), schema,
logicalType);
- case FLOAT: return conversion.toFloat(fromClass.cast(datum), schema,
logicalType);
- case DOUBLE: return conversion.toDouble(fromClass.cast(datum), schema,
logicalType);
- case BOOLEAN: return conversion.toBoolean(fromClass.cast(datum), schema,
logicalType);
+ try {
+ if (conversion == null) {
+ return datum;
+ } else {
+ return Conversions.convertToRawType(datum, schema, logicalType,
conversion);
+ }
+ } catch (AvroRuntimeException e) {
+ Throwable cause = e.getCause();
+ if (cause != null && cause.getClass() == ClassCastException.class) {
+ // This is to keep backwards compatibility. The convert function here
used to
+ // throw CCE. When prompted to Conversions, it throws
AvroRuntimeException. So,
+ // if the cause is a CCE, rethrow it in case any child class checks
it. This
+ // behaviour can be changed later in future versions to make it
consistent with
+ // reading path, which throws AvroRuntimeException
+ throw (ClassCastException)cause;
+ } else {
+ throw e;
+ }
}
- return datum;
}
{code}
We should add javadocs here about which params may be null and which will cause
an exception (e.g. logicalType can't be null if conversion is non-null).
{code}
diff --git
a/lang/java/avro/src/main/java/org/apache/avro/generic/GenericDatumReader.java
b/lang/java/avro/src/main/java/org/apache/avro/generic/GenericDatumReader.java
index 6133d6b..a3161be 100644
---
a/lang/java/avro/src/main/java/org/apache/avro/generic/GenericDatumReader.java
+++
b/lang/java/avro/src/main/java/org/apache/avro/generic/GenericDatumReader.java
...
@@ -191,26 +192,7 @@ public class GenericDatumReader<D> implements
DatumReader<D> {
protected Object convert(Object datum, Schema schema, LogicalType type,
Conversion<?> conversion) {
- try {
- switch (schema.getType()) {
- case RECORD: return conversion.fromRecord((IndexedRecord) datum,
schema, type);
- case ENUM: return conversion.fromEnumSymbol((GenericEnumSymbol)
datum, schema, type);
- case ARRAY: return
conversion.fromArray(getData().getArrayAsCollection(datum), schema, type);
- case MAP: return conversion.fromMap((Map<?, ?>) datum, schema, type);
- case FIXED: return conversion.fromFixed((GenericFixed) datum, schema,
type);
- case STRING: return conversion.fromCharSequence((CharSequence) datum,
schema, type);
- case BYTES: return conversion.fromBytes((ByteBuffer) datum, schema,
type);
- case INT: return conversion.fromInt((Integer) datum, schema, type);
- case LONG: return conversion.fromLong((Long) datum, schema, type);
- case FLOAT: return conversion.fromFloat((Float) datum, schema, type);
- case DOUBLE: return conversion.fromDouble((Double) datum, schema, type);
- case BOOLEAN: return conversion.fromBoolean((Boolean) datum, schema,
type);
- }
- return datum;
- } catch (ClassCastException e) {
- throw new AvroRuntimeException("Cannot convert " + datum + ":" +
- datum.getClass().getSimpleName() + ": expected generic type", e);
- }
+ return Conversions.convertToLogicalType(datum, schema, type, conversion);
}
/** Called to read a record instance. May be overridden for alternate record
diff --git
a/lang/java/avro/src/main/java/org/apache/avro/generic/GenericDatumWriter.java
b/lang/java/avro/src/main/java/org/apache/avro/generic/GenericDatumWriter.java
index e66726e..cf06d63 100644
---
a/lang/java/avro/src/main/java/org/apache/avro/generic/GenericDatumWriter.java
+++
b/lang/java/avro/src/main/java/org/apache/avro/generic/GenericDatumWriter.java
...
@@ -76,25 +78,25 @@ public class GenericDatumWriter<D> implements
DatumWriter<D> {
protected <T> Object convert(Schema schema, LogicalType logicalType,
Conversion<T> conversion, Object datum) {
- if (conversion == null) {
- return datum;
- }
- Class<T> fromClass = conversion.getConvertedType();
- switch (schema.getType()) {
- case RECORD: return conversion.toRecord(fromClass.cast(datum), schema,
logicalType);
- case ENUM: return conversion.toEnumSymbol(fromClass.cast(datum),
schema, logicalType);
- case ARRAY: return conversion.toArray(fromClass.cast(datum), schema,
logicalType);
- case MAP: return conversion.toMap(fromClass.cast(datum), schema,
logicalType);
- case FIXED: return conversion.toFixed(fromClass.cast(datum), schema,
logicalType);
- case STRING: return conversion.toCharSequence(fromClass.cast(datum),
schema, logicalType);
- case BYTES: return conversion.toBytes(fromClass.cast(datum), schema,
logicalType);
- case INT: return conversion.toInt(fromClass.cast(datum), schema,
logicalType);
- case LONG: return conversion.toLong(fromClass.cast(datum), schema,
logicalType);
- case FLOAT: return conversion.toFloat(fromClass.cast(datum), schema,
logicalType);
- case DOUBLE: return conversion.toDouble(fromClass.cast(datum), schema,
logicalType);
- case BOOLEAN: return conversion.toBoolean(fromClass.cast(datum), schema,
logicalType);
+ try {
+ if (conversion == null) {
+ return datum;
+ } else {
+ return Conversions.convertToRawType(datum, schema, logicalType,
conversion);
+ }
+ } catch (AvroRuntimeException e) {
+ Throwable cause = e.getCause();
+ if (cause != null && cause.getClass() == ClassCastException.class) {
+ // This is to keep backwards compatibility. The convert function here
used to
+ // throw CCE. When prompted to Conversions, it throws
AvroRuntimeException. So,
+ // if the cause is a CCE, rethrow it in case any child class checks
it. This
+ // behaviour can be changed later in future versions to make it
consistent with
+ // reading path, which throws AvroRuntimeException
+ throw (ClassCastException)cause;
+ } else {
+ throw e;
+ }
}
- return datum;
}
{code}
What did GenericDatumReader/Writer do on {{null}} for {{Schema}} or
{{LogicalType}} prior to this patch? I think it depends on the specific
{{Conversion}} implementation and now they'll be throwing either
{{AvroRuntimeException}} or {{IllegalArgumentException}} based on the feedback
above? Similarly, {{GenericDatumReader.convert}} used to throw NPE for a null
conversion (but only for some schema types), but now it will throw one of ARE
or IAE all the time, I think? this sounds breaking. Maybe we do the null checks
only in the 1.9 branch?
{code}
+ private boolean useLogicalTypesForNonDateTimeTypes = false;
/*
{code}
The problem with this approach is we also have another non-date-time logical
type without an implementation, UUID. so we wouldn't be able to add a UUID
implementation without breaking things (since right now setting this to true
should get you logical overlay for decimal and not for uuid). What if we just
make a flag for using logical type with decimal specifically? This would also
let us remove the {{LogicalTypes.isDateTimeType}} method, since we could check
specifically for the decimal type.
{code}
public String conversionInstance(Schema schema) {
+ if (schema == null || schema.getLogicalType() == null) {
+ return "null";
+ }
+
if (LogicalTypes.date().equals(schema.getLogicalType())) {
return "DATE_CONVERSION";
} else if (LogicalTypes.timeMillis().equals(schema.getLogicalType())) {
return "TIME_CONVERSION";
} else if (LogicalTypes.timestampMillis().equals(schema.getLogicalType()))
{
return "TIMESTAMP_CONVERSION";
+ } else if
(LogicalTypes.Decimal.class.equals(schema.getLogicalType().getClass())) {
+ return "DECIMAL_CONVERSION";
}
- return "null";
+
+ return null;
}
{code}
We should add a javadoc here. :). This also still changes behavior, from
returning the string "null" to returning an actual null. Is there a reason not
to just return the string version like before?
{code}
diff --git
a/lang/java/tools/src/main/java/org/apache/avro/tool/SpecificCompilerTool.java
b/lang/java/tools/src/main/java/org/apache/avro/tool/SpecificCompilerTool.java
index b038b4e..e2f9b8a 100644
---
a/lang/java/tools/src/main/java/org/apache/avro/tool/SpecificCompilerTool.java
+++
b/lang/java/tools/src/main/java/org/apache/avro/tool/SpecificCompilerTool.java
@@ -19,6 +19,7 @@ package org.apache.avro.tool;
import java.io.File;
import java.io.FilenameFilter;
+import java.io.IOException;
import java.io.InputStream;
import java.io.PrintStream;
import java.util.ArrayList;
@@ -42,7 +43,7 @@ public class SpecificCompilerTool implements Tool {
List<String> args) throws Exception {
if (args.size() < 3) {
System.err
- .println("Usage: [-encoding <outputencoding>] [-string]
(schema|protocol) input... outputdir");
+ .println("Usage: [-encoding <outputencoding>] [-string]
[-enableLogicalTypes] (schema|protocol) input... outputdir");
System.err
.println(" input - input files or directories");
System.err
{code}
This help message is wrong now.
{code}
diff --git
a/lang/java/compiler/src/test/java/org/apache/avro/compiler/specific/TestSpecificCompiler.java
b/lang/java/compiler/src/test/java/org/apache/avro/compiler/specific/TestSpecificCompiler.java
index 3668d2f..b831c73 100644
---
a/lang/java/compiler/src/test/java/org/apache/avro/compiler/specific/TestSpecificCompiler.java
+++
b/lang/java/compiler/src/test/java/org/apache/avro/compiler/specific/TestSpecificCompiler.java
...
@Test
- public void testLogicalTypes() throws Exception {
+ public void testLogicalTypesUsingJavaClasses() throws Exception {
SpecificCompiler compiler = createCompiler();
+ compiler.setUseLogicalTypesForNonDateTimeTypes(true);
...
+ @Test
+ public void testLogicalTypesNotUsingJavaClasses() throws Exception {
+ SpecificCompiler compiler = createCompiler();
+ compiler.setUseLogicalTypesForNonDateTimeTypes(false);
{code}
please update these test method names to reflect that only Decimal support is
being toggled.
> IDL compiler uses ByteBuffer for decimal type even if logical type is
> supported
> --------------------------------------------------------------------------------
>
> Key: AVRO-1847
> URL: https://issues.apache.org/jira/browse/AVRO-1847
> Project: Avro
> Issue Type: Bug
> Components: java
> Affects Versions: 1.8.0
> Reporter: Yibing Shi
> Assignee: Yibing Shi
> Attachments: AVRO-1847.1.patch, AVRO-1847.2.patch, AVRO-1847.3.patch,
> AVRO-1847.4.patch, AVRO-1847.5.patch
>
>
> Version 1.8.0 has added the support of logical types. A conversion class
> (Conversions.DecimalConversion) has also been added for decimal type.
> However, the IDL compiler still uses ByteBuffer for decimal types, which is
> not the same behaviour as data, time or timestamp type (added in AVRO-1684).
--
This message was sent by Atlassian JIRA
(v6.3.4#6332)