[
https://issues.apache.org/jira/browse/AVRO-2090?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16633196#comment-16633196
]
ASF GitHub Bot commented on AVRO-2090:
--------------------------------------
rstata commented on a change in pull request #256: AVRO-2090: Improve
encode/decode time for SpecificRecord using code generation
URL: https://github.com/apache/avro/pull/256#discussion_r221445803
##########
File path:
lang/java/compiler/src/main/velocity/org/apache/avro/compiler/specific/templates/java/classic/record.vm
##########
@@ -473,4 +475,282 @@ public class ${this.mangle($schema.getName())}#if
($schema.isError()) extends or
READER$.read(this, SpecificData.getDecoder(in));
}
+#if ($this.isCustomCodable($schema))
+ @Override public boolean hasCustomCoders() { return true; }
+
+ @Override public void encode(org.apache.avro.io.Encoder out)
+ throws java.io.IOException
+ {
+#set ($nv = 0)## Counter to ensure unique var-names
+#set ($maxnv = 0)## Holds high-water mark during recursion
+#foreach ($field in $schema.getFields())
+#set ($n = $this.mangle($field.name(), $schema.isError()))
+#set ($s = $field.schema())
+#encodeVar(0 "this.${n}" $s)
+
+#set ($nv = $maxnv)
+#end
+ }
+
+ @Override public void decode(org.apache.avro.io.Decoder in)
+ throws java.io.IOException
+ {
+#set ($nv = 0)## Counter to ensure unique var-names
+#set ($maxnv = 0)## Holds high-water mark during recursion
+#foreach ($field in $schema.getFields())
+#set ($n = $this.mangle($field.name(), $schema.isError()))
+#set ($s = $field.schema())
+#set ($rs = "SCHEMA$.getField(""${n}"").schema()")
+#decodeVar(0 "this.${n}" $s $rs)
+
+#set ($nv = $maxnv)
+#end
+ }
+#end
}
+
+#macro( encodeVar $indent $var $s )
+#set ($I = $this.indent($indent))
+##### Compound types (array, map, and union) require calls
+##### that will recurse back into this encodeVar macro:
+#if ($s.Type.Name.equals("array"))
+#encodeArray($indent $var $s)
+#elseif ($s.Type.Name.equals("map"))
+#encodeMap($indent $var $s)
+#elseif ($s.Type.Name.equals("union"))
+#encodeUnion($indent $var $s)
+##### Use the generated "encode" method as fast way to write
+##### (specific) record types:
+#elseif ($s.Type.Name.equals("record"))
+$I ${var}.encode(out);
+##### For rest of cases, generate calls out.writeXYZ:
+#elseif ($s.Type.Name.equals("null"))
+$I out.writeNull();
+#elseif ($s.Type.Name.equals("boolean"))
+$I out.writeBoolean(${var});
+#elseif ($s.Type.Name.equals("int"))
+$I out.writeInt(${var});
+#elseif ($s.Type.Name.equals("long"))
+$I out.writeLong(${var});
+#elseif ($s.Type.Name.equals("float"))
+$I out.writeFloat(${var});
+#elseif ($s.Type.Name.equals("double"))
+$I out.writeDouble(${var});
+#elseif ($s.Type.Name.equals("string"))
+#if ($this.isStringable($s))
+$I out.writeString(${var}.toString());
+#else
+$I out.writeString(${var});
+#end
+#elseif ($s.Type.Name.equals("bytes"))
+$I out.writeBytes(${var});
+#elseif ($s.Type.Name.equals("fixed"))
+$I out.writeFixed(${var}.bytes(), 0, ${s.FixedSize});
+#elseif ($s.Type.Name.equals("enum"))
+$I out.writeEnum(${var}.ordinal());
+#else
+## TODO -- singal a code-gen-time error
+#end
+#end
+
+#macro( encodeArray $indent $var $s )
+#set ($I = $this.indent($indent))
+#set ($et = $this.javaType($s.ElementType))
+$I long size${nv} = ${var}.size();
+$I out.writeArrayStart();
+$I out.setItemCount(size${nv});
+$I long actualSize${nv} = 0;
+$I for ($et e${nv}: ${var}) {
+$I actualSize${nv}++;
+$I out.startItem();
+#set ($var = "e${nv}")
+#set ($nv = $nv + 1)
+#set ($maxnv = $nv)
+#set ($indent = $indent + 2)
+#encodeVar($indent $var $s.ElementType)
+#set ($nv = $nv - 1)
+#set ($indent = $indent - 2)
+#set ($I = $this.indent($indent))
+$I }
+$I out.writeArrayEnd();
+$I if (actualSize${nv} != size${nv})
+$I throw new java.util.ConcurrentModificationException("Array-size
written was " + size${nv} + ", but element count was " + actualSize${nv} + ".");
+#end
+
+#macro( encodeMap $indent $var $s )
+#set ($I = $this.indent($indent))
+#set ($kt = $this.getStringType($s))
+#set ($vt = $this.javaType($s.ValueType))
+$I long size${nv} = ${var}.size();
+$I out.writeMapStart();
+$I out.setItemCount(size${nv});
+$I long actualSize${nv} = 0;
+$I for (java.util.Map.Entry<$kt, $vt> e${nv}: ${var}.entrySet()) {
+$I actualSize${nv}++;
+$I out.startItem();
+#if ($this.isStringable($s))
+$I out.writeString(e${nv}.getKey().toString());
+#else
+$I out.writeString(e${nv}.getKey());
+#end
+$I $vt v${nv} = e${nv}.getValue();
+#set ($var = "v${nv}")
+#set ($nv = $nv + 1)
+#set ($maxnv = $nv)
+#set ($indent = $indent + 2)
+#encodeVar($indent $var $s.ValueType)
+#set ($nv = $nv - 1)
+#set ($indent = $indent - 2)
+#set ($I = $this.indent($indent))
+$I }
+$I out.writeMapEnd();
+$I if (actualSize${nv} != size${nv})
+ throw new java.util.ConcurrentModificationException("Map-size written
was " + size${nv} + ", but element count was " + actualSize${nv} + ".");
+#end
+
+#macro( encodeUnion $indent $var $s )
+#set ($I = $this.indent($indent))
+#set ($et = $this.javaType($s.Types.get($this.getNonNullIndex($s))))
+$I if (${var} == null) {
+$I out.writeIndex(#if($this.getNonNullIndex($s)==0)1#{else}0#end);
+$I out.writeNull();
+$I } else {
+$I out.writeIndex(${this.getNonNullIndex($s)});
+#set ($indent = $indent + 2)
+#encodeVar($indent $var $s.Types.get($this.getNonNullIndex($s)))
+#set ($indent = $indent - 2)
+#set ($I = $this.indent($indent))
+$I }
+#end
+
+
+#macro( decodeVar $indent $var $s $rs )
+#set ($I = $this.indent($indent))
+##### Compound types (array, map, and union) require calls
+##### that will recurse back into this decodeVar macro:
+#if ($s.Type.Name.equals("array"))
+#decodeArray($indent $var $s $rs)
+#elseif ($s.Type.Name.equals("map"))
+#decodeMap($indent $var $s $rs)
+#elseif ($s.Type.Name.equals("union"))
+#decodeUnion($indent $var $s $rs)
+##### Use the generated "decode" method as fast way to write
+##### (specific) record types:
+#elseif ($s.Type.Name.equals("record"))
+$I if (${var} == null) {
+$I ${var} = new ${this.javaType($s)}();
+$I }
+$I ${var}.decode(in);
+##### For rest of cases, generate calls in.readXYZ:
+#elseif ($s.Type.Name.equals("null"))
+$I in.readNull();
+#elseif ($s.Type.Name.equals("boolean"))
+$I $var = in.readBoolean();
+#elseif ($s.Type.Name.equals("int"))
+$I $var = in.readInt();
+#elseif ($s.Type.Name.equals("long"))
+$I $var = in.readLong();
+#elseif ($s.Type.Name.equals("float"))
+$I $var = in.readFloat();
+#elseif ($s.Type.Name.equals("double"))
+$I $var = in.readDouble();
+#elseif ($s.Type.Name.equals("string"))
+#decodeString( "$I" $var $s )
+#elseif ($s.Type.Name.equals("bytes"))
+$I $var = in.readBytes(${var});
+#elseif ($s.Type.Name.equals("fixed"))
+$I if (${var} == null) {
+$I ${var} = new ${this.javaType($s)}();
+$I }
+$I in.readFixed(${var}.bytes(), 0, ${s.FixedSize});
+#elseif ($s.Type.Name.equals("enum"))
+$I $var = ${this.javaType($s)}.values()[in.readEnum()];
+#else
+## TODO -- singal a code-gen-time error
+#end
+#end
+
+#macro( decodeString $II $var $s )
+#set ($st = ${this.getStringType($s)})
+#if ($this.isStringable($s))
+$II ${var} = new ${st}(in.readString());
+#elseif ($st.equals("java.lang.String"))
+$II $var = in.readString();
+#elseif ($st.equals("org.apache.avro.util.Utf8"))
+$II $var = in.readString(${var});
+#else
+$II $var = in.readString(${var} instanceof Utf8 ? (Utf8)${var} : null);
+#end
+#end
+
+#macro( decodeArray $indent $var $s $rs )
+#set ($I = $this.indent($indent))
+#set ($t = $this.javaType($s))
+#set ($et = $this.javaType($s.ElementType))
+#set ($gat = "SpecificData.Array<${et}>")
+$I long size${nv} = in.readArrayStart();
+$I $t a${nv} = ${var}; // Need fresh name due to limitation of macro system
+$I if (a${nv} == null) {
Review comment:
changed to a velocity comment
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]
> Improve encode/decode time for SpecificRecord using code generation
> -------------------------------------------------------------------
>
> Key: AVRO-2090
> URL: https://issues.apache.org/jira/browse/AVRO-2090
> Project: Avro
> Issue Type: Improvement
> Components: java
> Reporter: Raymie Stata
> Assignee: Raymie Stata
> Priority: Major
> Attachments: customcoders.md
>
>
> Compared to GenericRecords, SpecificRecords offer type-safety plus the
> performance of traditional getters/setters/instance variables. But these are
> only beneficial to Java code accessing those records. SpecificRecords
> inherit serialization and deserialization code from GenericRecords, which is
> dynamic and thus slow (in fact, benchmarks show that serialization and
> deserialization is _slower_ for SpecificRecord than for GenericRecord).
> This patch extends record.vm to generate custom, higher-performance encoder
> and decoder functions for SpecificRecords. We've run a public benchmark
> showing that the new code reduces serialization time by 2/3 and
> deserialization time by close to 50%.
--
This message was sent by Atlassian JIRA
(v7.6.3#76005)