[ 
https://issues.apache.org/jira/browse/AVRO-2090?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16419597#comment-16419597
 ] 

ASF GitHub Bot commented on AVRO-2090:
--------------------------------------

cutting commented on a change in pull request #256: AVRO-2090: Improve 
encode/decode time for SpecificRecord using code generation
URL: https://github.com/apache/avro/pull/256#discussion_r178152178
 
 

 ##########
 File path: 
lang/java/compiler/src/main/velocity/org/apache/avro/compiler/specific/templates/java/classic/record.vm
 ##########
 @@ -473,4 +475,282 @@ public class ${this.mangle($schema.getName())}#if 
($schema.isError()) extends or
     READER$.read(this, SpecificData.getDecoder(in));
   }
 
+#if ($this.isCustomCodable($schema))
+  @Override public boolean hasCustomCoders() { return true; }
+
+  @Override public void encode(org.apache.avro.io.Encoder out)
+    throws java.io.IOException
+  {
+#set ($nv = 0)## Counter to ensure unique var-names
+#set ($maxnv = 0)## Holds high-water mark during recursion
+#foreach ($field in $schema.getFields())
+#set ($n = $this.mangle($field.name(), $schema.isError()))
+#set ($s = $field.schema())
+#encodeVar(0 "this.${n}" $s)
+
+#set ($nv = $maxnv)
+#end
+  }
+
+  @Override public void decode(org.apache.avro.io.Decoder in)
+    throws java.io.IOException
+  {
+#set ($nv = 0)## Counter to ensure unique var-names
+#set ($maxnv = 0)## Holds high-water mark during recursion
+#foreach ($field in $schema.getFields())
+#set ($n = $this.mangle($field.name(), $schema.isError()))
+#set ($s = $field.schema())
+#set ($rs = "SCHEMA$.getField(""${n}"").schema()")
+#decodeVar(0 "this.${n}" $s $rs)
+
+#set ($nv = $maxnv)
+#end
+  }
+#end
 }
+
+#macro( encodeVar $indent $var $s )
+#set ($I = $this.indent($indent))
+##### Compound types (array, map, and union) require calls
+##### that will recurse back into this encodeVar macro:
+#if ($s.Type.Name.equals("array"))
+#encodeArray($indent $var $s)
+#elseif ($s.Type.Name.equals("map"))
+#encodeMap($indent $var $s)
+#elseif ($s.Type.Name.equals("union"))
+#encodeUnion($indent $var $s)
+##### Use the generated "encode" method as fast way to write
+##### (specific) record types:
+#elseif ($s.Type.Name.equals("record"))
+$I    ${var}.encode(out);
+##### For rest of cases, generate calls out.writeXYZ:
+#elseif ($s.Type.Name.equals("null"))
+$I    out.writeNull();
+#elseif ($s.Type.Name.equals("boolean"))
+$I    out.writeBoolean(${var});
+#elseif ($s.Type.Name.equals("int"))
+$I    out.writeInt(${var});
+#elseif ($s.Type.Name.equals("long"))
+$I    out.writeLong(${var});
+#elseif ($s.Type.Name.equals("float"))
+$I    out.writeFloat(${var});
+#elseif ($s.Type.Name.equals("double"))
+$I    out.writeDouble(${var});
+#elseif ($s.Type.Name.equals("string"))
+#if ($this.isStringable($s))
+$I    out.writeString(${var}.toString());
+#else
+$I    out.writeString(${var});
+#end
+#elseif ($s.Type.Name.equals("bytes"))
+$I    out.writeBytes(${var});
+#elseif ($s.Type.Name.equals("fixed"))
+$I    out.writeFixed(${var}.bytes(), 0, ${s.FixedSize});
+#elseif ($s.Type.Name.equals("enum"))
+$I    out.writeEnum(${var}.ordinal());
+#else
+## TODO -- singal a code-gen-time error
+#end
+#end
+
+#macro( encodeArray $indent $var $s )
+#set ($I = $this.indent($indent))
+#set ($et = $this.javaType($s.ElementType))
+$I    long size${nv} = ${var}.size();
+$I    out.writeArrayStart();
+$I    out.setItemCount(size${nv});
+$I    long actualSize${nv} = 0;
+$I    for ($et e${nv}: ${var}) {
+$I      actualSize${nv}++;
+$I      out.startItem();
+#set ($var = "e${nv}")
+#set ($nv = $nv + 1)
+#set ($maxnv = $nv)
+#set ($indent = $indent + 2)
+#encodeVar($indent $var $s.ElementType)
+#set ($nv = $nv - 1)
+#set ($indent = $indent - 2)
+#set ($I = $this.indent($indent))
+$I    }
+$I    out.writeArrayEnd();
+$I    if (actualSize${nv} != size${nv})
+$I      throw new java.util.ConcurrentModificationException("Array-size 
written was " + size${nv} + ", but element count was " + actualSize${nv} + ".");
+#end
+
+#macro( encodeMap $indent $var $s )
+#set ($I = $this.indent($indent))
+#set ($kt = $this.getStringType($s))
+#set ($vt = $this.javaType($s.ValueType))
+$I    long size${nv} = ${var}.size();
+$I    out.writeMapStart();
+$I    out.setItemCount(size${nv});
+$I    long actualSize${nv} = 0;
+$I    for (java.util.Map.Entry<$kt, $vt> e${nv}: ${var}.entrySet()) {
+$I      actualSize${nv}++;
+$I      out.startItem();
+#if ($this.isStringable($s))
+$I      out.writeString(e${nv}.getKey().toString());
+#else
+$I      out.writeString(e${nv}.getKey());
+#end
+$I      $vt v${nv} = e${nv}.getValue();
+#set ($var = "v${nv}")
+#set ($nv = $nv + 1)
+#set ($maxnv = $nv)
+#set ($indent = $indent + 2)
+#encodeVar($indent $var $s.ValueType)
+#set ($nv = $nv - 1)
+#set ($indent = $indent - 2)
+#set ($I = $this.indent($indent))
+$I    }
+$I    out.writeMapEnd();
+$I    if (actualSize${nv} != size${nv})
+      throw new java.util.ConcurrentModificationException("Map-size written 
was " + size${nv} + ", but element count was " + actualSize${nv} + ".");
+#end
+
+#macro( encodeUnion $indent $var $s )
+#set ($I = $this.indent($indent))
+#set ($et = $this.javaType($s.Types.get($this.getNonNullIndex($s))))
+$I    if (${var} == null) {
+$I      out.writeIndex(#if($this.getNonNullIndex($s)==0)1#{else}0#end);
+$I      out.writeNull();
+$I    } else {
+$I      out.writeIndex(${this.getNonNullIndex($s)});
+#set ($indent = $indent + 2)
+#encodeVar($indent $var $s.Types.get($this.getNonNullIndex($s)))
+#set ($indent = $indent - 2)
+#set ($I = $this.indent($indent))
+$I    }
+#end
+
+
+#macro( decodeVar $indent $var $s $rs )
+#set ($I = $this.indent($indent))
+##### Compound types (array, map, and union) require calls
+##### that will recurse back into this decodeVar macro:
+#if ($s.Type.Name.equals("array"))
+#decodeArray($indent $var $s $rs)
+#elseif ($s.Type.Name.equals("map"))
+#decodeMap($indent $var $s $rs)
+#elseif ($s.Type.Name.equals("union"))
+#decodeUnion($indent $var $s $rs)
+##### Use the generated "decode" method as fast way to write
+##### (specific) record types:
+#elseif ($s.Type.Name.equals("record"))
+$I    if (${var} == null) {
+$I      ${var} = new ${this.javaType($s)}();
+$I    }
+$I    ${var}.decode(in);
+##### For rest of cases, generate calls in.readXYZ:
+#elseif ($s.Type.Name.equals("null"))
+$I    in.readNull();
+#elseif ($s.Type.Name.equals("boolean"))
+$I    $var = in.readBoolean();
+#elseif ($s.Type.Name.equals("int"))
+$I    $var = in.readInt();
+#elseif ($s.Type.Name.equals("long"))
+$I    $var = in.readLong();
+#elseif ($s.Type.Name.equals("float"))
+$I    $var = in.readFloat();
+#elseif ($s.Type.Name.equals("double"))
+$I    $var = in.readDouble();
+#elseif ($s.Type.Name.equals("string"))
+#decodeString( "$I" $var $s )
+#elseif ($s.Type.Name.equals("bytes"))
+$I    $var = in.readBytes(${var});
+#elseif ($s.Type.Name.equals("fixed"))
+$I    if (${var} == null) {
+$I      ${var} = new ${this.javaType($s)}();
+$I    }
+$I    in.readFixed(${var}.bytes(), 0, ${s.FixedSize});
+#elseif ($s.Type.Name.equals("enum"))
+$I    $var = ${this.javaType($s)}.values()[in.readEnum()];
+#else
+## TODO -- singal a code-gen-time error
+#end
+#end
+
+#macro( decodeString $II $var $s )
+#set ($st = ${this.getStringType($s)})
+#if ($this.isStringable($s))
+$II    ${var} = new ${st}(in.readString());
+#elseif ($st.equals("java.lang.String"))
+$II    $var = in.readString();
+#elseif ($st.equals("org.apache.avro.util.Utf8"))
+$II    $var = in.readString(${var});
+#else
+$II    $var = in.readString(${var} instanceof Utf8 ? (Utf8)${var} : null);
+#end
+#end
+
+#macro( decodeArray $indent $var $s $rs )
+#set ($I = $this.indent($indent))
+#set ($t = $this.javaType($s))
+#set ($et = $this.javaType($s.ElementType))
+#set ($gat = "SpecificData.Array<${et}>")
+$I    long size${nv} = in.readArrayStart();
+$I    $t a${nv} = ${var}; // Need fresh name due to limitation of macro system
+$I    if (a${nv} == null) {
 
 Review comment:
   but we probably don't need this comment in every generated file

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


> Improve encode/decode time for SpecificRecord using code generation
> -------------------------------------------------------------------
>
>                 Key: AVRO-2090
>                 URL: https://issues.apache.org/jira/browse/AVRO-2090
>             Project: Avro
>          Issue Type: Improvement
>          Components: java
>            Reporter: Raymie Stata
>            Assignee: Raymie Stata
>            Priority: Major
>         Attachments: customcoders.md
>
>
> Compared to GenericRecords, SpecificRecords offer type-safety plus the 
> performance of traditional getters/setters/instance variables.  But these are 
> only beneficial to Java code accessing those records.  SpecificRecords 
> inherit serialization and deserialization code from GenericRecords, which is 
> dynamic and thus slow (in fact, benchmarks show that serialization and 
> deserialization is _slower_ for SpecificRecord than for GenericRecord).
> This patch extends record.vm to generate custom, higher-performance encoder 
> and decoder functions for SpecificRecords.  We've run a public benchmark 
> showing that the new code reduces serialization time by 2/3 and 
> deserialization time by close to 50%.



--
This message was sent by Atlassian JIRA
(v7.6.3#76005)

Reply via email to