Github user paul-rogers commented on a diff in the pull request: https://github.com/apache/drill/pull/1058#discussion_r154475785 --- Diff: exec/java-exec/src/main/java/org/apache/drill/exec/cache/VectorSerializer.java --- @@ -62,27 +72,65 @@ public Writer write(VectorAccessible va) throws IOException { @SuppressWarnings("resource") public Writer write(VectorAccessible va, SelectionVector2 sv2) throws IOException { + checkNotNull(va); WritableBatch batch = WritableBatch.getBatchNoHVWrap( va.getRecordCount(), va, sv2 != null); return write(batch, sv2); } public Writer write(WritableBatch batch, SelectionVector2 sv2) throws IOException { - VectorAccessibleSerializable vas; - if (sv2 == null) { - vas = new VectorAccessibleSerializable(batch, allocator); - } else { - vas = new VectorAccessibleSerializable(batch, sv2, allocator); - } - if (retain) { - vas.writeToStreamAndRetain(stream); - } else { - vas.writeToStream(stream); + return write(batch, sv2, false); + } + + public Writer write(WritableBatch batch, SelectionVector2 sv2, boolean retain) throws IOException { + checkNotNull(batch); + checkNotNull(channel); + final Timer.Context timerContext = metrics.timer(WRITER_TIMER).time(); + + final DrillBuf[] incomingBuffers = batch.getBuffers(); + final UserBitShared.RecordBatchDef batchDef = batch.getDef(); + + try { + /* Write the metadata to the file */ + batchDef.writeDelimitedTo(output); + + + /* If we have a selection vector, dump it to file first */ --- End diff -- In the master branch, the `VectorSerializer` classes were slapped together as a simpler API on top of the existing `VectorAccessibleSerializable` classes. (That's why the code is so clunky.) Here, we turn the `VectorSerializer` into the means to do the writing, which seems like a good improvement. Does this mean we now have two implementations? The one here and the old one in `VectorAccessibleSerializable`? Do we need to clean up that split somehow? Else, should we leave comments to point people between the two copies so they know to make changes in both places? Looking at the other changes, it seems we leave the old "unmanaged" external sort to use the old implementation.
---