Github user paul-rogers commented on a diff in the pull request: https://github.com/apache/drill/pull/984#discussion_r145292056 --- Diff: exec/java-exec/src/main/java/org/apache/drill/exec/physical/impl/TopN/TopNBatch.java --- @@ -335,20 +336,42 @@ private void purge() throws SchemaChangeException { logger.debug("Took {} us to purge", watch.elapsed(TimeUnit.MICROSECONDS)); } - public PriorityQueue createNewPriorityQueue(FragmentContext context, List<Ordering> orderings, - VectorAccessible batch, MappingSet mainMapping, MappingSet leftMapping, MappingSet rightMapping) - throws ClassTransformationException, IOException, SchemaChangeException{ - CodeGenerator<PriorityQueue> cg = CodeGenerator.get(PriorityQueue.TEMPLATE_DEFINITION, context.getFunctionRegistry(), context.getOptions()); + private PriorityQueue createNewPriorityQueue(VectorAccessible batch, int limit) + throws SchemaChangeException, ClassTransformationException, IOException { + return createNewPriorityQueue( + mainMapping, leftMapping, rightMapping, context.getOptionSet(), context.getFunctionRegistry(), context.getDrillbitContext().getCompiler(), + config.getOrderings(), batch, unionTypeEnabled, codegenDump, limit, oContext.getAllocator(), schema.getSelectionVectorMode()); + } + + public static MappingSet createMainMappingSet() { + return new MappingSet((String) null, null, ClassGenerator.DEFAULT_SCALAR_MAP, ClassGenerator.DEFAULT_SCALAR_MAP); + } + + public static MappingSet createLeftMappingSet() { + return new MappingSet("leftIndex", null, ClassGenerator.DEFAULT_SCALAR_MAP, ClassGenerator.DEFAULT_SCALAR_MAP); + } + + public static MappingSet createRightMappingSet() { + return new MappingSet("rightIndex", null, ClassGenerator.DEFAULT_SCALAR_MAP, ClassGenerator.DEFAULT_SCALAR_MAP); + } + + public static PriorityQueue createNewPriorityQueue( + MappingSet mainMapping, MappingSet leftMapping, MappingSet rightMapping, --- End diff -- One thought worth sharing here. Today, our code cache uses source code as the key to look for an existing compiled version of a class. But, this is inefficient: we have to generate the source code, transform it, and store two copies to act as keys. Better would be to have a "class definition" object that holds all parameters needed to generate the code. Store these in the cache. Likely, these will be much smaller than the generated code. Using these avoids the need to generate the code to see if we already have a copy of that code. The class definition idea works as long as the definition is a closed set: everything needed to generate the code is in that one definition object. Your function here is close: it passes in a closed set of items. The open bits are the expression and system/session options. For options, we can copy the few values we need into our definition. The expression might require a bit more thought to capture. So, if we can evolve your idea further, we can get a potential large per-schema performance improvement by avoiding code generation when doing a repeat query. Too much to fix here; but something to think about moving forward.
---