Github user rvesse commented on a diff in the pull request: https://github.com/apache/jena/pull/306#discussion_r151387036 --- Diff: jena-core/src/main/java/org/apache/jena/graph/GraphUtil.java --- @@ -223,6 +258,116 @@ public static void deleteFrom(Graph dstGraph, Graph srcGraph) { dstGraph.getEventManager().notifyDeleteGraph(dstGraph, srcGraph) ; } + private static final int CMP_GREATER = 1; + private static final int CMP_EQUAL = 0; + private static final int CMP_LESS = -1; + private static final int CMP_UNKNOWN = -2; + + // Decide whether to loop on dstGraph or srcGraph. + private static boolean decideHowtoExecute(Graph dstGraph, Graph srcGraph) { + if ( false ) { + // Use dstGraph.size() and srcGraph.size() + // Assumes dstGraph.size() (and to some extent srcGraph.size() are efficient. + int dstSize = dstGraph.size(); + int srcSize = srcGraph.size(); + + // Loop on src if: + // * srcGraph is below the threshold MIN_SRC_SIZE (a "Just Do it" number) + // * dstGraph is "much" larger than src where "much" is given by DST_SRC_RATIO + // Assumes dstGraph is efficient. + boolean loopOnSrc = (srcSize < MIN_SRC_SIZE || dstSize > DST_SRC_RATIO*srcSize) ; + return loopOnSrc; + } + + // Avoid dstGraph.size() + // do |find()| > DST_SRC_RATIO*srcSize + if ( false ) { + int srcSize = srcGraph.size(); + boolean loopOnSrc = (srcSize < MIN_SRC_SIZE || compareSizeTo(dstGraph, DST_SRC_RATIO*srcSize) == CMP_GREATER) ; + return loopOnSrc; + } + + // Avoid dstGraph.size() and srcGraph.size() + // do |find1()| > DST_SRC_RATIO*|find2()| + // limit on |find2()| + int cmp = compareSizeByFind(dstGraph, srcGraph, DST_SRC_RATIO, 1000*MIN_SRC_SIZE); + boolean loopOnSrc = (cmp==CMP_GREATER || cmp==CMP_UNKNOWN) ; + return loopOnSrc; + } + + private static int compareSizeTo(Graph g, int size) { + int counter = 0; + ExtendedIterator<Triple> it = g.find(); + try { + while (it.hasNext()) { + it.next(); + counter += 1; + if (counter > size) { + return CMP_GREATER ; + } + } + return counter == size ? CMP_EQUAL : CMP_LESS; + } finally { + it.close(); + } + } + + /** Compare the ration of graph sizes by co-iteration, up to some limit on the srcGraph size. */ + private static int compareSizeByFind(Graph dstGraph, Graph srcGraph, int ratio, int maxCompare) { + ExtendedIterator<Triple> dstIter = dstGraph.find(); + ExtendedIterator<Triple> srcIter = srcGraph.find(); + try { + // Step dstIter by "ratio" steps each time. + return compareByLength(dstIter, srcIter, ratio, 1, maxCompare); + } finally { + dstIter.close(); + srcIter.close(); + } + } + + /** Compare the length of two iterators. + * <b>This operation is destructive on the iterators<b>. + */ + private static int compareByLength(Iterator<?> iter1, Iterator<?> iter2) { + return compareByLength(iter1, iter2, 1, 1, Integer.MAX_VALUE); + } + + /** Compare the length of two iterators. By using the "step" argument, the app can compare rations. + * <b>This operation is destructive on the iterators<b>. + */ + private static int compareByLength(Iterator<?> iter1, Iterator<?> iter2, int step1, int step2, int maxSteps) { + if ( step1 <= 0 ) + throw new IllegalArgumentException("step size 'step1' is not positve: "+step1); + if ( step2 <= 0 ) + throw new IllegalArgumentException("step size 'step2' is not positve: "+step2); + + int x = 0; + for ( ;; ) { + x++ ; + boolean ended1 = iter1.hasNext(); + boolean ended2 = iter2.hasNext(); + + if ( ended1 ) --- End diff -- Shouldn't these be `! ended1` and `! ended2`?
---