[ 
https://issues.apache.org/jira/browse/JENA-1414?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16255148#comment-16255148
 ] 

ASF GitHub Bot commented on JENA-1414:
--------------------------------------

Github user rvesse commented on a diff in the pull request:

    https://github.com/apache/jena/pull/306#discussion_r151387036
  
    --- Diff: jena-core/src/main/java/org/apache/jena/graph/GraphUtil.java ---
    @@ -223,6 +258,116 @@ public static void deleteFrom(Graph dstGraph, Graph 
srcGraph) {
             dstGraph.getEventManager().notifyDeleteGraph(dstGraph, srcGraph) ;
         }
     
    +    private static final int CMP_GREATER = 1;
    +    private static final int CMP_EQUAL   = 0;
    +    private static final int CMP_LESS    = -1;
    +    private static final int CMP_UNKNOWN = -2;
    +
    +    // Decide whether to loop on dstGraph or srcGraph.
    +    private static boolean decideHowtoExecute(Graph dstGraph, Graph 
srcGraph) {
    +        if ( false ) {
    +            // Use dstGraph.size() and srcGraph.size()
    +            // Assumes dstGraph.size() (and to some extent srcGraph.size() 
are efficient.
    +            int dstSize = dstGraph.size();
    +            int srcSize = srcGraph.size();
    +
    +            // Loop on src if:
    +            // * srcGraph is below the threshold MIN_SRC_SIZE (a "Just Do 
it" number)
    +            // * dstGraph is "much" larger than src where "much" is given 
by DST_SRC_RATIO
    +            //     Assumes dstGraph is efficient.
    +            boolean loopOnSrc = (srcSize < MIN_SRC_SIZE || dstSize > 
DST_SRC_RATIO*srcSize) ;
    +            return loopOnSrc;
    +        }
    +        
    +        // Avoid dstGraph.size()
    +        // do |find()| > DST_SRC_RATIO*srcSize
    +        if ( false ) {
    +            int srcSize = srcGraph.size();
    +            boolean loopOnSrc = (srcSize < MIN_SRC_SIZE || 
compareSizeTo(dstGraph, DST_SRC_RATIO*srcSize) == CMP_GREATER) ;
    +            return loopOnSrc;
    +        }
    +        
    +        // Avoid dstGraph.size() and srcGraph.size()
    +        // do |find1()| > DST_SRC_RATIO*|find2()|
    +        // limit on |find2()|
    +        int cmp = compareSizeByFind(dstGraph, srcGraph, DST_SRC_RATIO, 
1000*MIN_SRC_SIZE);
    +        boolean loopOnSrc = (cmp==CMP_GREATER || cmp==CMP_UNKNOWN) ; 
    +        return loopOnSrc;
    +    }
    +    
    +    private static int compareSizeTo(Graph g, int size) {
    +        int counter = 0;
    +        ExtendedIterator<Triple> it = g.find();
    +        try {
    +            while (it.hasNext()) {
    +                it.next();
    +                counter += 1;
    +                if (counter > size) {
    +                    return CMP_GREATER ;
    +                }
    +            }
    +            return counter == size ? CMP_EQUAL : CMP_LESS;
    +        } finally {
    +            it.close();
    +        }
    +    }
    +
    +    /** Compare the ration of graph sizes by co-iteration, up to some 
limit on the srcGraph size. */ 
    +    private static int compareSizeByFind(Graph dstGraph, Graph srcGraph, 
int ratio, int maxCompare) {
    +        ExtendedIterator<Triple> dstIter = dstGraph.find();
    +        ExtendedIterator<Triple> srcIter = srcGraph.find();
    +        try {
    +            // Step dstIter by "ratio" steps each time.
    +            return compareByLength(dstIter, srcIter, ratio, 1, maxCompare);
    +        } finally {
    +            dstIter.close();
    +            srcIter.close();
    +        }
    +    }
    +    
    +    /** Compare the length of two iterators.
    +     *  <b>This operation is destructive on the iterators<b>.
    +     */
    +    private static int compareByLength(Iterator<?> iter1, Iterator<?> 
iter2) {
    +        return compareByLength(iter1, iter2, 1, 1, Integer.MAX_VALUE);
    +    }
    +
    +    /** Compare the length of two iterators. By using the "step" argument, 
the app can compare rations.
    +     *  <b>This operation is destructive on the iterators<b>.
    +     */
    +    private static int compareByLength(Iterator<?> iter1, Iterator<?> 
iter2, int step1, int step2, int maxSteps) {
    +        if ( step1 <= 0 )
    +            throw new IllegalArgumentException("step size 'step1' is not 
positve: "+step1);
    +        if ( step2 <= 0 )
    +            throw new IllegalArgumentException("step size 'step2' is not 
positve: "+step2);
    +        
    +        int x = 0; 
    +        for ( ;; ) {
    +            x++ ;
    +            boolean ended1 = iter1.hasNext();
    +            boolean ended2 = iter2.hasNext();
    +            
    +            if ( ended1 )
    --- End diff --
    
    Shouldn't these be `! ended1` and `! ended2`?


> Performance regression in Model.remove(Model m) method
> ------------------------------------------------------
>
>                 Key: JENA-1414
>                 URL: https://issues.apache.org/jira/browse/JENA-1414
>             Project: Apache Jena
>          Issue Type: Improvement
>          Components: Core
>    Affects Versions: Jena 3.3.0, Jena 3.4.0
>            Reporter: Michał Woźniak
>            Assignee: Andy Seaborne
>         Attachments: graph_util_improve.patch
>
>
> The Model.remove(Model) works very slow on large models, as it propagates to 
> GraphUtil.deleteFrom(Graph, Graph), which computes size of the target graph 
> by iterating over all triples. This computation takes nearly 100% of the time 
> of the Model.remove(Model) operation.
> It seems this commit introduced the issue: 
> https://github.com/apache/jena/commit/781895ce64e062c7f2268a78189a777c39b92844#diff-fbb4d11dc804464f94c27e33e11b18e8
> Due to this bug deletion of a concept scheme on a large ontology may take 
> several minutes. 



--
This message was sent by Atlassian JIRA
(v6.4.14#64029)

Reply via email to