Hi everyone,

I believe I've found a bug in countQuery::doEvaluate which is producing 
incorrect query results.  We first noticed that the following queries 
incorrectly returned the same results using ibis::table::select, but distinct 
and seemingly correct values using ibis::query::getHitRows:

(behaviorSegments contains '63' OR behaviorSegments contains '662') AND 
(nielsenDma contains '503')
(behaviorSegments contains '63' OR behaviorSegments contains '662') AND 
(nielsenDma contains '501')
(behaviorSegments contains '63' OR behaviorSegments contains '662') AND 
(nielsenDma contains 'not a keyword!!')

I enabled verbosity level 4 using the ibis executable to find that value of 
ierr after evaluating the left hand side of the AND clause was 0 (indicating 0 
hit rows), causing evaluation to stop without evaluating the right hand side.

Can I suggest the following fix for countQuery.cpp?  We are still using 1.3.5.  
Is there a formal channel for filing bugs such as jira?

898

    case ibis::qExpr::LOGICAL_OR: {


899

        ierr = doEvaluate(term->getLeft(), mask, ht);


900

        if (ierr >= 0 && ht.cnt() < mask.cnt()) {


901

            int leftIerr = ierr;


902

            ibis::bitvector b1;


903

            if (ht.cnt() > mask.bytes() + ht.bytes()) {


904

                ibis::bitvector* newmask = mask - ht;


905

                ierr = doEvaluate(term->getRight(), *newmask, b1);


906

                delete newmask;


907

            }


908

            else {


909

                ierr = doEvaluate(term->getRight(), mask, b1);


910

            }


911

            if (ierr > 0) {


912

                ht |= b1;


913

                ierr = ht.sloppyCount();


914

            } else {


915

                ierr = leftIerr;


916

            }


917

        }


918

        break;}



ibis > where (behaviorSegments contains '63' OR behaviorSegments contains 
'662') AND (nielsenDma contains '503')
doQuery -- processing " FROM 8r7nhJy0RL WHERE (behaviorSegments contains '63' 
OR behaviorSegments contains '662') AND (nielsenDma contains '503')"
Constructing selectClause @ 0x7fff15ffa9c8
newToken -- generated new token "s315Pp5XgCB----2" for user saenns
query[s315Pp5XgCB----2]::setWhereClause -- add a new where clause 
"(behaviorSegments contains '63' OR behaviorSegments contains '662') AND 
(nielsenDma contains '503')".
query[s315Pp5XgCB----2]::setWhereClause -- where "(behaviorSegments contains 
'63' OR behaviorSegments contains '662') AND (nielsenDma contains '503')"
Translated the WHERE clause into: ((behaviorSegments CONTAINS '63' OR 
behaviorSegments CONTAINS '662') AND nielsenDma CONTAINS '503')
query[s315Pp5XgCB----2]::evaluate -- starting to evaluate the query for user 
"saenns"
query[s315Pp5XgCB----2]::doEvaluate(0x43d0f00: behaviorSegments CONTAINS '63', 
mask.cnt()=7795176) --> 209101, ierr = 209101
query[s315Pp5XgCB----2]::doEvaluate(0x43d7320: behaviorSegments CONTAINS '662', 
mask.cnt()=7795176) --> 0, ierr = 0
query[s315Pp5XgCB----2]::doEvaluate(0x43c8b20: (0x43d0f00 OR 0x43d7320), 
mask.cnt()=7795176) --> 209101, ierr = 209101
query[s315Pp5XgCB----2]::doEvaluate(0x43d11a0: nielsenDma CONTAINS '503', 
mask.cnt()=209101) --> 126, ierr = 2
query[s315Pp5XgCB----2]::doEvaluate(0x43cc620: (0x43c8b20 AND 0x43d11a0), 
mask.cnt()=7795176) --> 126, ierr = 2
query[s315Pp5XgCB----2]::evaluate -- the hit contains xxx bits with 126 bits 
set(=1) taking up xxx bytes; the estimated clustering factor is 1; had the bits 
been randomly spread out, the expected size would be xxx bytes; estimated 
number of bytes to be read in order to access 4-byte values is xxx
query[s315Pp5XgCB----2]::evaluate -- time to compute the 126 hits: 0 sec(CPU), 
0.19129 sec(elapsed).
query[s315Pp5XgCB----2]::evaluate -- user saenns FROM 8r7nhJy0RL WHERE 
(behaviorSegments contains '63' OR behaviorSegments contains '662') AND 
(nielsenDma contains '503') ==> 126 hits.
doQuery:: evaluate( FROM 8r7nhJy0RL WHERE (behaviorSegments contains '63' OR 
behaviorSegments contains '662') AND (nielsenDma contains '503')) produced 126 
hits, took 0 CPU seconds, 0.340331 elapsed seconds
countQuery::setWhereClause -- add a new where clause "(0x43d71e0 AND 0x43d7180)"
countQuery::evaluate -- start timer ...
countQuery::doEvaluate(0x43d7210: behaviorSegments CONTAINS '63', 
mask.cnt()=7795176) --> 209101, ierr = 209101
countQuery::doEvaluate(0x43d7290: behaviorSegments CONTAINS '662', 
mask.cnt()=7795176) --> 0, ierr = 0
countQuery::doEvaluate(0x43d71e0: (0x43d7210 OR 0x43d7290), mask.cnt()=7795176) 
--> 209101, ierr = 0
countQuery::doEvaluate(0x43d91f0: (0x43d71e0 AND 0x43d7180), 
mask.cnt()=7795176) --> 209101, ierr = 0
countQuery::evaluate -- Select count(*) From 8r7nhJy0RL Where (0x43d71e0 AND 
0x43d7180) --> 209101
countQuery::evaluate -- duration: 0 sec(CPU), 0.002358 sec(elapsed)
Warning -- countQuery.getNumHits returned 209101, while query.getNumHits 
returned 126
Freeing selectClause @ 0x7fff15ffa9c8



_______________________________________________
FastBit-users mailing list
[email protected]
https://hpcrdm.lbl.gov/cgi-bin/mailman/listinfo/fastbit-users

Reply via email to