Hi everyone,
I believe I've found a bug in countQuery::doEvaluate which is producing
incorrect query results. We first noticed that the following queries
incorrectly returned the same results using ibis::table::select, but distinct
and seemingly correct values using ibis::query::getHitRows:
(behaviorSegments contains '63' OR behaviorSegments contains '662') AND
(nielsenDma contains '503')
(behaviorSegments contains '63' OR behaviorSegments contains '662') AND
(nielsenDma contains '501')
(behaviorSegments contains '63' OR behaviorSegments contains '662') AND
(nielsenDma contains 'not a keyword!!')
I enabled verbosity level 4 using the ibis executable to find that value of
ierr after evaluating the left hand side of the AND clause was 0 (indicating 0
hit rows), causing evaluation to stop without evaluating the right hand side.
Can I suggest the following fix for countQuery.cpp? We are still using 1.3.5.
Is there a formal channel for filing bugs such as jira?
898
case ibis::qExpr::LOGICAL_OR: {
899
ierr = doEvaluate(term->getLeft(), mask, ht);
900
if (ierr >= 0 && ht.cnt() < mask.cnt()) {
901
int leftIerr = ierr;
902
ibis::bitvector b1;
903
if (ht.cnt() > mask.bytes() + ht.bytes()) {
904
ibis::bitvector* newmask = mask - ht;
905
ierr = doEvaluate(term->getRight(), *newmask, b1);
906
delete newmask;
907
}
908
else {
909
ierr = doEvaluate(term->getRight(), mask, b1);
910
}
911
if (ierr > 0) {
912
ht |= b1;
913
ierr = ht.sloppyCount();
914
} else {
915
ierr = leftIerr;
916
}
917
}
918
break;}
ibis > where (behaviorSegments contains '63' OR behaviorSegments contains
'662') AND (nielsenDma contains '503')
doQuery -- processing " FROM 8r7nhJy0RL WHERE (behaviorSegments contains '63'
OR behaviorSegments contains '662') AND (nielsenDma contains '503')"
Constructing selectClause @ 0x7fff15ffa9c8
newToken -- generated new token "s315Pp5XgCB----2" for user saenns
query[s315Pp5XgCB----2]::setWhereClause -- add a new where clause
"(behaviorSegments contains '63' OR behaviorSegments contains '662') AND
(nielsenDma contains '503')".
query[s315Pp5XgCB----2]::setWhereClause -- where "(behaviorSegments contains
'63' OR behaviorSegments contains '662') AND (nielsenDma contains '503')"
Translated the WHERE clause into: ((behaviorSegments CONTAINS '63' OR
behaviorSegments CONTAINS '662') AND nielsenDma CONTAINS '503')
query[s315Pp5XgCB----2]::evaluate -- starting to evaluate the query for user
"saenns"
query[s315Pp5XgCB----2]::doEvaluate(0x43d0f00: behaviorSegments CONTAINS '63',
mask.cnt()=7795176) --> 209101, ierr = 209101
query[s315Pp5XgCB----2]::doEvaluate(0x43d7320: behaviorSegments CONTAINS '662',
mask.cnt()=7795176) --> 0, ierr = 0
query[s315Pp5XgCB----2]::doEvaluate(0x43c8b20: (0x43d0f00 OR 0x43d7320),
mask.cnt()=7795176) --> 209101, ierr = 209101
query[s315Pp5XgCB----2]::doEvaluate(0x43d11a0: nielsenDma CONTAINS '503',
mask.cnt()=209101) --> 126, ierr = 2
query[s315Pp5XgCB----2]::doEvaluate(0x43cc620: (0x43c8b20 AND 0x43d11a0),
mask.cnt()=7795176) --> 126, ierr = 2
query[s315Pp5XgCB----2]::evaluate -- the hit contains xxx bits with 126 bits
set(=1) taking up xxx bytes; the estimated clustering factor is 1; had the bits
been randomly spread out, the expected size would be xxx bytes; estimated
number of bytes to be read in order to access 4-byte values is xxx
query[s315Pp5XgCB----2]::evaluate -- time to compute the 126 hits: 0 sec(CPU),
0.19129 sec(elapsed).
query[s315Pp5XgCB----2]::evaluate -- user saenns FROM 8r7nhJy0RL WHERE
(behaviorSegments contains '63' OR behaviorSegments contains '662') AND
(nielsenDma contains '503') ==> 126 hits.
doQuery:: evaluate( FROM 8r7nhJy0RL WHERE (behaviorSegments contains '63' OR
behaviorSegments contains '662') AND (nielsenDma contains '503')) produced 126
hits, took 0 CPU seconds, 0.340331 elapsed seconds
countQuery::setWhereClause -- add a new where clause "(0x43d71e0 AND 0x43d7180)"
countQuery::evaluate -- start timer ...
countQuery::doEvaluate(0x43d7210: behaviorSegments CONTAINS '63',
mask.cnt()=7795176) --> 209101, ierr = 209101
countQuery::doEvaluate(0x43d7290: behaviorSegments CONTAINS '662',
mask.cnt()=7795176) --> 0, ierr = 0
countQuery::doEvaluate(0x43d71e0: (0x43d7210 OR 0x43d7290), mask.cnt()=7795176)
--> 209101, ierr = 0
countQuery::doEvaluate(0x43d91f0: (0x43d71e0 AND 0x43d7180),
mask.cnt()=7795176) --> 209101, ierr = 0
countQuery::evaluate -- Select count(*) From 8r7nhJy0RL Where (0x43d71e0 AND
0x43d7180) --> 209101
countQuery::evaluate -- duration: 0 sec(CPU), 0.002358 sec(elapsed)
Warning -- countQuery.getNumHits returned 209101, while query.getNumHits
returned 126
Freeing selectClause @ 0x7fff15ffa9c8
_______________________________________________
FastBit-users mailing list
[email protected]
https://hpcrdm.lbl.gov/cgi-bin/mailman/listinfo/fastbit-users