The attached patch implements the WHERE clause "OR to UNION" 
optimization as described in this post:

 http://www.mail-archive.com/sqlite-users@sqlite.org/msg09004.html

If the computed cost of the rewritten WHERE clause is lower than 
the original query when indexes are taken into account, then it 
will perform the optimization. If the cost is estimated to be 
higher then the query will not be rewritten.

Given the database formed by running these statements:

  create table stuff(a,b,c,d);
  insert into stuff values(1,2,3,4);
  create temp view v1 as select random()%100,
    random()%100, random()%1000, random()%10000
     from stuff x, stuff y;
  insert into stuff select * from v1;
  insert into stuff select * from v1;
  insert into stuff select * from v1;
  insert into stuff select * from v1;
  insert into stuff select * from v1;
  create index stuff_b on stuff(b);
  create index stuff_c on stuff(c);
  create index stuff_d on stuff(d);
  analyze;

The patched version of sqlite 3.5.4 will run the following query 
many times faster than an unpatched sqlite 3.5.4:

  select b, count(*) from stuff 
  where c=2 or b=23 or c=17 or c=493 or d=7 or c=111 and a=14 
  group by 1 order by 2 DESC, 1 limit 10;

On my machine, the patched version produces these query timings:

  CPU Time: user 0.724045 sys 0.092005

with the EXPLAIN QUERY PLAN:

  0|0|TABLE stuff USING PRIMARY KEY
  0|0|TABLE stuff WITH INDEX stuff_c
  0|0|TABLE stuff WITH INDEX stuff_d
  0|0|TABLE stuff WITH INDEX stuff_b
  0|0|TABLE stuff WITH INDEX stuff_c

For the same query the unpatched sqlite 3.5.4 produces:

  CPU Time: user 20.869304 sys 8.912557

  0|0|TABLE stuff WITH INDEX stuff_b ORDER BY

Only single table queries are supported by this OR optimization. 
For this optimization to be considered, the WHERE clause may only 
consist of column equality comparisons to constants, ORs and ANDs.

The optimization only looks at the top-level WHERE clause ORs. It 
will not work with "IN" expressions. Nor will it will not expand 
expressions like "a=1 AND (b=2 or c=3)" into "a=1 AND b=2 OR a=1 
AND c=3" - although if manually expanded, the latter form could 
potentially be optimized.

It passes "make test" without regressions, but more testing is needed.




      
____________________________________________________________________________________
Never miss a thing.  Make Yahoo your home page. 
http://www.yahoo.com/r/hs
Index: src/select.c
===================================================================
RCS file: /sqlite/sqlite/src/select.c,v
retrieving revision 1.372
diff -u -3 -p -r1.372 select.c
--- src/select.c        14 Dec 2007 17:24:40 -0000      1.372
+++ src/select.c        22 Dec 2007 02:39:00 -0000
@@ -12,7 +12,7 @@
 ** This file contains C code routines that are called by the parser
 ** to handle SELECT statements in SQLite.
 **
-** $Id: select.c,v 1.372 2007/12/14 17:24:40 drh Exp $
+** $Id: select.c,v 1.370 2007/12/13 21:54:11 drh Exp $
 */
 #include "sqliteInt.h"
 
@@ -2961,6 +2961,440 @@ static void updateAccumulator(Parse *pPa
   pAggInfo->directMode = 0;
 }
 
+#ifndef SQLITE_OMIT_OR_UNION_TRANSFORM
+
+/* The function prefix "o2u" stands for "OR to UNION TRANSFORM" */
+static double o2uAndCost(Expr *p, int iTable, Bitmask *bm);
+static double o2uEqCost(Expr *p, int iTable, Bitmask *bm);
+
+/*
+** Count the number of bits in Bitmask. Each bit represents the existance
+** of a column in an expression. The zeroth bit represents the use of the
+** rowid column in the WHERE clause, which is different from non-o2u
+** uses of Bitmask in the code.
+*/
+static int o2uBitCount(Bitmask x){
+  int n = 0;
+  while( x ){
+    x &= x-1;
+    ++n;
+  }
+  return n;
+}
+
+/*
+** Full table scan cost is simply the average number rows for each index
+** for the specified table.
+*/
+static double o2uFullTableScanCost(Table* pTab){
+  if( pTab ){
+    double sum = 0;
+    int n = 0;
+    Index *pIndex;
+    for( pIndex = pTab->pIndex; pIndex; pIndex = pIndex->pNext, n++ ){
+      if( pIndex->nColumn>0 ){
+        sum += pIndex->aiRowEst[0];
+      }
+    }
+    if( n && sum>n ){
+      return sum/n;
+    }
+  }
+  return 1000000;
+}
+
+/*
+** Estimate a WHERE clause column's cost taking indexes into account.
+** Record any columns encountered in Bitmask.
+*/
+static double o2uColumnCost(Expr *p, int iTable, Bitmask *bm){
+  if( p && p->op==TK_COLUMN && p->pSelect==0 && p->iTable==iTable && p->pTab ){
+    int iColumn = p->iColumn;
+    Index* pIndex;
+    if( iColumn>=-1 && iColumn<=(int)(sizeof(Bitmask)*8-2) ){
+      *bm |= 1<<(iColumn+1); /* rowid column -1 is the 0th bit */
+    }else{
+      *bm |= 0x3; /* iTable beyond range: disqualify single column OR opt */
+    }
+    if( iColumn==-1 ){
+      return 10;                          /* Match: rowid */
+    }
+    for( pIndex = p->pTab->pIndex; pIndex; pIndex = pIndex->pNext ){
+      if( pIndex->nColumn>0 && pIndex->aiColumn[0]==iColumn ){
+        return pIndex->aiRowEst[1];       /* Match: first column of index */
+      }
+    }
+    return o2uFullTableScanCost(p->pTab); /* No index on column */
+  }
+  return 0;                               /* Not a valid column */
+}
+
+/*
+** Estimate the cost of this WHERE clause TK_OR node.
+*/
+static double o2uOrCost(Expr *p, int iTable, Bitmask *bm){
+  if( p && p->op==TK_OR ){
+    Expr *pL = p->pLeft;
+    Expr *pR = p->pRight;
+    double costL = 0;
+    double costR = 0;
+    if(
+      (
+        /* Only one of the following will be valid (non-zero) */
+           (costL = o2uEqCost(pL, iTable, bm))
+        || (costL = o2uAndCost(pL, iTable, bm))
+        || (costL = o2uOrCost(pL, iTable, bm))
+      ) && (
+        /* Only one of the following will be valid (non-zero) */
+           (costR = o2uEqCost(pR, iTable, bm))
+        || (costR = o2uAndCost(pR, iTable, bm))
+        || (costR = o2uOrCost(pR, iTable, bm))
+      )
+    ){
+      /* Add the costs of the left and right expressions if both are valid,
+      ** otherwise disqualify the optimization by returning zero.
+      */
+      if( costL&&costR ){
+        return (costL+costR);
+      }
+    }
+  }
+  return 0;
+}
+
+/*
+** Estimate the cost of this WHERE clause TK_EQ node.
+*/
+static double o2uEqCost(Expr *p, int iTable, Bitmask *bm){
+  if( p && p->op==TK_EQ ){
+    double cost;
+    Expr *pL = p->pLeft;
+    Expr *pR = p->pRight;
+    cost = o2uColumnCost(pL, iTable, bm);
+    if( cost && sqlite3ExprIsConstant(pR) ){
+      return cost;
+    }
+    cost = o2uColumnCost(pR, iTable, bm);
+    if( cost && sqlite3ExprIsConstant(pL) ){
+      return cost;
+    }
+  }
+  return 0;
+}
+
+/*
+** Estimate the cost of this WHERE clause AND expression.
+*/
+static double o2uAndCost(Expr *p, int iTable, Bitmask *bm){
+  if( p && p->op==TK_AND ){
+    Expr *pL = p->pLeft;
+    double L;
+    *bm |= 0x3; /* Disqualify single OR column optimization */
+    if( pL->op==TK_EQ ){
+      L = o2uEqCost(pL, iTable, bm);
+    }else{
+      L = o2uAndCost(pL, iTable, bm);
+    }
+    if( L ){
+      Expr *pR = p->pRight;
+      double R;
+      if( pR->op==TK_EQ ){
+        R = o2uEqCost(pR, iTable, bm);
+      }else{
+        R = o2uAndCost(pR, iTable, bm);
+      }
+      /* If both the left and right AND expressions have a valid cost,
+      ** then return the lower cost of the two sides.
+      */
+      if( R ){
+        return L<R ? L : R;
+      }
+    }
+  }
+  return 0;
+}
+
+static double o2uEstLog(double N){
+  double logN = 1;
+  double x = 2;
+  while( N>x ){
+    logN += 1;
+    x *= 2;
+  }
+  return logN;
+}
+
+/*
+** Change all TK_COLUMN iTable values matching iTableFrom to iTableTo
+** in expression `p'.
+*/
+static void o2uChangeCursorOfColumn(Expr *p, int iTableFrom, int iTableTo){
+  assert(p);
+  assert(iTableFrom>=0);
+  assert(iTableTo>=0);
+  assert(iTableFrom!=iTableTo);
+  switch( p->op ){
+    case TK_COLUMN: {
+      if( p->iTable==iTableFrom ){
+        p->iTable = iTableTo;
+      }
+      break;
+    }
+    case TK_EQ:
+    case TK_OR:
+    case TK_AND: {
+      assert(p->pLeft);
+      assert(p->pRight);
+      o2uChangeCursorOfColumn(p->pLeft, iTableFrom, iTableTo);
+      o2uChangeCursorOfColumn(p->pRight, iTableFrom, iTableTo);
+      break;
+    }
+  }
+}
+
+/*
+** Create a raw subselect of the form:
+**
+**   SELECT rowid from TABLE
+*/
+static Select *o2uCreateSubselect(
+  Parse *pParse,
+  SrcList *pSrcParent,
+  Expr *rowid
+){
+  Select *p = sqlite3SelectNew(pParse,
+    sqlite3ExprListAppend(pParse,0,sqlite3ExprDup(pParse->db,rowid),0),
+    sqlite3SrcListDup(pParse->db,pSrcParent), 0,0,0,0,0,0,0);
+  if( p ){
+    int iCursor = pParse->nTab++;
+    p->pSrc->a[0].iCursor = iCursor;
+    p->pEList->a[0].pExpr->iTable = iCursor;
+  }
+  return p;
+}
+
+/*
+** Determine the iColumn value of the expression, or return -2 if 
+** not applicable. -1 is the rowid column, as usual.
+*/
+static int o2uExprColumnNo(Expr *p){
+  assert(p);
+  switch( p->op ){
+    case TK_EQ: {
+      if( sqlite3ExprIsConstant(p->pRight) && p->pLeft->op==TK_COLUMN ){
+        return p->pLeft->iColumn;
+      }
+      if( sqlite3ExprIsConstant(p->pLeft) && p->pRight->op==TK_COLUMN ){
+        return p->pRight->iColumn;
+      }
+    }
+    case TK_OR: {
+      int iColumnL = o2uExprColumnNo(p->pLeft);
+      int iColumnR = o2uExprColumnNo(p->pRight);
+      if( iColumnL>=-1 && iColumnL==iColumnR ){
+        return iColumnL;
+      }
+    }
+  }
+  return -2;   /* Unknown or no single iTable value. */
+}
+
+/*
+** Can this expression be combined with this compound select's WHERE clause?
+** Return true if their iColumns are valid and match.
+*/
+static int o2uIsWhereExprCompatible(Expr *pWhere, Expr *pExpr){
+  int iColumnWhere;
+  assert(pWhere);
+  assert(pExpr);
+  iColumnWhere = o2uExprColumnNo(pWhere);
+  if( iColumnWhere>=-1 ){
+    int iColumnExpr = o2uExprColumnNo(pExpr);
+    if( iColumnWhere==iColumnExpr ){
+      return 1;
+    }
+  }
+  return 0;
+}
+
+/*
+** Traverse the original WHERE clause and transform it into the new
+** WHERE clause with a compound SELECT.
+**
+** Extract each WHERE clause OR term and create subselects for
+** the new WHERE clause's "rowid IN (compound select)" TK_IN node.
+** Expr pE's ownership is transferred to the newly created compound select
+** statements, and any unused OR nodes are deleted. Try to combine OR
+** expressions related to the same column into the same compound subselect.
+*/
+static int o2uTransferOrTerms(
+  Parse *pParse,
+  Select **ppSelectIn,
+  Expr *pE,
+  SrcList *pSrcParent,
+  Expr *rowid
+){
+  int rc = SQLITE_OK;
+  Select *p;
+  assert(pE);
+  assert(ppSelectIn);
+  assert(pSrcParent);
+  assert(rowid);
+  assert(rowid->op==TK_COLUMN);
+  if( pE->op==TK_OR ){
+    rc = o2uTransferOrTerms(pParse,ppSelectIn,pE->pLeft,pSrcParent,rowid);
+    if( rc==SQLITE_OK ){
+      rc = o2uTransferOrTerms(pParse,ppSelectIn,pE->pRight,pSrcParent,rowid);
+    }
+    pE->pLeft = pE->pRight = 0;
+    sqlite3ExprDelete(pE);
+  }else{
+    /* TK_COLUMN, TK_AND, TK_EQ expressions will make it here.
+    */
+    if( *ppSelectIn==0 ){
+      /* Create the SELECT of the "rowid IN (SELECT ...)" */
+      p = *ppSelectIn = o2uCreateSubselect(pParse, pSrcParent, rowid);
+    }else{
+      /* Find a matching UNION SELECT statement or create one.
+      */
+      Select *pSelectFound = 0;
+      p = *ppSelectIn;
+      assert(p);
+      if( o2uIsWhereExprCompatible(p->pWhere, pE) ){
+        pSelectFound = p;
+      }
+      p->op = TK_UNION;
+      while( p->pPrior ){
+        p = p->pPrior;
+        if( !pSelectFound && o2uIsWhereExprCompatible(p->pWhere, pE) ){
+          pSelectFound = p;
+        }
+        p->op = TK_UNION;
+      }
+      p->op = TK_SELECT;
+      if( pSelectFound ){
+        /* A compatible UNION SELECT was found. */
+        p = pSelectFound;
+      }else{
+        /* No compatible UNION SELECT found, so we must create one
+        ** and append to the the compound SELECT chain
+        */
+        p->op = TK_UNION;
+        p = p->pPrior = o2uCreateSubselect(pParse, pSrcParent, rowid);
+        p->op = TK_SELECT;
+      }
+    }
+    if( p==0 ){
+      sqlite3ExprDelete(pE);
+      return SQLITE_ERROR;
+    }
+    assert(p->pEList);
+    assert(p->pEList->nExpr==1);
+    assert(p->pEList->a);
+    assert(p->pSrc);
+    assert(p->pSrc->nSrc==1);
+    assert(p->pSrc->a);
+    if( p->pWhere ){
+      /* Append expression into existing WHERE clause using OR. */
+      Expr *pWhere = p->pWhere;
+      assert(o2uIsWhereExprCompatible(p->pWhere, pE));
+      p->pWhere = sqlite3PExpr(pParse,TK_OR,0,0,0);
+      if( p->pWhere==0 ){
+        p->pWhere = pWhere;
+        sqlite3ExprDelete(pE);
+        return SQLITE_ERROR;
+      }
+      p->pWhere->pLeft = pWhere;
+      p->pWhere->pRight = pE;
+    }else{
+      /* There is no existing WHERE clause, so use expression for WHERE. */
+      p->pWhere = pE;
+    }
+    p->isResolved = 2; /* Differentiate SELECT to not optimize again. */
+
+    /* The iTable of each TK_COLUMN node of the expression must match its
+    ** SELECT's iCursor.
+    */
+    o2uChangeCursorOfColumn(pE, rowid->iTable, p->pSrc->a[0].iCursor);
+  }
+  return rc;
+}
+
+/*
+** Perform the OR to UNION transformation.
+**
+** This routine is only called once you've already determined that
+** the OR to UNION transform is of lower cost than the original WHERE
+** clause.
+**
+** This routine will try to put like OR terms into the same SELECT
+** to take advantage of the already existing WHERE OR optimization.
+**
+** For example, this statement:
+**
+**   SELECT c, count(*) AS total FROM foo
+**   WHERE a=1 OR b=2 OR 3=a OR c=2 AND b=5
+**   GROUP BY c
+**   ORDER BY total, c;
+**
+** will be converted to:
+**
+**   SELECT c, count(*) AS total FROM foo
+**   WHERE rowid IN (
+**     SELECT rowid FROM foo WHERE a=1 OR 3=a  UNION ALL
+**     SELECT rowid FROM foo WHERE b=2         UNION ALL
+**     SELECT rowid FROM foo WHERE c=2 AND b=5
+**   )
+**   GROUP BY c
+**   ORDER BY total, c;
+*/
+static int o2uUnionize(Parse *pParse, Select *p){
+  Expr *pWhere = p->pWhere;
+  struct SrcList_item *pFrom = p->pSrc->a;
+  Expr *pIn = p->pWhere = sqlite3PExpr(pParse,TK_IN,0,0,0);
+  Expr *rowid = pIn->pLeft = sqlite3PExpr(pParse,TK_COLUMN,0,0,0);
+  rowid->token.z = "rowid";
+  rowid->token.n = 5;
+  rowid->token.dyn = 0;
+  rowid->affinity = SQLITE_AFF_NONE;
+  rowid->iColumn = -1;
+  rowid->iTable = p->pSrc->a[0].iCursor;
+  rowid->pTab = sqlite3LocateTable(pParse,pFrom->zName,pFrom->zDatabase);
+  return o2uTransferOrTerms(pParse, &pIn->pSelect, pWhere, p->pSrc, rowid);
+}
+
+/*
+** Determine whether the OR to UNION transform is of lower cost than a
+** full table scan once individual column indexes are taken into account.
+** If it is determined that it is likely a lower cost, then perform the
+** tranform.
+**
+** Note: This routine will not perform the tranform if ANY WHERE clause
+** column name has a '+' prepended to it, as in "+a=3 OR b=3".
+** Nor will this routine perform the transform if the WHERE contains only
+** like columns, as in "a=1 OR a=3 OR a=7", as this is more efficiently
+** performed by the existing OR optimization.
+*/
+static int o2uTransform(Parse *pParse, Select *p){
+  int rc = SQLITE_OK;
+  if( p && p->pWhere && p->isResolved==1 && p->pSrc
+  && p->pSrc->nSrc==1 && p->pSrc->a[0].pTab && p->pSrc->a[0].pSelect==0
+  && p->pSrc->a[0].iCursor>=0 ){
+    Bitmask bm = 0;
+    double orOptRowEst = o2uOrCost(p->pWhere, p->pSrc->a[0].iCursor, &bm);
+    int columnsUsed = o2uBitCount(bm);
+    if( orOptRowEst>0 && columnsUsed>1 ){
+      double orOptCost = orOptRowEst * o2uEstLog(orOptRowEst);
+      double fullScanCost = o2uFullTableScanCost(p->pSrc->a[0].pTab);
+      if( orOptCost<fullScanCost ){
+        rc = o2uUnionize(pParse, p);
+      }
+    }
+  }
+  return rc;
+}
+
+#endif /* SQLITE_OMIT_OR_UNION_TRANSFORM */
 
 /*
 ** Generate code for the given SELECT statement.
@@ -3111,6 +3545,16 @@ int sqlite3Select(
     pOrderBy = 0;
   }
 
+#ifndef SQLITE_OMIT_OR_UNION_TRANSFORM
+  /*
+  ** Perform the WHERE clause OR to UNION transformation if there is a
+  ** cost advantage.
+  */
+  rc = o2uTransform(pParse, p);
+  pWhere = p->pWhere;
+  if( rc ) goto select_end; /* An error occurred - likely a malloc failure. */
+#endif
+
   /* Begin generating code.
   */
   v = sqlite3GetVdbe(pParse);

-----------------------------------------------------------------------------
To unsubscribe, send email to [EMAIL PROTECTED]
-----------------------------------------------------------------------------

Reply via email to