Update of /cvsroot/monetdb/pathfinder/compiler/sql
In directory 23jxhf1.ch3.sourceforge.com:/tmp/cvs-serv11514/sql

Modified Files:
        lalg2sql.brg 
Log Message:
-- Replaced aggregate operators count, min, max, avg, sum, prod, seqty1,
   and all in the algebra by a single aggregate operator ``aggr'' 
   that can handle multiple aggregates. The aggregate entries 
   are of kind count, min, max, avg, sum, prod, seqty1, all, and dist. 

-- Added new aggregate kind ``dist'' that allows to represent group by
   columns that functionally depend on the partitioning criterion
   in the result of the grouping aggregate.

-- Added rewrite that merges aggregates.                                        
                                             
                                                                                
                                             
-- Added rewrite that removes superfluous aggregates.                           
                                             
                                                                                
                                             
-- Added rewrite that pushes a rank operator through an aggregate.              
                                             
                                                                                
                                             
-- Extended the XML import to cope with the old                                 
                                             
   as well as the new representation of aggregates.                             
                                             


U lalg2sql.brg
Index: lalg2sql.brg
===================================================================
RCS file: /cvsroot/monetdb/pathfinder/compiler/sql/lalg2sql.brg,v
retrieving revision 1.159
retrieving revision 1.160
diff -u -d -r1.159 -r1.160
--- lalg2sql.brg        7 May 2009 14:29:22 -0000       1.159
+++ lalg2sql.brg        12 Jun 2009 13:06:17 -0000      1.160
@@ -46,6 +46,7 @@
 
 #include "lalg2sql.h"
 #include "alg_dag.h"
+#include "string_utils.h"
 #include "mem.h"
 #include "oops.h"             /* PFoops() */
 
@@ -117,11 +118,7 @@
 %term bool_or         = 29 /**< boolean OR operator */
 %term bool_not        = 30 /**< boolean NOT operator */
 %term to              = 31 /**< op:to operator */
-%term avg             = 32 /**< operator for (partitioned) avg of a column */
-%term max_            = 33 /**< operator for (partitioned) max of a column */
-%term min_            = 34 /**< operator for (partitioned) min of a column */
-%term sum             = 35 /**< operator for (partitioned) sum of a column */
-%term count           = 36 /**< (partitioned) row counting operator */
+%term aggr            = 32 /**< operator for (partitioned) aggregate */
 %term rownum          = 37 /**< consecutive number generation (DENSE_RANK) */
 %term rowrank         = 38 /**< consecutive number generation (ROW_NUMBER) */
 %term rank            = 39 /**< arbitrary but ordered number generation */
@@ -130,9 +127,6 @@
                                 certain type */
 %term type_assert     = 42 /**< restricts the type of a relation */
 %term cast            = 43 /**< type cast of an attribute */
-%term seqty1          = 44 /**< test for exactly one type occurrence in one
-                                iteration (Pathfinder extension) */
-%term all             = 45 /**< test if all items in an iteration are true */
 %term step            = 50 /**< XPath location step */
 %term step_join       = 51 /**< duplicate generating path step */
 %term guide_step      = 52 /**< XPath location step
@@ -243,19 +237,13 @@
 Rel:    type_assert (Rel)                            =  32 (10);
 Rel:    cast (Rel)                                   =  33 (10);
 
-Rel:    avg (Rel)                                    =  35 (10);
-Rel:    max_ (Rel)                                   =  36 (10);
-Rel:    min_ (Rel)                                   =  37 (10);
-Rel:    sum (Rel)                                    =  38 (10);
-Rel:    count (Rel)                                  =  39 (10);
+Rel:    aggr (Rel)                                   =  35 (10);
+Rel:    aggr (rank (Rel))                            =  36 (10);
 Rel:    disjunion (
-            count (Rel),
+            aggr (Rel),
             attach (difference (
                         Rel,
-                        project (count (Rel)))))     =  40  (5);
-
-Rel:    seqty1 (Rel)                                 =  41 (10);
-Rel:    all (Rel)                                    =  42 (10);
+                        project (aggr (Rel)))))      =  40  (5);
 
 Rel:    string_join (Rel, Rel)                       =  43 (10);
 
@@ -1745,12 +1733,7 @@
             return true;
         case la_to:
             return false;
-        case la_avg:
-        case la_max:
-        case la_min:
-        case la_sum:
-        case la_count:
-        case la_all:
+        case la_aggr:
             return (!p->sem.aggr.part);
         case la_rownum:
         case la_rowrank:
@@ -1759,7 +1742,6 @@
             return false;
         case la_type:
         case la_type_assert:
-        case la_seqty1:
             /* FIXME */
             assert (!"not yet implemented");
             break;
@@ -3628,6 +3610,10 @@
                     res_expr = str_upper (expr[0]); break;
                 case alg_fun_fn_lower_case:
                     res_expr = str_lower (expr[0]); break;
+                case alg_fun_fn_doc_available:
+                    PFoops (OOPS_FATAL,
+                            "Document availability check not implemented.");
+
                 case alg_fun_fn_translate:
                 case alg_fun_fn_substring_before:
                 case alg_fun_fn_substring_after:
@@ -3649,17 +3635,10 @@
                  * a comment a text node, or a namespace binding) the functions
                  * returns the zero length string.
                  */
-                {
-                    
-                } break;
                 case alg_fun_fn_local_name:
-                {
-                    
-                } break;
                 case alg_fun_fn_namespace_uri:
 
                 case alg_fun_fn_qname:
-                case alg_fun_fn_doc_available:
                 case alg_fun_pf_fragment:
                 case alg_fun_pf_supernode:
 
@@ -3890,43 +3869,64 @@
             }
         }   break;
 
-        /* Rel:    avg (Rel) */
-        case 35:
-        /* Rel:    max_ (Rel) */
-        case 36:
-        /* Rel:    min_ (Rel) */
-        case 37:
-        /* Rel:    sum (Rel) */
-        case 38:
-        /* Rel:    count (Rel) */
-        case 39:
-        /* Rel:    all (Rel) */
-        case 42:
+        /* Rel:    aggr (rank (Rel)) */
+        case 36: /* reference to this rule also appears below */
+        {
+            PFord_ordering_t sortby = L(p)->sem.sort.sortby;
+
+            if (!p->sem.aggr.part ||
+                p->sem.aggr.part != L(p)->sem.sort.res ||
+                PFprop_icol (p->prop, p->sem.aggr.part))
+                PFoops (OOPS_FATAL,
+                        "missing implementation for `aggr (rank (Rel))' "
+                        "pattern -- partition and result columns are "
+                        "unrelated");
+
+            for (unsigned int i = 0; i < PFord_count (sortby); i++) {
+                PFalg_col_t  col  = PFord_order_col_at (sortby, i);
+                PFsql_kind_t kind = col_env_lookup (
+                                        COLMAP(LL(p)),
+                                        col,
+                                        type_of (L(p), col))->kind;
+                if (kind != sql_column_name &&
+                    kind != sql_ref_column_name) {
+#ifndef NDEBUG
+                    if (!BOUND(LL(p)))
+                        execute (comment ("bind as a column reference "
+                                          "is needed in the following "
+                                          "aggregate"));
+#endif
+                    bind_operator (LL(p), false);
+                    break;
+                }
+            }
+        }
+            /* fall through */
+
+        /* Rel:    aggr (Rel) */
+        case 35: /* reference to this rule also appears below */
+            
             /* Bind the input of the aggregate if the group by
                criterion is not a column reference. */
             if (p->sem.aggr.part &&
-                (col_env_lookup (
-                     COLMAP(L(p)),
-                     p->sem.aggr.part,
-                     type_of (p, p->sem.aggr.part)))->kind != sql_column_name) 
{
+                rule == 35) {
+                PFsql_kind_t kind = col_env_lookup (
+                                        COLMAP(L(p)),
+                                        p->sem.aggr.part,
+                                        type_of (L(p), 
p->sem.aggr.part))->kind;
+                if (kind != sql_column_name &&
+                    kind != sql_ref_column_name) {
 #ifndef NDEBUG
-                if (!BOUND(L(p)))
-                    execute (comment ("bind as a column reference "
-                                      "is needed in the following aggregate"));
+                    if (!BOUND(L(p)))
+                        execute (comment ("bind as a column reference "
+                                          "is needed in the following "
+                                          "aggregate"));
 #endif
-                bind_operator (L(p), false);
+                    bind_operator (L(p), false);
+                }
             }
         {
-            PFalg_col_t col = p->sem.aggr.col,
-                        part = p->sem.aggr.part,
-                        res = p->sem.aggr.res;
-            PFalg_simple_type_t part_ty,
-                                ty = 0 /* dummy type */,
-                                res_ty = type_of (p, res);
-            PFsql_t *expr = NULL,
-                    *part_expr;
-            PFsql_col_t *part_col,
-                        *res_col = new_col (res, res_ty);
+            PFla_op_t *in_op = (rule == 35) ? L(p) : LL(p);
 
             PFsql_t *selectlist = NULL;
             PFsql_t *columnlist = NULL;
@@ -3941,73 +3941,112 @@
                            table_name (newtable),
                            alias (newalias));
 
-            if (col) /* cope with missing col column in case of count */ {
-                ty = type_of (L(p), col);
-                expr = col_env_lookup (COLMAP(L(p)), col, ty);
-            }
-
             /* add the partition criterion to all lists and environments */
-            if (part) {
-                part_ty = type_of (L(p), part);
-                part_col = new_col (part, part_ty);
-                part_expr = col_env_lookup (COLMAP(L(p)), part, part_ty);
-
+            if (p->sem.aggr.part && rule == 35) {
+                PFalg_col_t         part      = p->sem.aggr.part;
+                PFalg_simple_type_t part_ty   = type_of (L(p), part);
+                PFsql_col_t        *part_col  = new_col (part, part_ty);
+                PFsql_t            *part_expr = col_env_lookup (COLMAP(L(p)),
+                                                                part,
+                                                                part_ty);
+                /* provide the column information for the result
+                   (after binding) */
                 col_env_add (COLMAP(p), part, part_ty,
                              ext_column_name (newalias, part_col));
 
-                /* create columnlist for the table name */
+                /* create columnlist for the table name
+                   (used during binding) */
                 columnlist = column_list (columnlist,
                                           column_name (part_col));
 
-                /* create selectlist for the table name */
+                /* create selectlist for group by query */
                 selectlist = select_list (selectlist,
                                           transform_expression (
                                               part_expr,
                                               column_name (part_col)));
 
+                /* extend group by list */
                 groupbylist = column_list (groupbylist, part_expr);
             }
+            else if (p->sem.aggr.part && rule == 36) {
+                PFord_ordering_t    sortby = L(p)->sem.sort.sortby;
+                PFalg_col_t         ord;
+                PFalg_simple_type_t ord_ty;
 
-            /* generate the aggregation */
-            switch (p->kind) {
-                case la_avg:   expr = avg (expr);      break;
-                case la_max:   expr = max_ (expr);     break;
-                case la_min:   expr = min_ (expr);     break;
-                /* with la_all we have to ensure that each
-                 * column col is true for all rows within
-                 * a group, since we express boolean values
-                 * as literal integers, MIN does the job */ 
-                case la_all:   assert (ty == aat_bln);
-                               expr = min_ (((expr)->kind == sql_column_name)?
-                                           expr:
-                                           case_(
-                                              when (expr,
-                                                    lit_int(1)),
-                                              else_ (lit_int (0))));
-                             break;
-                case la_sum:   expr = sum (expr);      break;
-                case la_count: expr = count (star ()); break;
-                default: assert (0); break;
+                for (unsigned int i = 0; i < PFord_count (sortby); i++) {
+                    ord    = PFord_order_col_at (sortby, i);
+                    ord_ty = type_of (L(p), ord);
+
+                    /* extend group by list */
+                    groupbylist = column_list (groupbylist,
+                                               col_env_lookup (COLMAP(LL(p)),
+                                                               ord,
+                                                               ord_ty));
+                }
             }
 
-            col_env_add (COLMAP(p), res, res_ty,
-                         ext_column_name (newalias, res_col));
+            /* add the aggregates to all lists and environments */
+            for (unsigned int i = 0; i < p->sem.aggr.count; i++) {
+                PFalg_col_t         col     = p->sem.aggr.aggr[i].col,
+                                    res     = p->sem.aggr.aggr[i].res;
+                PFalg_simple_type_t res_ty  = type_of (p, res);
+                PFsql_t            *expr    = NULL;
+                PFsql_col_t        *res_col = new_col (res, res_ty);
 
-            columnlist = column_list (columnlist, column_name (res_col));
+                /* provide the column information for the result
+                   (after binding) */
+                col_env_add (COLMAP(p), res, res_ty,
+                             ext_column_name (newalias, res_col));
 
-            /* add the aggregation to the selection list */
-            selectlist = select_list (selectlist,
-                                      transform_expression (
-                                          expr,
-                                          column_name (res_col)));
+                /* create columnlist for the table name
+                   (used during binding) */
+                columnlist = column_list (columnlist, column_name (res_col));
+
+                /* cope with missing col column in case of count */
+                if (col)
+                    expr = col_env_lookup (COLMAP(in_op),
+                                           col,
+                                           type_of (in_op, col));
+                
+                /* generate the aggregation */
+                switch (p->sem.aggr.aggr[i].kind) {
+                    case alg_aggr_dist:
+                        /* extend group by list */
+                        groupbylist = column_list (groupbylist, expr);
+                        break;
+                    case alg_aggr_min:   expr = min_ (expr);     break;
+                    case alg_aggr_max:   expr = max_ (expr);     break;
+                    case alg_aggr_all: 
+                        expr = min_ (((expr)->kind == sql_column_name)?
+                                    expr:
+                                    case_(
+                                       when (expr,
+                                             lit_int(1)),
+                                       else_ (lit_int (0))));
+                        break;
+                    case alg_aggr_count: expr = count (star ()); break;
+                    case alg_aggr_avg:   expr = avg (expr);      break;
+                    case alg_aggr_sum:   expr = sum (expr);      break;
+                    case alg_aggr_seqty1:
+                    case alg_aggr_prod: 
+                        PFoops (OOPS_FATAL, "Unsupported aggregate.");
+                        break;
+                }
+
+                /* add the aggregation to the selection list */
+                selectlist = select_list (selectlist,
+                                          transform_expression (
+                                              expr,
+                                              column_name (res_col)));
+            }
 
             /* dump the operator binding */
             execute (comment ("binding due to aggregate"));
             execute (bind (table_def (newtable, columnlist),
                            PFsql_select (false,
                                          selectlist,
-                                         transform_frommap (L(p)),
-                                         transform_wheremap (L(p)),
+                                         transform_frommap (in_op),
+                                         transform_wheremap (in_op),
                                          NULL,
                                          groupbylist)));
 
@@ -4016,10 +4055,10 @@
         }   break;
 
         /* Rel:    disjunion (
-                       count (Rel),
+                       aggr (Rel),
                        attach (difference (
                                    Rel,
-                                   project (count (Rel))))) */
+                                   project (aggr (Rel))))) */
         case 40:
             /**
              * ensure the following pattern:
@@ -4040,7 +4079,9 @@
              *      count
              */
             if (!(L(p) == RLRL(p) &&
-                  R(p)->sem.attach.res == L(p)->sem.aggr.res &&
+                  L(p)->sem.aggr.count == 1 &&
+                  L(p)->sem.aggr.aggr[0].kind == alg_aggr_count &&
+                  R(p)->sem.attach.res == L(p)->sem.aggr.aggr[0].res &&
                   R(p)->sem.attach.value.type == aat_int &&
                   R(p)->sem.attach.value.val.int_ == 0 &&
                   RLL(p)->schema.count == 1 &&
@@ -4063,7 +4104,7 @@
             PFsql_aident_t newalias;
 
             part      = L(p)->sem.aggr.part;
-            res       = L(p)->sem.aggr.res;
+            res       = L(p)->sem.aggr.aggr[0].res;
             part_ty   = type_of (p, part);
             res_ty    = type_of (p, res);
 
@@ -4159,12 +4200,6 @@
             BOUND(p) = true;
         }   break;
 
-        /* Rel:    seqty1 (Rel) */
-        case 41:
-            /* FIXME: implementation is missing */
-            assert (!"missing");
-            break;
-
         /* Rel:    string_join (Rel, Rel) */
         case 43:
         {
@@ -5328,14 +5363,17 @@
             /* copy all existing side effect information */
             from_list_copy (FROMLIST(p), FROMLIST(L(p)));
 
-            reduce (kids[1], nts[1]);
-
             if (PFprop_const (p->prop, p->sem.err.col)) {
                 msg = (PFprop_const_val (p->prop, p->sem.err.col)).val.str;
+                if (PFstrUtils_beginsWith(msg, "err:FODC0002"))
+                    /* discard document availability check */
+                    break;
             }
             else
                 msg = "error in query evaluation";
 
+            reduce (kids[1], nts[1]);
+
             err = case_ (when (gt (count (star ()), lit_int (0)),
                                raise_error (lit_str (ERR_SQLSTATE),
                                             lit_str (msg))),


------------------------------------------------------------------------------
Crystal Reports - New Free Runtime and 30 Day Trial
Check out the new simplified licensing option that enables unlimited
royalty-free distribution of the report engine for externally facing 
server and web deployment.
http://p.sf.net/sfu/businessobjects
_______________________________________________
Monetdb-pf-checkins mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/monetdb-pf-checkins

Reply via email to