Update of /cvsroot/monetdb/pathfinder/compiler/mil
In directory sc8-pr-cvs16:/tmp/cvs-serv26652/mil

Modified Files:
        mil.c milgen.brg milprint.c 
Log Message:
Thetajoin operator:

    Introduced a thetajoin operator in the logical algebra (and the physical
    algebra and the MIL generation). This new thetajoin can handle a list of
    conjunctive predicates where each predicate represents an (in-)equality
    condition.

Thetajoin introduction:

    A thetajoin operator is introduced either by a small pattern
    (select-comparison-cross) in opt_general.brg or by a new introduction
    phase in intro_thetajoin.c. The latter introduction phase uses a
    selection as a basis to find the *correct* equi-join and transforms
    this equi-join into a thetajoin (for more details please look into
    the code).

Thetajoin optimization:

    Similar to the MVD optimization phase that pushes cross product
    operators up in the DAG structure this checkin provides a new
    optimization phase that pushes thetajoin operators up in the DAG
    (opt_thetajoin.c). These rewrites result in thetajoin operators
    that contain multiple predicates. (E.g. the transformed equi-joins
    eventually hit the selection that triggered the rewrite and thus form
    a new scope-dependent value-based join -- see XMark Q3 and Q4.)

Additional changes:

    opt_general.brg
     * Extended thetajoin pattern to cope with inequality comparisons
     * Rewrite the pattern: semijoin (distinct (Rel), Rel) into
       distinct (semijoin (Rel, Rel) as the semijoin hopefully reduces
       the cardinality more than the distinct operator. (This speeds
       up XMark Q12 by a factor of 2.5.)

    intro_proxy.c
     * Added a preceding phase that removes all semijoin operators.
       (Using only simply checks more proxies are now detected.)
     * Removed unnest optimization phase (thetajoin optimization
       does the same in a more general scenario).

    physical.c
     * Toyed around with the cost model
     * Fixed 'PFord_order_dir_at' bug

    planner.c
     * Generated more ordered alternatives (even if an ordering is
       not required) after an operator is mapped to physical algebra.
       In some situations this avoids in a later step planning the same
       sort operator multiple times. (We only introduce orderings for
       iter-like columns and sort at most two columns. Otherwise the
       plan space explodes for some sample queries.)
     * Extended list of possible orderings for the rownum operator



Index: milprint.c
===================================================================
RCS file: /cvsroot/monetdb/pathfinder/compiler/mil/milprint.c,v
retrieving revision 1.52
retrieving revision 1.53
diff -u -d -r1.52 -r1.53
--- milprint.c  22 Mar 2007 09:39:05 -0000      1.52
+++ milprint.c  7 May 2007 10:17:40 -0000       1.53
@@ -40,6 +40,14 @@
                  | expression '.cross (' expression ')'     <m_cross>
                  | expression '.join (' expression ')'      <m_join>
                  | expression '.leftjoin (' expression ')'  <m_leftjoin>
+                 | 'thetajoin ('exp','exp','exp','exp')'    <m_thetajoin>
+                 | 'htordered_unique_thetajoin ('
+                      exp ',' exp ',' exp ', nil, nil)'     <m_unq2_tjoin>
+                 | 'll_htordered_unique_thetajoin ('
+                      exp ',' exp ',' exp ','
+                      exp ',' exp ', nil, nil)'             <m_unq1_tjoin>
+                 | 'combine_node_info(' exp ',' exp ','
+                      exp ',' exp ',' exp ',' exp ')'       <m_zip_nodes>
                  | expression '.kunion (' expression ')'    <m_kunion>
                  | expression '.kdiff (' expression ')'     <m_kdiff>
                  | expression '.kintersect (' expression ')'<m_kintersect>
@@ -90,8 +98,12 @@
                  | '[floor](' expression ')'                <m_mfloor>
                  | '[round_up](' expression ')'             <m_mround_up>
                  | '>(' expression ',' expression ')'       <m_gt>
-                 | '[>](' expression ',' expression ')'     <m_mgt>
                  | '[=](' expression ',' expression ')'     <m_meq>
+                 | '[>](' expression ',' expression ')'     <m_mgt>
+                 | '[>=](' expression ',' expression ')'    <m_mge>
+                 | '[<](' expression ',' expression ')'     <m_mlt>
+                 | '[<=](' expression ',' expression ')'    <m_mle>
+                 | '[!=](' expression ',' expression ')'    <m_mne>
                  | 'not(' expression ')'                    <m_not>
                  | '[not](' expression ')'                  <m_mnot>
                  | '[-](' expression ')'                    <m_mneg>
@@ -206,6 +218,10 @@
     , [m_cross]        = "cross"
     , [m_join]         = "join"
     , [m_leftjoin]     = "leftjoin"
+    , [m_thetajoin]    = "thetajoin"
+    , [m_unq2_tjoin]   = "htordered_unique_thetajoin"
+    , [m_unq1_tjoin]   = "ll_htordered_unique_thetajoin"
+    , [m_zip_nodes]    = "combine_node_info"
     , [m_reverse]      = "reverse"
     , [m_mirror]       = "mirror"
     , [m_copy]         = "copy"
@@ -241,8 +257,12 @@
     , [m_mfloor]       = "[floor]"
     , [m_mround_up]    = "[round_up]"
     , [m_gt]           = ">"
-    , [m_mgt]          = "[>]"
     , [m_meq]          = "[=]"
+    , [m_mgt]          = "[>]"
+    , [m_mge]          = "[>=]"
+    , [m_mlt]          = "[<]"
+    , [m_mle]          = "[<=]"
+    , [m_mne]          = "[!=]"
     , [m_not]          = "not"
     , [m_mnot]         = "[not]"
     , [m_mneg]         = "[-]"
@@ -831,10 +851,18 @@
         case m_mmod:
         /* expression : '>(' expression ',' expression ')' */
         case m_gt:
-        /* expression : '[>](' expression ',' expression ')' */
-        case m_mgt:
         /* expression : '[=](' expression ',' expression ')' */
         case m_meq:
+        /* expression : '[>](' expression ',' expression ')' */
+        case m_mgt:
+        /* expression : '[>=](' expression ',' expression ')' */
+        case m_mge:
+        /* expression : '[<](' expression ',' expression ')' */
+        case m_mlt:
+        /* expression : '[<=](' expression ',' expression ')' */
+        case m_mle:
+        /* expression : '[!=](' expression ',' expression ')' */
+        case m_mne:
         /* expression : '[and](' expression ',' expression ')' */
         case m_mand:
         /* expression : '[or](' expression ',' expression ')' */
@@ -954,6 +982,8 @@
         case m_add_qnames:
         /* expression : 'sc_desc (' expr ',' expr ',' expr ',' expr ')' */
         case m_sc_desc:
+        /* expression : 'thetajoin ('exp','exp','exp','exp')' */
+        case m_thetajoin:
             milprintf ("%s (", ID[n->kind]);
             print_expression (n->child[0]);
             milprintf (", ");
@@ -987,6 +1017,52 @@
             milprintf (")");
             break;
 
+        /* expression : 'htordered_unique_thetajoin (' exp ',' exp ','
+                                                       exp ', nil, nil)' */
+        case m_unq2_tjoin:
+            milprintf ("%s (", ID[n->kind]);
+            print_expression (n->child[2]); /* mode */
+            milprintf (", ");
+            print_expression (n->child[0]);
+            milprintf (", ");
+            print_expression (n->child[1]);
+            milprintf (", nil, nil)");
+            break;
+            
+        /* expression : 'll_htordered_unique_thetajoin ('
+                            exp ',' exp ',' exp ',' exp ',' exp ', nil, nil)' 
*/
+        case m_unq1_tjoin:
+            milprintf ("%s (", ID[n->kind]);
+            print_expression (n->child[4]); /* mode */
+            milprintf (", ");
+            print_expression (n->child[0]);
+            milprintf (", ");
+            print_expression (n->child[1]);
+            milprintf (", ");
+            print_expression (n->child[2]);
+            milprintf (", ");
+            print_expression (n->child[3]);
+            milprintf (", nil, nil)");
+            break;
+
+        /* expression : combine_node_info(' exp ',' exp ',' exp ','
+                                            exp ',' exp ',' exp ')' */
+        case m_zip_nodes:
+            milprintf ("%s (", ID[n->kind]);
+            print_expression (n->child[0]);
+            milprintf (", ");
+            print_expression (n->child[1]);
+            milprintf (", ");
+            print_expression (n->child[2]);
+            milprintf (", ");
+            print_expression (n->child[3]);
+            milprintf (", ");
+            print_expression (n->child[4]);
+            milprintf (", ");
+            print_expression (n->child[5]);
+            milprintf (")");
+            break;
+
         case m_merged_union:
         case m_mc_intersect:
             milprintf ("%s (", ID[n->kind]);

Index: mil.c
===================================================================
RCS file: /cvsroot/monetdb/pathfinder/compiler/mil/mil.c,v
retrieving revision 1.38
retrieving revision 1.39
diff -u -d -r1.38 -r1.39
--- mil.c       15 Mar 2007 14:12:57 -0000      1.38
+++ mil.c       7 May 2007 10:17:38 -0000       1.39
@@ -310,6 +310,11 @@
         case PF_MIL_VAR_PROP_TEXT:   return "PROP_TEXT";
         case PF_MIL_VAR_PROP_COM:    return "PROP_COM"; 
         case PF_MIL_VAR_PROP_INS:    return "PROP_INS";    
+        case PF_MIL_VAR_LE:          return "LE";
+        case PF_MIL_VAR_LT:          return "LT";
+        case PF_MIL_VAR_EQ:          return "EQ";
+        case PF_MIL_VAR_GT:          return "GT";
+        case PF_MIL_VAR_GE:          return "GE";
         case PF_MIL_VAR_TRACE_OUTER: return "trace_outer";
         case PF_MIL_VAR_TRACE_INNER: return "trace_inner";
         case PF_MIL_VAR_TRACE_ITER:  return "trace_iter";
@@ -613,6 +618,45 @@
 }
 
 /**
+ * Monet thetajoin() operator
+ */
+PFmil_t *
+PFmil_thetajoin (const PFmil_t *a, const PFmil_t *b,
+                 const PFmil_t *comp, const PFmil_t *size)
+{
+    return wire4 (m_thetajoin, a, b, comp, size);
+}
+
+/**
+ * Monet htordered_unique_thetajoin PROC
+ */
+PFmil_t * PFmil_unq2_thetajoin (const PFmil_t *a, const PFmil_t *b,
+                                const PFmil_t *comp)
+{
+    return wire3 (m_unq2_tjoin, a, b, comp);
+}
+
+/**
+ * MIL ll_htordered_unique_thetajoin PROC
+ */
+PFmil_t * PFmil_unq1_thetajoin (const PFmil_t *a, const PFmil_t *b,
+                                const PFmil_t *c, const PFmil_t *d,
+                                const PFmil_t *comp)
+{
+    return wire5 (m_unq1_tjoin, a, b, c, d, comp);
+}
+
+/**
+ * MIL combine_node_info PROC
+ */
+PFmil_t * PFmil_zip_nodes (const PFmil_t *a, const PFmil_t *b,
+                           const PFmil_t *c, const PFmil_t *d,
+                           const PFmil_t *e, const PFmil_t *f)
+{
+    return wire6 (m_zip_nodes, a, b, c, d, e, f);
+}
+
+/**
  * Monet reverse operator, swap head/tail
  */
 PFmil_t *
@@ -1014,6 +1058,15 @@
 }
 
 /**
+ * Multiplexed comparison operator (equality)
+ */
+PFmil_t *
+PFmil_meq (const PFmil_t *a, const PFmil_t *b)
+{
+    return wire2 (m_meq, a, b);
+}
+
+/**
  * Multiplexed comparison operator (greater than)
  */
 PFmil_t *
@@ -1023,12 +1076,39 @@
 }
 
 /**
- * Multiplexed comparison operator (equality)
+ * Multiplexed comparison operator (greater equal)
  */
 PFmil_t *
-PFmil_meq (const PFmil_t *a, const PFmil_t *b)
+PFmil_mge (const PFmil_t *a, const PFmil_t *b)
 {
-    return wire2 (m_meq, a, b);
+    return wire2 (m_mge, a, b);
+}
+
+/**
+ * Multiplexed comparison operator (less than)
+ */
+PFmil_t *
+PFmil_mlt (const PFmil_t *a, const PFmil_t *b)
+{
+    return wire2 (m_mlt, a, b);
+}
+
+/**
+ * Multiplexed comparison operator (less equal)
+ */
+PFmil_t *
+PFmil_mle (const PFmil_t *a, const PFmil_t *b)
+{
+    return wire2 (m_mle, a, b);
+}
+
+/**
+ * Multiplexed comparison operator (inequality)
+ */
+PFmil_t *
+PFmil_mne (const PFmil_t *a, const PFmil_t *b)
+{
+    return wire2 (m_mne, a, b);
 }
 
 /**

Index: milgen.brg
===================================================================
RCS file: /cvsroot/monetdb/pathfinder/compiler/mil/milgen.brg,v
retrieving revision 1.73
retrieving revision 1.74
diff -u -d -r1.73 -r1.74
--- milgen.brg  15 Mar 2007 14:12:57 -0000      1.73
+++ milgen.brg  7 May 2007 10:17:39 -0000       1.74
@@ -86,15 +86,18 @@
 %term leftjoin       =  11
 %term eqjoin         =  14
 %term semijoin       =  15
-%term project        =  16
-%term select         =  17
-%term append_union   =  20
-%term merge_union    =  21
-%term intersect      =  22
-%term difference     =  23
-%term sort_distinct  =  24
-%term std_sort       =  25
-%term refine_sort    =  26
+%term thetajoin      =  16
+%term unq2_thetajoin =  17
+%term unq1_thetajoin =  18
+%term project        =  19
+%term select         =  20
+%term append_union   =  23
+%term merge_union    =  24
+%term intersect      =  25
+%term difference     =  26
+%term sort_distinct  =  27
+%term std_sort       =  28
+%term refine_sort    =  29
 %term fun_1to1       =  30
 %term eq             =  40
 %term eq_atom        =  41
@@ -163,6 +166,9 @@
 Rel:      leftjoin (Rel, Rel)                                      =  14 (10);
 Rel:      eqjoin (Rel, Rel)                                        =  15 (10);
 Rel:      semijoin (Rel, Rel)                                      =  16 (10);
+Rel:      thetajoin (Rel, Rel)                                     =  17 (10);
+Rel:      unq2_thetajoin (Rel, Rel)                                =  18 (10);
+Rel:      unq1_thetajoin (Rel, Rel)                                =  19 (10);
 Rel:      project (Rel)                                            =  20 (10);
 Rel:      select (Rel)                                             =  21 (10);
 Rel:      append_union (Rel, Rel)                                  =  22 (10);
@@ -1973,6 +1979,442 @@
             unpin (res, 1);
         } break;
 
+        /* Rel:      thetajoin (Rel, Rel) */
+        case 17:
+        {
+            unsigned int shared_cols = 0;
+            bool match_found;
+            bool initialized = false;
+
+            PFalg_simple_type_t ltype = 0;
+            PFmil_t *args = NULL;
+            mvar_t  *res = new_var (1);
+            mvar_t  *lmap = new_var (1);
+            mvar_t  *rmap = new_var (1);
+
+            /* first try to match all equality predicates in one go 
+               (using a special multi-column MIL intersect proc) */
+            for (unsigned int i = 0; i < p->sem.thetajoin.count; i++)
+                if (p->sem.thetajoin.pred[i].comp == alg_comp_eq) {
+                    initialized = true;
+                    match_found = false;
+                    ltype = type_of (L(p), p->sem.thetajoin.pred[i].left);
+                    for (PFalg_simple_type_t t = 1; t; t <<= 1)
+                        if (t & ltype) {
+                            mvar_t *l = env_mvar (
+                                            L(p)->env,
+                                            p->sem.thetajoin.pred[i].left,
+                                            t);
+                            mvar_t *r = env_mvar_unsafe (
+                                            R(p)->env,
+                                            p->sem.thetajoin.pred[i].right,
+                                            t);
+
+                            if (r) {
+                                if (!shared_cols)
+                                    args = arg (var (l->name), var (r->name));
+                                else
+                                    args = arg (args, arg (var (l->name), 
+                                                           var (r->name)));
+                                shared_cols++;
+                                match_found = true;
+                            }
+                        }
+                    if (!match_found) {
+                        /* If one join argument has no match we
+                           can return an empty result */
+                        shared_cols = 0;
+                        execute (assgn (var (res->name),
+                                        seqbase (new (type (mty_void), 
+                                                      type (mty_oid)),
+                                                 lit_oid (0))));
+                        break;
+                    }
+                }
+
+            /* apply the multi-column equi-join */
+            if (shared_cols) {
+                execute (assgn (var (res->name), 
+                                mc_intersect (args)),
+                         assgn (var (lmap->name),
+                                reverse (mark (var (res->name), lit_oid (0)))),
+                         assgn (var (rmap->name),
+                                reverse (mark (reverse (var (res->name)),
+                                               lit_oid (0)))));
+            }
+            
+            /* If there was no equality predicate apply a normal inequality
+               thetajoin to the first predicate.
+               For all remaining predicates (also the ones that appear in 
+               combination with equi-join predicates we apply a selection
+               (post) filter. */
+            for (unsigned int i = 0; i < p->sem.thetajoin.count; i++)
+                if (p->sem.thetajoin.pred[i].comp != alg_comp_eq) {
+                    mvar_t *l, *r;
+                    bool type_match = false;
+                    
+                    ltype = type_of (L(p), p->sem.thetajoin.pred[i].left);
+                    assert (monomorphic (ltype));
+                    
+                    if (ltype & aat_node) {
+                        /* FIXME */
+                        assert (!(ltype & aat_anode) &&
+                                "cope with attribute node inequality");
+
+                        mvar_t *lpre, *rpre, *lfrag, *rfrag;
+                        
+                        l = new_var (1);
+                        r = new_var (1);
+                        
+                        lpre = env_mvar (
+                                   L(p)->env,
+                                   p->sem.thetajoin.pred[i].left,
+                                   aat_pre);
+                        lfrag = env_mvar (
+                                    L(p)->env,
+                                    p->sem.thetajoin.pred[i].left,
+                                    aat_pfrag);
+                        
+                        rpre = env_mvar_unsafe (
+                                   R(p)->env,
+                                   p->sem.thetajoin.pred[i].right,
+                                   aat_pre);
+                        rfrag = env_mvar_unsafe (
+                                    R(p)->env,
+                                    p->sem.thetajoin.pred[i].right,
+                                    aat_pfrag);
+
+                        if (rpre && rfrag) {
+                            mvar_t *tmp_res = new_var (1);
+                            execute (assgn (var (tmp_res->name),
+                                            zip_nodes (
+                                                var (lfrag->name),
+                                                var (lpre->name),
+                                                nil (),
+                                                var (rfrag->name),
+                                                var (rpre->name),
+                                                nil ())),
+                                     assgn (var (l->name),
+                                            fetch (var (tmp_res->name),
+                                                   lit_int (0))),
+                                     assgn (var (r->name),
+                                            fetch (var (tmp_res->name),
+                                                   lit_int (1))));
+                            unpin (tmp_res, 1);
+                            type_match = true;
+                        }
+                    } else {
+                        l = env_mvar (
+                                L(p)->env,
+                                p->sem.thetajoin.pred[i].left,
+                                ltype);
+                        r = env_mvar_unsafe (
+                                R(p)->env,
+                                p->sem.thetajoin.pred[i].right,
+                                ltype);
+                        if (r) type_match = true;
+                    }
+
+                    if (type_match) {
+                        if (!initialized) {
+                            /* As there was no equi-join condition and 
+                               this is the first inequality predicate
+                               we evaluate a thetajoin */
+
+                            PFmil_t *comp;
+                            /* find the correct comparison */
+                            switch (p->sem.thetajoin.pred[i].comp) {
+                                case alg_comp_eq:
+                                    comp = var (PF_MIL_VAR_EQ); break; 
+                                case alg_comp_gt:
+                                    comp = var (PF_MIL_VAR_GT); break; 
+                                case alg_comp_ge:
+                                    comp = var (PF_MIL_VAR_GE); break; 
+                                case alg_comp_lt:
+                                    comp = var (PF_MIL_VAR_LT); break; 
+                                case alg_comp_le:
+                                    comp = var (PF_MIL_VAR_LE); break; 
+                                default:
+                                    PFoops (OOPS_FATAL, "incorrect 
comparison");
+                            }
+
+                            execute (assgn (var (res->name),
+                                            tjoin (
+                                                var (l->name),
+                                                reverse (var (r->name)),
+                                                comp,
+                                                PFmil_mmult (
+                                                    count (var (r->name)),
+                                                    lit_lng (64)))),
+                                     assgn (var (lmap->name),
+                                            reverse (mark (var (res->name),
+                                                           lit_oid (0)))),
+                                     assgn (var (rmap->name),
+                                            reverse (
+                                                mark (reverse (var 
(res->name)),
+                                                      lit_oid (0)))));
+                        } else {
+                            /* apply a (post) filter */
+                            
+                            PFmil_t * (*op) (const PFmil_t *, const PFmil_t *);
+                            /* find the correct comparison */
+                            switch (p->sem.thetajoin.pred[i].comp) {
+                                case alg_comp_eq:
+                                    op = PFmil_meq; break; 
+                                case alg_comp_gt:
+                                    op = PFmil_mgt; break; 
+                                case alg_comp_ge:
+                                    op = PFmil_mge; break; 
+                                case alg_comp_lt:
+                                    op = PFmil_mlt; break; 
+                                case alg_comp_le:
+                                    op = PFmil_mle; break; 
+                                case alg_comp_ne:
+                                    op = PFmil_mne; break; 
+                                default:
+                                    assert(0);
+                                    op = NULL;
+                            }
+
+                            /* apply filter and update the mapping
+                               relations lmap and rmap */
+                            execute (assgn (var (res->name), 
+                                            op (leftjoin (
+                                                    var (lmap->name),
+                                                    var (l->name)),
+                                                leftjoin (
+                                                    var (rmap->name),
+                                                    var (r->name)))),
+                                     assgn (var (res->name),
+                                            reverse (
+                                                mark (select_ (
+                                                          var (res->name),
+                                                          lit_bit (true)),
+                                                      lit_oid (0)))),
+                                     assgn (var (lmap->name),
+                                            leftjoin (
+                                                var (res->name),
+                                                var (lmap->name))),
+                                     assgn (var (rmap->name),
+                                            leftjoin (
+                                                var (res->name),
+                                                var (rmap->name))));
+                        }
+
+                        initialized = true;
+                    } else {
+                        /* If one join argument has no match we
+                           can return an empty result */
+                        execute (assgn (var (lmap->name),
+                                        seqbase (new (type (mty_void), 
+                                                      type (mty_oid)),
+                                                 lit_oid (0))),
+                                 assgn (var (rmap->name),
+                                        seqbase (new (type (mty_void), 
+                                                      type (mty_oid)),
+                                                 lit_oid (0))));
+                        break;
+                    }
+                    if (ltype & aat_node) {
+                        unpin (l, 1);
+                        unpin (r, 1);
+                    }
+                }
+            /* release temporary variable res */
+            unpin (res, 1);
+
+            /* map all matching tuples (of the left input relation) */
+            for (unsigned int col = 0; col < env_count (L(p)->env); col++) {
+                 mvar_t *a = new_var (p->refctr);
+
+                 execute (
+                      assgn (var (a->name),
+                             leftjoin (
+                                 var (lmap->name),
+                                 var (env_at (L(p)->env, col).mvar->name))));
+                      
+                 env_add (p->env,
+                          env_at (L(p)->env, col).att,
+                          env_at (L(p)->env, col).ty,
+                          a);
+            }
+            unpin (lmap, 1);
+
+            /* map all matching tuples (of the right input relation) */
+            for (unsigned int col = 0; col < env_count (R(p)->env); col++) {
+                 mvar_t *a = new_var (p->refctr);
+
+                 execute (
+                      assgn (var (a->name),
+                             leftjoin (
+                                 var (rmap->name),
+                                 var (env_at (R(p)->env, col).mvar->name))));
+                      
+                 env_add (p->env,
+                          env_at (R(p)->env, col).att,
+                          env_at (R(p)->env, col).ty,
+                          a);
+            }
+            unpin (rmap, 1);
+            
+        } break;
+
+        /* Rel:      unq2_thetajoin (Rel, Rel) */
+        case 18:
+        {
+            PFalg_simple_type_t lty = type_of (L(p),
+                                               p->sem.unq_thetajoin.left);
+            PFalg_simple_type_t rty = type_of (R(p),
+                                               p->sem.unq_thetajoin.right);
+            PFalg_simple_type_t t = 0;
+            mvar_t *l = NULL;
+            mvar_t *r = NULL;
+            mvar_t *ldist = NULL;
+            mvar_t *rdist = NULL;
+            mvar_t *res = new_var (1);
+            mvar_t *left, *right;
+            PFmil_t *comp;
+
+            assert (lty == rty);
+            assert (type_of (L(p), p->sem.unq_thetajoin.ldist) == aat_nat);
+            assert (type_of (R(p), p->sem.unq_thetajoin.rdist) == aat_nat);
+
+            /* find the variable names for the distinct attributes */
+            ldist = env_mvar_unsafe (L(p)->env,
+                                     p->sem.unq_thetajoin.ldist, aat_nat);
+            rdist = env_mvar_unsafe (R(p)->env,
+                                     p->sem.unq_thetajoin.rdist, aat_nat);
+
+            /* find the variable names for the join attributes */
+            for (t = 1; t; t <<= 1)
+                if (t & lty && t & rty) {
+                    l = env_mvar_unsafe (L(p)->env,
+                                         p->sem.unq_thetajoin.left, t);
+                    r = env_mvar_unsafe (R(p)->env,
+                                         p->sem.unq_thetajoin.right, t);
+                    break;
+                }
+            
+            /* check consistency */
+            if (t != lty || t != rty || !ldist || !rdist)
+                PFoops (OOPS_FATAL, 
+                        "multi-predicate thetajoins (with duplicate remvoval)"
+                        " are not supported");
+
+            if (!l || !r)
+                PFoops (OOPS_FATAL, "incompatible types in thetajoin");
+
+            /* find the correct comparison */
+            switch (p->sem.unq_thetajoin.comp) {
+                case alg_comp_eq: comp = var (PF_MIL_VAR_EQ); break; 
+                case alg_comp_gt: comp = var (PF_MIL_VAR_GT); break; 
+                case alg_comp_ge: comp = var (PF_MIL_VAR_GE); break; 
+                case alg_comp_lt: comp = var (PF_MIL_VAR_LT); break; 
+                case alg_comp_le: comp = var (PF_MIL_VAR_LE); break; 
+                default:
+                    PFoops (OOPS_FATAL, "incorrect comparison");
+            }
+
+            /* add the two result columns to the environment */
+            left = new_var (p->refctr);
+            right = new_var (p->refctr);
+            env_add (p->env, p->sem.unq_thetajoin.ldist, aat_nat, left);
+            env_add (p->env, p->sem.unq_thetajoin.rdist, aat_nat, right);
+
+            /* apply the duplicate removing thetajoin */
+            execute (
+                assgn (var (res->name),
+                       unq2_tjoin (leftjoin (reverse (var (ldist->name)),
+                                             var (l->name)),
+                                   leftjoin (reverse (var (rdist->name)),
+                                             var (r->name)),
+                                   comp)),
+                assgn (var (left->name),
+                       reverse (mark (var (res->name),
+                                      lit_oid (0)))),
+                assgn (var (left->name),
+                       assert_order (var (left->name))),
+                assgn (var (right->name),
+                       reverse (mark (reverse (var (res->name)),
+                                      lit_oid (0)))));
+
+            unpin (res, 1);
+        } break;
+
+        /* Rel:      unq1_thetajoin (Rel, Rel) */
+        case 19:
+        {
+            PFalg_simple_type_t lty = type_of (L(p),
+                                               p->sem.unq_thetajoin.left);
+            PFalg_simple_type_t rty = type_of (R(p),
+                                               p->sem.unq_thetajoin.right);
+            PFalg_simple_type_t t = 0;
+            mvar_t *l = NULL;
+            mvar_t *r = NULL;
+            mvar_t *ldist = NULL;
+            mvar_t *rdist = NULL;
+            mvar_t *res = new_var (p->refctr);
+            PFmil_t *comp;
+
+            assert (lty == rty);
+            assert (type_of (L(p), p->sem.unq_thetajoin.ldist) == aat_nat);
+            assert (type_of (R(p), p->sem.unq_thetajoin.rdist) == aat_nat);
+
+            /* find the variable names for the distinct attributes */
+            ldist = env_mvar_unsafe (L(p)->env,
+                                     p->sem.unq_thetajoin.ldist, aat_nat);
+            rdist = env_mvar_unsafe (R(p)->env,
+                                     p->sem.unq_thetajoin.rdist, aat_nat);
+
+            /* find the variable names for the join attributes */
+            for (t = 1; t; t <<= 1)
+                if (t & lty && t & rty) {
+                    l = env_mvar_unsafe (L(p)->env,
+                                         p->sem.unq_thetajoin.left, t);
+                    r = env_mvar_unsafe (R(p)->env,
+                                         p->sem.unq_thetajoin.right, t);
+                    break;
+                }
+            
+            /* check consistency */
+            if (t != lty || t != rty || !ldist || !rdist)
+                PFoops (OOPS_FATAL, 
+                        "multi-predicate thetajoins (with duplicate remvoval)"
+                        " are not supported");
+
+            if (!l || !r)
+                PFoops (OOPS_FATAL, "incompatible types in thetajoin");
+
+            /* find the correct comparison */
+            switch (p->sem.unq_thetajoin.comp) {
+                case alg_comp_eq: comp = var (PF_MIL_VAR_EQ); break; 
+                case alg_comp_gt: comp = var (PF_MIL_VAR_GT); break; 
+                case alg_comp_ge: comp = var (PF_MIL_VAR_GE); break; 
+                case alg_comp_lt: comp = var (PF_MIL_VAR_LT); break; 
+                case alg_comp_le: comp = var (PF_MIL_VAR_LE); break; 
+                default:
+                    PFoops (OOPS_FATAL, "incorrect comparison");
+            }
+
+            /* add the two result columns to the environment */
+            env_add (p->env, p->sem.unq_thetajoin.ldist, aat_nat, res);
+
+            /* apply the duplicate removing thetajoin */
+            execute (
+                assgn (var (res->name),
+                       reverse(
+                           mark (
+                               unq1_tjoin (
+                                   leftjoin (reverse (var (ldist->name)),
+                                             var (l->name)),
+                                   leftjoin (reverse (var (rdist->name)),
+                                             var (r->name)),
+                                   var (ldist->name),
+                                   var (rdist->name),
+                                   comp),
+                               lit_oid (0)))));
+        } break;
+
         /* Rel:      project (Rel) */
         case 20:
             /*
@@ -3216,6 +3658,17 @@
                                                 type_of (p, 
p->sem.number.part))
                                            )),
                                    lit_oid (1)))));
+            else if (env_at (L(p)->env, 0).ty == aat_pfrag)
+                /* As in many cases two adjacent path steps are separated
+                   by a number operator and the fragment is constant we
+                   try to avoid an additional dependency on the fragment
+                   to ease 'materialize' operator removal. */
+                execute (
+                    assgn (var (res->name),
+                           mark (VAR (L(p)->env,
+                                      env_at (L(p)->env, 0).att,
+                                      aat_pre),
+                                 lit_oid (1))));
             else
                 execute (
                     assgn (var (res->name),


-------------------------------------------------------------------------
This SF.net email is sponsored by DB2 Express
Download DB2 Express C - the FREE version of DB2 express and take
control of your XML. No limits. Just data. Click to get it now.
http://sourceforge.net/powerbar/db2/
_______________________________________________
Monetdb-pf-checkins mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/monetdb-pf-checkins

Reply via email to