Update of /cvsroot/monetdb/pathfinder/compiler/algebra/prop
In directory sc8-pr-cvs16.sourceforge.net:/tmp/cvs-serv5984/algebra/prop

Modified Files:
        prop_trace_names.c 
Log Message:
-- Changed logic of the name tracing.

   Up til now we ignored that a column name may conflict in the DAG
   and for each operator overwrote the current column name by the
   column name steming from the last parent.

   Now we have 3 different states for an original column name:
   * CURRENT: We have a current column name.
   * UNKNOWN: We don't know anything about the column so far.
   * INVALID: The column name is either created elsewhere or
              naming conflicts did arise during merging.

   For nodes with multiple parents we now correctly merge the
   current names of an input column names.


Index: prop_trace_names.c
===================================================================
RCS file: /cvsroot/monetdb/pathfinder/compiler/algebra/prop/prop_trace_names.c,v
retrieving revision 1.11
retrieving revision 1.12
diff -u -d -r1.11 -r1.12
--- prop_trace_names.c  10 Dec 2007 15:10:02 -0000      1.11
+++ prop_trace_names.c  13 Dec 2007 09:20:32 -0000      1.12
@@ -60,21 +60,6 @@
 #define ORI_AT(n,i) (((name_pair_t *) PFarray_at ((n), (i)))->ori)
 
 /**
- * lookup the current column name (based on the original column name)
- */
-static PFalg_att_t
-find_cur_name (PFarray_t *np_list, PFalg_att_t ori)
-{
-    assert (np_list);
-
-    for (unsigned int i = 0; i < PFarray_last (np_list); i++)
-        if (ori == ORI_AT(np_list, i))
-            return CUR_AT(np_list, i);
-
-    return 0;
-}
-
-/**
  * Add a new original name/current name pair to the list of name pairs @a np
  */
 static void
@@ -87,33 +72,30 @@
 }
 
 /**
- * diff_np removes the name pair that is associated with
- * the current attribute name @a cur.
+ * diff_np marks the name pair invalid that is associated
+ * with the current attribute name @a cur.
  */
 static void
 diff_np (PFarray_t *np_list, PFalg_att_t cur)
 {
     for (unsigned int i = 0; i < PFarray_last (np_list); i++)
         if (CUR_AT(np_list, i) == cur) {
-            *(name_pair_t *) PFarray_at (np_list, i)
-                = *(name_pair_t *) PFarray_top (np_list);
-            PFarray_del (np_list);
+            /* mark the name pair as invalid */
+            CUR_AT(np_list, i) = att_NULL;
             break;
         }
 }
 
-static PFalg_att_t twig_iter;
-
 /**
  * Worker for PFprop_trace_names(). It recursively propagates
  * the name pair list.
  */
-static PFarray_t *
-map_names (PFla_op_t *n, PFla_op_t *goal, PFarray_t *par_np_list)
+static void
+map_names (PFla_op_t *n, PFla_op_t *goal, PFarray_t *par_np_list,
+           PFalg_att_t twig_iter)
 {
     PFalg_att_t cur, ori;
-    PFarray_t  *np_list = n->prop->name_pairs,
-               *res_list = NULL;
+    PFarray_t  *np_list = n->prop->name_pairs;
 
     assert (n);
     assert (np_list);
@@ -121,25 +103,46 @@
 
     /* collect all name pair lists of the parent operators and
        include (possibly) new matching columns in the name pairs list */
-    for (unsigned int i = 0; i < PFarray_last (par_np_list); i++) {
-        ori = ORI_AT(par_np_list, i);
-        cur = CUR_AT(par_np_list, i);
-
-        assert (ori && cur);
+    if (!PFarray_last (np_list))
+        /* Copy the complete name pair list of the parent
+           if we have no name pair list so far. */
+        for (unsigned int i = 0; i < PFarray_last (par_np_list); i++)
+            add_name_pair (np_list,
+                           ORI_AT(par_np_list, i),
+                           CUR_AT(par_np_list, i));
+    else
+        /* Otherwise adjust the name pair list. */
+        for (unsigned int i = 0; i < PFarray_last (par_np_list); i++) {
+            ori = ORI_AT(par_np_list, i);
+            cur = CUR_AT(par_np_list, i);
 
-        if (!find_cur_name (np_list, ori) && PFprop_ocol (n, cur))
-            add_name_pair (np_list, ori, cur);
-    }
+            /* Use the name pair entry of the parent if the name pair
+               is unknown (original name is 0). Name pairs marked
+               as unknown are introduced by a projection that prunes
+               the column. */
+            if (!ORI_AT(np_list,i)) {
+                ORI_AT(np_list, i) = ori;
+                CUR_AT(np_list, i) = cur;
+            }
+            /* Mark the name pair as invalid if the name pair of the
+               parent is known (original name is not 0) and we have
+               conflicting current names. */
+            else if (ori && cur != CUR_AT(np_list, i)) {
+                CUR_AT(np_list, i) = att_NULL;
+            }
+        }
 
     /* nothing to do if we haven't collected
        all incoming name pair lists of that node */
     EDGE(n)++;
     if (EDGE(n) < PFprop_refctr (n))
-        return NULL;
+        return;
 
-    /* if we reached our goal we can return the current name pair list */
+    /* If we reached our goal we can return.
+       (The resulting name pair list is accessible
+        using the reference to operator goal.) */
     if (n == goal)
-        return np_list;
+        return;
 
     /* Remove all the name pairs from the list whose current column name
        is generated by this operator and map the current name in case
@@ -150,10 +153,6 @@
         case la_lit_tbl:
         case la_empty_tbl:
         case la_ref_tbl:
-        case la_cross:
-        case la_eqjoin:
-        case la_semijoin:
-        case la_thetajoin:
         case la_select:
         case la_pos_select:
         case la_error:
@@ -175,26 +174,78 @@
 
         case la_project:
         {
-            /* get an empty name pair list */
-            PFarray_t *tmp = n->prop->l_name_pairs;
-            if (tmp) PFarray_last (tmp) = 0;
-            else tmp = PFarray (sizeof (name_pair_t));
-
-            /* Infer only columns that are used in the projection. */
-            for (unsigned int i = 0; i < PFarray_last (np_list); i++)
-                for (unsigned int j = 0; j < n->sem.proj.count; j++)
+            unsigned int j;
+            for (unsigned int i = 0; i < PFarray_last (np_list); i++) { 
+                /* Adjust all current column names for the columns
+                   in the projection list. */
+                for (j = 0; j < n->sem.proj.count; j++)
                     if (n->sem.proj.items[j].new == CUR_AT(np_list, i)) {
-                        add_name_pair (tmp,
-                                       ORI_AT(np_list, i),
-                                       n->sem.proj.items[j].old);
+                        CUR_AT(np_list, i) = n->sem.proj.items[j].old;
                         break;
                     }
-
-            /* update the name pair list */
-            n->prop->l_name_pairs = np_list;
-            np_list = tmp;
+                /* If the name pair is not referenced in the projection
+                   list and it is not marked as invalid we mark the
+                   name pair as unknown. (Throwing away both column
+                   names does not cause trouble as all name pair lists
+                   are aligned.) */
+                if (j == n->sem.proj.count &&
+                    CUR_AT(np_list, i) != att_NULL) {
+                    CUR_AT(np_list, i) = att_NULL;
+                    ORI_AT(np_list, i) = att_NULL;
+                }
+            }
         }   break;
 
+        case la_cross:
+        case la_eqjoin:
+        case la_semijoin:
+        case la_thetajoin:
+        {
+            unsigned int j;
+            /* if we have no additional name pair list then create one */
+            if (!n->prop->l_name_pairs)
+               n->prop->l_name_pairs = PFarray (sizeof (name_pair_t));
+
+            /* mark all columns that we do not see in the left child
+               as unknown */
+            for (unsigned int i = 0; i < PFarray_last (np_list); i++) {
+                for (j = 0; j < L(n)->schema.count; j++)
+                    if (L(n)->schema.items[j].name == CUR_AT(np_list, i)) {
+                        add_name_pair (n->prop->l_name_pairs,
+                                       ORI_AT(np_list, i),
+                                       CUR_AT(np_list, i));
+                        break;
+                    }
+                if (j == L(n)->schema.count)
+                    add_name_pair (n->prop->l_name_pairs, att_NULL, att_NULL);
+            }
+            map_names (L(n), goal, n->prop->l_name_pairs, att_NULL);
+            PFarray_last (n->prop->l_name_pairs) = 0;
+            
+            if (n->kind == la_semijoin)
+                /* mark all columns in the right child as unknown */
+                for (unsigned int i = 0; i < PFarray_last (np_list); i++)
+                    add_name_pair (n->prop->l_name_pairs, att_NULL, att_NULL);
+            else
+                /* mark all columns that we do not see in the right child
+                   as unknown */
+                for (unsigned int i = 0; i < PFarray_last (np_list); i++) {
+                    for (j = 0; j < R(n)->schema.count; j++)
+                        if (R(n)->schema.items[j].name == CUR_AT(np_list, i)) {
+                            add_name_pair (n->prop->l_name_pairs,
+                                           ORI_AT(np_list, i),
+                                           CUR_AT(np_list, i));
+                            break;
+                        }
+                    if (j == R(n)->schema.count)
+                        add_name_pair (n->prop->l_name_pairs,
+                                       att_NULL,
+                                       att_NULL);
+                }
+            map_names (R(n), goal, n->prop->l_name_pairs, att_NULL);
+            PFarray_last (n->prop->l_name_pairs) = 0;
+        }   return;
+        
         case la_fun_1to1:
             diff_np (np_list, n->sem.fun_1to1.res);
             break;
@@ -261,116 +312,76 @@
             diff_np (np_list, n->sem.iter_item.item);
             /* make sure the underlying constructors
                propagate the correct information */
-            if (PFarray_last (np_list)) {
-                assert (CUR_AT(np_list, 0) == n->sem.iter_item.iter);
-                twig_iter = ORI_AT(np_list, 0);
-            } else
-                twig_iter = att_NULL;
-
-            /* empty the name pair list */
-            PFarray_last (np_list) = 0;
-            res_list = map_names (L(n), goal, np_list);
-            return res_list;
-            break;
+            map_names (L(n), goal, np_list, n->sem.iter_item.iter);
+            return;
 
         case la_fcns:
+            map_names (L(n), goal, np_list, twig_iter);
+            map_names (R(n), goal, np_list, twig_iter);
             break;
 
         case la_docnode:
-        {
-            PFalg_att_t twig_iter_old = twig_iter;
-
-            if (twig_iter)
-                add_name_pair (np_list, twig_iter, n->sem.docnode.iter);
-
-            twig_iter = att_NULL;
+            for (unsigned int i = 0; i < PFarray_last (np_list); i++)
+                /* Adjust all current column names for the loop columns. */
+                if (twig_iter == CUR_AT(np_list, i))
+                    CUR_AT(np_list, i) = n->sem.docnode.iter;
+                
             /* infer properties for children and
                return the resulting mapping */
-            res_list = map_names (L(n), goal, np_list);
-            twig_iter = twig_iter_old;
-            if (res_list) return res_list;
-
-            /* empty the name pair list */
-            PFarray_last (np_list) = 0;
-            res_list = map_names (R(n), goal, np_list);
-            return res_list;
-        }   break;
+            map_names (L(n), goal, np_list, att_NULL);
+            map_names (R(n), goal, np_list, n->sem.docnode.iter);
+            return;
 
         case la_element:
-        {
-            PFalg_att_t twig_iter_old = twig_iter;
-
-            if (twig_iter)
-                add_name_pair (np_list, twig_iter, n->sem.iter_item.iter);
-
-            twig_iter = att_NULL;
+            for (unsigned int i = 0; i < PFarray_last (np_list); i++)
+                /* Adjust all current column names for the loop columns. */
+                if (twig_iter == CUR_AT(np_list, i))
+                    CUR_AT(np_list, i) = n->sem.iter_item.iter;
+                
             /* infer properties for children and
                return the resulting mapping */
-            res_list = map_names (L(n), goal, np_list);
-            twig_iter = twig_iter_old;
-            if (res_list) return res_list;
-
-            /* empty the name pair list */
-            PFarray_last (np_list) = 0;
-            res_list = map_names (R(n), goal, np_list);
-            return res_list;
-        }   break;
+            map_names (L(n), goal, np_list, att_NULL);
+            map_names (R(n), goal, np_list, n->sem.iter_item.iter);
+            return;
 
         case la_textnode:
         case la_comment:
-        {
-            PFalg_att_t twig_iter_old = twig_iter;
-
-            if (twig_iter)
-                add_name_pair (np_list, twig_iter, n->sem.iter_item.iter);
-
-            twig_iter = att_NULL;
+            for (unsigned int i = 0; i < PFarray_last (np_list); i++)
+                /* Adjust all current column names for the loop columns. */
+                if (twig_iter == CUR_AT(np_list, i))
+                    CUR_AT(np_list, i) = n->sem.iter_item.iter;
+                
             /* infer properties for children and
                return the resulting mapping */
-            res_list = map_names (L(n), goal, np_list);
-            twig_iter = twig_iter_old;
-            if (res_list) return res_list;
-        }   break;
+            map_names (L(n), goal, np_list, att_NULL);
+            return;
 
         case la_attribute:
         case la_processi:
-        {
-            PFalg_att_t twig_iter_old = twig_iter;
-
-            if (twig_iter)
-                add_name_pair (np_list,
-                               twig_iter,
-                               n->sem.iter_item1_item2.iter);
-
-            twig_iter = att_NULL;
+            for (unsigned int i = 0; i < PFarray_last (np_list); i++)
+                /* Adjust all current column names for the loop columns. */
+                if (twig_iter == CUR_AT(np_list, i))
+                    CUR_AT(np_list, i) = n->sem.iter_item1_item2.iter;
+                
             /* infer properties for children and
                return the resulting mapping */
-            res_list = map_names (L(n), goal, np_list);
-            twig_iter = twig_iter_old;
-            if (res_list) return res_list;
-        }   break;
+            map_names (L(n), goal, np_list, att_NULL);
+            return;
 
         case la_content:
-        {
-            PFalg_att_t twig_iter_old = twig_iter;
-
-            if (twig_iter)
-                add_name_pair (np_list,
-                               twig_iter,
-                               n->sem.iter_pos_item.iter);
-
-            twig_iter = att_NULL;
+            for (unsigned int i = 0; i < PFarray_last (np_list); i++)
+                /* Adjust all current column names for the loop columns. */
+                if (twig_iter == CUR_AT(np_list, i))
+                    CUR_AT(np_list, i) = n->sem.iter_pos_item.iter;
+                
             /* infer properties for children and
                return the resulting mapping */
-            res_list = map_names (R(n), goal, np_list);
-            if (res_list) return res_list;
+            map_names (R(n), goal, np_list, att_NULL);
 
             /* empty the name pair list */
             PFarray_last (np_list) = 0;
-            res_list = map_names (L(n), goal, np_list);
-            twig_iter = twig_iter_old;
-            return res_list;
-        }   break;
+            map_names (L(n), goal, np_list, att_NULL);
+            return;
 
         case la_merge_adjacent:
             assert (n->sem.merge_adjacent.iter_res ==
@@ -382,6 +393,9 @@
         case la_frag_union:
         case la_empty_frag:
             /* do not infer name pairs to the children */
+
+            /* empty the name pair list */
+            PFarray_last (np_list) = 0;
             break;
 
         case la_cond_err:
@@ -389,14 +403,11 @@
         case la_trace_msg:
         case la_trace_map:
             /* do the recursive calls by hand */
-            res_list = map_names (L(n), goal, np_list);
-            if (res_list) return res_list;
+            map_names (L(n), goal, np_list, att_NULL);
             /* empty the name pair list */
             PFarray_last (np_list) = 0;
-            res_list = map_names (R(n), goal, np_list);
-            return res_list;
-
-            break;
+            map_names (R(n), goal, np_list, att_NULL);
+            return;
 
         case la_nil:
             /* we do not have properties */
@@ -410,7 +421,7 @@
                     "The column name tracing cannot "
                     "handle recursion operator.");
             break;
-
+            
         case la_string_join:
             assert (n->sem.string_join.iter == n->sem.string_join.iter_res &&
                     n->sem.string_join.iter_sep == 
n->sem.string_join.iter_res);
@@ -433,15 +444,8 @@
 
     /* infer properties for children and
        return the resulting mapping */
-    if (L(n))
-        res_list = map_names (L(n), goal, np_list);
-
-    if (res_list) return res_list;
-
-    if (R(n))
-        res_list = map_names (R(n), goal, np_list);
-
-    return res_list;
+    if (L(n)) map_names (L(n), goal, np_list, att_NULL);
+    if (R(n)) map_names (R(n), goal, np_list, att_NULL);
 }
 
 /* check for the goal operator */
@@ -474,6 +478,8 @@
     /* reset the name mapping structure */
     if (n->prop->name_pairs) PFarray_last (n->prop->name_pairs) = 0;
     else n->prop->name_pairs = PFarray (sizeof (name_pair_t));
+    
+    if (n->prop->l_name_pairs) PFarray_last (n->prop->l_name_pairs) = 0;
 }
 
 /**
@@ -505,8 +511,10 @@
         add_name_pair (map_list, list.atts[i], list.atts[i]);
 
     /* collect the mapped names */
-    new_map_list = map_names (start, goal, map_list);
+    map_names (start, goal, map_list, att_NULL);
+    new_map_list = goal->prop->name_pairs;
     assert (new_map_list);
+    assert (PFarray_last (new_map_list) == list.count);
 
     /* create new list */
     new_list.count = list.count;


-------------------------------------------------------------------------
SF.Net email is sponsored by:
Check out the new SourceForge.net Marketplace.
It's the best place to buy or sell services
for just about anything Open Source.
http://ad.doubleclick.net/clk;164216239;13503038;w?http://sf.net/marketplace
_______________________________________________
Monetdb-pf-checkins mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/monetdb-pf-checkins

Reply via email to