Update of /cvsroot/monetdb/MonetDB/src/gdk
In directory sc8-pr-cvs16.sourceforge.net:/tmp/cvs-serv30578

Added Files:
        gdk_rangejoin.mx 
Log Message:
adding missing file



--- NEW FILE: gdk_rangejoin.mx ---
@' The contents of this file are subject to the MonetDB Public License
@' Version 1.1 (the "License"); you may not use this file except in
@' compliance with the License. You may obtain a copy of the License at
@' http://monetdb.cwi.nl/Legal/MonetDBLicense-1.1.html
@'
@' Software distributed under the License is distributed on an "AS IS"
@' basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the
@' License for the specific language governing rights and limitations
@' under the License.
@'
@' The Original Code is the MonetDB Database System.
@'
@' The Initial Developer of the Original Code is CWI.
@' Portions created by CWI are Copyright (C) 1997-2007 CWI.
@' All Rights Reserved.

@f gdk_rangejoin
@a N. J. Nes

@* Range Join Operators
The sql statement b.x <= a.z <= b.y, could be implemented using too thetajoins.
But that results in very large intermediates.

@h
#ifndef GDK_RANGEJOIN_H
#define GDK_RANGEJOIN_H

#include "gdk.h"

gdk_export BAT *BATrangejoin(BAT *l, BAT *rl, BAT *rh, bit li, bit hi);
/* Join all BUNs of the BATs that have tail values: {rl <= l <= rh}.  */

#endif /* GDK_RANGEJOIN_H */
@c

#include "monetdb_config.h"
#include "gdk_rangejoin.h"

BAT *BATrangejoin(BAT *l, BAT *rl, BAT *rh, bit li, bit hi)
{
        BAT *bn;

        ERRORcheck(l == NULL, "BATrangejoin: invalid left operand");
        ERRORcheck(rl == NULL, "BATrangejoin: invalid right low operand");
        ERRORcheck(rh == NULL, "BATrangejoin: invalid right high operand");
        ERRORcheck(TYPEerror(l->ttype, rl->ttype), "BATrangejoin: type 
conflict\n");
        ERRORcheck(TYPEerror(l->ttype, rh->ttype), "BATrangejoin: type 
conflict\n");
        /* TODO check that rl and rh are aligned */

        bn = BATnew(BAThtype(l), BAThtype(rl), MIN(BATcount(l), BATcount(rl)));
        if (bn == NULL) 
                return bn;
        switch (ATOMstorage(rl->ttype)) {
        case TYPE_chr:
                @:rangejoin(chr)@
        case TYPE_bte:
                @:rangejoin(bte)@
        case TYPE_sht:
                @:rangejoin(sht)@
        case TYPE_int:
                @:rangejoin(int)@
        case TYPE_flt:
                @:rangejoin(flt)@
        case TYPE_dbl:
                @:rangejoin(dbl)@
        case TYPE_lng:
                @:rangejoin(lng)@
        default:
                GDKerror("BATrangejoin: type not implemented\n");
                return NULL;
        }
        /* set sorted flags by hand, because we used BUNfastins() */
        bn->hsorted = BAThordered(l);
        bn->tsorted = FALSE;

        ESTIDEBUG THRprintf(GDKout, "#BATrangejoin: actual resultsize: " SZFMT 
"\n", BATcount(bn));

        return bn;
}

@= rangejoin
if (li && hi)
        @:rangejoin_(@1,>=,<=)
else if (li && !hi)
        @:rangejoin_(@1,>=,<)
else if (!li && hi)
        @:rangejoin_(@1,>,<=)
else
        @:rangejoin_(@1,>,<)
break;
@
@= rangejoin_
{
        BATiter li = bat_iterator(l);
        BATiter rli = bat_iterator(rl);
        BATiter rhi = bat_iterator(rh);
        BUN p, q;
        BUN v, w;

        BATloop(l, p, q) {
                @1 x1 = *(@1 *) BUNtloc(li, p);
                BATloop(rl, v, w) {
                        if ((x1 @2 *(@1 *) BUNtloc(rli, v))  &&
                            (x1 @3 *(@1 *) BUNtloc(rhi, v))) { 
                                if (BUNfastins(bn, BUNhead(li, p), BUNhead(rli, 
v)) == NULL) {
                                        BBPreclaim(bn);
                                        return NULL;
                                }
                        }
                }
        }
}
@+ Bandjoin
A non-equi join of two relations R and S is called a Band-join if
the join predicate requires the values of R to fall within a given range.
This kind of joins is encountered in real world domains, such as those
involved with time and distance.

The boundary conditions for the bandjoin are constants or a NULL value.
The latter enables encoding of arbitrary theta joins using the more
general bandjoin.
Incidentally note that c1 = c2 = 0 leads to an equi-join.

The straight forward implementation uses a nested loop.
The current implementation does not optimize processing, because
the impact of the choices is not yet clear.

The hash indexing routines have been extended with a Band argument.
@{
@c
BAT *
BATbandjoin(BAT *l, BAT *r, ptr c1, ptr c2)
{
        BAT *bn;

        ERRORcheck(l == NULL, "BATbandjoin: invalid left operand");
        ERRORcheck(r == NULL, "BATbandjoin: invalid right operand");
        ERRORcheck(TYPEerror(l->ttype, r->htype), "BATbandjoin: type 
conflict\n");
        bn = BATnew(BAThtype(l), BAThtype(r), MIN(BATcount(l), BATcount(r)));
        if (bn == NULL) 
                return bn;
        switch (ATOMstorage(r->htype)) {
        case TYPE_chr:
                @:bandjoin(chr)@
        case TYPE_bte:
                @:bandjoin(bte)@
        case TYPE_sht:
                @:bandjoin(sht)@
        case TYPE_int:
                @:bandjoin(int)@
        case TYPE_flt:
                @:bandjoin(flt)@
        case TYPE_dbl:
                @:bandjoin(dbl)@
        case TYPE_lng:
                @:bandjoin(lng)@
        default:
                GDKerror("BATbandjoin: type not implemented\n");
                return NULL;
        }
        /* set sorted flags by hand, because we used BUNfastins() */
        bn->hsorted = BAThordered(l);
        bn->tsorted = FALSE;

        ESTIDEBUG THRprintf(GDKout, "#BATbandjoin: actual resultsize: " SZFMT 
"\n", BATcount(bn));

        return bn;
}

@
@}
@-
The easiest case is to implement a nested loop for band operations.
Choice point is to determine the status of the NULL values in the final
result.
@{
@= bandjoin
{
        BATiter li = bat_iterator(l);
        BATiter ri = bat_iterator(r);
        @1 *x1;
        @1 *x2;
        BUN p, q;
        BUN v, w;

        BATloop(l, p, q) {
                x1 = (@1 *) BUNtloc(li, p);
                BATloop(r, v, w) {
                        x2 = (@1 *) BUNhloc(ri, v);
                        if ((*x1 >= *x2 -  *(@1 *) c1) &&
                            (*x1 <= *x2 + *(@1 *) c2)) {
                                if (BUNfastins(bn, BUNhead(li, p), BUNtail(ri, 
v)) == NULL) {
                                        BBPreclaim(bn);
                                        return NULL;
                                }
                        }
                }
        }
        break;
}
@
@}


-------------------------------------------------------------------------
This SF.net email is sponsored by: Microsoft
Defy all challenges. Microsoft(R) Visual Studio 2005.
http://clk.atdmt.com/MRT/go/vse0120000070mrt/direct/01/
_______________________________________________
Monetdb-checkins mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/monetdb-checkins

Reply via email to