Changeset: d2134f8d5b50 for MonetDB
URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=d2134f8d5b50
Modified Files:
monetdb5/modules/mal/Makefile.ag
monetdb5/modules/mal/cbp.h
monetdb5/modules/mal/txtsim.c
monetdb5/modules/mal/txtsim.h
monetdb5/modules/mal/txtsim.mal
monetdb5/optimizer/Makefile.ag
monetdb5/optimizer/opt_dictionary.c
monetdb5/optimizer/opt_joinselect.c
monetdb5/optimizer/opt_joinselect.h
monetdb5/optimizer/opt_macro.c
monetdb5/optimizer/opt_mapreduce.h
monetdb5/optimizer/opt_octopus.c
monetdb5/optimizer/opt_prejoin.c
monetdb5/optimizer/opt_reduce.c
monetdb5/optimizer/opt_reduce.h
monetdb5/optimizer/opt_remoteQueries.c
monetdb5/optimizer/opt_replication.h
monetdb5/optimizer/opt_replications.h
monetdb5/optimizer/opt_statistics.c
monetdb5/optimizer/opt_support.c
Branch: headless
Log Message:
More minor changes.
diffs (truncated from 1147 to 300 lines):
diff --git a/monetdb5/modules/mal/Makefile.ag b/monetdb5/modules/mal/Makefile.ag
--- a/monetdb5/modules/mal/Makefile.ag
+++ b/monetdb5/modules/mal/Makefile.ag
@@ -47,7 +47,7 @@
# pqueue.mx \
profiler.c \
# recycle.c \
- remote.c \
+# remote.c \
replication.c \
sabaoth.c \
tablet.c \
diff --git a/monetdb5/modules/mal/cbp.h b/monetdb5/modules/mal/cbp.h
--- a/monetdb5/modules/mal/cbp.h
+++ b/monetdb5/modules/mal/cbp.h
@@ -28,7 +28,6 @@
#include "mal_client.h"
#include "mal_interpreter.h"
#include "mal_profiler.h"
-#include "column.h"
#ifdef WIN32
#if !defined(LIBMAL) && !defined(LIBATOMS) && !defined(LIBKERNEL) &&
!defined(LIBMAL) && !defined(LIBOPTIMIZER) && !defined(LIBSCHEDULER) &&
!defined(LIBMONETDB5)
diff --git a/monetdb5/modules/mal/txtsim.c b/monetdb5/modules/mal/txtsim.c
--- a/monetdb5/modules/mal/txtsim.c
+++ b/monetdb5/modules/mal/txtsim.c
@@ -865,39 +865,39 @@
}
-/* ============ Q-GRAM SELF JOIN ============== */
+/* ============ Q-GRAM SELF JOIN ==============
+ UNCLEAR WHERE THIS IS USED
str
-CMDqgramselfjoin(BAT **res, BAT *qgram, BAT *id, BAT *pos, BAT *len, flt *c,
int *k)
+CMDqgramselfjoin(COL **res , COL **res1, COL *qgram, COL *id, COL *pos, COL
*len, flt *c, int *k)
{
- BUN n = BATcount(qgram);
- BUN i, j;
- BAT *bn;
+ oid n = BATcount(qgram);
+ oid i, j;
+ COL *bn, *b;
- oid *qbuf = (oid *) Tloc(qgram, BUNfirst(qgram));
- int *ibuf = (int *) Tloc(id, BUNfirst(id));
- int *pbuf = (int *) Tloc(pos, BUNfirst(pos));
- int *lbuf = (int *) Tloc(len, BUNfirst(len));
+ oid *qbuf = (oid *) COLloc(qgram, BUNfirst(qgram));
+ int *ibuf = (int *) COLloc(id, BUNfirst(id));
+ int *pbuf = (int *) COLloc(pos, BUNfirst(pos));
+ int *lbuf = (int *) COLloc(len, BUNfirst(len));
- ERRORcheck((qgram->ttype != TYPE_oid), "CMDqgramselfjoin: tail of BAT
qgram must be oid.\n");
- ERRORcheck((id->ttype != TYPE_int), "CMDqgramselfjoin: tail of BAT id
must be int.\n");
- ERRORcheck((pos->ttype != TYPE_int), "CMDqgramselfjoin: tail of BAT pos
must be int.\n");
- ERRORcheck((len->ttype != TYPE_int), "CMDqgramselfjoin: tail of BAT len
must be int.\n");
-
- /* ERRORcheck( (BATcount(qgram)>1 && !(BATtordered(qgram)&1)),
"CMDqgramselfjoin: tail of qgram must be sorted.\n"); */
+ ERRORcheck((COLtype(qgram) != TYPE_oid), "CMDqgramselfjoin: tail of BAT
qgram must be oid.\n");
+ ERRORcheck((COLtype(id-) != TYPE_int), "CMDqgramselfjoin: tail of BAT
id must be int.\n");
+ ERRORcheck((COLtype(pos) != TYPE_int), "CMDqgramselfjoin: tail of BAT
pos must be int.\n");
+ ERRORcheck((COLtype(len) != TYPE_int), "CMDqgramselfjoin: tail of BAT
len must be int.\n");
ERRORcheck((ALIGNsynced(qgram, id) == 0), "CMDqgramselfjoin: qgram and
id are not synced");
ERRORcheck((ALIGNsynced(qgram, pos) == 0), "CMDqgramselfjoin: qgram and
pos are not synced");
ERRORcheck((ALIGNsynced(qgram, len) == 0), "CMDqgramselfjoin: qgram and
len are not synced");
- ERRORcheck((Tsize(qgram) != ATOMsize(qgram->ttype)), "CMDqgramselfjoin:
qgram is not a true void bat");
- ERRORcheck((Tsize(id) != ATOMsize(id->ttype)), "CMDqgramselfjoin: id is
not a true void bat");
+ ERRORcheck((Tsize(qgram) != ATOMsize(COLtype(qgram))),
"CMDqgramselfjoin: qgram is not a true void bat");
+ ERRORcheck((Tsize(id) != ATOMsize(COLtype(id))), "CMDqgramselfjoin: id
is not a true void bat");
- ERRORcheck((Tsize(pos) != ATOMsize(pos->ttype)), "CMDqgramselfjoin: pos
is not a true void bat");
- ERRORcheck((Tsize(len) != ATOMsize(len->ttype)), "CMDqgramselfjoin: len
is not a true void bat");
+ ERRORcheck((Tsize(pos) != ATOMsize(COLtype(pos))), "CMDqgramselfjoin:
pos is not a true void bat");
+ ERRORcheck((Tsize(len) != ATOMsize(COLtype(len))), "CMDqgramselfjoin:
len is not a true void bat");
- *res = bn = BATnew(TYPE_int, TYPE_int, n);
+ *res = b = COLnew(TYPE_int, n);
+ *res1 = bn = COLnew(TYPE_int, n);
for (i = 0; i < n - 1; i++) {
for (j = i + 1; (j < n && qbuf[j] == qbuf[i] && pbuf[j] <=
(pbuf[i] + (*k + *c * MYMIN(lbuf[i], lbuf[j])))); j++) {
@@ -915,3 +915,4 @@
BBPreclaim(bn);
throw(MAL, "txtsim.qgramselfjoin", MAL_MALLOC_FAIL);
}
+*/
diff --git a/monetdb5/modules/mal/txtsim.h b/monetdb5/modules/mal/txtsim.h
--- a/monetdb5/modules/mal/txtsim.h
+++ b/monetdb5/modules/mal/txtsim.h
@@ -46,6 +46,5 @@
txtsim_export str soundex_impl(str *res, str *Name);
txtsim_export str stringdiff_impl(int *res, str *s1, str*s2);
txtsim_export str CMDqgramnormalize(str *res, str *input);
-txtsim_export str CMDqgramselfjoin(BAT **res, BAT *qgram, BAT *id, BAT *pos,
BAT *len, flt *c, int *k);
#endif /*_TXTSIM_H*/
diff --git a/monetdb5/modules/mal/txtsim.mal b/monetdb5/modules/mal/txtsim.mal
--- a/monetdb5/modules/mal/txtsim.mal
+++ b/monetdb5/modules/mal/txtsim.mal
@@ -55,7 +55,3 @@
address CMDqgramnormalize
comment "'Normalizes' strings (eg. toUpper and replaces non-alphanumerics with
one space";
-command qgramselfjoin(qgram:bat[:oid, :oid], id:bat[:oid, :oid], pos:bat[:oid,
:int], len:bat[:oid, :int], c:flt, k:int) :bat[:int, :int]
-address CMDqgramselfjoin
-comment "QGram self-join on ordered(!) qgram tables and sub-ordered q-gram
positions";
-
diff --git a/monetdb5/optimizer/Makefile.ag b/monetdb5/optimizer/Makefile.ag
--- a/monetdb5/optimizer/Makefile.ag
+++ b/monetdb5/optimizer/Makefile.ag
@@ -40,22 +40,22 @@
opt_datacyclotron.c \
opt_dataflow.c \
opt_deadcode.c \
- opt_derivepath.c \
+# opt_derivepath.c \
opt_dictionary.c \
opt_emptySet.c \
opt_evaluate.c \
opt_factorize.c \
- opt_garbageCollector.c \
+ opt_garbagecollector.c \
opt_history.c \
opt_inline.c \
# opt_joinpath.c \
opt_joinselect.c \
opt_macro.c \
- opt_mapreduce.c \
+# opt_mapreduce.c \
opt_mergetable.c \
opt_mitosis.c \
opt_multiplex.c \
- opt_octopus.c \
+# opt_octopus.c \
opt_pipes.c \
opt_prejoin.c \
opt_prelude.c \
diff --git a/monetdb5/optimizer/opt_dictionary.c
b/monetdb5/optimizer/opt_dictionary.c
--- a/monetdb5/optimizer/opt_dictionary.c
+++ b/monetdb5/optimizer/opt_dictionary.c
@@ -138,8 +138,8 @@
p = BUNfnd(dictIndex, (ptr) nme);
if ( p != BUN_NONE ){
*idx = *(int*) BUNhead(bidx, p);
- *val = *(int*) BUNhead(bval, BUNfnd(BATmirror(dictValue), (ptr)
nme ) );
- *base = *(int*) BUNhead(bbase, BUNfnd(BATmirror(dictBase),
(ptr) nme ) );
+ *val = *(int*) BUNhead(bval, BUNfnd(dictValue, (ptr) nme ) );
+ *base = *(int*) BUNhead(bbase, BUNfnd(dictBase, (ptr) nme ) );
return 0;
}
return -1;
@@ -190,7 +190,7 @@
pushInstruction(mb, q);
continue;
}
- /* replace the BAT if a dictionary exists */
+ /* replace the column if a dictionary exists */
buf[0]= 0;
if ( getModuleId(q) == sqlRef && getFunctionId(q) == bindRef)
snprintf(buf,BUFSIZ,"%s/%s/%s/%d",
@@ -205,10 +205,10 @@
COLiter bidx = col_iterator(dictIndex);
COLiter bval = col_iterator(dictValue);
- p = BUNfnd(BATmirror(dictIndex), (ptr) buf);
+ p = BUNfnd(dictIndex, (ptr) buf);
if ( p != BUN_NONE ){
x = *(int*) BUNhead(bidx, p);
- v = *(int*) BUNhead(bval,
BUNfnd(BATmirror(dictValue), (ptr) buf ) );
+ v = *(int*) BUNhead(bval, BUNfnd(dictValue,
(ptr) buf ) );
OPTDEBUGdictionary
mnstr_printf(GDKout, "#Located a
dictionary %s ? %s %d %d\n",buf, (p?"ok":"no"), x,v);
/* replace the sql.bind */
@@ -227,9 +227,9 @@
}
/* construct: (bi:col[:type], bv:col[:any2]):=
dictionary.bind(name); */
qq = newStmt(mb,"dictionary",bindRef);
- qq = pushReturn(mb,qq,
newTmpVariable(mb,newColType(bv->ttype)));
+ qq = pushReturn(mb,qq,
newTmpVariable(mb,newColType(COLtype(bv))));
qq = pushStr(mb,qq,buf);
- setVarType(mb, getArg(qq,0),
newColType(bi->ttype));
+ setVarType(mb, getArg(qq,0),
newColType(COLtype(bi)));
setVarFixed(mb, getArg(qq,0));
setVarFixed(mb, getArg(qq,0));
setVarFixed(mb, getArg(qq,1));
@@ -620,18 +620,18 @@
if ( DICTinit() == 0)
return MAL_SUCCEED;
mal_set_lock(mal_contextLock, "dictionary");
- b = BATnew(TYPE_int,TYPE_str, 255);
+ b = COLnew(TYPE_str, 255);
if (b == NULL) {
mal_unset_lock(mal_contextLock, "dictionary");
throw(MAL,"dictionary.initialize",RUNTIME_OBJECT_MISSING);
}
- bn = BATnew(TYPE_int, TYPE_str, 255);
+ bn = COLnew(TYPE_str, 255);
if (bn == NULL) {
CBPreleaseref(b);
mal_unset_lock(mal_contextLock, "dictionary");
throw(MAL,"dictionary.initialize",RUNTIME_OBJECT_MISSING);
}
- bs = BATnew(TYPE_int, TYPE_str, 255);
+ bs = COLnew(TYPE_str, 255);
if (bs == NULL) {
CBPreleaseref(b);
CBPreleaseref(bn);
@@ -648,7 +648,7 @@
COLkey(bn, TRUE);
CBPrename(bn->batCacheid, "dictValue");
COLmode(bn, PERSISTENT);
- CBPkeeref(bn);
+ CBPkeepref(bn);
dictValue = bn;
COLkey(bs, TRUE);
@@ -733,7 +733,7 @@
if ( bs )
bh= COLhistogram(bs);
if ( bs && bh && COLcount(bs) > 0)
- ratio = (double) COLcount(bh) / (double) BATcount(bs);
+ ratio = (double) COLcount(bh) / (double) COLcount(bs);
#ifdef DEBUG_OPT_DICTIONARY
if ( bs && bh )
mnstr_printf(GDKout,"#dictionary.compress sample " SZFMT " "
SZFMT" %f %f\n", COLcount(bs), COLcount(bh),ratio, ratio * COLcount(b));
@@ -750,11 +750,10 @@
CBPreleaseref(b);
throw(MAL,"dict.new","Can not access unique list");
}
- bo = BATmirror(bo);
cnt = COLcount(bo);
typ= DICTtype(cnt);
- if( typ == TYPE_lng || typ == b->ttype ){
+ if( typ == TYPE_lng || typ == COLtype(b) ){
/* don't create a new dictionary */
#ifdef DEBUG_OPT_DICTIONARY
mnstr_printf(GDKout,"#dictionary.new %s not compressed\n",*nme);
@@ -763,39 +762,37 @@
CBPreleaseref(bo);
return MAL_SUCCEED;
}
- mnstr_printf(GDKout,"#dictionary.new %s compressed from type %s to %s "
SZFMT" elm\n", *nme, getTypeName(b->ttype), getTypeName(typ), cnt);
- bv = BATnew(typ, b->ttype, COLcount(b));
+ mnstr_printf(GDKout,"#dictionary.new %s compressed from type %s to %s "
SZFMT" elm\n", *nme, getTypeName(COLtype(b)), getTypeName(typ), cnt);
+ bv = COLnew(COLtype(b), COLcount(b));
/* create the dictionary representation */
-#define mkindex(A1,A2,A3) \
- { A1 o; \
+#define mkindex { \
/* complete encoding table */ \
- bi = col_iterator(A2); \
- o = (A1) A3; \
- COLloop(A2,p,q){ \
- BUNins(bv, &o, BUNtail(bi,p),FALSE); \
- o++; \
- } \
- } break;
+ bi = col_iterator(bo); \
+ COLloop(bo,p,q){ \
+ BUNappend(bv, BUNhead(bi,p),FALSE); \
+ } }
switch(typ){
- case TYPE_bte: mkindex(bte,bo,bte_nil+1)
- case TYPE_sht: mkindex(sht,bo,sht_nil+1)
- case TYPE_int: mkindex(int,bo,int_nil+1)
+ case TYPE_bte: mkindex; break;
+ case TYPE_sht: mkindex; break;
+ case TYPE_int: mkindex;
}
- bv->hsorted = GDK_SORTED;
- if (!(bv->dirty&2)) bv = COLsetaccess(bv, BAT_READ);
+ bv->sorted = GDK_SORTED;
+ COLsetreadonly(bv);
COLpropcheck(bv, COLPROPS_QUICK);
- bx = COLjoin(b,BATmirror(bv), BUN_NONE);
+ bx = COLjoin(b,bv, BUN_NONE);
COLpropcheck(bx, COLPROPS_QUICK);
#ifdef DEBUG_OPT_DICTIONARY
mnstr_printf(GDKout,"#dictionary.new values table " SZFMT " \n",
COLcount(bv));
#endif
- if (!(bx->dirty&2)) bx = COLsetaccess(bx, BAT_READ);
+ COLsetreadonly(bx);
+/* no, can not use recursive bats
BUNins(dictIndex,&bx->batCacheid, *nme, FALSE);
BUNins(dictValue,&bv->batCacheid, *nme, FALSE);
BUNins(dictBase,&b->batCacheid, *nme, FALSE);
+*/
COLmode(bx, PERSISTENT);
COLmode(bv, PERSISTENT);
CBPkeepref(bx);
@@ -804,11 +801,11 @@
_______________________________________________
Checkin-list mailing list
[email protected]
http://mail.monetdb.org/mailman/listinfo/checkin-list