Changeset: f5c801d2bbf9 for MonetDB
URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=f5c801d2bbf9
Modified Files:
MonetDB5/conf/monetdb5.conf.in
MonetDB5/src/modules/mal/mal_init.mx
MonetDB5/src/optimizer/Makefile.ag
MonetDB5/src/optimizer/opt_prelude.mx
MonetDB5/src/optimizer/opt_support.mx
MonetDB5/src/optimizer/opt_tarantula.mx
MonetDB5/src/scheduler/Makefile.ag
MonetDB5/src/scheduler/run_tarantula.mx
Branch: Oct2010
Log Message:
Removed tarantula optimizer from Oct2010 branch.
diffs (truncated from 1607 to 300 lines):
diff -r be79866c5b58 -r f5c801d2bbf9 MonetDB5/conf/monetdb5.conf.in
--- a/MonetDB5/conf/monetdb5.conf.in Fri Sep 03 13:32:15 2010 +0200
+++ b/MonetDB5/conf/monetdb5.conf.in Fri Sep 03 13:57:31 2010 +0200
@@ -240,7 +240,6 @@
# The Octopus pipeline for distributed processing (Merovingian enabled
# platforms only)
octopus_pipe=inline,remap,evaluate,costModel,coercions,emptySet,aliases,mitosis,mergetable,deadcode,constants,commonTerms,joinPath,octopus,deadcode,reduce,dataflow,history,multiplex,garbageCollector
-tarantula_pipe=inline,remap,evaluate,costModel,coercions,emptySet,aliases,mitosis,mergetable,deadcode,constants,commonTerms,joinPath,tarantula,deadcode,reduce,dataflow,history,multiplex,garbageCollector
#
# The MapReduce pipeline for a different flavour of distributed
# processing on Merovingian enabled platforms
@@ -264,4 +263,3 @@
# The opt_debug bitvector controls their output. It can be set to a pipeline
or a comma separated
# list of optimizers you would like to trace. It is a server wide property and
can not be set
# dynamically, as it is intended for internal use.
-#opt_debug=tarantula
diff -r be79866c5b58 -r f5c801d2bbf9 MonetDB5/src/modules/mal/mal_init.mx
--- a/MonetDB5/src/modules/mal/mal_init.mx Fri Sep 03 13:32:15 2010 +0200
+++ b/MonetDB5/src/modules/mal/mal_init.mx Fri Sep 03 13:57:31 2010 +0200
@@ -226,7 +226,6 @@
include opt_strengthReduction;
include opt_statistics;
include opt_trace;
-include opt_tarantula;
include opt_mapreduce;
include chopper;
@@ -249,7 +248,6 @@
include run_isolate;
include run_memo;
include run_octopus;
-include run_tarantula;
@-
Cracker schemes
diff -r be79866c5b58 -r f5c801d2bbf9 MonetDB5/src/optimizer/Makefile.ag
--- a/MonetDB5/src/optimizer/Makefile.ag Fri Sep 03 13:32:15 2010 +0200
+++ b/MonetDB5/src/optimizer/Makefile.ag Fri Sep 03 13:57:31 2010 +0200
@@ -37,7 +37,7 @@
opt_accessmode.mx opt_joinpath.mx opt_heuristics.mx
opt_remap.mx \
opt_statistics.mx opt_trace.mx opt_recycler.mx opt_dataflow.mx
\
opt_cluster.mx opt_replication.mx opt_dictionary.mx
opt_mapreduce.mx \
- opt_mitosis.mx opt_octopus.mx opt_tarantula.mx opt_history.mx
opt_selcrack.mx opt_sidcrack.mx
+ opt_mitosis.mx opt_octopus.mx opt_history.mx opt_selcrack.mx
opt_sidcrack.mx
#SCRIPTS = mal
LIBS = ../modules/atoms/lib_mtime ../mal/libmonetdb5 \
@@ -63,7 +63,7 @@
opt_accessmode.mx opt_joinpath.mx opt_heuristics.mx
opt_remap.mx \
opt_statistics.mx opt_trace.mx opt_recycler.mx opt_dataflow.mx
\
opt_cluster.mx opt_replication.mx opt_dictionary.mx
opt_mapreduce.mx \
- opt_mitosis.mx opt_octopus.mx opt_tarantula.mx opt_history.mx
opt_selcrack.mx opt_sidcrack.mx
+ opt_mitosis.mx opt_octopus.mx opt_history.mx opt_selcrack.mx
opt_sidcrack.mx
HEADERS = h
}
@@ -84,7 +84,7 @@
opt_accessmode.mx opt_joinpath.mx opt_heuristics.mx
opt_remap.mx \
opt_statistics.mx opt_trace.mx opt_recycler.mx opt_dataflow.mx
\
opt_cluster.mx opt_replication.mx opt_dictionary.mx
opt_mapreduce.mx \
- opt_mitosis.mx opt_octopus.mx opt_tarantula.mx opt_history.mx
opt_selcrack.mx opt_sidcrack.mx
+ opt_mitosis.mx opt_octopus.mx opt_history.mx opt_selcrack.mx
opt_sidcrack.mx
}
EXTRA_DIST_DIR = Tests
diff -r be79866c5b58 -r f5c801d2bbf9 MonetDB5/src/optimizer/opt_prelude.mx
--- a/MonetDB5/src/optimizer/opt_prelude.mx Fri Sep 03 13:32:15 2010 +0200
+++ b/MonetDB5/src/optimizer/opt_prelude.mx Fri Sep 03 13:57:31 2010 +0200
@@ -187,7 +187,6 @@
opt_export str topn_minRef;
opt_export str topn_maxRef;
opt_export str tuniqueRef;
-opt_export str tarantulaRef;
opt_export str not_uniqueRef;
opt_export str unionRef;
opt_export str unpackRef;
@@ -389,7 +388,6 @@
str sumRef;
str sunionRef;
str takeRef;
-str tarantulaRef;
str topn_minRef;
str topn_maxRef;
str tuniqueRef;
@@ -590,7 +588,6 @@
sumRef = putName("sum",3);
sunionRef= putName("sunion",6);
takeRef= putName("take",5);
- tarantulaRef = putName("tarantula",9);
topn_minRef= putName("topn_min",8);
topn_maxRef= putName("topn_max",8);
tuniqueRef = putName("tunique",7);
diff -r be79866c5b58 -r f5c801d2bbf9 MonetDB5/src/optimizer/opt_support.mx
--- a/MonetDB5/src/optimizer/opt_support.mx Fri Sep 03 13:32:15 2010 +0200
+++ b/MonetDB5/src/optimizer/opt_support.mx Fri Sep 03 13:57:31 2010 +0200
@@ -568,7 +568,6 @@
#define DEBUG_OPT_SIDCRACK 47
#define DEBUG_OPT_TRACE 48
#define DEBUG_OPT_HEURISTIC 49
-#define DEBUG_OPT_TARANTULA 50
#define DEBUG_OPT(X) ((lng) 1 << (X))
opt_export lng optDebug;
@@ -732,7 +731,6 @@
{"singleton", 0, 0, 0, DEBUG_OPT_SINGLETON},
{"strengthreduction", 0, 0, 0, DEBUG_OPT_STRENGTHREDUCTION},
{"trace", 0, 0, 0, DEBUG_OPT_TRACE},
-{"tarantula", 0, 0, 0, DEBUG_OPT_TARANTULA},
{ 0, 0, 0, 0, 0}
};
diff -r be79866c5b58 -r f5c801d2bbf9 MonetDB5/src/optimizer/opt_tarantula.mx
--- a/MonetDB5/src/optimizer/opt_tarantula.mx Fri Sep 03 13:32:15 2010 +0200
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,1127 +0,0 @@
-@/
-The contents of this file are subject to the MonetDB Public License
-Version 1.1 (the "License"); you may not use this file except in
-compliance with the License. You may obtain a copy of the License at
-http://monetdb.cwi.nl/Legal/MonetDBLicense-1.1.html
-
-Software distributed under the License is distributed on an "AS IS"
-basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the
-License for the specific language governing rights and limitations
-under the License.
-
-The Original Code is the MonetDB Database System.
-
-The Initial Developer of the Original Code is CWI.
-Portions created by CWI are Copyright (C) 1997-July 2008 CWI.
-Copyright August 2008-2010 MonetDB B.V.
-All Rights Reserved.
-@
-
-...@f opt_tarantula
-...@a M. Kersten
-...@- Multileg creatures
-Query execution can be improved significantly using distributed processing.
-Traditionally, this encompasses fragmentation and allocation of the base
-tables over multiple sites and query plans that include on the fly transport
-of intermediate results.
-
-Breaking the database into pieces itself is a well-studied area.
-Most approaches consider the workload and search for a good split
-of the base tables, such that the workload performance improves.
-
-The Tarantula optimizer, like the Octopus optimizer, use the
-output of the mitosis+mergetable optimizer and produces the
-actual plans for parallel execution.
-The tarantula untangles the query plan into a controlling head
-function and a series of subplans, one for each leg to execute concurrently.
-
-The target for breaking the plan are the blocking operations,
-in particular mat.pack(). The flow graph leading to the
-the pack arguments are extracted from the query plan and
-each subgraph is cast into an independent plan.
-Since the query plan is a DAG, it is perfectly possible that
-a portion being extracted is shared amongst all legs.
-The naive extraction then leads to a re-calculation of
-shared intermediates in each leg.
-
-The subplan produces the argument to the blocking operator, whose
-result will be assembled in the head. It is also perfectly possible
-that variables assigned a value are used later on in the query graph.
-These variables are identified and one leg becomes responsible to
-return it also to the head to be used later on.
-
-The orginal pack operation is replaced by a call to a function
-to orchestrate the distributed processing and return the final
-result. Then the next pack operation is searched and its
-subgraph is derived. Again, it may share portions produced
-in the first pack subgraph.
-
-A potential more optimal scheme would be to detect each such case and
-turn it into a splitting point as well. This can be detected by
-looking for the last assignment and multiple use cases. [VARIANT TODO]
-
-The allocation of a subplan to leg depends on a bidding scheme.
-Bidding can not depend on BAT arguments, because that would cause
-significant communication overhead. Scalar values could be used and
-would function well in terms of using the recycler to get involved into
-precise bidding.
-
-A snippet of an tarantula plan with two legs is shown.
-The main part of the query becomes a three step procedure of
-1) remote registration of subplans, 2) obtaining bids and schedule design
-and 3) execute the subplans.
-Each plan does not contain duplicate node ids.
-...@verbatim
-function reg_qry_0():int;
- tarantula.register(0,"qry_0","qry_1");
- tarantula.register(1,"qry_0","qry_1");
- return 0;
-end reg_qry;
-
-function bid_qry_0(hdl:int)(leg0:int,leg1:int,leg2:int,leg3:int);
- (_4,_5) := tarantula.getBid(0,"qry_0","qry_1");
- (_6,_7) := tarantula.getBid(1,"qry_0","qry_1");
- return (leg0,leg1,leg2,leg3) := scheduler.schedule_0(_4,_5,_6,_7);
-end bid_qry;
-
-function run_qry(node_0:int,node_1:int):bat[:oid,:int]
- _87 := tarantula.execute(node_0,"leg0");
- _88 := tarantula.execute(node_1,"leg1");
- _15 := mat.pack(_87,_88);
- return run_qry := _15;
-end run_qry;
-
-function user.qry():void;
- _3 := reg_qry_0();
- (_4,_5,_6,_7):= bid_qry_0(_3);
- _15 := tarantula.run_qry(_4,_5,_6,_7);
- _16 := sql.resultSet(1,1,_15);
- sql.rsColumn(_16,"sys.squida","bid","int",32,0,_15);
- _21 := io.stdout();
- sql.exportResult(_21,_16);
-end qry;
-...@end verbatim
-The nodes addressed by the tarentula are indices into a global catalog.
-The number of subplans is derived by the Mitosis as the number of pieces to
consider.
-The tarantula may register the subplans to more nodes than pieces.
-
-...@verbatim
-function tarantula.exec_qry_0(node:str,fcn:str):bat[:oid,:int];
- conn:= remote.connect(node,"monetdb","monetdb");
- r:= remote.exec(conn,"tarantula",fcn);
- b:bat[:oid,:int]:= remote.get(conn,r);
- return b;
-end exec_qry;
-...@end verbatim
-
-...@verbatim
-function tarantula.qry_0(version:int):bat[:oid,:int];
- _53:bat[:oid,:int] :=
attach.bind("file://export/scratch/mk/dbfarm/demo/bat/01/001");
- _54 := algebra.slice(_53,0...@0,2...@0);
- _63:bat[:oid,:int] :=
attach.bind("file://export/scratch/mk/dbfarm/demo/bat/01/002");
- _64 := algebra.slice(_63,0...@0,2...@0);
- _72 := algebra.kdifference(_54,_64);
- _78 := algebra.kunion(_72,_64);
- _13:bat[:oid,:oid] :=
attach.bind("file://export/scratch/mk/dbfarm/demo/bat/01/003");
- _14 := bat.reverse(_13);
- _85 := algebra.kdifference(_78,_14);
- return qry_0 := _85;
-end tarantula.qry_0;
-
-function tarantula.qry_1(version:int):bat[:oid,:int];
- ... use second slice ...
-end tarantula.qry_1;
-
-...@end verbatim
-
-[Stability]
-We assume that during a session, workers once started will remain alive.
-No fault tolerance techniques are included.
-
-[Recycling]
-The legs contain registered functions and possible partial results in its
recycler pool.
-In the first implementation we assume a read-only database, where all workers
are
-restarted when changes to the underlying database affect the recycler pool.
-Alternative, the head can clear the recycler pool explicitly upon such state
change.
-
-[Shared disk]
-For a leg to work it needs access to its storage layer, which is a NAS.
-It is encapsulated in the operation attach.bind("path",tuplecount).
-
-The code generation for the legs currently relies on a conceptual full
replication of
-the database over the servers. The next version should use the attach()
functionality
-or use the datacylcotron to access portions.
-
-These approaches are different from the Octopus, where the head is
-the sole control over the persistent data.
-
-[Naming]
-The legs received from the tarantula should be ensured
-not to clash with those already known. Therefore, we simply
-tag them by orginating site.
-
-[Caveats]
-Any update invalidates the request to distributed processing.
-In the same line, multi-statement SQL transactions and
-updates to global variables are ignored.
-
-Global variables are tricky, because they are part of the
-session context. To make it work, we need to be able to perform
-an upcall to that context (=dangerous).
-The solution is that any variable context should be
-passed through a relation.
-...@{
-...@mal
-module tarantula;
-pattern optimizer.tarantula():str
-address OPTtarantula;
-pattern optimizer.tarantula(mod:str, fcn:str):str
-address OPTtarantula
-comment "Map-execute-reduce parallelism optimizer";
-
-...@h
-#ifndef _TAR_OCTOPUS_
-#define _TAR_OCTOPUS_
-#include "opt_prelude.h"
_______________________________________________
Checkin-list mailing list
[email protected]
http://mail.monetdb.org/mailman/listinfo/checkin-list