Changeset: f5c801d2bbf9 for MonetDB
URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=f5c801d2bbf9
Modified Files:
        MonetDB5/conf/monetdb5.conf.in
        MonetDB5/src/modules/mal/mal_init.mx
        MonetDB5/src/optimizer/Makefile.ag
        MonetDB5/src/optimizer/opt_prelude.mx
        MonetDB5/src/optimizer/opt_support.mx
        MonetDB5/src/optimizer/opt_tarantula.mx
        MonetDB5/src/scheduler/Makefile.ag
        MonetDB5/src/scheduler/run_tarantula.mx
Branch: Oct2010
Log Message:

Removed tarantula optimizer from Oct2010 branch.


diffs (truncated from 1607 to 300 lines):

diff -r be79866c5b58 -r f5c801d2bbf9 MonetDB5/conf/monetdb5.conf.in
--- a/MonetDB5/conf/monetdb5.conf.in    Fri Sep 03 13:32:15 2010 +0200
+++ b/MonetDB5/conf/monetdb5.conf.in    Fri Sep 03 13:57:31 2010 +0200
@@ -240,7 +240,6 @@
 # The Octopus pipeline for distributed processing (Merovingian enabled
 # platforms only)
 
octopus_pipe=inline,remap,evaluate,costModel,coercions,emptySet,aliases,mitosis,mergetable,deadcode,constants,commonTerms,joinPath,octopus,deadcode,reduce,dataflow,history,multiplex,garbageCollector
-tarantula_pipe=inline,remap,evaluate,costModel,coercions,emptySet,aliases,mitosis,mergetable,deadcode,constants,commonTerms,joinPath,tarantula,deadcode,reduce,dataflow,history,multiplex,garbageCollector
 #
 # The MapReduce pipeline for a different flavour of distributed
 # processing on Merovingian enabled platforms
@@ -264,4 +263,3 @@
 # The opt_debug bitvector controls their output. It can be set to a pipeline 
or a comma separated
 # list of optimizers you would like to trace. It is a server wide property and 
can not be set 
 # dynamically, as it is intended for internal use.
-#opt_debug=tarantula
diff -r be79866c5b58 -r f5c801d2bbf9 MonetDB5/src/modules/mal/mal_init.mx
--- a/MonetDB5/src/modules/mal/mal_init.mx      Fri Sep 03 13:32:15 2010 +0200
+++ b/MonetDB5/src/modules/mal/mal_init.mx      Fri Sep 03 13:57:31 2010 +0200
@@ -226,7 +226,6 @@
 include opt_strengthReduction;
 include opt_statistics;
 include opt_trace;
-include opt_tarantula;
 include opt_mapreduce;
 
 include chopper;
@@ -249,7 +248,6 @@
 include run_isolate;
 include run_memo;
 include run_octopus;
-include run_tarantula;
 
 @-
 Cracker schemes
diff -r be79866c5b58 -r f5c801d2bbf9 MonetDB5/src/optimizer/Makefile.ag
--- a/MonetDB5/src/optimizer/Makefile.ag        Fri Sep 03 13:32:15 2010 +0200
+++ b/MonetDB5/src/optimizer/Makefile.ag        Fri Sep 03 13:57:31 2010 +0200
@@ -37,7 +37,7 @@
                opt_accessmode.mx opt_joinpath.mx opt_heuristics.mx 
opt_remap.mx \
                opt_statistics.mx opt_trace.mx  opt_recycler.mx opt_dataflow.mx 
\
                opt_cluster.mx  opt_replication.mx opt_dictionary.mx 
opt_mapreduce.mx \
-               opt_mitosis.mx opt_octopus.mx opt_tarantula.mx  opt_history.mx 
opt_selcrack.mx opt_sidcrack.mx
+               opt_mitosis.mx opt_octopus.mx opt_history.mx opt_selcrack.mx 
opt_sidcrack.mx
 
        #SCRIPTS = mal
        LIBS = ../modules/atoms/lib_mtime ../mal/libmonetdb5 \
@@ -63,7 +63,7 @@
                opt_accessmode.mx opt_joinpath.mx opt_heuristics.mx 
opt_remap.mx \
                opt_statistics.mx opt_trace.mx  opt_recycler.mx opt_dataflow.mx 
\
                opt_cluster.mx  opt_replication.mx opt_dictionary.mx 
opt_mapreduce.mx \
-               opt_mitosis.mx opt_octopus.mx opt_tarantula.mx  opt_history.mx 
opt_selcrack.mx opt_sidcrack.mx
+               opt_mitosis.mx opt_octopus.mx opt_history.mx opt_selcrack.mx 
opt_sidcrack.mx
 
        HEADERS = h 
 }
@@ -84,7 +84,7 @@
                opt_accessmode.mx opt_joinpath.mx opt_heuristics.mx 
opt_remap.mx \
                opt_statistics.mx opt_trace.mx  opt_recycler.mx opt_dataflow.mx 
\
                opt_cluster.mx  opt_replication.mx opt_dictionary.mx 
opt_mapreduce.mx \
-               opt_mitosis.mx opt_octopus.mx opt_tarantula.mx  opt_history.mx 
opt_selcrack.mx opt_sidcrack.mx
+               opt_mitosis.mx opt_octopus.mx opt_history.mx opt_selcrack.mx 
opt_sidcrack.mx
 }
 
 EXTRA_DIST_DIR = Tests
diff -r be79866c5b58 -r f5c801d2bbf9 MonetDB5/src/optimizer/opt_prelude.mx
--- a/MonetDB5/src/optimizer/opt_prelude.mx     Fri Sep 03 13:32:15 2010 +0200
+++ b/MonetDB5/src/optimizer/opt_prelude.mx     Fri Sep 03 13:57:31 2010 +0200
@@ -187,7 +187,6 @@
 opt_export  str topn_minRef;
 opt_export  str topn_maxRef;
 opt_export  str tuniqueRef;
-opt_export  str tarantulaRef;
 opt_export  str not_uniqueRef;
 opt_export  str unionRef;
 opt_export  str unpackRef;
@@ -389,7 +388,6 @@
 str sumRef;
 str sunionRef;
 str takeRef;
-str tarantulaRef;
 str topn_minRef;
 str topn_maxRef;
 str tuniqueRef;
@@ -590,7 +588,6 @@
                sumRef = putName("sum",3);
                sunionRef= putName("sunion",6);
                takeRef= putName("take",5);
-               tarantulaRef = putName("tarantula",9);
                topn_minRef= putName("topn_min",8);
                topn_maxRef= putName("topn_max",8);
                tuniqueRef = putName("tunique",7);
diff -r be79866c5b58 -r f5c801d2bbf9 MonetDB5/src/optimizer/opt_support.mx
--- a/MonetDB5/src/optimizer/opt_support.mx     Fri Sep 03 13:32:15 2010 +0200
+++ b/MonetDB5/src/optimizer/opt_support.mx     Fri Sep 03 13:57:31 2010 +0200
@@ -568,7 +568,6 @@
 #define DEBUG_OPT_SIDCRACK                     47
 #define DEBUG_OPT_TRACE                                48
 #define DEBUG_OPT_HEURISTIC                    49
-#define DEBUG_OPT_TARANTULA                    50
 
 #define DEBUG_OPT(X) ((lng) 1 << (X))
 opt_export lng optDebug;
@@ -732,7 +731,6 @@
 {"singleton",  0,      0,      0,      DEBUG_OPT_SINGLETON},
 {"strengthreduction",  0,      0,      0,      DEBUG_OPT_STRENGTHREDUCTION},
 {"trace",              0,      0,      0,      DEBUG_OPT_TRACE},
-{"tarantula",  0,      0,      0,      DEBUG_OPT_TARANTULA},
 { 0,   0,      0,      0,      0}
 };
 
diff -r be79866c5b58 -r f5c801d2bbf9 MonetDB5/src/optimizer/opt_tarantula.mx
--- a/MonetDB5/src/optimizer/opt_tarantula.mx   Fri Sep 03 13:32:15 2010 +0200
+++ /dev/null   Thu Jan 01 00:00:00 1970 +0000
@@ -1,1127 +0,0 @@
-@/
-The contents of this file are subject to the MonetDB Public License
-Version 1.1 (the "License"); you may not use this file except in
-compliance with the License. You may obtain a copy of the License at
-http://monetdb.cwi.nl/Legal/MonetDBLicense-1.1.html
-
-Software distributed under the License is distributed on an "AS IS"
-basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the
-License for the specific language governing rights and limitations
-under the License.
-
-The Original Code is the MonetDB Database System.
-
-The Initial Developer of the Original Code is CWI.
-Portions created by CWI are Copyright (C) 1997-July 2008 CWI.
-Copyright August 2008-2010 MonetDB B.V.
-All Rights Reserved.
-@
-
-...@f opt_tarantula
-...@a M. Kersten
-...@- Multileg creatures
-Query execution can be improved significantly using distributed processing.
-Traditionally, this encompasses fragmentation and allocation of the base
-tables over multiple sites and query plans that include on the fly transport
-of intermediate results.
-
-Breaking the database into pieces itself is a well-studied area.
-Most approaches consider the workload and search for a good split
-of the base tables, such that the workload performance improves.
-
-The Tarantula optimizer, like the Octopus optimizer, use the
-output of the mitosis+mergetable optimizer and produces the
-actual plans for parallel execution.
-The tarantula untangles the query plan into a controlling head
-function and a series of subplans, one for each leg to execute concurrently.
-
-The target for breaking the plan are the blocking operations,
-in particular mat.pack(). The flow graph leading to the
-the pack arguments are extracted from the query plan and
-each subgraph is cast into an independent plan. 
-Since the query plan is a DAG, it is perfectly possible that
-a portion being extracted is shared amongst all legs.
-The naive extraction then leads to a re-calculation of 
-shared intermediates in each leg.
-
-The subplan produces the argument to the blocking operator, whose
-result will be assembled in the head. It is also perfectly possible
-that variables assigned a value are used later on in the query graph.
-These variables are identified and one leg becomes responsible to
-return it also to the head to be used later on.
-
-The orginal pack operation is replaced by a call to a function
-to orchestrate the distributed processing and return the final
-result. Then the next pack operation is searched and its
-subgraph is derived. Again, it may share portions produced
-in the first pack subgraph.
-
-A potential more optimal scheme would be to detect each such case and
-turn it into a splitting point as well. This can be detected by
-looking for the last assignment and multiple use cases. [VARIANT TODO]
-
-The allocation of a subplan to leg depends on a bidding scheme. 
-Bidding can not depend on BAT arguments, because that would cause 
-significant communication overhead. Scalar values could be used and
-would function well in terms of using the recycler to get involved into
-precise bidding. 
-
-A snippet of an tarantula plan with two legs is shown.
-The main part of the query becomes a three step procedure of
-1) remote registration of subplans, 2) obtaining bids and schedule design
-and 3) execute the subplans.
-Each plan does not contain duplicate node ids.
-...@verbatim
-function reg_qry_0():int;
-       tarantula.register(0,"qry_0","qry_1");
-       tarantula.register(1,"qry_0","qry_1");
-       return 0;
-end reg_qry;
-
-function bid_qry_0(hdl:int)(leg0:int,leg1:int,leg2:int,leg3:int);
-       (_4,_5) := tarantula.getBid(0,"qry_0","qry_1");
-       (_6,_7) := tarantula.getBid(1,"qry_0","qry_1");
-    return (leg0,leg1,leg2,leg3) := scheduler.schedule_0(_4,_5,_6,_7);
-end bid_qry;
-
-function run_qry(node_0:int,node_1:int):bat[:oid,:int]
-    _87 := tarantula.execute(node_0,"leg0");
-    _88 := tarantula.execute(node_1,"leg1");
-    _15 := mat.pack(_87,_88);
-       return run_qry := _15;
-end run_qry;
-
-function user.qry():void;
-       _3 := reg_qry_0();
-       (_4,_5,_6,_7):= bid_qry_0(_3);
-       _15 := tarantula.run_qry(_4,_5,_6,_7);
-    _16 := sql.resultSet(1,1,_15);
-    sql.rsColumn(_16,"sys.squida","bid","int",32,0,_15);
-    _21 := io.stdout();
-    sql.exportResult(_21,_16);
-end qry;
-...@end verbatim
-The nodes addressed by the tarentula are indices into a global catalog.
-The number of subplans is derived by the Mitosis as the number of pieces to 
consider.
-The tarantula may register the subplans to more nodes than pieces.
-
-...@verbatim
-function tarantula.exec_qry_0(node:str,fcn:str):bat[:oid,:int];
-       conn:= remote.connect(node,"monetdb","monetdb");
-       r:= remote.exec(conn,"tarantula",fcn);
-       b:bat[:oid,:int]:= remote.get(conn,r);
-       return b;
-end exec_qry;
-...@end verbatim
-
-...@verbatim
-function tarantula.qry_0(version:int):bat[:oid,:int];
-    _53:bat[:oid,:int] := 
attach.bind("file://export/scratch/mk/dbfarm/demo/bat/01/001");
-       _54 := algebra.slice(_53,0...@0,2...@0);
-    _63:bat[:oid,:int] := 
attach.bind("file://export/scratch/mk/dbfarm/demo/bat/01/002");
-       _64 := algebra.slice(_63,0...@0,2...@0);
-    _72 := algebra.kdifference(_54,_64);
-    _78 := algebra.kunion(_72,_64);
-    _13:bat[:oid,:oid]  := 
attach.bind("file://export/scratch/mk/dbfarm/demo/bat/01/003");
-    _14 := bat.reverse(_13);
-    _85 := algebra.kdifference(_78,_14);
-       return qry_0 := _85;
-end tarantula.qry_0;
-
-function tarantula.qry_1(version:int):bat[:oid,:int];
-       ... use second slice ...
-end tarantula.qry_1;
-
-...@end verbatim
-
-[Stability]
-We assume that during a session, workers once started will remain alive.
-No fault tolerance techniques are included.
-
-[Recycling]
-The legs contain registered functions and possible partial results in its 
recycler pool.
-In the first implementation we assume a read-only database, where all workers 
are
-restarted when changes to the underlying database affect the recycler pool.
-Alternative, the head can clear the recycler pool explicitly upon such state 
change.
-
-[Shared disk]
-For a leg to work it needs access to its storage layer, which is a NAS.
-It is encapsulated in the operation attach.bind("path",tuplecount).
-
-The code generation for the legs currently relies on a conceptual full 
replication of
-the database over the servers. The next version should use the attach() 
functionality
-or use the datacylcotron to access portions.
-
-These approaches are different from the Octopus, where the head is
-the sole control over the persistent data. 
-
-[Naming]
-The legs received from the tarantula should be ensured
-not to clash with those already known. Therefore, we simply
-tag them by orginating site.
-
-[Caveats]
-Any update invalidates the request to distributed processing.
-In the same line, multi-statement SQL transactions and
-updates to global variables are ignored. 
-
-Global variables are tricky, because they are part of the
-session context. To make it work, we need to be able to perform
-an upcall to that context (=dangerous).
-The solution is that any variable context should be
-passed through a relation.
-...@{
-...@mal
-module tarantula; 
-pattern optimizer.tarantula():str
-address OPTtarantula;
-pattern optimizer.tarantula(mod:str, fcn:str):str
-address OPTtarantula
-comment "Map-execute-reduce parallelism optimizer";
-
-...@h
-#ifndef _TAR_OCTOPUS_
-#define _TAR_OCTOPUS_
-#include "opt_prelude.h"
_______________________________________________
Checkin-list mailing list
[email protected]
http://mail.monetdb.org/mailman/listinfo/checkin-list

Reply via email to