Hello, my partner and me are working with the goal of improve the GEQO's performance, we tried with Ant Colony Optimization, but it does not improve, actually we are trying with a new variant of Genetic Algorithm, specifically Micro-GA. This algorithm finds a better solution than GEQO in less time, however the total query execution time is higher. The fitness is calculated by geqo_eval function. Does anybody know why this happens?

We attach the patch made with the changes in postgresql-9.2.0.

Regards.

diff --git a/contrib/Makefile b/contrib/Makefile
index d230451..df9ccef 100755
--- a/contrib/Makefile
+++ b/contrib/Makefile
@@ -26,6 +26,7 @@ SUBDIRS = \
 		isn		\
 		lo		\
 		ltree		\
+		microg		\
 		oid2name	\
 		pageinspect	\
 		passwordcheck	\
diff --git a/contrib/microg/Makefile b/contrib/microg/Makefile
new file mode 100644
index 0000000..7597ede
--- /dev/null
+++ b/contrib/microg/Makefile
@@ -0,0 +1,25 @@
+#-------------------------------------------------------------------------
+#
+# Makefile--
+#    Makefile for the genetic query optimizer module
+#
+# Copyright (c) 2015, Universidad Central Marta Abreu de Las Villas
+#
+# contrib/microg/Makefile
+#
+#-------------------------------------------------------------------------
+
+MODULE_big = microg
+OBJS =	microg_main.o
+
+
+ifdef USE_PGXS
+PG_CONFIG = pg_config
+PGXS := $(shell $(PG_CONFIG) --pgxs)
+include $(PGXS)
+else
+subdir = contrib/microg
+top_builddir = ../..
+include $(top_builddir)/src/Makefile.global
+include $(top_srcdir)/contrib/contrib-global.mk
+endif
diff --git a/contrib/microg/microg_main.c b/contrib/microg/microg_main.c
new file mode 100644
index 0000000..0bd896d
--- /dev/null
+++ b/contrib/microg/microg_main.c
@@ -0,0 +1,451 @@
+/*------------------------------------------------------------------------
+ *
+ * microg_main.c
+ *	  solution to the query optimization problem
+ *	  by means of a Micro Genetic Algorithm (micro-GA)
+ *
+ * Portions Copyright (c) 2014-2015, PostgreSQL Global Development Group
+ * Portions Copyright (c) 2015, Universidad Central de Las Villas
+ *
+ * contrib/microg_main.c
+ *
+ *-------------------------------------------------------------------------
+ */
+
+/* contributed by:
+ =*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=
+ *  Martin Utesch				 * Institute of Automatic Control	   *
+ =							 = University of Mining and Technology =
+ *  ute...@aut.tu-freiberg.de  * Freiberg, Germany				   *
+ =*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=
+*   Laura Perez Triana			
+*   Centro de Estudios Informaticos
+*== Universidad Central de Las Villas =* ltri...@uclv.cu *Villa Clara, Cuba
+ =*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=
+*   Alejandro G. Gomez Boix			
+*   Centro de Estudios Informaticos
+*== Universidad Central de Las Villas =* b...@uclv.cu *Villa Clara, Cuba
+ */
+
+/* -- parts of this are adapted from D. Whitley's Genitor algorithm -- */
+
+#include "postgres.h"
+
+#include <math.h>
+
+#include "optimizer/geqo_misc.h"
+#include "optimizer/geqo_mutation.h"
+#include "optimizer/geqo_pool.h"
+#include "optimizer/geqo_random.h"
+#include "optimizer/geqo_selection.h"
+#include "optimizer/geqo_gene.h"
+#include "optimizer/paths.h"
+#include <sys/timeb.h>
+#include "fmgr.h"
+
+PG_MODULE_MAGIC
+;
+
+/*
+ * Configuration options
+ */
+int Geqo_effort;
+int Geqo_pool_size;
+int Geqo_generations;
+double Geqo_selection_bias;
+double Geqo_seed;
+
+join_search_hook_type join_search_hook = NULL;
+
+void _PG_init(void);
+void _PG_fini(void);
+
+
+/* new functions of microg */
+void random_init_poolMG(PlannerInfo *root, Pool *pool);
+int meanCommonEdgeInPool(Chromosome *pool, int pool_size, int number_of_rels);
+int existEdge(Gene a, Gene b, Gene* s2, int lenght);
+int commonEdge(Gene* s1, Gene* s2, int number_of_rels);
+Chromosome * localSerch2_opt(PlannerInfo *root, Gene* solution, int sizeProblem);
+RelOptInfo *microg(PlannerInfo *root, int number_of_rels, List *initial_rels);
+
+
+/* define edge recombination crossover [ERX] per default */
+#if !defined(ERX) && \
+	!defined(PMX) && \
+	!defined(CX)  && \
+	!defined(PX)  && \
+	!defined(OX1) && \
+	!defined(OX2)
+#define ERX
+#endif
+
+/*
+ * microg
+ *	  solution of the query optimization problem
+ *	  similar to a constrained Traveling Salesman Problem (TSP)
+ */
+
+RelOptInfo *
+microg(PlannerInfo *root, int number_of_rels, List *initial_rels) {
+	GeqoPrivateData
+private;
+	int generation;
+	Chromosome *momma;
+	Chromosome *daddy;
+	Chromosome *kid, *result;
+	Pool *pool;
+	int pool_size, number_generations; 
+
+       struct timeb seed;
+
+#ifdef GEQO_DEBUG
+	int status_interval;
+#endif
+
+	RelOptInfo *best_rel;
+
+#if defined(ERX)
+	Edge *edge_table; /* list of edges */
+	int edge_failures = 0;
+#endif
+#if defined(CX) || defined(PX) || defined(OX1) || defined(OX2)
+	City *city_table; /* list of cities */
+#endif
+#if defined(CX)
+	int cycle_diffs = 0;
+	int mutations = 0;
+#endif
+
+	/* set up private information */
+	root->join_search_private = (void *) &private;
+private.initial_rels = initial_rels;
+
+	/*No using Geqo_seed, the new seed is initialized with ftime function*/
+	ftime(&seed);
+	geqo_set_seed(root, seed.millitm);
+
+	/* set GA parameters */
+	pool_size = 5;
+	number_generations = 250;
+#ifdef GEQO_DEBUG
+	status_interval = 10;
+#endif
+
+	/* allocate genetic pool memory */
+	pool = alloc_pool(root, pool_size, number_of_rels);
+
+	//inicializando aleatorio solo esta primera vez al primer elemento de la pobleacion
+#ifdef GEQO_DEBUG
+	elog(DEBUG1, "GEQO selected %d pool entries, best %.2f, worst %.2f",
+			pool_size,
+#endif
+
+	/* allocate chromosome momma and daddy memory */
+	momma = alloc_chromo(root, pool->string_length);
+	daddy = alloc_chromo(root, pool->string_length);
+
+#if defined (ERX)
+#ifdef GEQO_DEBUG
+	elog(DEBUG2, "using edge recombination crossover [ERX]");
+#endif
+	/* allocate edge table memory */
+	edge_table = alloc_edge_table(root, pool->string_length);
+#elif defined(PMX)
+#ifdef GEQO_DEBUG
+	elog(DEBUG2, "using partially matched crossover [PMX]");
+#endif
+	/* allocate chromosome kid memory */
+	kid = alloc_chromo(root, pool->string_length);
+#elif defined(CX)
+#ifdef GEQO_DEBUG
+	elog(DEBUG2, "using cycle crossover [CX]");
+#endif
+	/* allocate city table memory */
+	kid = alloc_chromo(root, pool->string_length);
+	city_table = alloc_city_table(root, pool->string_length);
+#elif defined(PX)
+#ifdef GEQO_DEBUG
+	elog(DEBUG2, "using position crossover [PX]");
+#endif
+	/* allocate city table memory */
+	kid = alloc_chromo(root, pool->string_length);
+	city_table = alloc_city_table(root, pool->string_length);
+#elif defined(OX1)
+#ifdef GEQO_DEBUG
+	elog(DEBUG2, "using order crossover [OX1]");
+#endif
+	/* allocate city table memory */
+	kid = alloc_chromo(root, pool->string_length);
+	city_table = alloc_city_table(root, pool->string_length);
+#elif defined(OX2)
+#ifdef GEQO_DEBUG
+	elog(DEBUG2, "using order crossover [OX2]");
+#endif
+	/* allocate city table memory */
+	kid = alloc_chromo(root, pool->string_length);
+	city_table = alloc_city_table(root, pool->string_length);
+#endif
+
+	init_tour(root, pool->data[0].string, pool->string_length);
+	pool->data[0].worth = geqo_eval(root, pool->data[0].string,
+			pool->string_length);
+	for (generation = 0; generation < number_generations; generation++) {
+
+		ftime(&seed);
+		geqo_set_seed(root, seed.millitm);
+		/* random initialization of the pool */
+		random_init_poolMG(root, pool);
+		/* sort the pool according to cheapest path as fitness */
+		sort_pool(root, pool); /* we have to do it only one time, since all
+		 * kids replace the worst individuals in
+		 * future (-> geqo_pool.c:spread_chromo ) */
+
+		do {
+			geqo_selection(root, momma, daddy, pool, Geqo_selection_bias);
+
+#if defined (ERX)
+			/* EDGE RECOMBINATION CROSSOVER */
+			gimme_edge_table(root, momma->string, daddy->string,
+					pool->string_length, edge_table);
+
+			kid = momma;
+
+			/* are there any edge failures ? */
+			edge_failures += gimme_tour(root, edge_table, kid->string,
+					pool->string_length);
+#elif defined(PMX)
+			/* PARTIALLY MATCHED CROSSOVER */
+			pmx(root, momma->string, daddy->string, kid->string, pool->string_length);
+#elif defined(CX)
+			/* CYCLE CROSSOVER */
+			cycle_diffs = cx(root, momma->string, daddy->string, kid->string, pool->string_length, city_table);
+			/* mutate the child */
+			if (cycle_diffs == 0)
+			{
+				mutations++;
+				geqo_mutation(root, kid->string, pool->string_length);
+			}
+#elif defined(PX)
+			/* POSITION CROSSOVER */
+			px(root, momma->string, daddy->string, kid->string, pool->string_length, city_table);
+#elif defined(OX1)
+			/* ORDER CROSSOVER */
+			ox1(root, momma->string, daddy->string, kid->string, pool->string_length, city_table);
+#elif defined(OX2)
+			/* ORDER CROSSOVER */
+			ox2(root, momma->string, daddy->string, kid->string, pool->string_length, city_table);
+#endif
+
+			/* EVALUATE FITNESS */
+			kid->worth = geqo_eval(root, kid->string, pool->string_length);
+
+			/* push the kid into the wilderness of life according to its worth */
+			spread_chromo(root, kid, pool);
+
+#ifdef GEQO_DEBUG
+			if (status_interval && !(generation % status_interval))
+			print_gen(stdout, pool, generation);
+#endif
+
+		} while (meanCommonEdgeInPool(pool->data, pool_size,
+				number_of_rels) < (number_of_rels - 1) / 4);
+
+	}
+
+#if defined(ERX) && defined(GEQO_DEBUG)
+	if (edge_failures != 0)
+	elog(LOG, "[GEQO] failures: %d, average: %d",
+			edge_failures, (int) number_generations / edge_failures);
+	else
+	elog(LOG, "[GEQO] no edge failures detected");
+#endif
+
+#if defined(CX) && defined(GEQO_DEBUG)
+	if (mutations != 0)
+	elog(LOG, "[GEQO] mutations: %d, generations: %d",
+			mutations, number_generations);
+	else
+	elog(LOG, "[GEQO] no mutations processed");
+#endif
+
+#ifdef GEQO_DEBUG
+	print_pool(stdout, pool, 0, pool_size - 1);
+#endif
+
+#ifdef GEQO_DEBUG
+	elog(DEBUG1, "GEQO best is %.2f after %d generations",
+			pool->data[0].worth, number_generations);
+#endif
+
+	/*
+	 * got the cheapest query tree processed by geqo; first element of the
+	 * population indicates the best query tree
+	 */
+
+	result = localSerch2_opt(root, (Gene *) pool->data[0].string,
+			number_of_rels);
+	
+	best_rel = gimme_tree(root, result->string, pool->string_length);
+
+	/* DBG: show the query plan */
+#ifdef NOT_USED
+	print_plan(best_plan, root);
+#endif
+
+	/* ... free memory stuff */
+	free_chromo(root, momma);
+	free_chromo(root, daddy);
+
+#if defined (ERX)
+	free_edge_table(root, edge_table);
+#elif defined(PMX)
+	free_chromo(root, kid);
+#elif defined(CX)
+	free_chromo(root, kid);
+	free_city_table(root, city_table);
+#elif defined(PX)
+	free_chromo(root, kid);
+	free_city_table(root, city_table);
+#elif defined(OX1)
+	free_chromo(root, kid);
+	free_city_table(root, city_table);
+#elif defined(OX2)
+	free_chromo(root, kid);
+	free_city_table(root, city_table);
+#endif
+
+	free_pool(root, pool);
+
+	/* ... clear root pointer to our private storage */
+	root->join_search_private = NULL;
+
+	return best_rel;
+}
+
+
+void _PG_init(void) {
+
+	join_search_hook = microg;
+}
+void _PG_fini(void) {
+
+	join_search_hook = NULL;
+}
+
+void random_init_poolMG(PlannerInfo *root, Pool *pool) {
+	Chromosome *chromo = (Chromosome *) pool->data;
+	int i;
+
+	/* new micro-GA pool with best individual 
+	 * from last generation */
+	for (i = 1; i < pool->size; i++) {
+		init_tour(root, chromo[i].string, pool->string_length);
+		pool->data[i].worth = geqo_eval(root, chromo[i].string,
+				pool->string_length);
+	}
+
+}
+
+/* computing mean of common edge between all individuals */
+int meanCommonEdgeInPool(Chromosome *pool, int pool_size,
+		int number_of_rels) {
+
+	int result = 0;
+	int sum = 0, count = 0;
+	int i, j;
+
+	for (i = pool_size - 1; i >= 0; i--) {
+		for (j = i - 1; j >= 0; j--) {
+			sum += commonEdge((Gene *) pool[i].string,
+					(Gene *) pool[j].string, number_of_rels);
+			count++;
+		}
+	}
+	result = sum / count;
+	return result;
+}
+
+/* computing amount of common edges between two individuals */
+int commonEdge(Gene* s1, Gene* s2, int number_of_rels) {
+
+	int countEdge = 0;
+	int i;
+	if (existEdge(s1[0], s1[1], s2, number_of_rels) == 1)
+		countEdge++;
+	for (i = 1; i < number_of_rels - 1; i++) {
+		if (existEdge(s1[i], s1[i + 1], s2, number_of_rels) == 1)
+			countEdge++;
+	}
+	return countEdge;
+}
+
+int existEdge(Gene a, Gene b, Gene* s2, int lenght) {
+	int i;
+	if (s2[0] == a || s2[lenght - 1] == a) {
+		if ((s2[0] == a && s2[1] == b)
+				|| (s2[lenght - 1] == a && s2[lenght - 2] == b))
+			return 1;
+		else
+			return 0;
+	} else {
+		for (i = 1; i < lenght - 1; i++) {
+			if ((s2[i] == a && s2[i + 1] == b)
+					|| (s2[i] == a && s2[i - 1] == b)) {
+				return 1;
+			}
+		}
+		return 0;
+	}
+}
+
+
+/* local search algorithm to try to improve the quality of the solution */
+Chromosome * localSerch2_opt(PlannerInfo *root, Gene* solution,
+		int sizeProblem) {
+
+	Chromosome *result;
+	Gene *aux, *aux1;
+	double costAux, costAux1;
+	int flag = 1, i, j, h;
+	Gene temp;
+
+	result = alloc_chromo(root, sizeProblem);
+
+	aux = malloc(sizeof(Gene) * sizeProblem);
+	aux1 = malloc(sizeof(Gene) * sizeProblem);
+
+	for (i = 0; i < sizeProblem; i++) {
+		aux1[i] = solution[i];
+		aux[i] = solution[i];
+	}
+	costAux = geqo_eval(root, aux, sizeProblem);
+	costAux1 = costAux;
+	while (flag == 1) {
+		flag = 0;
+		for (i = 0; i < sizeProblem; i++) {
+			j = i + 2;
+			while (j != i) {
+				for (h = 0; h < sizeProblem; h++) {
+					aux1[h] = aux[h];
+				}
+				temp = aux1[(i + 1) % sizeProblem];
+				aux1[(i + 1) % sizeProblem] = aux1[j % sizeProblem];
+				aux1[j % sizeProblem] = temp;
+
+				costAux1 = geqo_eval(root, aux1, sizeProblem);
+				if (costAux > costAux1) {
+					aux = aux1;
+					costAux = costAux1;
+					flag = 1;
+				}
+				j = (j + 1) % sizeProblem;
+			}
+		}
+	}
+
+	result->string = aux;
+	result->worth = costAux;
+	return result;
+}
+
-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Reply via email to