Hi everybody. First of all I have to thank you for your wonderful job! PostgreSQL rocks!
I am writing you because I am interested in understanding some specifics related to PostgreSQL internals. More precisely, I am investigating the running time of the different function implementation approaches, which is part of my BSc thesis. Here is the thing: I have implemented as a proof of concept three functions, which are a) text[] arraypoc(text, int); this returns an array of the form 'text,int' where the text is the copy of the text passed as parameter and int is a simple counter. The number of tuples is specified by the integer parameter. I have estimated the running time in this manner: SELECT * FROM unnest(arraypoc('abcdefghilmnopqrstuvz',1000000)); The estimated running time is after 10 executions is: (791.571 + 797.163 + 677.331 + 686.674 + 686.691 + 686.438 + 797.910 + 795.955 + 793.459 + 794.110)/10 = 750.7302 b) TABLE(text,int) srfpoc(text, int); is similar as the previous one but this is a set returning function, which returns a table of a similar shape as in the previous case. Instead of a string, I return a text and an integer. Again text is just the copy of the parameter and int is a counter. I have estimated the running time in this manner: SELECT * FROM srfpoc('abcdefghilmnopqrstuvz',1000000); The estimated running time is after 10 executions is: (665.016 + 778.100 + 640.605 + 787.102 + 785.501 + 791.307 + 784.780 + 793.222 + 794.624 + 790.357)/10 = 761.0614 c) TABLE(text,int) srfmatpoc(text, int); this does the same as the previous one, but in this case I wrote a SRF_Materialized using the SPI interface. I have estimated the running time in this manner: SELECT * FROM srfmatpoc('abcdefghilmnopqrstuvz',1000000); The estimated running time is after 10 executions is: (747.095 + 703.894 + 762.310 + 763.299 + 764.582 + 760.991 + 763.427 + 764.033 + 731.292 + 770.895)/10 = 753.1818 I have executed all the tests on the same server. The functions are compiled using the -O3 compilation parameter. I am using PostgreSQL 9.1.3. I would have expected the version a) to be slower than b) and c) but it turns out that it is actually the fastest (the difference is not so big anyway). What am I doing wrong? What can I do to improve the functions? Have I misunderstood something? Attached you find the code of all three functions. Thanks a lot! Armando
#include "postgres.h" #include <string.h> #include "utils/array.h" #include "utils/builtins.h" /* text_to_cstring */ #include "catalog/pg_type.h" #include "utils/lsyscache.h" #include "fmgr.h" #ifdef PG_MODULE_MAGIC PG_MODULE_MAGIC; #endif PG_FUNCTION_INFO_V1(arraypoc); Datum arraypoc(PG_FUNCTION_ARGS) { ArrayType *result; Datum *result_data = NULL; int16 o_typlen; bool o_typbyval; char o_typalign; text *t, *cpyt; uint32 c; int i, t_len; t = (text *)PG_GETARG_TEXT_PP(0); c = (uint32)PG_GETARG_INT32(1); if(c>0) result_data = (Datum *)palloc(sizeof(Datum) * c); for(i=0; i<c; ++i) { t_len = VARSIZE_ANY_EXHDR(t); cpyt = (text *)palloc(VARHDRSZ + t_len + 1 + 16); SET_VARSIZE(cpyt, VARHDRSZ + t_len + 1 + 16); memcpy((void *)VARDATA(cpyt), (void *)VARDATA(t), t_len); memcpy((void *)VARDATA(cpyt) + t_len, ",", 1); snprintf((void *)VARDATA(cpyt) + t_len + 1, 16, "%d", i); result_data[i] = PointerGetDatum(cpyt); } if(result_data == NULL || c <= 0) { PG_RETURN_NULL(); } else { get_typlenbyvalalign(TEXTOID, &o_typlen, &o_typbyval, &o_typalign); result = construct_array(result_data, c, TEXTOID, o_typlen, o_typbyval, o_typalign); PG_RETURN_ARRAYTYPE_P(result); } }
#include "postgres.h" #include <stdio.h> /* snprintf */ #include <stdlib.h> /* malloc, free */ #include <string.h> /* strncpy, strlen */ #include <sys/types.h> /* for uint32 and size_t*/ #include "catalog/pg_type.h" #include "utils/builtins.h" /* text_to_cstring */ #include "funcapi.h" #include "fmgr.h" #ifdef PG_MODULE_MAGIC PG_MODULE_MAGIC; #endif PG_FUNCTION_INFO_V1(srfpoc); Datum srfpoc(PG_FUNCTION_ARGS) { FuncCallContext *funcctx; TupleDesc tupdesc; text *t; int max_calls, call_cntr; t = (text *)PG_GETARG_TEXT_PP(0); if (SRF_IS_FIRSTCALL()) { MemoryContext oldcontext; funcctx = SRF_FIRSTCALL_INIT(); oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx); funcctx->max_calls = (uint32)PG_GETARG_INT32(1); tupdesc = CreateTemplateTupleDesc(2, false); TupleDescInitEntry(tupdesc, (AttrNumber) 1, "t", TEXTOID, -1, 0); TupleDescInitEntry(tupdesc, (AttrNumber) 2, "count", INT4OID, -1, 0); funcctx->user_fctx = BlessTupleDesc(tupdesc); MemoryContextSwitchTo(oldcontext); } funcctx = SRF_PERCALL_SETUP(); max_calls = funcctx->max_calls; call_cntr = funcctx->call_cntr; tupdesc = funcctx->user_fctx; if(call_cntr < max_calls && t != NULL) { bool nulls[2]; Datum values[2]; HeapTuple tuple; text *cpyt; cpyt = (text *)palloc(VARHDRSZ + VARSIZE_ANY_EXHDR(t)); SET_VARSIZE(cpyt, VARHDRSZ + VARSIZE_ANY_EXHDR(t)); memcpy((void *)VARDATA(cpyt), (void *)VARDATA(t), VARSIZE_ANY_EXHDR(t)); values[0] = PointerGetDatum(cpyt); values[1] = Int32GetDatum(call_cntr); nulls[0] = false; nulls[1] = false; tuple = heap_form_tuple(tupdesc, values, nulls); SRF_RETURN_NEXT(funcctx, HeapTupleGetDatum(tuple)); } else { SRF_RETURN_DONE(funcctx); } }
#include "postgres.h" #include "catalog/pg_type.h" #include "fmgr.h" #include "funcapi.h" #include "executor/spi.h" #include "lib/stringinfo.h" #include "miscadmin.h" #include "utils/builtins.h" #include "utils/guc.h" #include "utils/lsyscache.h" #ifdef PG_MODULE_MAGIC PG_MODULE_MAGIC; #endif PG_FUNCTION_INFO_V1(srfmatpoc); Datum srfmatpoc(PG_FUNCTION_ARGS) { ReturnSetInfo *rsinfo; Tuplestorestate *tupstore; TupleDesc tupdesc; MemoryContext per_query_ctx; MemoryContext oldcontext; text *t, *cpyt; int ret, c, i; rsinfo = (ReturnSetInfo *) fcinfo->resultinfo; per_query_ctx = rsinfo->econtext->ecxt_per_query_memory; if ((ret = SPI_connect()) < 0) elog(ERROR, "srfmatpoc: SPI_connect returned %d", ret); tupdesc = CreateTemplateTupleDesc(2, false); TupleDescInitEntry(tupdesc, (AttrNumber) 1, "t", TEXTOID, -1, 0); TupleDescInitEntry(tupdesc, (AttrNumber) 2, "count", INT4OID, -1, 0); oldcontext = MemoryContextSwitchTo(per_query_ctx); tupdesc = CreateTupleDescCopy(tupdesc); tupstore = tuplestore_begin_heap(rsinfo->allowedModes & SFRM_Materialize_Random, false, work_mem); MemoryContextSwitchTo(oldcontext); t = (text *)PG_GETARG_TEXT_PP(0); c = (uint32)PG_GETARG_INT32(1); for(i=0; i<c; ++i) { HeapTuple tuple; Datum values[2]; bool nulls[2]; cpyt = (text *)palloc(VARHDRSZ + VARSIZE_ANY_EXHDR(t)); SET_VARSIZE(cpyt, VARHDRSZ + VARSIZE_ANY_EXHDR(t)); memcpy((void *)VARDATA(cpyt), (void *)VARDATA(t), VARSIZE_ANY_EXHDR(t)); values[0] = PointerGetDatum(cpyt); values[1] = Int32GetDatum(i); nulls[0] = false; nulls[1] = false; tuple = heap_form_tuple(tupdesc, values, nulls); tuplestore_puttuple(tupstore, tuple); heap_freetuple(tuple); } rsinfo->returnMode = SFRM_Materialize; rsinfo->setResult = tupstore; rsinfo->setDesc = tupdesc; SPI_finish(); return (Datum) 0; }
-- Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org) To make changes to your subscription: http://www.postgresql.org/mailpref/pgsql-hackers