Hi everybody.

First of all I have to thank you for your wonderful job! PostgreSQL rocks!

I am writing you because I am interested in understanding some specifics related
to PostgreSQL internals. More precisely, I am investigating the running time
of the different function implementation approaches, which is part of my BSc
thesis.

Here is the thing: I have implemented as a proof of concept three functions,
which are
a) text[] arraypoc(text, int); this returns an array of the form 'text,int'
   where the text is the copy of the text passed as parameter and int is a
   simple counter. The number of tuples is specified by the integer parameter.
   I have estimated the running time in this manner:
   SELECT *
     FROM unnest(arraypoc('abcdefghilmnopqrstuvz',1000000));
   The estimated running time is after 10 executions is:
   (791.571 + 797.163 + 677.331 + 686.674 + 686.691 + 686.438 +
    797.910 + 795.955 + 793.459 + 794.110)/10                         = 750.7302

b) TABLE(text,int) srfpoc(text, int); is similar as the previous one but this
   is a set returning function, which returns a table of a similar shape as in
   the previous case. Instead of a string, I return a text and an integer. Again
   text is just the copy of the parameter and int is a counter.
   I have estimated the running time in this manner:
   SELECT *
     FROM srfpoc('abcdefghilmnopqrstuvz',1000000);
   The estimated running time is after 10 executions is: 
   (665.016 + 778.100 + 640.605 + 787.102 + 785.501 + 791.307 +
    784.780 + 793.222 + 794.624 + 790.357)/10                         = 761.0614

c) TABLE(text,int) srfmatpoc(text, int); this does the same as the previous one,
   but in this case I wrote a SRF_Materialized using the SPI interface.  I have
   estimated the running time in this manner:
   SELECT *
     FROM srfmatpoc('abcdefghilmnopqrstuvz',1000000);
   The estimated running time is after 10 executions is: 
   (747.095 + 703.894 + 762.310 + 763.299 + 764.582 + 760.991 + 763.427 +
    764.033 + 731.292 + 770.895)/10                                   = 753.1818

I have executed all the tests on the same server. The functions are compiled
using the -O3 compilation parameter. I am using PostgreSQL 9.1.3.

I would have expected the version a) to be slower than b) and c) but it turns
out that it is actually the fastest (the difference is not so big anyway). What
am I doing wrong? What can I do to improve the functions? Have I misunderstood
something?

Attached you find the code of all three functions.

Thanks a lot!
Armando
#include "postgres.h"
#include <string.h>

#include "utils/array.h"
#include "utils/builtins.h"	/* text_to_cstring */
#include "catalog/pg_type.h"
#include "utils/lsyscache.h"
#include "fmgr.h"

#ifdef PG_MODULE_MAGIC
PG_MODULE_MAGIC;
#endif

PG_FUNCTION_INFO_V1(arraypoc);

Datum arraypoc(PG_FUNCTION_ARGS) {
  ArrayType   *result;
  Datum       *result_data = NULL;
  int16        o_typlen;
  bool         o_typbyval;
  char         o_typalign;
  text        *t,
              *cpyt;
  uint32       c;
  int         i, t_len;

  t = (text *)PG_GETARG_TEXT_PP(0);
  c = (uint32)PG_GETARG_INT32(1);

  if(c>0) result_data = (Datum *)palloc(sizeof(Datum) * c);

  for(i=0; i<c; ++i) {
    t_len = VARSIZE_ANY_EXHDR(t);

    cpyt = (text *)palloc(VARHDRSZ + t_len + 1 + 16);
    SET_VARSIZE(cpyt, VARHDRSZ + t_len + 1 + 16);
    memcpy((void *)VARDATA(cpyt), (void *)VARDATA(t), t_len);
    memcpy((void *)VARDATA(cpyt) + t_len, ",", 1);
    snprintf((void *)VARDATA(cpyt) + t_len + 1, 16, "%d", i);

    result_data[i] = PointerGetDatum(cpyt);
  }

  if(result_data == NULL || c <= 0) {
    PG_RETURN_NULL();

  } else {
    get_typlenbyvalalign(TEXTOID, &o_typlen, &o_typbyval, &o_typalign);
    result = construct_array(result_data, c, TEXTOID, o_typlen, o_typbyval,
                             o_typalign);
    PG_RETURN_ARRAYTYPE_P(result);
  }
}
#include "postgres.h"

#include <stdio.h>    /* snprintf */
#include <stdlib.h>    /* malloc, free */
#include <string.h>    /* strncpy, strlen */
#include <sys/types.h>  /* for uint32 and size_t*/

#include "catalog/pg_type.h"
#include "utils/builtins.h"  /* text_to_cstring */
#include "funcapi.h"
#include "fmgr.h"

#ifdef PG_MODULE_MAGIC
PG_MODULE_MAGIC;
#endif

PG_FUNCTION_INFO_V1(srfpoc);

Datum srfpoc(PG_FUNCTION_ARGS) {
  FuncCallContext *funcctx;
  TupleDesc        tupdesc;
  text            *t;
  int              max_calls,
                   call_cntr;

  t = (text *)PG_GETARG_TEXT_PP(0);
  
  if (SRF_IS_FIRSTCALL()) {
    MemoryContext    oldcontext;

    funcctx = SRF_FIRSTCALL_INIT();
    oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);

    funcctx->max_calls = (uint32)PG_GETARG_INT32(1);

    tupdesc = CreateTemplateTupleDesc(2, false);
    TupleDescInitEntry(tupdesc, (AttrNumber) 1, "t", TEXTOID, -1, 0);
    TupleDescInitEntry(tupdesc, (AttrNumber) 2, "count", INT4OID, -1, 0);

    funcctx->user_fctx = BlessTupleDesc(tupdesc);

    MemoryContextSwitchTo(oldcontext);
  }

  funcctx = SRF_PERCALL_SETUP();
  max_calls = funcctx->max_calls;
  call_cntr = funcctx->call_cntr;
  tupdesc = funcctx->user_fctx;

  if(call_cntr < max_calls && t != NULL) {
    bool         nulls[2];
    Datum        values[2];
    HeapTuple    tuple;
    text        *cpyt;

    cpyt = (text *)palloc(VARHDRSZ + VARSIZE_ANY_EXHDR(t));
    SET_VARSIZE(cpyt, VARHDRSZ + VARSIZE_ANY_EXHDR(t));
    memcpy((void *)VARDATA(cpyt), (void *)VARDATA(t), VARSIZE_ANY_EXHDR(t));

    values[0] = PointerGetDatum(cpyt);
    values[1] = Int32GetDatum(call_cntr);
    nulls[0] = false;
    nulls[1] = false;

    tuple = heap_form_tuple(tupdesc, values, nulls);
    SRF_RETURN_NEXT(funcctx, HeapTupleGetDatum(tuple));

  } else {
    SRF_RETURN_DONE(funcctx);
  }
}
#include "postgres.h"

#include "catalog/pg_type.h"
#include "fmgr.h"
#include "funcapi.h"
#include "executor/spi.h"
#include "lib/stringinfo.h"
#include "miscadmin.h"
#include "utils/builtins.h"
#include "utils/guc.h"
#include "utils/lsyscache.h"

#ifdef PG_MODULE_MAGIC
PG_MODULE_MAGIC;
#endif

PG_FUNCTION_INFO_V1(srfmatpoc);

Datum srfmatpoc(PG_FUNCTION_ARGS) {
  ReturnSetInfo   *rsinfo;
  Tuplestorestate *tupstore;
  TupleDesc        tupdesc;
  MemoryContext    per_query_ctx;
  MemoryContext    oldcontext;
  text            *t,
                  *cpyt;
  int              ret,
                   c,
                   i;

  rsinfo = (ReturnSetInfo *) fcinfo->resultinfo;
  per_query_ctx = rsinfo->econtext->ecxt_per_query_memory;

  if ((ret = SPI_connect()) < 0)
    elog(ERROR, "srfmatpoc: SPI_connect returned %d", ret);

  tupdesc = CreateTemplateTupleDesc(2, false);
  TupleDescInitEntry(tupdesc, (AttrNumber) 1, "t", TEXTOID, -1, 0);
  TupleDescInitEntry(tupdesc, (AttrNumber) 2, "count", INT4OID, -1, 0);

  oldcontext = MemoryContextSwitchTo(per_query_ctx);
  tupdesc = CreateTupleDescCopy(tupdesc);
  tupstore =
    tuplestore_begin_heap(rsinfo->allowedModes & SFRM_Materialize_Random,
                          false, work_mem);
  MemoryContextSwitchTo(oldcontext);

  t = (text *)PG_GETARG_TEXT_PP(0);
  c = (uint32)PG_GETARG_INT32(1);

  for(i=0; i<c; ++i) {
    HeapTuple  tuple;
    Datum      values[2];
    bool       nulls[2];

    cpyt = (text *)palloc(VARHDRSZ + VARSIZE_ANY_EXHDR(t));
    SET_VARSIZE(cpyt, VARHDRSZ + VARSIZE_ANY_EXHDR(t));
    memcpy((void *)VARDATA(cpyt), (void *)VARDATA(t), VARSIZE_ANY_EXHDR(t));

    values[0] = PointerGetDatum(cpyt);
    values[1] = Int32GetDatum(i);
    nulls[0] = false;
    nulls[1] = false;

    tuple = heap_form_tuple(tupdesc, values, nulls);
    tuplestore_puttuple(tupstore, tuple);
    heap_freetuple(tuple);
  }

  rsinfo->returnMode = SFRM_Materialize;
  rsinfo->setResult = tupstore;
  rsinfo->setDesc = tupdesc;

  SPI_finish();

  return (Datum) 0;
}
-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Reply via email to