Changeset: 22cb9b53fd93 for MonetDB
URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=22cb9b53fd93
Modified Files:
        MonetDB5/src/mal/mal_errors.mx
        MonetDB5/src/mal/mal_linker.mx
        sql/src/backends/monet5/sql_scenario.mx
        sql/src/backends/monet5/vaults/mseed.mx
        sql/src/backends/monet5/vaults/mseed.sql
        sql/src/backends/monet5/vaults/vault.mx
Branch: default
Log Message:

Mseed loading experiments
Performance experimentation on eir on 17 dec 2010.
The code base uses the SQLstatementIntern to populate both the catalog and the 
mseed files.
Experiment 0 concerns loading the mseedN file using a sequence of inserts. This 
involves
the complete code path from generating SQL statement, parsing, optimization and 
execution.
No query caching is enabled. The first record from
ftp://www.orfeus-eu.org/pub/data/POND/1990/002/MN_BNI__BHN.1990.002.20.21.32.mseed
with 3768 events was loaded in 10.770 msec

The second experiment produced a copy file, separately loaded into MonetDB using
copy 400000 records into mseed2 from '/tmp/input/' delimiters ',','\n';
this track leads to a load time of 326ms for the first record.
The first file with 26 records and 97592 tuples was loaded this way within 8710 
msec.
This approach still uses the SQL logger. The expected speedup for direct update 
would
be something like a factor 3.

The third experiment produced the single binary BAT images followed by
copy into mseed2 from ('/tmp/time','/tmp/data');
Attach procedure for complete file ran in about 232 ms


diffs (truncated from 406 to 300 lines):

diff -r c509dd829627 -r 22cb9b53fd93 MonetDB5/src/mal/mal_errors.mx
--- a/MonetDB5/src/mal/mal_errors.mx    Fri Dec 17 16:31:36 2010 +0100
+++ b/MonetDB5/src/mal/mal_errors.mx    Fri Dec 17 16:58:51 2010 +0100
@@ -88,6 +88,7 @@
 #define RUNTIME_FILE_NOT_FOUND "File not found"
 #define RUNTIME_UNLINK "File could not be unlinked"
 #define RUNTIME_DIR_ERROR "Unable to open directory"
+#define RUNTIME_CREATE_ERROR "Unable to create file/directory"
 #define RUNTIME_STREAM_FAILED "Could not create stream"
 #define RUNTIME_STREAM_WRITE "Could not write to stream"
 #define RUNTIME_STREAM_INPUT "Could not read from stream"
diff -r c509dd829627 -r 22cb9b53fd93 MonetDB5/src/mal/mal_linker.mx
--- a/MonetDB5/src/mal/mal_linker.mx    Fri Dec 17 16:31:36 2010 +0100
+++ b/MonetDB5/src/mal/mal_linker.mx    Fri Dec 17 16:58:51 2010 +0100
@@ -92,7 +92,7 @@
 @c
 #include "mal_config.h"
 #include "mal_linker.h"
-#include "mal_function.h"      /* for throwException() */
+#include "mal_function.h"      /* for throw() */
 #include "mal_import.h"                /* for slash_2_dir_sep() */
 
 #include <sys/types.h> /* opendir */
diff -r c509dd829627 -r 22cb9b53fd93 sql/src/backends/monet5/sql_scenario.mx
--- a/sql/src/backends/monet5/sql_scenario.mx   Fri Dec 17 16:31:36 2010 +0100
+++ b/sql/src/backends/monet5/sql_scenario.mx   Fri Dec 17 16:58:51 2010 +0100
@@ -597,6 +597,10 @@
 }
 
 /* #define _SQL_COMPILE */
+...@-
+BEWARE, the SQLstatementIntern turns off the autocommit mode.
+It is contrary to normal behavior.
+...@c
 str
 SQLstatementIntern(Client c, str *expr, str nme, int execute, bit output)
 {
diff -r c509dd829627 -r 22cb9b53fd93 sql/src/backends/monet5/vaults/mseed.mx
--- a/sql/src/backends/monet5/vaults/mseed.mx   Fri Dec 17 16:31:36 2010 +0100
+++ b/sql/src/backends/monet5/vaults/mseed.mx   Fri Dec 17 16:58:51 2010 +0100
@@ -35,7 +35,6 @@
 -- all records in the mseed files correspond to a row in the catalog
 CREATE TABLE mseedCatalog (
 mseed                  int,                    -- Vault file id
-chunk                  varchar(255),   -- SQL volumn storage container name
 seqno                  int,                    -- SEED record sequence number, 
should be between 0 and 999999
                 PRIMARY KEY (mseed,seqno),
 dataquality    char,                   -- Data record indicator, should be 
'D=data unknown qual', 
@@ -54,19 +53,14 @@
 
 -- this function inserts the mseed record information into the catalog
 -- errors are returned for off-line analysis.
-CREATE FUNCTION mseedImport(vid int, source string, target string)
-       RETURNS string
+CREATE PROCEDURE mseedImport(vid int, source string, target string)
 EXTERNAL NAME mseed.import;
 
--- mseed data volumns may appear in different formats
--- we try to postpone them, assuming the optimizer can guide JIT.
---CREATE TABLE chunkname (
+-- The records are collected in SQL tables of the following structure
+-- The are ordered on timestamp
+--CREATE TABLE chunk<mseed> (
 --time timestamp,
---mseed        int,
---adata        varchar(20),    dependent on type
---idata        int,
---fdata        float,
---ddata        double
+--data int (or float,double,varchar(20),       dependent on type
 --); 
 @end verbatim
 
@@ -83,27 +77,40 @@
 @begin verbatim
 create table batch(vid int, source string, target string);
 
-
 insert into batch
 select vid, source, target from vault where created is null limit 2;
-call batchload( select vid, source, target from vault where created is null 
limit 2);
+call mseedImport( (select vid, source, target from vault where created is null 
limit 2) );
 drop table batch;
 @end verbatim
 
 @mal
 module mseed;
-pattern import(vid:int, source:str, target:str):str
+pattern import{unsafe}(vid:int, source:str, target:str)
 address MseedImport
 comment "Dump the record content of an mseed file from the vault into the 
mseed catalog";
 
+...@{
 @-
-The intended use is to simply rely on SQL to select elements
-in the vault table for import and to apply the mseed import operation.
-It returns the id list of succesful imports.
-...@verbatim
-select mseedImport(vid,source,target) from vault where ...
-...@end verbatim
-...@{
+Performance experimentation on eir on 17 dec 2010.
+The code base uses the SQLstatementIntern to populate both the catalog and the 
mseed files.
+Experiment 0 concerns loading the mseedN file using a sequence of inserts. 
This involves
+the complete code path from generating SQL statement, parsing, optimization 
and execution.
+No query caching is enabled. The first record from
+ftp://www.orfeus-eu.org/pub/data/POND/1990/002/MN_BNI__BHN.1990.002.20.21.32.mseed
+with 3768 events was loaded in 10.770 msec
+
+The second experiment produced a copy file, separately loaded into MonetDB 
using
+copy 400000 records into mseed2 from '/tmp/input/' delimiters ',','\n';
+this track leads to a load time of 326ms for the first record.
+The first file with 26 records and 97592 tuples was loaded this way within 
8710 msec.
+This approach still uses the SQL logger. The expected speedup for direct 
update would
+be something like a factor 3.
+
+The third experiment produced the single binary BAT images followed by
+copy into mseed2 from ('/tmp/time','/tmp/data');
+Attach procedure for complete file ran in about 232 ms.
+
+
 @h
 #ifndef _MSEED_H
 #define _MSEED_H
@@ -134,16 +141,20 @@
 @c
 #include "mseed.h"
 #include "vault.h"
+
+#define EXPERIMENT0 0
+#define EXPERIMENT1 0
+#define EXPERIMENT2 2
+
 str SQLstatementIntern(Client c, str *expr, str nme, int execute, bit output);
 
-#define QRYinsert "INSERT INTO mseedCatalog(mseed, seqno, dataquality, 
network, \
-        station, location, channel, starttime , samplerate, samplecnt, 
sampletype) \
-        VALUES(%d, %d,'%c','%s', '%s','%s','%s','%s',%f,%d,%s);"
+#define QRYinsertI "INSERT INTO mseedCatalog(mseed, seqno, dataquality, 
network, \
+        station, location, channel, starttime , samplerate, samplecnt, 
sampletype, minval,maxval) \
+        VALUES(%d, %d,'%c','%s', '%s','%s','%s','%s',%f,%d,%s,%d,%d);"
 
 str
 MseedImport(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr pci)
 {
-       str *ret = (str*) getArgReference(stk,pci,0); 
        str msg = MAL_SUCCEED;
        int *vid = (int*) getArgReference(stk,pci,1); 
        str *sourcefile = (str*) getArgReference(stk,pci,2); 
@@ -167,20 +178,22 @@
        char starttime[BUFSIZ];
        char stoptime[BUFSIZ];
        int stepsize;
+       int imin,imax;
+       int nobat=0;
+       FILE *data=0, *time=0;
 
        (void) mb;
 
        if ( strcmp(*sourcefile, *targetfile) ) {
                if ( vaultpath[0] == 0){
                        msg= createException(MAL,"mseed.dump","Vault not 
initialized");
-                       *ret = GDKstrdup(msg);
                        return msg;
                }
                snprintf(file,BUFSIZ,"%s%c%s",vaultpath, DIR_SEP,*targetfile);
 
                /* only fetch the file if it is not already in the local vault 
*/
                if ( access(file, R_OK) ){
-                       mnstr_printf(cntxt->fdout, "FTP fetch %s -> 
%s\n",*sourcefile,file);
+                       mnstr_printf(cntxt->fdout, "#FTP fetch %s -> 
%s\n",*sourcefile,file);
                        msg= VLTimport(&answ, sourcefile, targetfile);
                        if ( msg)
                                return msg;
@@ -197,7 +210,7 @@
 
        /* Loop over the input file */
        s= buf;
-       while ( (retcode = ms_readmsr (&msr, file, reclen, NULL, NULL, 1, 
dataflag, verbose)) == MS_NOERROR )
+       while ( (retcode = ms_readmsr (&msr, file, reclen, NULL, NULL, 1, 
dataflag, verbose)) == MS_NOERROR  )
        {
                totalrecs++;
                totalsamps += msr->samplecnt;
@@ -209,14 +222,91 @@
 
                switch(msr->sampletype){
                        case 'a': kind="'string'"; break;
-                       case 'i': kind="'int'"; break;
+                       case 'i': kind="'int'"; imin= int_nil; imax= int_nil; 
break;
                        case 'f': kind="'flt'"; break;
                        case 'd': kind="'dbl'"; break;
                        default : kind="null";
                }
-               snprintf(buf,BUFSIZ,QRYinsert, *vid, 
msr->sequence_number,msr->dataquality,msr->network, msr->station, 
msr->location, msr->channel,
-               starttime,msr->samprate, msr->samplecnt,kind);
-               printf("%s\n",buf);
+               if ( nobat==0 ){
+                       snprintf(buf,BUFSIZ,"CREATE TABLE sys.mseed%d(time 
timestamp, data int);",*vid);
+                       msg =SQLstatementIntern(cntxt,&s,"mseed.import 
file",TRUE,FALSE);
+                       if ( msg)
+                               return msg;
+               }
+               if ( EXPERIMENT1 )
+                       data = fopen("/tmp/data","w");
+               if ( EXPERIMENT2  && nobat == 0){
+                       snprintf(buf, BUFSIZ, "%s/time",vaultpath);
+                       time = fopen(buf,"w");
+                       if ( time == 0)
+                               throw(MAL,"mseed.import",RUNTIME_CREATE_ERROR);
+                       snprintf(buf, BUFSIZ, "%s/data",vaultpath);
+                       data = fopen(buf,"w");
+                       if ( data == 0)
+                               throw(MAL,"mseed.import",RUNTIME_CREATE_ERROR);
+               }
+               nobat++;
+
+               if (msr->datasamples)
+               for ( j=0;j< msr->samplecnt; j++){
+                       t= MS_HPTIME2EPOCH(msr->starttime);
+                       tm = gmtime(&t);
+                       snprintf(stoptime,BUFSIZ,"%d-%02d-%02d 
%02d:%02d:%02d.%06ld", tm->tm_year +(tm->tm_year > 80?1900:2000), 
tm->tm_mon+1,tm->tm_mday, tm->tm_hour, tm->tm_min,tm->tm_sec, msr->starttime % 
HPTMODULUS);
+                       msr->starttime += stepsize;
+                       switch(msr->sampletype){
+                       case 'a':
+                               if ( EXPERIMENT0 )
+                                       snprintf(buf,BUFSIZ, "INSERT INTO 
mseed%d(time,adata) VALUES (timestamp '%s', %s);", *vid, stoptime, ((char**) 
msr->datasamples)[j]);
+                               if ( EXPERIMENT1 )
+                                       snprintf(buf,BUFSIZ, "%s, %s", 
stoptime, ((char**) msr->datasamples)[j]);
+                               break;
+                       case 'i':
+                               if ( EXPERIMENT0 )
+                                       snprintf(buf,BUFSIZ, "INSERT INTO 
mseed%d(time,data) VALUES (timestamp '%s', %d);", *vid, stoptime, ((int*) 
msr->datasamples)[j]);
+                               if ( EXPERIMENT1 )
+                                       snprintf(buf,BUFSIZ, "%s, %d", 
stoptime, ((int*) msr->datasamples)[j]);
+
+                               if ( EXPERIMENT2 ){
+                                       fwrite((char*) &tm, sizeof(timestamp), 
1, time);
+                                       fwrite( (char*) &((int*) 
msr->datasamples)[j], sizeof(int), 1, data);
+                               }
+                               if ( imin == int_nil || imin > ((int*) 
msr->datasamples)[j]) imin = ((int*) msr->datasamples)[j];
+                               if ( imax == int_nil || imax < ((int*) 
msr->datasamples)[j]) imax = ((int*) msr->datasamples)[j];
+                               break;
+                       case 'f':
+                               if ( EXPERIMENT0 )
+                                       snprintf(buf,BUFSIZ, "INSERT INTO 
mseed%d(time,fdata) VALUES (timestamp '%s', %f);", *vid, stoptime, ((flt*) 
msr->datasamples)[j]);
+                               if ( EXPERIMENT1 )
+                                       snprintf(buf,BUFSIZ, "%s, %f", 
stoptime, ((flt*) msr->datasamples)[j]);
+                               break;
+                       case 'd':
+                               if ( EXPERIMENT0 )
+                                       snprintf(buf,BUFSIZ, "INSERT INTO 
mseed%d(time,ddata) VALUES ( timestamp '%s', %f);", *vid, stoptime, ((dbl*) 
msr->datasamples)[j]);
+                               if ( EXPERIMENT1 )
+                                       snprintf(buf,BUFSIZ, "%s, %f", 
stoptime, ((dbl*) msr->datasamples)[j]);
+                               break;
+                       default:
+                               snprintf(buf,BUFSIZ,"undef %d",msr->encoding);
+                       }
+                       if ( EXPERIMENT1 )
+                               fprintf(data,"%s\n",buf);
+                       if ( EXPERIMENT0 ){
+                               msg 
=SQLstatementIntern(cntxt,&s,"mseed.import",TRUE,FALSE);
+                               if ( msg != MAL_SUCCEED)
+                                       break;
+                       }
+               }
+               switch(msr->sampletype){
+                       case 'a': kind="'string'"; break;
+                       case 'i': 
+                               snprintf(buf,BUFSIZ,QRYinsertI, *vid, 
msr->sequence_number,msr->dataquality,msr->network, msr->station, 
msr->location, msr->channel,
+                               starttime,msr->samprate, 
msr->samplecnt,kind,imin,imax);
+                               break;
+                       case 'f': kind="'flt'"; break;
+                       case 'd': kind="'dbl'"; break;
+                       default : kind="null";
+               }
+
                msg =SQLstatementIntern(cntxt,&s,"mseed.import",TRUE,FALSE);
                if ( msg != MAL_SUCCEED){
                        break;
@@ -229,42 +319,37 @@
                        t= MS_HPTIME2EPOCH(msr->starttime);
                        tm = gmtime(&t);
                        snprintf(stoptime,BUFSIZ,"%d-%02d-%02d 
%02d:%02d:%02d.%06ld", tm->tm_year +(tm->tm_year > 80?1900:2000), 
tm->tm_mon+1,tm->tm_mday, tm->tm_hour, tm->tm_min,tm->tm_sec, msr->starttime % 
HPTMODULUS);
-                       printf("%s\n",stoptime);
                }
-               if (msr->datasamples)
-               for ( j=0;j< msr->samplecnt; j++){
-                       t= MS_HPTIME2EPOCH(msr->starttime);
-                       tm = gmtime(&t);
-                       snprintf(stoptime,BUFSIZ,"%d-%02d-%02d 
%02d:%02d:%02d.%06ld", tm->tm_year +(tm->tm_year > 80?1900:2000), 
tm->tm_mon+1,tm->tm_mday, tm->tm_hour, tm->tm_min,tm->tm_sec, msr->starttime % 
HPTMODULUS);
-                       msr->starttime += stepsize;
-                       switch(msr->sampletype){
-                       case 'a':
-                               snprintf(buf,BUFSIZ, "INSERT INTO 
mseedVolumn(time,adata) VALUES (%s, %d,%s);", stoptime, *vid, ((char**) 
msr->datasamples)[j]);
-                               break;
-                       case 'i':
-                               snprintf(buf,BUFSIZ, "INSERT INTO 
mseedVolumn(time,idata) VALUES (%s, %d,%d);", stoptime, *vid, ((int*) 
msr->datasamples)[j]);
-                               break;
-                       case 'f':
-                               snprintf(buf,BUFSIZ, "INSERT INTO 
mseedVolumn(time,fdata) VALUES (%s, %d,%f);", stoptime, *vid, ((flt*) 
msr->datasamples)[j]);
_______________________________________________
Checkin-list mailing list
[email protected]
http://mail.monetdb.org/mailman/listinfo/checkin-list

Reply via email to