Changeset: ce2b7137b310 for MonetDB
URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=ce2b7137b310
Modified Files:
sql/src/backends/monet5/vaults/mseed.mx
Branch: default
Log Message:
Defensive against errors
The code has been simplified somewhat and made more robust against errors.
The interface is turned into a function call, which would allow us to
perform bulk updates as follows
select vid from batch where mseedimport(vid,source,target) is not null;
using a side-effect function, leaving new tables behind and updates
to the catalog.
diffs (truncated from 378 to 300 lines):
diff -r 0bfaa8565a84 -r ce2b7137b310 sql/src/backends/monet5/vaults/mseed.mx
--- a/sql/src/backends/monet5/vaults/mseed.mx Sun Dec 26 15:31:00 2010 +0100
+++ b/sql/src/backends/monet5/vaults/mseed.mx Sun Dec 26 15:33:50 2010 +0100
@@ -53,50 +53,54 @@
-- this function inserts the mseed record information into the catalog
-- errors are returned for off-line analysis.
-CREATE PROCEDURE mseedImport(vid int, source string, target string)
+
+CREATE FUNCTION mseedImport(vid int, source string, target string)
+RETURNS int
EXTERNAL NAME mseed.import;
-CREATE PROCEDURE mseedLoad(vid int, source string, target string)
+
+CREATE FUNCTION mseedLoad(vid int, source string, target string)
+RETURNS int
EXTERNAL NAME mseed.load;
-
-- The records are collected in SQL tables of the following structure
-- The are ordered on timestamp
---CREATE TABLE chunk<mseed> (
+--CREATE TABLE mseed<mseedid> (
--time timestamp,
--data int (or float,double,varchar(20), dependent on type
--);
@end verbatim
@- How to use the mseed catalog.
-First, the vault directory is populated with the location of the mseed source
files
-in a remote site. The corresponding local name is set using the basename
property,
+First, the vault directory is populated with the location of the mseed source
files.
+The corresponding local name is set using the basename property,
and all files creation and access times are set to null.
Following, a limited number of files are loaded into the vault and analysed.
The information extracted ends up in the catalog, and remains there forever.
The underlying mseed file is not decrypted directly, it will be done as soon
as a query requests its.
-A test sequence (after the vault director has been populated)
+A test sequence (after the vault directory has been populated)
+to populate the mseedcatalog.
@begin verbatim
create table batch(vid int, source string, target string);
insert into batch
select vid, source, target from vault where created is null limit 2;
-call mseedImport( (select vid, source, target from vault where created is null
limit 2) );
+select vid from batch where mseedImport( vid, source, target ) is not null;
drop table batch;
@end verbatim
@mal
module mseed;
-pattern import{unsafe}(vid:int, source:str, target:str)
+pattern import{unsafe}(vid:int, source:str, target:str):int
address MseedImport
comment "Fetch the record descriptor of a mseed file from the vault into the
mseed catalog";
-pattern load{unsafe}(vid:int, source:str, target:str)
+pattern load{unsafe}(vid:int, source:str, target:str):int
address MseedLoad
comment "Load the content of a mseed file from the vault into a mseed table";
@{
@-
-Performance experimentation on eir on 17 dec 2010.
+Performance experimentation on Eir on Dec 17, 2010.
The code base uses the SQLstatementIntern to populate both the catalog and the
mseed files.
Experiment INSERTLOAD concerns loading the mseedN file using a sequence of
inserts. This involves
the complete code path from generating SQL statement, parsing, optimization
and execution.
@@ -109,7 +113,7 @@
copy 400000 records into mseed2 from '/tmp/data/' delimiters ',','\n';
this track leads to a load time of 326ms for the first record.
The first file with 26 records and 97592 tuples was loaded this way within
8710 msec.
-This approach still uses the SQL logger. The expected speedup for direct
update would
+This approach still uses the SQL logger. The expected speedup for LOCKED copy
into would
be something like a factor 3.
Experiment ATTACHLOAD produced the single binary BAT images followed by
@@ -120,7 +124,8 @@
Experiment MANUALLOAD generates a file with insert statements, which is
executed manually
against mclient. The total time for this first batch is 34 sec.
This shows the gains from cached plans. (compared to experiment 0)
-The file grows from 0.1GB to 1GB
+
+The database explosion factor is 10.
-rw-r--r-- 1 mk ins 721464 2010-12-18 10:01 4156.tail
-rw-r--r-- 1 mk ins 360732 2010-12-18 10:01 4272.tail
@@ -129,7 +134,7 @@
the following optimistic load times are foreseen:
Experiment Loadtime
INSERTLOAD 28 yrs
-COPYLOAD 322 days
+COPYLOAD 322 days (107 days with LOCKED)
ATTACHLOAD 10 days (37days at 1sec)
MANUALLOAD 3.2 yrs
@@ -165,19 +170,20 @@
#include "mseed.h"
#include "vault.h"
-#define INSERTLOAD 0
-#define COPYLOAD 0
-#define ATTACHLOAD 0
-#define MANUALLOAD 1
+#define INSERTLOAD 1
+#define COPYLOAD 2
+#define ATTACHLOAD 4
+#define MANUALLOAD 8
+static int experiment = ATTACHLOAD;
str SQLstatementIntern(Client c, str *expr, str nme, int execute, bit output);
#define QRYinsertI "INSERT INTO mseedCatalog(mseed, seqno, dataquality,
network, \
station, location, channel, starttime , samplerate, samplecnt,
sampletype, minval,maxval) \
- VALUES(%d, %d,'%c','%s', '%s','%s','%s','%s',%f,%d,%s,%d,%d);"
+ VALUES(%d, %d,'%c','%s', '%s','%s','%s','%s',%f,%d,'%s',%d,%d);"
static str
-MseedInternal(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr pci, int
load)
+MseedInternal(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr pci, int
forceload)
{
str msg = MAL_SUCCEED;
int *vid = (int*) getArgReference(stk,pci,1);
@@ -195,7 +201,7 @@
int j;
time_t t;
struct tm *tm;
- char *s, *kind;
+ char *s;
timestamp answ;
char file[BUFSIZ];
char buf[BUFSIZ];
@@ -221,7 +227,7 @@
msg= VLTimport(&answ, sourcefile, targetfile);
if ( msg)
return msg;
- /* remember the location of the copy */
+ /* remember the freshness of the copy */
s= buf;
snprintf(buf,BUFSIZ,"UPDATE vault SET created=now()
WHERE vid = %d;", *vid);
msg =SQLstatementIntern(cntxt,&s,"mseed.import
file",TRUE,FALSE);
@@ -246,31 +252,38 @@
snprintf(starttime,BUFSIZ,"%d-%02d-%02d %02d:%02d:%02d.%06ld",
tm->tm_year +(tm->tm_year > 80?1900:2000), tm->tm_mon+1,tm->tm_mday,
tm->tm_hour, tm->tm_min,tm->tm_sec, msr->starttime % HPTMODULUS);
switch(msr->sampletype){
- case 'a': kind="'string'"; break;
- case 'i': kind="'int'"; imin = INT_MAX; imax = INT_MIN;
break;
- case 'f': kind="'flt'"; break;
- case 'd': kind="'dbl'"; break;
- default : kind="null";
+ case 'i':
+ imin = INT_MAX; imax = INT_MIN;
+ if ( nobat==0 ){
+ snprintf(buf,BUFSIZ,"CREATE TABLE
sys.mseed%d(time timestamp, data int);",*vid);
+ msg
=SQLstatementIntern(cntxt,&s,"mseed.import file",TRUE,FALSE);
+ if ( msg)
+ goto wrapup;
+ }
+ break;
+ case 'a': case 'f': case 'd':
+ default :
+ msg = createException(MAL,"mseed.import","data
type not yet implemented");
+ goto wrapup;
}
- if ( nobat==0 ){
- snprintf(buf,BUFSIZ,"CREATE TABLE sys.mseed%d(time
timestamp, data int);",*vid);
- msg =SQLstatementIntern(cntxt,&s,"mseed.import
file",TRUE,FALSE);
- if ( msg)
- return msg;
- }
- if ( load && MANUALLOAD && nobat==0 )
+
+ if ( forceload && experiment & MANUALLOAD && nobat==0 )
data = fopen("/tmp/data","w");
- if ( load && COPYLOAD )
+ if ( forceload && experiment & COPYLOAD )
data = fopen("/tmp/data","w");
- if ( load && ATTACHLOAD && nobat == 0){
+ if ( forceload && experiment & ATTACHLOAD && nobat == 0){
snprintf(buf, BUFSIZ, "%s/time",vaultpath);
time = fopen(buf,"w");
- if ( time == 0)
- throw(MAL,"mseed.import",RUNTIME_CREATE_ERROR);
+ if ( time == 0) {
+ msg=
createException(MAL,"mseed.import",RUNTIME_CREATE_ERROR);
+ break;
+ }
snprintf(buf, BUFSIZ, "%s/data",vaultpath);
data = fopen(buf,"w");
- if ( data == 0)
- throw(MAL,"mseed.import",RUNTIME_CREATE_ERROR);
+ if ( data == 0){
+ msg=
createException(MAL,"mseed.import",RUNTIME_CREATE_ERROR);
+ break;
+ }
}
nobat++;
@@ -282,120 +295,118 @@
msr->starttime += stepsize;
switch(msr->sampletype){
case 'a':
- if ( load && (INSERTLOAD || MANUALLOAD))
+ if ( forceload && (experiment & (INSERTLOAD |
MANUALLOAD)))
snprintf(buf,BUFSIZ, "INSERT INTO
mseed%d(time,adata) VALUES (timestamp '%s', %s);", *vid, stoptime, ((char**)
msr->datasamples)[j]);
- if ( load && COPYLOAD )
+ if ( forceload && experiment & COPYLOAD )
snprintf(buf,BUFSIZ, "%s, %s",
stoptime, ((char**) msr->datasamples)[j]);
break;
case 'i':
- if ( load && (INSERTLOAD || MANUALLOAD ))
+ if ( forceload && experiment & (INSERTLOAD |
MANUALLOAD ))
snprintf(buf,BUFSIZ, "INSERT INTO
mseed%d(time,data) VALUES (timestamp '%s', %d);", *vid, stoptime, ((int*)
msr->datasamples)[j]);
- if ( load && COPYLOAD )
+ if ( forceload && experiment & COPYLOAD )
snprintf(buf,BUFSIZ, "%s, %d",
stoptime, ((int*) msr->datasamples)[j]);
- if ( load && ATTACHLOAD ){
+ if ( forceload && experiment & ATTACHLOAD ){
if (fwrite((char*) &tm,
sizeof(timestamp), 1, time) < 1 ||
fwrite((char*) &((int*)
msr->datasamples)[j], sizeof(int), 1, data) < 1) {
- fclose(time);
- fclose(data);
msg=
createException(MAL,"mseed.dump","fwrite() to 'time' or 'data' file failed");
- return msg;
+ goto wrapup;
}
}
if ( imin > ((int*) msr->datasamples)[j]) imin
= ((int*) msr->datasamples)[j];
if ( imax < ((int*) msr->datasamples)[j]) imax
= ((int*) msr->datasamples)[j];
break;
case 'f':
- if ( load && (INSERTLOAD || MANUALLOAD ))
+ if ( forceload && experiment & (INSERTLOAD |
MANUALLOAD ))
snprintf(buf,BUFSIZ, "INSERT INTO
mseed%d(time,fdata) VALUES (timestamp '%s', %f);", *vid, stoptime, ((flt*)
msr->datasamples)[j]);
- if ( load && COPYLOAD )
+ if ( forceload && experiment & COPYLOAD )
snprintf(buf,BUFSIZ, "%s, %f",
stoptime, ((flt*) msr->datasamples)[j]);
break;
case 'd':
- if ( load && (INSERTLOAD || MANUALLOAD ))
+ if ( forceload && experiment & (INSERTLOAD |
MANUALLOAD ))
snprintf(buf,BUFSIZ, "INSERT INTO
mseed%d(time,ddata) VALUES ( timestamp '%s', %f);", *vid, stoptime, ((dbl*)
msr->datasamples)[j]);
- if ( load && COPYLOAD )
+ if ( forceload && experiment & COPYLOAD )
snprintf(buf,BUFSIZ, "%s, %f",
stoptime, ((dbl*) msr->datasamples)[j]);
break;
default:
snprintf(buf,BUFSIZ,"undef %d",msr->encoding);
}
- if ( load && (COPYLOAD || MANUALLOAD ))
+
+ if ( forceload && experiment & (COPYLOAD || MANUALLOAD
))
fprintf(data,"%s\n",buf);
- if ( load && INSERTLOAD ){
+
+ if ( forceload && experiment & INSERTLOAD ){
msg
=SQLstatementIntern(cntxt,&s,"mseed.import",TRUE,FALSE);
if ( msg != MAL_SUCCEED)
- break;
- }
- }
- switch(msr->sampletype){
- case 'a': kind="'string'"; break;
- case 'i':
- snprintf(buf,BUFSIZ,QRYinsertI, *vid,
msr->sequence_number,msr->dataquality,msr->network, msr->station,
msr->location, msr->channel,
- starttime,msr->samprate,
msr->samplecnt,kind,imin,imax);
- break;
- case 'f': kind="'flt'"; break;
- case 'd': kind="'dbl'"; break;
- default : kind="null";
- }
-
- if ( load == 0 ){
- msg
=SQLstatementIntern(cntxt,&s,"mseed.import",TRUE,FALSE);
- if ( msg != MAL_SUCCEED){
- break;
+ goto wrapup;
}
}
- if (msr->samplecnt) {
- t= MS_HPTIME2EPOCH(msr->starttime);
- tm = gmtime(&t);
- snprintf(stoptime,BUFSIZ,"%d-%02d-%02d
%02d:%02d:%02d.%06ld", tm->tm_year +(tm->tm_year > 80?1900:2000),
tm->tm_mon+1,tm->tm_mday, tm->tm_hour, tm->tm_min,tm->tm_sec, msr->starttime %
HPTMODULUS);
+ switch(msr->sampletype){
+ case 'i':
+ snprintf(buf,BUFSIZ,QRYinsertI, *vid,
msr->sequence_number,msr->dataquality,msr->network, msr->station,
msr->location, msr->channel,
+ starttime,msr->samprate,
msr->samplecnt,"int",imin,imax);
+ break;
+ default :
+ msg = createException(MAL,"mseed.import","data
type not yet implemented");
+ goto wrapup;
}
- if(INSERTLOAD && nobat) {
+
+ if ( forceload == 0 && ( msg
=SQLstatementIntern(cntxt,&s,"mseed.import",TRUE,FALSE)) != MAL_SUCCEED)
+ break;
+
+ if(experiment & INSERTLOAD && nobat) {
_______________________________________________
Checkin-list mailing list
[email protected]
http://mail.monetdb.org/mailman/listinfo/checkin-list