It does not appear that the recent INSERT checkin speeds up bulk inserts
on already populated tables with many indexes:
[3643] Add a new algorithm for handling INSERT
which reduces fragmentation on a VACUUM
http://www.sqlite.org/cvstrac/chngview?cn=3643
http://www.sqlite.org/cvstrac/tktview?tn=2075
Regarding the newly optimized code for the statement:
INSERT INTO table1 SELECT * FROM table2;
I created a patch (attached) in the hope of speeding up the sqlite3
shell .import command using the same INSERT INTO construct.
It uses a staging table in an exact schema copy of the table and
indexes (with code borrowed from vacuum.c) into an attached :memory:
database table which bulk inserts the data into the real table, batch
by batch. The supplied patch stages in 10,000 row batches.
Unfortunately, its results are not very promising.
Although checkin [3643] greatly improves VACUUM speed and index locality
after a VACUUM, it does not help in situations where you're trying to
insert into already populated large tables with numerous indexes.
In such cases, simply using a high value for cache_size
pragma cache_size=200000;
yields substantially times faster .import times than the supplied
patch .import with a default cache_size.
Only if you increase the patch's IMPORT_BATCH_SIZE value to a number greater
than the number of rows being imported (say 100000) - and the table being
into imported into is empty prior to .import - do you see any speed
improvement. Even with these ideal conditions the patched .import is still
2 times slower than the non-patched .import command with a very high pragma
cache_size.
The test scenario is below. 59,049 rows are to be .imported into table foo
which has numerous indexes. Try it with the latest CVS sqlite3, as well
as the latest CVS plus the .import patch.
.import patch cache_size IMPORT_BATCH_SIZE time (s)
------------- ---------- ----------------- --------
no 2000 n/a 1201
no 200000 n/a 22
yes 2000 10000 1665
yes 2000 30000 966
yes 200000 30000 78
yes 2000 100000 51
Perhaps SQLite insert speed could benefit from having blocks of contiguous
pages for exclusively use by each index. There seems to be a great deal
of disk activity when the cache_size is low.
#!/bin/bash
rm -f foo.db t59049.csv
sqlite3 foo.db <<EOF
CREATE TABLE foo(
a text unique,
b text unique not null,
c text unique not null,
primary key(c,a,b)
);
CREATE INDEX foo_ba on foo(b,a);
EOF
sqlite3 >t59049.csv <<EOF
create view v1 as
select 1 union all select 2 union all select 3;
select hex(randomblob(17)),
hex(randomblob(13)),
hex(randomblob(131))
from v1,v1,v1,v1,v1,v1,v1,v1,v1,v1;
EOF
time sqlite3 foo.db ".import t59049.csv foo"
____________________________________________________________________________________
Do you Yahoo!?
Everyone is raving about the all-new Yahoo! Mail beta.
http://new.mail.yahoo.com
Index: src/shell.c
===================================================================
RCS file: /sqlite/sqlite/src/shell.c,v
retrieving revision 1.158
diff -u -3 -p -r1.158 shell.c
--- src/shell.c 8 Jan 2007 14:31:36 -0000 1.158
+++ src/shell.c 18 Feb 2007 17:52:11 -0000
@@ -144,6 +144,41 @@ static void shellstaticFunc(
sqlite3_result_text(context, zShellStatic, -1, SQLITE_STATIC);
}
+#define WANT_execSql_DEBUGGING 1
+#if WANT_execSql_DEBUGGING
+# define D(x) x
+#else
+# define D(x)
+#endif
+
+static int execSql(sqlite3 *db, const char *zSql){
+ sqlite3_stmt *pStmt;
+ D(fprintf(stderr, "begin execSql: %s\n", zSql));
+ if( SQLITE_OK!=sqlite3_prepare(db, zSql, -1, &pStmt, 0) ){
+ return sqlite3_errcode(db);
+ }
+ while( SQLITE_ROW==sqlite3_step(pStmt) ){}
+ D(fprintf(stderr, "end execSql: %s\n", zSql));
+ return sqlite3_finalize(pStmt);
+}
+static int execExecSql(sqlite3 *db, const char *zSql){
+ sqlite3_stmt *pStmt;
+ int rc;
+ rc = sqlite3_prepare(db, zSql, -1, &pStmt, 0);
+ if( rc!=SQLITE_OK ) return rc;
+ while( SQLITE_ROW==sqlite3_step(pStmt) ){
+ rc = execSql(db, (char*)sqlite3_column_text(pStmt, 0));
+ if( rc!=SQLITE_OK ){
+ sqlite3_finalize(pStmt);
+ return rc;
+ }
+ }
+ return sqlite3_finalize(pStmt);
+}
+static int detach_import_db(int rc) {
+ execSql(db, "detach database import_db");
+ return rc;
+}
/*
** This routine reads a line of text from FILE in, stores
@@ -1103,6 +1138,9 @@ static int do_meta_command(char *zLine,
char *zCommit; /* How to commit changes */
FILE *in; /* The input file */
int lineno = 0; /* Line number of input file */
+ char* zInsertInto;
+ char* zTruncate;
+ int rows_imported_counter = 0;
open_db(p);
nSep = strlen(p->separator);
@@ -1124,9 +1162,30 @@ static int do_meta_command(char *zLine,
}
sqlite3_finalize(pStmt);
if( nCol==0 ) return 0;
- zSql = malloc( nByte + 20 + nCol*2 );
- if( zSql==0 ) return 0;
- sqlite3_snprintf(nByte+20, zSql, "INSERT INTO '%q' VALUES(?", zTable);
+
+ rc = execSql(p->db, "attach database ':memory:' as import_db");
+ zSql = sqlite3_mprintf(
+ "SELECT 'CREATE TABLE import_db.' || substr(sql,14,100000000) "
+ "FROM sqlite_master WHERE tbl_name='%q' AND type='table' "
+ "UNION ALL "
+ "SELECT 'CREATE INDEX import_db.' || substr(sql,14,100000000) "
+ "FROM sqlite_master WHERE tbl_name='%q' AND sql LIKE 'CREATE INDEX %%'
"
+ "UNION ALL "
+ "SELECT 'CREATE UNIQUE INDEX import_db.' || substr(sql,21,100000000) "
+ "FROM sqlite_master WHERE tbl_name='%q' "
+ " AND sql LIKE 'CREATE UNIQUE INDEX %%'",
+ zTable, zTable, zTable);
+ if (!rc) rc = execExecSql(p->db, zSql);
+ sqlite3_free(zSql);
+ if( rc ){
+ fprintf(stderr, "Error: %s\n", sqlite3_errmsg(db));
+ return detach_import_db(1);
+ }
+ zSql = malloc( nByte + 40 + nCol*2 );
+ if( zSql==0 ) return detach_import_db(0);
+
+ sqlite3_snprintf(nByte+40, zSql,
+ "INSERT INTO import_db.'%q' VALUES(?", zTable);
j = strlen(zSql);
for(i=1; i<nCol; i++){
zSql[j++] = ',';
@@ -1135,25 +1194,30 @@ static int do_meta_command(char *zLine,
zSql[j++] = ')';
zSql[j] = 0;
rc = sqlite3_prepare(p->db, zSql, -1, &pStmt, 0);
- free(zSql);
+ sqlite3_free(zSql);
if( rc ){
fprintf(stderr, "Error: %s\n", sqlite3_errmsg(db));
sqlite3_finalize(pStmt);
- return 1;
+ return detach_import_db(1);
}
in = fopen(zFile, "rb");
if( in==0 ){
fprintf(stderr, "cannot open file: %s\n", zFile);
sqlite3_finalize(pStmt);
- return 0;
+ return detach_import_db(0);
}
azCol = malloc( sizeof(azCol[0])*(nCol+1) );
if( azCol==0 ){
fclose(in);
- return 0;
+ return detach_import_db(0);
}
sqlite3_exec(p->db, "BEGIN", 0, 0, 0);
zCommit = "COMMIT";
+
+ zInsertInto = sqlite3_mprintf(
+ "INSERT INTO '%q' SELECT * FROM import_db.'%q'", zTable, zTable);
+ zTruncate = sqlite3_mprintf("DELETE FROM import_db.'%q'", zTable);
+
while( (zLine = local_getline(0, in))!=0 ){
char *z;
i = 0;
@@ -1180,7 +1244,15 @@ static int do_meta_command(char *zLine,
sqlite3_bind_text(pStmt, i+1, azCol[i], -1, SQLITE_STATIC);
}
sqlite3_step(pStmt);
- rc = sqlite3_reset(pStmt);
+
+#define IMPORT_BATCH_SIZE 10000
+ /* printf("loop: %d\n", rows_imported_counter); */
+ if (++rows_imported_counter % IMPORT_BATCH_SIZE == 0) {
+ rc = execSql(p->db, zInsertInto);
+ if (rc==SQLITE_OK ) rc = execSql(p->db, zTruncate);
+ }
+
+ if (rc==SQLITE_OK ) rc = sqlite3_reset(pStmt);
free(zLine);
if( rc!=SQLITE_OK ){
fprintf(stderr,"Error: %s\n", sqlite3_errmsg(db));
@@ -1189,10 +1261,21 @@ static int do_meta_command(char *zLine,
break;
}
}
+ if (rc==SQLITE_OK) {
+ rc = execSql(p->db, zInsertInto);
+ if (rc==SQLITE_OK ) rc = execSql(p->db, zTruncate);
+ }
+ if( rc!=SQLITE_OK ){
+ fprintf(stderr,"Error: %s\n", sqlite3_errmsg(db));
+ zCommit = "ROLLBACK";
+ }
+ sqlite3_free(zInsertInto);
+ sqlite3_free(zTruncate);
free(azCol);
fclose(in);
sqlite3_finalize(pStmt);
sqlite3_exec(p->db, zCommit, 0, 0, 0);
+ detach_import_db(0);
}else
if( c=='i' && strncmp(azArg[0], "indices", n)==0 && nArg>1 ){
-----------------------------------------------------------------------------
To unsubscribe, send email to [EMAIL PROTECTED]
-----------------------------------------------------------------------------