It does not appear that the recent INSERT checkin speeds up bulk inserts
on already populated tables with many indexes:

  [3643] Add a new algorithm for handling INSERT 
         which reduces fragmentation on a VACUUM

  http://www.sqlite.org/cvstrac/chngview?cn=3643
  http://www.sqlite.org/cvstrac/tktview?tn=2075

Regarding the newly optimized code for the statement:

  INSERT INTO table1 SELECT * FROM table2;

I created a patch (attached) in the hope of speeding up the sqlite3 
shell .import command using the same INSERT INTO construct.
It uses a staging table in an exact schema copy of the table and 
indexes (with code borrowed from vacuum.c) into an attached :memory: 
database table which bulk inserts the data into the real table, batch 
by batch. The supplied patch stages in 10,000 row batches.

Unfortunately, its results are not very promising.

Although checkin [3643] greatly improves VACUUM speed and index locality 
after a VACUUM, it does not help in situations where you're trying to
insert into already populated large tables with numerous indexes.
In such cases, simply using a high value for cache_size

  pragma cache_size=200000;

yields substantially times faster .import times than the supplied 
patch .import with a default cache_size.

Only if you increase the patch's IMPORT_BATCH_SIZE value to a number greater
than the number of rows being imported (say 100000) - and the table being 
into imported into is empty prior to .import - do you see any speed 
improvement.  Even with these ideal conditions the patched .import is still 
2 times slower than the non-patched .import command with a very high pragma 
cache_size.

The test scenario is below. 59,049 rows are to be .imported into table foo
which has numerous indexes. Try it with the latest CVS sqlite3, as well
as the latest CVS plus the .import patch.

 .import patch  cache_size IMPORT_BATCH_SIZE  time (s)
 -------------  ---------- -----------------  --------
 no                   2000               n/a      1201
 no                 200000               n/a        22
 yes                  2000             10000      1665
 yes                  2000             30000       966
 yes                200000             30000        78
 yes                  2000            100000        51

Perhaps SQLite insert speed could benefit from having blocks of contiguous 
pages for exclusively use by each index. There seems to be a great deal
of disk activity when the cache_size is low.

#!/bin/bash

rm -f foo.db t59049.csv

sqlite3 foo.db <<EOF
  CREATE TABLE foo(
    a text unique, 
    b text unique not null, 
    c text unique not null, 
    primary key(c,a,b)
  );
  CREATE INDEX foo_ba on foo(b,a);
EOF

sqlite3 >t59049.csv <<EOF
  create view v1 as 
    select 1 union all select 2 union all select 3;

  select hex(randomblob(17)), 
         hex(randomblob(13)), 
         hex(randomblob(131))
  from v1,v1,v1,v1,v1,v1,v1,v1,v1,v1;
EOF

time sqlite3 foo.db ".import t59049.csv foo"



 
____________________________________________________________________________________
Do you Yahoo!?
Everyone is raving about the all-new Yahoo! Mail beta.
http://new.mail.yahoo.com
Index: src/shell.c
===================================================================
RCS file: /sqlite/sqlite/src/shell.c,v
retrieving revision 1.158
diff -u -3 -p -r1.158 shell.c
--- src/shell.c 8 Jan 2007 14:31:36 -0000       1.158
+++ src/shell.c 18 Feb 2007 17:52:11 -0000
@@ -144,6 +144,41 @@ static void shellstaticFunc(
   sqlite3_result_text(context, zShellStatic, -1, SQLITE_STATIC);
 }
 
+#define WANT_execSql_DEBUGGING 1
+#if WANT_execSql_DEBUGGING
+# define D(x) x
+#else
+# define D(x)
+#endif
+
+static int execSql(sqlite3 *db, const char *zSql){
+  sqlite3_stmt *pStmt;
+  D(fprintf(stderr, "begin execSql: %s\n", zSql));
+  if( SQLITE_OK!=sqlite3_prepare(db, zSql, -1, &pStmt, 0) ){
+    return sqlite3_errcode(db);
+  }
+  while( SQLITE_ROW==sqlite3_step(pStmt) ){}
+  D(fprintf(stderr, "end execSql: %s\n", zSql));
+  return sqlite3_finalize(pStmt);
+}
+static int execExecSql(sqlite3 *db, const char *zSql){
+  sqlite3_stmt *pStmt;
+  int rc;
+  rc = sqlite3_prepare(db, zSql, -1, &pStmt, 0);
+  if( rc!=SQLITE_OK ) return rc;
+  while( SQLITE_ROW==sqlite3_step(pStmt) ){
+    rc = execSql(db, (char*)sqlite3_column_text(pStmt, 0));
+    if( rc!=SQLITE_OK ){
+      sqlite3_finalize(pStmt);
+      return rc;
+    }
+  }
+  return sqlite3_finalize(pStmt);
+}
+static int detach_import_db(int rc) {
+  execSql(db, "detach database import_db");
+  return rc;
+}
 
 /*
 ** This routine reads a line of text from FILE in, stores
@@ -1103,6 +1138,9 @@ static int do_meta_command(char *zLine, 
     char *zCommit;              /* How to commit changes */   
     FILE *in;                   /* The input file */
     int lineno = 0;             /* Line number of input file */
+    char* zInsertInto;
+    char* zTruncate;
+    int rows_imported_counter = 0;
 
     open_db(p);
     nSep = strlen(p->separator);
@@ -1124,9 +1162,30 @@ static int do_meta_command(char *zLine, 
     }
     sqlite3_finalize(pStmt);
     if( nCol==0 ) return 0;
-    zSql = malloc( nByte + 20 + nCol*2 );
-    if( zSql==0 ) return 0;
-    sqlite3_snprintf(nByte+20, zSql, "INSERT INTO '%q' VALUES(?", zTable);
+
+    rc = execSql(p->db, "attach database ':memory:' as import_db");
+    zSql = sqlite3_mprintf(
+        "SELECT 'CREATE TABLE import_db.' || substr(sql,14,100000000) "
+        "FROM sqlite_master WHERE tbl_name='%q' AND type='table' "
+        "UNION ALL "
+        "SELECT 'CREATE INDEX import_db.' || substr(sql,14,100000000) "
+        "FROM sqlite_master WHERE tbl_name='%q' AND sql LIKE 'CREATE INDEX %%' 
"
+        "UNION ALL "
+        "SELECT 'CREATE UNIQUE INDEX import_db.' || substr(sql,21,100000000) "
+        "FROM sqlite_master WHERE tbl_name='%q' "
+        " AND sql LIKE 'CREATE UNIQUE INDEX %%'",
+        zTable, zTable, zTable);
+    if (!rc) rc = execExecSql(p->db, zSql);
+    sqlite3_free(zSql);
+    if( rc ){
+      fprintf(stderr, "Error: %s\n", sqlite3_errmsg(db));
+      return detach_import_db(1);
+    }
+    zSql = malloc( nByte + 40 + nCol*2 );
+    if( zSql==0 ) return detach_import_db(0);
+
+    sqlite3_snprintf(nByte+40, zSql,
+      "INSERT INTO import_db.'%q' VALUES(?", zTable);
     j = strlen(zSql);
     for(i=1; i<nCol; i++){
       zSql[j++] = ',';
@@ -1135,25 +1194,30 @@ static int do_meta_command(char *zLine, 
     zSql[j++] = ')';
     zSql[j] = 0;
     rc = sqlite3_prepare(p->db, zSql, -1, &pStmt, 0);
-    free(zSql);
+    sqlite3_free(zSql);
     if( rc ){
       fprintf(stderr, "Error: %s\n", sqlite3_errmsg(db));
       sqlite3_finalize(pStmt);
-      return 1;
+      return detach_import_db(1);
     }
     in = fopen(zFile, "rb");
     if( in==0 ){
       fprintf(stderr, "cannot open file: %s\n", zFile);
       sqlite3_finalize(pStmt);
-      return 0;
+      return detach_import_db(0);
     }
     azCol = malloc( sizeof(azCol[0])*(nCol+1) );
     if( azCol==0 ){
       fclose(in);
-      return 0;
+      return detach_import_db(0);
     }
     sqlite3_exec(p->db, "BEGIN", 0, 0, 0);
     zCommit = "COMMIT";
+
+    zInsertInto = sqlite3_mprintf(
+      "INSERT INTO '%q' SELECT * FROM import_db.'%q'", zTable, zTable);
+    zTruncate = sqlite3_mprintf("DELETE FROM import_db.'%q'", zTable);
+
     while( (zLine = local_getline(0, in))!=0 ){
       char *z;
       i = 0;
@@ -1180,7 +1244,15 @@ static int do_meta_command(char *zLine, 
         sqlite3_bind_text(pStmt, i+1, azCol[i], -1, SQLITE_STATIC);
       }
       sqlite3_step(pStmt);
-      rc = sqlite3_reset(pStmt);
+    
+#define IMPORT_BATCH_SIZE 10000
+      /* printf("loop: %d\n", rows_imported_counter); */
+      if (++rows_imported_counter % IMPORT_BATCH_SIZE == 0) {
+        rc = execSql(p->db, zInsertInto);
+        if (rc==SQLITE_OK ) rc = execSql(p->db, zTruncate);
+      }
+
+      if (rc==SQLITE_OK ) rc = sqlite3_reset(pStmt);
       free(zLine);
       if( rc!=SQLITE_OK ){
         fprintf(stderr,"Error: %s\n", sqlite3_errmsg(db));
@@ -1189,10 +1261,21 @@ static int do_meta_command(char *zLine, 
         break;
       }
     }
+    if (rc==SQLITE_OK) {
+      rc = execSql(p->db, zInsertInto);
+      if (rc==SQLITE_OK ) rc = execSql(p->db, zTruncate);
+    }
+    if( rc!=SQLITE_OK ){
+      fprintf(stderr,"Error: %s\n", sqlite3_errmsg(db));
+      zCommit = "ROLLBACK";
+    }
+    sqlite3_free(zInsertInto);
+    sqlite3_free(zTruncate);
     free(azCol);
     fclose(in);
     sqlite3_finalize(pStmt);
     sqlite3_exec(p->db, zCommit, 0, 0, 0);
+    detach_import_db(0);
   }else
 
   if( c=='i' && strncmp(azArg[0], "indices", n)==0 && nArg>1 ){

-----------------------------------------------------------------------------
To unsubscribe, send email to [EMAIL PROTECTED]
-----------------------------------------------------------------------------

Reply via email to