Hi again,

I tune a little the SQLite experiment :
- to get rid of the 19th columns message,
- to measure the previous tests with more precise figures,
- the effect of the suggested index :
   CREATE INDEX xyzzy2 ON fec(cand_nm, contbr_st, contb_receipt_amt);
- the effect of using a filesystem database.

results : (time +/-1 seconds, windows timing doesn't show below the second)
- feeding data :
   . in disk database : 151 seconds
   . in memory database :  131 seconds (25% = 1 cpu used out of 4)

 - creating index CREATE INDEX xyzzy2 ON fec(cand_nm, contbr_st,
contb_receipt_amt) :
   . in disk database : 43 seconds
   . in memory database :  38 seconds

- select cand_nm, sum(contb_receipt_amt) as total from fec group by cand_nm
;
  . in disk database : 22 seconds
  . in memory database :  19 seconds
  . in disk database with index: 3 seconds
  . in memory database with index :  3 seconds


- select cand_nm, sum(contb_receipt_amt) as total from fec group by cand_nm ;
 . in disk database : 27 seconds
 . in memory database :  24 seconds
 . in disk database with index: 4 seconds
 . in memory database with index :  4 seconds


Remarks :

- with an expert index, SQLite is 6 times quicker.
   ==> Why such a 'x6' speed-up, as we need to scan the whole table anyway ?

- the ":memory:" database is barely quicker than the disk database.
   ==> How can a rotating disk (7200rpm) database compete with a pure
in-memory database ?



*** ANNEXE ********************
script to launch with ":memory:" or with "turlututu.db"  :
(I measure the file LastWrite time, on windows via powershell to get seconds)


.header on
.mod csv
.separator ","

create table fec(
CMTE_ID ,CAND_ID ,CAND_NM ,CONTBR_NM ,
CONTBR_CITY ,CONTBR_ST , CONTBR_ZIP ,
CONTBR_EMPLOYER ,CONTBR_OCCUPATION ,CONTB_RECEIPT_AMT double
precision,
CONTB_RECEIPT_DT ,RECEIPT_DESC ,MEMO_CD ,
MEMO_TEXT  ,FORM_TP ,FILE_NUM ,
TRAN_ID ,ELECTION_TP ,USELESS_COLUMN
);
.import "P00000001-ALL.csv" fec


--5 344 498 record read with warning as 19th empty column
.output fec_test0.csv
select *  from fec limit 1;

.output stdout
.output fec_test1.csv
select count(*) from fec;

.output stdout
.output fec_test2.csv

select cand_nm, sum(contb_receipt_amt) as total from fec group by cand_nm;

.output stdout
.output fec_test3.csv

select cand_nm, contbr_st, sum(contb_receipt_amt) as total from fec group
by cand_nm, contbr_st;

.output stdout

-- in memory, with index   ---------------------------------------------
CREATE INDEX xyzzy2 ON fec(cand_nm, contbr_st, contb_receipt_amt);
.output fec_test0c.csv
select *  from fec limit 1;

.output stdout

.output fec_test1c.csv
select count(*) from fec;

.output stdout
.output fec_test2c.csv

select cand_nm, sum(contb_receipt_amt) as total from fec group by cand_nm;

.output stdout
.output fec_test3c.csv

select cand_nm, contbr_st, sum(contb_receipt_amt) as total from fec group
by cand_nm, contbr_st;

.output stdout
_______________________________________________
sqlite-users mailing list
sqlite-users@sqlite.org
http://sqlite.org:8080/cgi-bin/mailman/listinfo/sqlite-users

Reply via email to