Hello !  

Can someone give any insight here ?  

For me the main problem is the wall size that becomes stable at 387MB (+- 8%
of the database, but around 15 times the size of the tables it's
manipulating).  

It seems to me that something is not working as advertised.   

I'm testing sqlite3 with wall enabled and while running a program that
exclusively open a database I get this on disk while doing it's work:  

ls -l
-rw-r--r-- 1 xxxx xxxx 6185697280 Mar 23 21:34 hacker-news-items.db // 5.7GB
-rw-r--r-- 1 xxxx xxxx???? 786432 Mar 23 21:34 hacker-news-items.db-shm
// 786KB
-rw-r--r-- 1 xxxx xxxx? 397299872 Mar 23 21:34 hacker-news-items.db-wal //
387MB stable at this size  

To be able to compare I dumped all users and users_submitted on a separated
database to measure it's total size
-rw-r--r-- 1 xxxx xxxx?? 25014272 Mar 23 22:06 hacker-news-items-diff.db
//24MB db with all users and users_submitted  

The schema:  

CREATE TABLE 'users' (
??? 'id' INTEGER PRIMARY KEY NOT NULL,
??? 'name' TEXT UNIQUE NOT NULL,
??? 'delay' INTEGER DEFAULT 0,
??? 'created' INTEGER DEFAULT 0,
??? 'karma' INTEGER DEFAULT 0,
??? 'about' TEXT DEFAULT ''
);  

CREATE TABLE users_submitted(user_id integer not null, item_id integer not
null, unique(user_id, item_id));  

Some stats about the data:  

select count(*) from users -> 389514  

select count(*) from users_submitted -> 9381845  

select max(length(about)) from users -> 83197  

select count(*) from users where length(about) > 256 -> 4704  

select count(*) from users where length(about) > 1024 -> 347  

select count(*) from users where length(about) > 2048 -> 92  

select count(*) from users where length(about) > 4096 -> 22  

select count(*) from (
select count(*) as submissions
from users_submitted 
group by user_id
)
where submissions > 10 -> 64639  

?  

The program (with not important parts stripped):  


local db = SQLite3("hacker-news-items.db");
db.exec_dml("PRAGMA synchronous =OFF;");
db.exec_dml("PRAGMA journal_mode = WAL");
db.exec_dml("CREATE TABLE IF NOT EXISTS sync_all_users(id integer primary
key, name varchar, sync_date timestamp default current_timestamp);");
db.exec_dml("insert or ignore into sync_all_users(id, name) values(1, '');");
//only store one record all the time

local users_update_null = SQLite3Stmt(db, "update users set name=? where
name=?");
local users_update_stmt = SQLite3Stmt(db, "update users set delay=?, karma=?,
about=? where name=?");
local users_submitted_insert_stmt = SQLite3Stmt(db, "insert into
users_submitted(user_id , item_id) values(?,?)");
local users_exist_stmt = SQLite3Stmt(db, "select id from users where
name=?");
local sync_all_users_stmt = SQLite3Stmt(db, "update sync_all_users set
name=?, sync_date=CURRENT_TIMESTAMP where id=1;");


function getUserDataAndUpdate(user_id)
{
??? local user = getUserFromSomeWhere(user_id);
??? if(user == "null")
??? {
??? ??? users_update_null.bind(1, "_null_" + user_id);
??? ??? users_update_null.bind(2, user_id);
??? ??? users_update_null.step();
??? ??? users_update_null.reset();
??? ??? return;
??? }

??? local record = json2var(user);
??? local rec_id = type(record) == "table" ? table_rawget(record, "id",
false) : false;
??? if(rec_id)
??? {
??? ??? users_update_stmt.bind(1, table_rawget(record, "delay", 0));
??? ??? users_update_stmt.bind(2, table_rawget(record, "karma", 0));
??? ??? users_update_stmt.bind(3, table_rawget(record, "about", null));
??? ??? users_update_stmt.bind(4, rec_id);
??? ??? users_update_stmtstep();
??? ??? users_update_stmt.reset();
??? ??? 
??? ??? local user_id = db.last_row_id();
??? ??? 
??? ??? local submitted = table_rawget(record, "submitted", false);
??? ??? if(submitted)
??? ??? {
??? ??? ??? for(local i = 0, len = submitted.len(); i < len; ++i)
??? ??? ??? {
??? ??? ??? ??? //print("submitted", submitted[i]);
??? ??? ??? ??? users_submitted_insert_stmt.bind(1, user_id);
??? ??? ??? ??? users_submitted_insert_stmt.bind(2,
submitted[i]);
??? ??? ??? ??? users_submitted_insert_stmt.step();
??? ??? ??? ??? users_submitted_insert_stmt.reset();
??? ??? ??? }
??? ??? }
??? }
}

function updateAllUsers()
{
??? local stmt_all = db.prepare("select? name from users where name >= ?
and name not like '|_null|_%' escape '|';");
??? local name = db.exec_get_one("select name from sync_all_users where
id=1");
??? stmt_all.bind(1, name);
??? local count = 0;
??? //db.exec_dml("begin;");
??? while(stmt_all.next_row())
??? {
??? ??? name = stmt_all.col(0);
??? ??? print("Now updating user", name, count);
??? ??? getUserDataAndUpdate(name);
??? ??? 
??? ??? if( (++count % 1000) == 0 )
??? ??? {
??? ??? ??? stmt_all.reset(); //stop the unique active reader to do
a checkpoint
??? ??? ??? print("Now processing", name, count);
??? ??? ??? db.exec_dml("PRAGMA wal_checkpoint(FULL);");
??? ??? ??? //db.exec_dml("commit;");
??? ??? ??? //db.exec_dml("begin;");
??? ??? ??? sync_all_users_stmt.bind(1, name);
??? ??? ??? sync_all_users_stmt.step();
??? ??? ??? sync_all_users_stmt.reset();
??? ??? ??? stmt_all.bind(1, name);
??? ??? }
??? }
??? //db.exec_dml("commit;");
??? stmt_all.finalize();
}

updateAllUsers();  

?  

?

Reply via email to