I have a .NET C# application that is using System.Data.SQLite. The application goes through a filesystem and extracts file names, builds md5sums on every file and builds up a database. Reason behind is, to reduce the number of duplicates.
The files as a tar file sum up to 66 GB. It's about a million files. I learnt that it is a good idea to do all in one transaction in SQLite, but I wonder if it is still a good idea when inserting many files as BLOBs (I decided to put everything into the database). My first attempt ran overnight and when I came to the machine next morning, the SQLITe database file was grown to (only) 3.4GB and an alert box had popped up saying something like "bad library use" of System.Data.Sqlite. Would it be possibly better to commit and close the transaction after every blob update? Whether or not I'm inserting a file into the list of resources depends on the existence of a unique ref_id which I build from the name_md5sum_size of the file. So, when two files have the same name, same size and the same md5sum, I decide they are identical. When I hit a UNIQUE violation (try ExecuteNonQuery()) I decide not to UPDATE the record with the BLOB. If it's a first time entry, I decide to UPDATE the BLOB. Below is the code. Especially the transaction portion might be wirth considering. Thank you. -- Christoph using System; using System.Data; using System.Data.Common; using System.Security.Cryptography; using System.Text; using System.Windows.Forms; using System.IO; using System.Data.SQLite; namespace sqliteForm { ///<summary> /// Summary description for Form1 ///</summary> /// public class Form1 : System.Windows.Forms.Form public partial class Form1 : System.Windows.Forms.Form { internal System.Windows.Forms.Button btnSearch; internal System.Windows.Forms.TextBox txtFile; internal System.Windows.Forms.Label lblFile; internal System.Windows.Forms.Label lblDirectory; internal System.Windows.Forms.ListBox lstFilesFound; internal System.Windows.Forms.ComboBox cboDirectory; private int totalcount; private SQLiteTransaction dbTrans; private SQLiteCommand resource_cmd; private SQLiteCommand res_data_cmd; private SQLiteCommand cdcmd; private SQLiteCommand delcmd; private SQLiteParameter size; private SQLiteParameter creation_date; private SQLiteParameter name; private SQLiteParameter data; private SQLiteParameter md5sum_res; private SQLiteParameter md5sum_cd; private SQLiteParameter size_cd; private SQLiteParameter tag; private SQLiteParameter suite; private SQLiteParameter ref_id; private SQLiteParameter ref_id_cd; private SQLiteParameter prob_ref; private SQLiteParameter mandant; private SQLiteParameter basename; public Form1() { // // Required for Windows Form Designer support // InitializeComponent(); // // TODO: Add any constructor code after InitializeComponent call. // } ///<summary> /// Clean up any resources being used. ///</summary> protected override void Dispose( bool disposing ) { if( disposing ) { if (components != null) { components.Dispose(); } } base.Dispose( disposing ); } #region Windows Form Designer generated code ///<summary> /// Required method for Designer support: do not modify /// the contents of this method with the code editor. ///</summary> private void InitializeComponent() { this.btnSearch = new System.Windows.Forms.Button(); this.txtFile = new System.Windows.Forms.TextBox(); this.lblFile = new System.Windows.Forms.Label(); this.lblDirectory = new System.Windows.Forms.Label(); this.lstFilesFound = new System.Windows.Forms.ListBox(); this.cboDirectory = new System.Windows.Forms.ComboBox(); this.statusStrip1 = new System.Windows.Forms.StatusStrip(); this.toolStripStatusLabel1 = new System.Windows.Forms.ToolStripStatusLabel(); this.button1 = new System.Windows.Forms.Button(); this.button2 = new System.Windows.Forms.Button(); this.textBox1 = new System.Windows.Forms.TextBox(); this.label1 = new System.Windows.Forms.Label(); this.statusStrip1.SuspendLayout(); this.SuspendLayout(); // // btnSearch // this.btnSearch.Location = new System.Drawing.Point(725, 266); this.btnSearch.Name = "btnSearch"; this.btnSearch.Size = new System.Drawing.Size(75, 23); this.btnSearch.TabIndex = 0; this.btnSearch.Text = "Search"; this.btnSearch.Click += new System.EventHandler(this.btnSearch_Click); // // txtFile // this.txtFile.Location = new System.Drawing.Point(380, 432); this.txtFile.Name = "txtFile"; this.txtFile.Size = new System.Drawing.Size(120, 20); this.txtFile.TabIndex = 4; this.txtFile.Text = "*.*"; // // lblFile // this.lblFile.Location = new System.Drawing.Point(380, 413); this.lblFile.Name = "lblFile"; this.lblFile.Size = new System.Drawing.Size(144, 16); this.lblFile.TabIndex = 5; this.lblFile.Text = "Search for files containing:"; // // lblDirectory // this.lblDirectory.Location = new System.Drawing.Point(16, 413); this.lblDirectory.Name = "lblDirectory"; this.lblDirectory.Size = new System.Drawing.Size(321, 16); this.lblDirectory.TabIndex = 3; this.lblDirectory.Text = "Look In:"; // // lstFilesFound // this.lstFilesFound.Location = new System.Drawing.Point(19, 35); this.lstFilesFound.Name = "lstFilesFound"; this.lstFilesFound.Size = new System.Drawing.Size(781, 225); this.lstFilesFound.TabIndex = 1; // // cboDirectory // this.cboDirectory.DropDownWidth = 112; this.cboDirectory.Location = new System.Drawing.Point(16, 432); this.cboDirectory.Name = "cboDirectory"; this.cboDirectory.Size = new System.Drawing.Size(321, 21); this.cboDirectory.TabIndex = 2; this.cboDirectory.Text = "ComboBox1"; // // statusStrip1 // this.statusStrip1.Items.AddRange(new System.Windows.Forms.ToolStripItem[] { this.toolStripStatusLabel1}); this.statusStrip1.Location = new System.Drawing.Point(0, 485); this.statusStrip1.Name = "statusStrip1"; this.statusStrip1.Size = new System.Drawing.Size(847, 22); this.statusStrip1.TabIndex = 0; // // toolStripStatusLabel1 // this.toolStripStatusLabel1.Name = "toolStripStatusLabel1"; this.toolStripStatusLabel1.Size = new System.Drawing.Size(0, 17); // // button1 // this.button1.Location = new System.Drawing.Point(725, 318); this.button1.Name = "button1"; this.button1.Size = new System.Drawing.Size(75, 23); this.button1.TabIndex = 6; this.button1.Text = "Exit"; this.button1.UseVisualStyleBackColor = true; this.button1.Click += new System.EventHandler(this.button1_Click); // // button2 // this.button2.Location = new System.Drawing.Point(569, 266); this.button2.Name = "button2"; this.button2.Size = new System.Drawing.Size(78, 23); this.button2.TabIndex = 7; this.button2.Text = "clear all data"; this.button2.UseVisualStyleBackColor = true; this.button2.Click += new System.EventHandler(this.button2_Click); // // textBox1 // this.textBox1.Location = new System.Drawing.Point(18, 321); this.textBox1.Name = "textBox1"; this.textBox1.Size = new System.Drawing.Size(402, 20); this.textBox1.TabIndex = 8; this.textBox1.Text = "data source=C:\\Users\\kuku\\testsuite\\versionen_neu.sqlite"; this.textBox1.TextChanged += new System.EventHandler(this.textBox1_TextChanged); // // label1 // this.label1.AutoSize = true; this.label1.Location = new System.Drawing.Point(15, 305); this.label1.Name = "label1"; this.label1.Size = new System.Drawing.Size(129, 13); this.label1.TabIndex = 9; this.label1.Text = "SQLite Connection String:"; // // Form1 // this.AutoScaleBaseSize = new System.Drawing.Size(5, 13); this.ClientSize = new System.Drawing.Size(847, 507); this.Controls.Add(this.label1); this.Controls.Add(this.textBox1); this.Controls.Add(this.button2); this.Controls.Add(this.button1); this.Controls.Add(this.statusStrip1); this.Controls.Add(this.btnSearch); this.Controls.Add(this.txtFile); this.Controls.Add(this.lblFile); this.Controls.Add(this.lblDirectory); this.Controls.Add(this.lstFilesFound); this.Controls.Add(this.cboDirectory); this.Name = "Form1"; this.Text = "Form1"; this.Load += new System.EventHandler(this.Form1_Load); this.statusStrip1.ResumeLayout(false); this.statusStrip1.PerformLayout(); this.ResumeLayout(false); this.PerformLayout(); } private void button2_Click(object sender, EventArgs e) { // // sqLiteConnection1 // try { sqLiteConnection1 = new System.Data.SQLite.SQLiteConnection(); sqLiteConnection1.ConnectionString = textBox1.Text; sqLiteConnection1.DefaultTimeout = 30; // while (sqLiteConnection1.con) } catch { // exception catching is for communists } try { sqLiteConnection1.Open(); delcmd = sqLiteConnection1.CreateCommand(); this.dbTrans = sqLiteConnection1.BeginTransaction(); this.delcmd.Transaction = dbTrans; Console.WriteLine("State:" + sqLiteConnection1.State); delcmd.CommandText = "DELETE FROM RESOURCE"; delcmd.ExecuteNonQuery(); delcmd.CommandText = "DELETE FROM CANDIDATE"; delcmd.ExecuteNonQuery(); dbTrans.Commit(); sqLiteConnection1.Close(); } catch { // } } private void button1_Click(object sender, EventArgs e) { Application.Exit(); } #endregion ///<summary> /// The main entry point for the application ///</summary> //[STAThread] static void Main() { Application.Run(new Form1()); } private void btnSearch_Click(object sender, System.EventArgs e) { // DB stuff // // sqLiteConnection1 // try { sqLiteConnection1 = new System.Data.SQLite.SQLiteConnection(); sqLiteConnection1.ConnectionString = textBox1.Text; sqLiteConnection1.DefaultTimeout = 30; // while (sqLiteConnection1.con) } catch { // exception catching is for communists } // sqLiteConnection1.Open(); // while (sqLiteConnection1.State != System.Data.ConnectionState.Open) // // ; resource_cmd = sqLiteConnection1.CreateCommand(); cdcmd = sqLiteConnection1.CreateCommand(); res_data_cmd = sqLiteConnection1.CreateCommand(); dbTrans = sqLiteConnection1.BeginTransaction(); resource_cmd.Transaction = dbTrans; cdcmd.Transaction = dbTrans; res_data_cmd.Transaction = dbTrans; resource_cmd.CommandText = "INSERT INTO resource (ref_id,size,name,md5sum) VALUES(?,?,?,?)"; cdcmd.CommandText = "INSERT INTO candidate (ref_id,mandant,md5sum,basename,prob_ref,suite,tag,size,creation_date) VALUES(?,?,?,?,?,?,?,?,?)"; ; // resource_cmd ref_id = resource_cmd.CreateParameter(); size = resource_cmd.CreateParameter(); name = resource_cmd.CreateParameter(); md5sum_res = resource_cmd.CreateParameter(); //cdcmd ref_id_cd = cdcmd.CreateParameter(); mandant = cdcmd.CreateParameter(); md5sum_cd = cdcmd.CreateParameter(); basename = cdcmd.CreateParameter(); prob_ref = cdcmd.CreateParameter(); suite = cdcmd.CreateParameter(); tag = cdcmd.CreateParameter(); size_cd = cdcmd.CreateParameter(); creation_date = cdcmd.CreateParameter(); cdcmd.Parameters.Add(ref_id_cd); cdcmd.Parameters.Add(mandant); cdcmd.Parameters.Add(md5sum_cd); cdcmd.Parameters.Add(basename); cdcmd.Parameters.Add(prob_ref); cdcmd.Parameters.Add(suite); cdcmd.Parameters.Add(tag); cdcmd.Parameters.Add(size_cd); cdcmd.Parameters.Add(creation_date); resource_cmd.Parameters.Add(ref_id); resource_cmd.Parameters.Add(size); resource_cmd.Parameters.Add(name); resource_cmd.Parameters.Add(md5sum_res); lstFilesFound.Items.Clear(); txtFile.Enabled = false; cboDirectory.Enabled = false; btnSearch.Text = "Searching..."; this.Cursor = Cursors.WaitCursor; Application.DoEvents(); DirSearch(cboDirectory.Text); dbTrans.Commit(); sqLiteConnection1.Close(); sqLiteConnection1.Dispose(); btnSearch.Text = "Search"; this.Cursor = Cursors.Default; txtFile.Enabled = true; cboDirectory.Enabled = true; } private void Form1_Load(object sender, System.EventArgs e) { cboDirectory.Items.Clear(); foreach (string s in Directory.GetLogicalDrives()) { cboDirectory.Items.Add(s); } cboDirectory.Text = "C:\\usr\\local\\www\\data\\test\\mandanten\\"; } void DirSearch(string sDir) { try { foreach (var d in Directory.GetDirectories(sDir)) { lstFilesFound.Items.Add(d); foreach (var f in Directory.GetFiles(d, txtFile.Text)) { string[] words = f.Split('\\'); bool startcopy=false; totalcount++; toolStripStatusLabel1.Text = totalcount.ToString()+ " Files"; Application.DoEvents(); FileInfo fileInfo = new FileInfo(f); //lstFilesFound.Items.Add(f); //"INSERT INTO resource (ref_id,size,name,md5sum) VALUES(?,?,?)"; // \<mandant>\probanden\6.00.1\4711\name.txt // resource_cmd string retval = GetMD5HashFromFile(f); ref_id.Value = fileInfo.Name + "_" + words[words.Length - 5] + "_" + words[words.Length - 2] + "_" + retval + "_" + fileInfo.Length.ToString(); size.Value = fileInfo.Length; name.Value = words[words.Length - 1]; md5sum_res.Value = retval; try { resource_cmd.ExecuteNonQuery(); /// Jetzt Daten einlesen als BLOB FileStream fs = new FileStream(f, FileMode.OpenOrCreate, FileAccess.Read); try { byte[] MyData = new byte[fs.Length]; long fsize = fs.Length; fs.Read(MyData, 0, System.Convert.ToInt32(fs.Length)); fs.Close(); res_data_cmd.CommandText = "UPDATE resource set data=@blob where ref_id='" + ref_id.Value + "' "; res_data_cmd.Parameters.Add("@blob", DbType.Binary, (int) fsize).Value = MyData; res_data_cmd.ExecuteNonQuery(); } catch (System.Exception e) { lstFilesFound.Items.Add(e.Message); } } } catch (System.Exception e) { // Tue nichts in dem Fall; } //"INSERT INTO candidate (ref_id,mandant,md5sum,basename,prob_ref,suite,tag,size,creation_date) VALUES(?,?,?,?,?,?,?,?,?)" // cdcmd ref_id_cd.Value = fileInfo.Name + "_" + words[words.Length - 5]+"_" +words[words.Length-2] + "_" + retval + "_" + fileInfo.Length.ToString(); mandant.Value = words[words.Length - 5]; md5sum_cd.Value = retval; basename.Value = fileInfo.Name; prob_ref.Value = (words[words.Length - 4])[0].ToString(); // nimm nur p oder r (probanden/referenzen) suite.Value = words[words.Length-2]; tag.Value = words[words.Length-3]; size_cd.Value = fileInfo.Length; creation_date.Value = fileInfo.CreationTimeUtc; cdcmd.ExecuteNonQuery(); } DirSearch(d); } } catch (System.Exception excpt) { Console.WriteLine(excpt.Message); lstFilesFound.Items.Add(excpt.Message); } } private string toString(int i) { throw new NotImplementedException(); } protected string GetMD5HashFromFile(string fileName) { FileStream file = new FileStream(fileName, FileMode.Open); MD5 md5 = new MD5CryptoServiceProvider(); byte[] retVal = md5.ComputeHash(file); file.Close(); StringBuilder sb = new StringBuilder(); for (int i = 0; i< retVal.Length; i++) { sb.Append(retVal[i].ToString("x2")); } return sb.ToString(); } } } _______________________________________________ sqlite-users mailing list sqlite-users@sqlite.org http://sqlite.org:8080/cgi-bin/mailman/listinfo/sqlite-users