I have a .NET C# application that is using System.Data.SQLite.
The application goes through a filesystem and extracts file names,
builds md5sums on every file
and builds up a database. Reason behind is, to reduce the number of
duplicates.

The files as a tar file sum up to 66 GB. It's about a million files. I
learnt that it is a good idea to do all
in one transaction in SQLite, but I wonder if it is still a good idea
when inserting many files as BLOBs (I decided to put everything into the
database).

My first attempt ran overnight and when I came to the machine next
morning, the SQLITe database file was grown
to (only) 3.4GB and an alert box had popped up saying something like
"bad library use" of System.Data.Sqlite.

Would it be possibly better to commit and close the transaction after
every blob update?

Whether or not I'm inserting a file into the list of resources depends
on the existence of a unique
ref_id which I build from the name_md5sum_size of the file. So, when two
files have the same name, same size
and the same md5sum, I decide they are identical.

When I hit a UNIQUE violation (try ExecuteNonQuery()) I decide not to
UPDATE the record with the BLOB.
If it's a first time entry, I decide to UPDATE the BLOB.

Below is the code. Especially the transaction portion might be wirth
considering.

Thank you.

--
Christoph

using System;
using System.Data;
using System.Data.Common;

using System.Security.Cryptography;
using System.Text;
using System.Windows.Forms;

using System.IO;
using System.Data.SQLite;



namespace sqliteForm
{
 ///<summary>
 /// Summary description for Form1
 ///</summary>
 /// public class Form1 : System.Windows.Forms.Form
 public partial class Form1 : System.Windows.Forms.Form
 {
 internal System.Windows.Forms.Button btnSearch;
 internal System.Windows.Forms.TextBox txtFile;
 internal System.Windows.Forms.Label lblFile;
 internal System.Windows.Forms.Label lblDirectory;
 internal System.Windows.Forms.ListBox lstFilesFound;
 internal System.Windows.Forms.ComboBox cboDirectory;

 private int totalcount;
 private SQLiteTransaction dbTrans;
 private SQLiteCommand   resource_cmd;
 private SQLiteCommand res_data_cmd;

 private SQLiteCommand   cdcmd;
 private SQLiteCommand   delcmd;

 private SQLiteParameter size;
 private SQLiteParameter creation_date;
 private SQLiteParameter name;
 private SQLiteParameter data;

 private SQLiteParameter md5sum_res;
 private SQLiteParameter md5sum_cd;

 private SQLiteParameter size_cd;
 private SQLiteParameter tag;
 private SQLiteParameter suite;
 private SQLiteParameter ref_id;
     private SQLiteParameter ref_id_cd;



 private SQLiteParameter prob_ref;
 private SQLiteParameter mandant;
 private SQLiteParameter basename;



     public Form1()
 {
 //
 // Required for Windows Form Designer support
 //
 InitializeComponent();

 //
 // TODO: Add any constructor code after InitializeComponent call.
 //
 }

 ///<summary>
 /// Clean up any resources being used.
 ///</summary>
 protected override void Dispose( bool disposing )
 {
 if( disposing )
 {
 if (components != null)
 {
  components.Dispose();
 }
 }
 base.Dispose( disposing );
 }

 #region Windows Form Designer generated code
 ///<summary>
 /// Required method for Designer support: do not modify
 /// the contents of this method with the code editor.
 ///</summary>
 private void InitializeComponent()
 {
            this.btnSearch = new System.Windows.Forms.Button();
            this.txtFile = new System.Windows.Forms.TextBox();
            this.lblFile = new System.Windows.Forms.Label();
            this.lblDirectory = new System.Windows.Forms.Label();
            this.lstFilesFound = new System.Windows.Forms.ListBox();
            this.cboDirectory = new System.Windows.Forms.ComboBox();
            this.statusStrip1 = new System.Windows.Forms.StatusStrip();
            this.toolStripStatusLabel1 = new
System.Windows.Forms.ToolStripStatusLabel();
            this.button1 = new System.Windows.Forms.Button();
            this.button2 = new System.Windows.Forms.Button();
            this.textBox1 = new System.Windows.Forms.TextBox();
            this.label1 = new System.Windows.Forms.Label();
            this.statusStrip1.SuspendLayout();
            this.SuspendLayout();
            //
            // btnSearch
            //
            this.btnSearch.Location = new System.Drawing.Point(725, 266);
            this.btnSearch.Name = "btnSearch";
            this.btnSearch.Size = new System.Drawing.Size(75, 23);
            this.btnSearch.TabIndex = 0;
            this.btnSearch.Text = "Search";
            this.btnSearch.Click += new
System.EventHandler(this.btnSearch_Click);
            //
            // txtFile
            //
            this.txtFile.Location = new System.Drawing.Point(380, 432);
            this.txtFile.Name = "txtFile";
            this.txtFile.Size = new System.Drawing.Size(120, 20);
            this.txtFile.TabIndex = 4;
            this.txtFile.Text = "*.*";
            //
            // lblFile
            //
            this.lblFile.Location = new System.Drawing.Point(380, 413);
            this.lblFile.Name = "lblFile";
            this.lblFile.Size = new System.Drawing.Size(144, 16);
            this.lblFile.TabIndex = 5;
            this.lblFile.Text = "Search for files containing:";
            //
            // lblDirectory
            //
            this.lblDirectory.Location = new System.Drawing.Point(16, 413);
            this.lblDirectory.Name = "lblDirectory";
            this.lblDirectory.Size = new System.Drawing.Size(321, 16);
            this.lblDirectory.TabIndex = 3;
            this.lblDirectory.Text = "Look In:";
            //
            // lstFilesFound
            //
            this.lstFilesFound.Location = new System.Drawing.Point(19, 35);
            this.lstFilesFound.Name = "lstFilesFound";
            this.lstFilesFound.Size = new System.Drawing.Size(781, 225);
            this.lstFilesFound.TabIndex = 1;
            //
            // cboDirectory
            //
            this.cboDirectory.DropDownWidth = 112;
            this.cboDirectory.Location = new System.Drawing.Point(16, 432);
            this.cboDirectory.Name = "cboDirectory";
            this.cboDirectory.Size = new System.Drawing.Size(321, 21);
            this.cboDirectory.TabIndex = 2;
            this.cboDirectory.Text = "ComboBox1";
            //
            // statusStrip1
            //
            this.statusStrip1.Items.AddRange(new
System.Windows.Forms.ToolStripItem[] {
            this.toolStripStatusLabel1});
            this.statusStrip1.Location = new System.Drawing.Point(0, 485);
            this.statusStrip1.Name = "statusStrip1";
            this.statusStrip1.Size = new System.Drawing.Size(847, 22);
            this.statusStrip1.TabIndex = 0;
            //
            // toolStripStatusLabel1
            //
            this.toolStripStatusLabel1.Name = "toolStripStatusLabel1";
            this.toolStripStatusLabel1.Size = new
System.Drawing.Size(0, 17);
            //
            // button1
            //
            this.button1.Location = new System.Drawing.Point(725, 318);
            this.button1.Name = "button1";
            this.button1.Size = new System.Drawing.Size(75, 23);
            this.button1.TabIndex = 6;
            this.button1.Text = "Exit";
            this.button1.UseVisualStyleBackColor = true;
            this.button1.Click += new
System.EventHandler(this.button1_Click);
            //
            // button2
            //
            this.button2.Location = new System.Drawing.Point(569, 266);
            this.button2.Name = "button2";
            this.button2.Size = new System.Drawing.Size(78, 23);
            this.button2.TabIndex = 7;
            this.button2.Text = "clear all data";
            this.button2.UseVisualStyleBackColor = true;
            this.button2.Click += new
System.EventHandler(this.button2_Click);
            //
            // textBox1
            //
            this.textBox1.Location = new System.Drawing.Point(18, 321);
            this.textBox1.Name = "textBox1";
            this.textBox1.Size = new System.Drawing.Size(402, 20);
            this.textBox1.TabIndex = 8;
            this.textBox1.Text = "data
source=C:\\Users\\kuku\\testsuite\\versionen_neu.sqlite";
            this.textBox1.TextChanged += new
System.EventHandler(this.textBox1_TextChanged);
            //
            // label1
            //
            this.label1.AutoSize = true;
            this.label1.Location = new System.Drawing.Point(15, 305);
            this.label1.Name = "label1";
            this.label1.Size = new System.Drawing.Size(129, 13);
            this.label1.TabIndex = 9;
            this.label1.Text = "SQLite Connection String:";
            //
            // Form1
            //
            this.AutoScaleBaseSize = new System.Drawing.Size(5, 13);
            this.ClientSize = new System.Drawing.Size(847, 507);
            this.Controls.Add(this.label1);
            this.Controls.Add(this.textBox1);
            this.Controls.Add(this.button2);
            this.Controls.Add(this.button1);
            this.Controls.Add(this.statusStrip1);
            this.Controls.Add(this.btnSearch);
            this.Controls.Add(this.txtFile);
            this.Controls.Add(this.lblFile);
            this.Controls.Add(this.lblDirectory);
            this.Controls.Add(this.lstFilesFound);
            this.Controls.Add(this.cboDirectory);
            this.Name = "Form1";
            this.Text = "Form1";
            this.Load += new System.EventHandler(this.Form1_Load);
            this.statusStrip1.ResumeLayout(false);
            this.statusStrip1.PerformLayout();
            this.ResumeLayout(false);
            this.PerformLayout();

 }

     private void button2_Click(object sender, EventArgs e)
     {
         //
         // sqLiteConnection1
         //
         try
         {
             sqLiteConnection1 = new System.Data.SQLite.SQLiteConnection();
             sqLiteConnection1.ConnectionString = textBox1.Text;
             sqLiteConnection1.DefaultTimeout = 30;

             // while (sqLiteConnection1.con)
         }
         catch
         {
             // exception catching is for communists
         }

         try
         {

             sqLiteConnection1.Open();
             delcmd = sqLiteConnection1.CreateCommand();
             this.dbTrans = sqLiteConnection1.BeginTransaction();
             this.delcmd.Transaction = dbTrans;
             Console.WriteLine("State:" + sqLiteConnection1.State);
             delcmd.CommandText = "DELETE FROM RESOURCE";
             delcmd.ExecuteNonQuery();
             delcmd.CommandText = "DELETE FROM CANDIDATE";
             delcmd.ExecuteNonQuery();
             dbTrans.Commit();
             sqLiteConnection1.Close();
         }
         catch
         {
             //
         }
     }

     private void button1_Click(object sender, EventArgs e)
     {
         Application.Exit();
     }

     #endregion

 ///<summary>
 /// The main entry point for the application ///</summary>
 //[STAThread] static void Main() { Application.Run(new Form1()); }

 private void btnSearch_Click(object sender, System.EventArgs e)
 {

// DB stuff

     //
     // sqLiteConnection1
     //
     try
     {
         sqLiteConnection1 = new System.Data.SQLite.SQLiteConnection();
         sqLiteConnection1.ConnectionString = textBox1.Text;
         sqLiteConnection1.DefaultTimeout = 30;

         // while (sqLiteConnection1.con)
     }
     catch
     {
         // exception catching is for communists
     }

     //
     sqLiteConnection1.Open();
     // while (sqLiteConnection1.State !=
System.Data.ConnectionState.Open)
     //
     //   ;


     resource_cmd = sqLiteConnection1.CreateCommand();
     cdcmd = sqLiteConnection1.CreateCommand();
     res_data_cmd = sqLiteConnection1.CreateCommand();
     dbTrans = sqLiteConnection1.BeginTransaction();
     resource_cmd.Transaction = dbTrans;
     cdcmd.Transaction = dbTrans;
     res_data_cmd.Transaction = dbTrans;
     resource_cmd.CommandText = "INSERT INTO resource
(ref_id,size,name,md5sum) VALUES(?,?,?,?)";
     cdcmd.CommandText = "INSERT INTO candidate
(ref_id,mandant,md5sum,basename,prob_ref,suite,tag,size,creation_date)
VALUES(?,?,?,?,?,?,?,?,?)"; ;

     // resource_cmd
     ref_id = resource_cmd.CreateParameter();
     size = resource_cmd.CreateParameter();
     name = resource_cmd.CreateParameter();
     md5sum_res = resource_cmd.CreateParameter();
     //cdcmd
     ref_id_cd = cdcmd.CreateParameter();
     mandant = cdcmd.CreateParameter();
     md5sum_cd = cdcmd.CreateParameter();
     basename = cdcmd.CreateParameter();
     prob_ref = cdcmd.CreateParameter();
     suite = cdcmd.CreateParameter();
     tag = cdcmd.CreateParameter();
     size_cd = cdcmd.CreateParameter();
     creation_date = cdcmd.CreateParameter();

     cdcmd.Parameters.Add(ref_id_cd);
     cdcmd.Parameters.Add(mandant);
     cdcmd.Parameters.Add(md5sum_cd);
     cdcmd.Parameters.Add(basename);
     cdcmd.Parameters.Add(prob_ref);
     cdcmd.Parameters.Add(suite);
     cdcmd.Parameters.Add(tag);
     cdcmd.Parameters.Add(size_cd);
     cdcmd.Parameters.Add(creation_date);

     resource_cmd.Parameters.Add(ref_id);
     resource_cmd.Parameters.Add(size);
     resource_cmd.Parameters.Add(name);
     resource_cmd.Parameters.Add(md5sum_res);



    lstFilesFound.Items.Clear();
    txtFile.Enabled = false;
    cboDirectory.Enabled = false;
    btnSearch.Text = "Searching...";
    this.Cursor = Cursors.WaitCursor;
    Application.DoEvents();
    DirSearch(cboDirectory.Text);
    dbTrans.Commit();
    sqLiteConnection1.Close();
    sqLiteConnection1.Dispose();
    btnSearch.Text = "Search";
    this.Cursor = Cursors.Default;
    txtFile.Enabled = true;
    cboDirectory.Enabled = true;
 }

 private void Form1_Load(object sender, System.EventArgs e)
 {

    cboDirectory.Items.Clear();
    foreach (string s in Directory.GetLogicalDrives())
    {
       cboDirectory.Items.Add(s);
    }
    cboDirectory.Text = "C:\\usr\\local\\www\\data\\test\\mandanten\\";
 }

 void DirSearch(string sDir)
 {
    try
    {
       foreach (var d in Directory.GetDirectories(sDir))
       {

           lstFilesFound.Items.Add(d);
           foreach (var f in Directory.GetFiles(d, txtFile.Text))
          {

              string[] words = f.Split('\\');
              bool startcopy=false;
              totalcount++;
              toolStripStatusLabel1.Text = totalcount.ToString()+ " Files";
              Application.DoEvents();
              FileInfo fileInfo = new FileInfo(f);


              //lstFilesFound.Items.Add(f);
              //"INSERT INTO resource (ref_id,size,name,md5sum)
VALUES(?,?,?)";
              // \<mandant>\probanden\6.00.1\4711\name.txt
              // resource_cmd
              string retval = GetMD5HashFromFile(f);
              ref_id.Value = fileInfo.Name + "_" + words[words.Length -
5] + "_" + words[words.Length - 2] + "_" + retval + "_" +
fileInfo.Length.ToString();

              size.Value = fileInfo.Length;
              name.Value = words[words.Length - 1];
              md5sum_res.Value = retval;
              try
              {
                  resource_cmd.ExecuteNonQuery();

                  /// Jetzt Daten einlesen als BLOB
                  FileStream fs = new FileStream(f,
FileMode.OpenOrCreate, FileAccess.Read);


                      try
                      {
                          byte[] MyData = new byte[fs.Length];
                          long fsize = fs.Length;
                          fs.Read(MyData, 0,
System.Convert.ToInt32(fs.Length));
                          fs.Close();

                          res_data_cmd.CommandText = "UPDATE  resource
set data=@blob where ref_id='" + ref_id.Value +
                                                     "' ";
                          res_data_cmd.Parameters.Add("@blob",
DbType.Binary, (int) fsize).Value = MyData;

                          res_data_cmd.ExecuteNonQuery();
                      }
                      catch (System.Exception e)
                      {
                          lstFilesFound.Items.Add(e.Message);

                      }
                  }

              } catch (System.Exception e)
              {
                  // Tue nichts in dem Fall;

              }

              //"INSERT INTO candidate
(ref_id,mandant,md5sum,basename,prob_ref,suite,tag,size,creation_date)
VALUES(?,?,?,?,?,?,?,?,?)"
              // cdcmd
              ref_id_cd.Value = fileInfo.Name + "_" +
words[words.Length - 5]+"_" +words[words.Length-2] + "_" + retval + "_"
+ fileInfo.Length.ToString();
              mandant.Value = words[words.Length - 5];
              md5sum_cd.Value = retval;
              basename.Value = fileInfo.Name;
              prob_ref.Value = (words[words.Length - 4])[0].ToString();
// nimm nur p oder r (probanden/referenzen)
              suite.Value = words[words.Length-2];
              tag.Value = words[words.Length-3];
              size_cd.Value = fileInfo.Length;
              creation_date.Value = fileInfo.CreationTimeUtc;
              cdcmd.ExecuteNonQuery();
          }
          DirSearch(d);
       }
    }
    catch (System.Exception excpt)
    {
       Console.WriteLine(excpt.Message);
       lstFilesFound.Items.Add(excpt.Message);
    }
 }

     private string toString(int i)
     {
         throw new NotImplementedException();
     }


     protected string GetMD5HashFromFile(string fileName)
     {
         FileStream file = new FileStream(fileName, FileMode.Open);
         MD5 md5 = new MD5CryptoServiceProvider();
         byte[] retVal = md5.ComputeHash(file);
         file.Close();

         StringBuilder sb = new StringBuilder();
         for (int i = 0; i<  retVal.Length; i++)
         {
             sb.Append(retVal[i].ToString("x2"));
         }
         return sb.ToString();
     }



 }
}


_______________________________________________
sqlite-users mailing list
sqlite-users@sqlite.org
http://sqlite.org:8080/cgi-bin/mailman/listinfo/sqlite-users

Reply via email to