Eelco,
Thanks, thats a lot faster :)
Conversion started @ Sun Jul 28 17:24:37 2002
Conversion finished @ Sun Jul 28 17:28:20 2002
I had to edit the file a little bit so that it inputs files
into their respective mailboxes under one user. Also I had
to edit:
*mbox_delimiter_pattern = "^From .* ";
to
*mbox_delimiter_pattern = "^From .* "; (one less space)
However this seems to confuse imapd I think, because the imported
folders where the above actually made a difference now die on:
Jul 28 17:53:45 java dbmail/imap4[18953]: imap_process(): Executing command
fetch...
Jul 28 17:53:48 java dbmail/imap4[18953]: mime_readheader(): no valid mime
headers found
Jul 28 17:53:48 java dbmail/imap4[18953]: db_fetch_headers(): error fetching
message, ID: 7518
Jul 28 17:53:48 java dbmail/imap4[18953]: db_fetch_headers(): got error at
level 0
Jul 28 17:53:48 java dbmail/imap4[18953]: Received SIGSEGV
Any ideas on this ? maybe I should just change the delimiters in the mailbox ?
On Sat, Jul 27, 2002 at 11:15:26PM +0200, Eelco van Beek - IC&S wrote:
> You should take a look at the uni-one converter. It was especially
> written for a company called uni-hone here in the netherlands to convert
> 6,5 Gbyte of mboxes. All boxes were converted in 51 minutes.
> >In preperation of migrating our office setup to dbmail, I've decided
> >to convert my personal mailsetup first. I have about 165Mb of mboxes
> >I want to convert to dbmail with the mbox2dbmail tool. For convenience
> >I edited mbox2dbmail so that I can give it a mailbox as second argument.
> >However the mboximport seems to be a bit slow, it takes about half an
> >hour with 5mb/240 mail mbox. Can anyone shine some light at this ?
> >Used hardware is a Via C3/800 (about the same celeron) with 128 RAM,
> >all actions are done on localhost.
Regards,
-- Frido
/*
* this program traverses a directory tree and executes
* dbmail conversion on each file.A
*
* slightly edited to parse this kind of directory
*
* /home/user/mail/box
* /home/user/mail/lists/mailinglist
*
* create mailbox for each mboxfile (box, lists/mailinglist)
* use constant for user
*
* did not change printf statements, maybe confusing
*/
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <sys/types.h>
#include <dirent.h>
#include <time.h>
#include <unistd.h>
#include "db.h"
#include "auth.h"
#include "dbmailtypes.h"
#include "debug.h"
#include <regex.h>
#define MAX_LINESIZE 1024
#define UID_SIZE 70
const char *mbox_delimiter_pattern = "^From .* ";
char blk[READ_BLOCK_SIZE + MAX_LINESIZE + 1];
/* syslog */
#define PNAME "dbmail/uni-one-convertor"
char *getusername (char *path);
int traverse (char *path);
int process_mboxfile(char *file, u64_t userid);
int main (int argc, char* argv[])
{
time_t start;
time_t stop;
int result;
if (argc < 2)
{
printf ("Error, traverse need a directory as argument\n");
return -1;
}
openlog(PNAME, LOG_PID, LOG_MAIL); /* open connection to syslog */
configure_debug(TRACE_ERROR, 1, 0);
/* open dbase connections */
if (db_connect() != 0 || auth_connect() != 0)
{
printf("Error opening dbase connections\n");
return -1;
}
time (&start); /* mark the starting time */
result = traverse (argv[1]);
time (&stop); /* mark the ending time */
printf ("Conversion started @ %s", ctime(&start));
printf ("Conversion finished @ %s", ctime(&stop));
return result;
}
char *getusername (char *path)
{
int i;
char *tmp;
i = strlen (path);
tmp = path+i;
while ( (tmp!=path) && (*tmp!='/'))
tmp--;
return tmp+1;
}
int traverse (char *path)
{
char newpath [1024];
char *username;
struct dirent **namelist;
int n;
u64_t userid;
n = scandir (path, &namelist, 0, alphasort);
if (n < 0)
{
printf ("file %s\n",path);
username = getusername(path);
printf ("username %s\n", username);
printf("creating user...");
userid = auth_user_exists("frido");
if (userid != -1 && userid != 0)
{
printf("Ok id [%llu]\n", userid);
printf("converting mailbox...");
fflush(stdout);
n = process_mboxfile(path, userid);
if (n != 0)
printf("Warning: error converting mailbox\n");
else
printf ("done :)\n");
}
else
{
printf("user already exists. Skipping\n");
}
}
else
{
while (n--)
{
if ((strcmp(namelist[n]->d_name,"..")!=0) &&
(strcmp(namelist[n]->d_name,".")!=0))
{
sprintf (newpath,"%s/%s",path, namelist[n]->d_name);
traverse (newpath);
}
free (namelist[n]);
}
free(namelist);
}
return 0;
}
int process_mboxfile(char *file, u64_t userid)
{
regex_t preg;
int result;
FILE *infile;
int in_msg, header_passed;
char newunique[UID_SIZE];
unsigned cnt,len,newlines;
u64_t msgid=0, size;
char saved;
char *a, *b;
int len2;
// need to make a proper mailboxname from path
// no idea how to do this properly, never coded c before :)
/* does the actual searching */
if ((a = strstr(file, "./") ) != 0) {
/* blindly chop out the unwanted string */
/* don't forget the null! */
len2 = strlen("./");
b = a+len2;
memcpy(a, b, strlen(b)+1);
}
db_createmailbox(file, userid);
if ((result = regcomp(&preg, mbox_delimiter_pattern, REG_NOSUB)) != 0)
{
trace(TRACE_ERROR,"Regex compilation failed.");
return -1;
}
if ( (infile = fopen(file, "r")) == 0)
{
trace(TRACE_ERROR,"Could not open file [%s]", infile);
return -1;
}
in_msg = 0;
cnt = 0;
size = 0;
newlines = 0;
while (!feof(infile) && !ferror(infile))
{
if (fgets(&blk[cnt], MAX_LINESIZE, infile) == 0)
break;
/* check if this is an mbox delimiter */
if (regexec(&preg, &blk[cnt], 0, NULL, 0) == 0)
{
if (!in_msg)
in_msg = 1; /* ok start of a new msg */
else
{
/* update & end message */
db_insert_message_block(blk, cnt, msgid);
snprintf(newunique, UID_SIZE, "%lluA%lu", userid, time(NULL));
db_update_message(msgid, newunique, size+cnt, size+cnt+newlines);
trace(TRACE_ERROR, "message [%llu] inserted, [%u] bytes", msgid,
size+cnt);
}
/* start new message */
msgid = db_insert_message(userid, file, 0);
header_passed = 0;
cnt = 0;
size = 0;
newlines = 0;
}
else
{
newlines++;
if (header_passed == 0)
{
/* we're still reading the header */
len = strlen(&blk[cnt]);
if (strcmp(&blk[cnt], "\n") == 0)
{
db_insert_message_block(blk, cnt+len, msgid);
header_passed = 1;
size += (cnt+len);
cnt = 0;
}
else
cnt += len;
}
else
{
/* this is body data */
len = strlen(&blk[cnt]);
cnt += len;
if (cnt >= READ_BLOCK_SIZE)
{
/* write block */
saved = blk[READ_BLOCK_SIZE];
blk[READ_BLOCK_SIZE] = '\0';
db_insert_message_block(blk, READ_BLOCK_SIZE, msgid);
blk[READ_BLOCK_SIZE] = saved;
memmove(blk, &blk[READ_BLOCK_SIZE], cnt - (READ_BLOCK_SIZE));
size += READ_BLOCK_SIZE;
cnt -= READ_BLOCK_SIZE;
}
}
}
}
/* update & end message */
if (msgid > 0)
{
db_insert_message_block(blk, cnt, msgid);
snprintf(newunique, UID_SIZE, "%lluA%lu", userid, time(NULL));
db_update_message(msgid, newunique, size+cnt, size+cnt+newlines);
trace(TRACE_ERROR, "message [%llu] inserted, [%u] bytes", msgid,
size+cnt);
}
fclose(infile);
return 0;
}