Update of /cvsroot/mahogany/M/lib/dspam
In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv19217
Modified Files:
dspam.c libdspam.c libdspam.h libdspam_objects.h nodetree.c
storage_driver.h util.c
Log Message:
resolved merge conflicts with latest imported snapshot
Index: dspam.c
===================================================================
RCS file: /cvsroot/mahogany/M/lib/dspam/dspam.c,v
retrieving revision 1.3
retrieving revision 1.4
diff -b -u -2 -r1.3 -r1.4
--- dspam.c 8 Jul 2004 07:44:07 -0000 1.3
+++ dspam.c 10 Jul 2004 16:26:02 -0000 1.4
@@ -54,4 +54,5 @@
#include "dspam.h"
+#include "pref.h"
#include "libdspam.h"
#include "language.h"
@@ -60,4 +61,5 @@
#include "md5.h"
#include "tbt.h"
+#include "pref.h"
#ifdef SYSTEM_LOGGING
@@ -102,4 +104,8 @@
#endif
+#ifdef DEBUG
+ DO_DEBUG = 0;
+#endif
+
srand (getpid ());
@@ -127,5 +133,5 @@
#ifdef TRUSTED_USER_SECURITY
p = getpwuid (getuid ());
- trusted = is_trusted(p);
+ trusted = _ds_is_trusted(p);
if (!trusted)
{
@@ -160,4 +166,12 @@
}
+#ifdef DEBUG
+ /* Debug */
+ if (!strcmp (argv[i], "--debug"))
+ {
+ DO_DEBUG = 1;
+ }
+#endif
+
/* Set training mode */
if (!strncmp (argv[i], "--mode=", 7))
@@ -317,5 +331,4 @@
}
-
/*
Which features should be enabled?
@@ -548,5 +561,5 @@
while (node_nt != NULL)
{
- PREF *PTX;
+ AGENT_PREF PTX;
struct stat s;
char filename[MAX_FILENAME_LENGTH];
@@ -555,5 +568,4 @@
#ifdef DEBUG
- DO_DEBUG = 0;
strcpy (filename, _ds_userdir_path (node_nt->ptr, "debug"));
if (!stat (filename, &s))
@@ -566,6 +578,4 @@
if (DO_DEBUG) {
- DO_DEBUG = 1;
-
LOGDEBUG ("DSPAM Instance Startup");
LOGDEBUG ("full arg list: %s", debug_args);
@@ -590,5 +600,5 @@
LOGDEBUG("Loading preferences for user %s", (const char *) node_nt->ptr);
- PTX = load_preferences(node_nt->ptr);
+ PTX = _ds_pref_load(node_nt->ptr);
#ifdef OPT_IN
@@ -641,5 +651,5 @@
/* Processing Error */
- if (result != DSR_ISINNOCENT)
+ if (result != DSR_ISINNOCENT && result != DSR_ISWHITELISTED)
{
LOG (LOG_WARNING,
@@ -673,6 +683,9 @@
if (ATX.source != DSS_CORPUS) {
if (ATX.spam_args[0] != 0 ||
- (PTX != NULL && (
- PTX->spam_mode == DPS_TAG || PTX->spam_mode == DPS_DELIVER)))
+ (PTX != NULL &&
+ ( !strcmp(_ds_pref_val(PTX, "spamAction"), "tag") ||
+ !strcmp(_ds_pref_val(PTX, "spamAction"), "deliver") )
+ )
+ )
{
if (ATX.spam_args[0] != 0) {
@@ -732,4 +745,5 @@
returns: DSR_ISINNOCENT - Message is innocent
DSR_ISSPAM - Message is spam
+ DSR_ISWHITELISTED - Message is whitelisted
(other) - Error
*/
@@ -737,5 +751,5 @@
int
process_message (AGENT_CTX *ATX,
- PREF *PTX,
+ AGENT_PREF PTX,
buffer * message,
const char *username)
@@ -784,4 +798,5 @@
/* Set Group Membership */
+ if (strcmp(_ds_pref_val(PTX, "ignoreGroups"), "on")) {
snprintf (filename, sizeof (filename), "%s/group", DSPAM_HOME);
file = fopen (filename, "r");
@@ -912,4 +927,5 @@
fclose (file);
}
+ }
/* Crunch our agent context into a DSPAM context */
@@ -922,6 +938,6 @@
/* If there is no preference, defer to commandline */
- if (PTX != NULL && PTX->feature_bnr != -1) {
- if (PTX->feature_bnr)
+ if (PTX != NULL && strcmp(_ds_pref_val(PTX, "enableBNR"), "")) {
+ if (!strcmp(_ds_pref_val(PTX, "enableBNR"), "on"))
f_all |= DSF_NOISE;
} else {
@@ -930,6 +946,6 @@
}
- if (PTX != NULL && PTX->feature_whitelist != -1) {
- if (PTX->feature_whitelist)
+ if (PTX != NULL && strcmp(_ds_pref_val(PTX, "enableWhitelist"), "")) {
+ if (!strcmp(_ds_pref_val(PTX, "enableWhitelist"), "on"))
f_all |= DSF_WHITELIST;
} else {
@@ -949,6 +965,9 @@
}
- if (PTX != NULL && PTX->training_buffer >= 0)
- CTX->training_buffer = PTX->training_buffer;
+ if (PTX != NULL && strcmp(_ds_pref_val(PTX, "statisticalSedation"), ""))
+ CTX->training_buffer = atoi(_ds_pref_val(PTX, "statisticalSedation"));
+
+ if (PTX != NULL && strcmp(_ds_pref_val(PTX, "whitelistThreshold"), ""))
+ CTX->wh_threshold = atoi(_ds_pref_val(PTX, "whitelistThreshold"));
if (ATX->classification != -1) {
@@ -957,8 +976,18 @@
}
- if (PTX != NULL && PTX->training_mode != -1)
- CTX->training_mode = PTX->training_mode;
+ if (PTX != NULL && strcmp(_ds_pref_val(PTX, "trainingMode"), "")) {
+ if (!strcmp(_ds_pref_val(PTX, "trainingMode"), "TEFT"))
+ CTX->training_mode = DST_TEFT;
+ else if (!strcmp(_ds_pref_val(PTX, "trainingMode"), "TOE"))
+ CTX->training_mode = DST_TOE;
+ else if (!strcmp(_ds_pref_val(PTX, "trainingMode"), "TUM"))
+ CTX->training_mode = DST_TUM;
+ else if (!strcmp(_ds_pref_val(PTX, "trainingMode"), "NOTRAIN"))
+ CTX->training_mode = DST_NOTRAIN;
else
CTX->training_mode = ATX->training_mode;
+ } else {
+ CTX->training_mode = ATX->training_mode;
+ }
if (CTX->training_buffer != -1)
@@ -1157,15 +1186,9 @@
char first_boundary[512];
int is_signed = 0;
-
-#ifdef SIGNATURE_IN_ATTACHMENTS
int final_boundary = 0;
-#else
-#ifndef WEBMAIL
char *signature_begin = NULL, *signature_end, *erase_begin;
int signature_length;
-#endif
struct nt_node *node_header;
struct nt_c c2;
-#endif
i = 0;
@@ -1188,8 +1211,9 @@
#endif
-/* mode: signature-attachments */
+ if (!strcmp(_ds_pref_val(PTX, "signatureLocation"), "attachment")) {
+
+ /* record the message's main boundary so we can make sure there
+ isn't any junk in the last component */
-#ifdef SIGNATURE_IN_ATTACHMENTS
- /* record the message's main boundary so we can make sure there isn't any junk
in the last component */
if (i == 0 && block->boundary != NULL)
strlcpy (first_boundary, block->boundary, sizeof (first_boundary));
@@ -1272,5 +1296,7 @@
(struct _ds_message_block *) prev_node->ptr;
- /* use the current block's terminating boundary, since we're removing it */
+ /* use the current block's terminating boundary, since we're
+ removing it */
+
free (prev_block->terminating_boundary);
prev_block->terminating_boundary = block->terminating_boundary;
@@ -1293,5 +1319,6 @@
}
-#else
+ } /* signatureLocation = attachment */
+ else {
if (block->media_type == MT_TEXT
|| block->media_type == MT_MESSAGE
@@ -1390,5 +1417,5 @@
}
-#ifdef SIGNATURE_IN_HEADERS
+ if (!strcmp(_ds_pref_val(PTX, "signatureLocation"), "headers")) {
if (block->headers != NULL && !have_signature)
{
@@ -1399,5 +1426,6 @@
while(node_header != NULL) {
head = (struct _ds_header_field *) node_header->ptr;
- if (head->heading && !strcmp(head->heading, "X-DSPAM-Signature")) {
+ if (head->heading &&
+ !strcmp(head->heading, "X-DSPAM-Signature")) {
body = head->data;
break;
@@ -1406,5 +1434,5 @@
}
}
-#endif
+ }
#ifndef WEBMAIL
/* Look for signature */
@@ -1430,5 +1458,5 @@
&&
(isalnum
- ((unsigned char) signature_end[0]) || signature_end[0] == 32))
+ ((int) signature_end[0]) || signature_end[0] == 32))
signature_end++;
@@ -1441,10 +1469,14 @@
memcpy (signature_key, signature_begin, signature_length);
signature_key[signature_length] = 0;
-#ifndef SIGNATURE_IN_HEADERS
+
+ if (strcmp(_ds_pref_val(PTX, "signatureLocation"),
+ "headers")) {
+
if (!is_signed && ATX->classification == -1) {
strcpy (erase_begin, signature_end + 1);
block->body->used = strlen(body);
}
-#endif
+
+ }
have_signature = 1;
LOGDEBUG ("found signature '%s'", signature_key);
@@ -1455,5 +1487,5 @@
#endif /* !WEBMAIL */
}
-#endif
+ } /* signatureLocation else */
prev_node = node_nt;
node_nt = c_nt_next (components->components, &c);
@@ -1537,6 +1569,6 @@
{
DSPAM_CTX *CLX;
- int match = (CTX->classification == DSR_ISSPAM) ? DSR_ISSPAM : DSR_ISINNOCENT;
-
+ int match = (CTX->classification == DSR_ISSPAM) ?
+ DSR_ISSPAM : DSR_ISINNOCENT;
iter++;
#endif
@@ -1695,6 +1727,10 @@
res = user_classify ((const char *) node_int->ptr,
CTX->signature, NULL);
+ if (res == DSR_ISWHITELISTED)
+ res = DSR_ISINNOCENT;
- if ((res == DSR_ISSPAM || res == DSR_ISINNOCENT) &&
r.total_incorrect+r.total_correct>4) {
+ if ((res == DSR_ISSPAM || res == DSR_ISINNOCENT) &&
+ r.total_incorrect+r.total_correct>4)
+ {
tbt_add (t,
(double) r.total_correct / (r.total_correct+r.total_incorrect),
@@ -1721,5 +1757,7 @@
/* include the top n reliable sources */
while(node_tbt != NULL && total_nodes>0) {
- float probability = (node_tbt->frequency == DSR_ISINNOCENT) ?
1-node_tbt->probability : node_tbt->probability;
+ float probability = (node_tbt->frequency == DSR_ISINNOCENT ||
+ node_tbt->frequency == DSR_ISWHITELISTED) ?
+ 1-node_tbt->probability : node_tbt->probability;
LOGDEBUG("including node %llu [%2.6f]", node_tbt->token, probability);
@@ -1798,5 +1836,5 @@
/* If the global user thinks it's innocent, and the user thought it was
spam, retrain the user as a false positive */
- if (result == DSR_ISINNOCENT && was_spam) {
+ if ((result == DSR_ISINNOCENT || result == DSR_ISWHITELISTED) && was_spam) {
DSPAM_CTX *CTC = malloc(sizeof(DSPAM_CTX));
if (CTC == NULL) {
@@ -1865,7 +1903,6 @@
{
_ds_create_signature_id (CTX, session, sizeof (session));
-#ifndef SIGNATURE_IN_ATTACHMENTS
- if (_ds_verify_signature (CTX, session))
-#endif
+ if (strcmp(_ds_pref_val(PTX, "signatureLocation"), "attachment") ||
+ _ds_verify_signature (CTX, session))
valid = 1;
}
@@ -1873,5 +1910,5 @@
LOGDEBUG ("saved signature as %s", session);
-#ifdef SIGNATURE_IN_ATTACHMENTS
+ if (!strcmp(_ds_pref_val(PTX, "signatureLocation"), "attachment")) {
/* Embed the signature as an attachment; convert email if necessary */
@@ -2286,7 +2323,7 @@
}
}
-
-#else
- if (CTX->classification == DSR_NONE && CTX->training_mode != DST_NOTRAIN) {
+ } else { /* signatureLocation = attachment ELSE */
+ if (CTX->classification == DSR_NONE && CTX->training_mode != DST_NOTRAIN)
+ {
#ifndef WEBMAIL
_ds_set_signature (CTX, CTX->signature, session);
@@ -2297,9 +2334,9 @@
#endif
}
-#endif
+ } /* signatureLocation != attachment */
}
- write_web_stats ((CTX->group == NULL || CTX->flags & DSF_MERGED) ? username :
CTX->group,
- &CTX->totals);
+ write_web_stats ((CTX->group == NULL || CTX->flags & DSF_MERGED) ?
+ username : CTX->group, &CTX->totals);
LOGDEBUG ("libdspam returned probability of %f, message result: %s",
@@ -2343,10 +2380,9 @@
if (result == DSR_ISSPAM)
class = 'S';
+ else if (result == DSR_ISWHITELISTED)
+ class = 'W';
else
class = 'I';
- if (CTX->probability == -2)
- class = 'W';
-
if (CTX->source == DSS_ERROR) {
if (CTX->classification == DSR_ISSPAM)
@@ -2374,9 +2410,9 @@
file = fopen(filename, "a");
if (file != NULL) {
- int i = get_fcntl_lock(fileno(file));
+ int i = _ds_get_fcntl_lock(fileno(file));
if (!i) {
fputs(x, file);
fputs("\n", file);
- free_fcntl_lock(fileno(file));
+ _ds_free_fcntl_lock(fileno(file));
} else {
LOGDEBUG("Failed to lock %s: %d: %s\n", filename, i, strerror(errno));
@@ -2395,5 +2431,5 @@
file = fopen(filename, "a");
if (file != NULL) {
- int i = get_fcntl_lock(fileno(file));
+ int i = _ds_get_fcntl_lock(fileno(file));
if (!i) {
double start, stop;
@@ -2407,5 +2443,5 @@
snprintf(s, sizeof(s), "%s\t%f\n", x, stop-start);
fputs(s, file);
- free_fcntl_lock(fileno(file));
+ _ds_free_fcntl_lock(fileno(file));
} else {
LOGDEBUG("Failed to lock %s: %d: %s\n", filename, i, strerror(errno));
@@ -2454,11 +2490,11 @@
#endif
if (PTX != NULL &&
- PTX->spam_mode == DPS_TAG &&
- PTX->spam_subject[0] != 0)
+ !strcmp(_ds_pref_val(PTX, "spamAction"), "tag") &&
+ strcmp(_ds_pref_val(PTX, "spamSubject"), ""))
do_tag = 1;
strcpy(spam_subject, "[SPAM]");
- if (PTX != NULL && PTX->spam_mode == DPS_TAG)
- strcpy(spam_subject, PTX->spam_subject);
+ if (PTX != NULL && !strcmp(_ds_pref_val(PTX, "spamAction"), "tag"))
+ strcpy(spam_subject, _ds_pref_val(PTX, "spamSubject"));
if (!strncmp(head->data, spam_subject, strlen(spam_subject))) {
@@ -2466,4 +2502,10 @@
}
+ if (head->original_data != NULL &&
+ !strncmp(head->original_data, spam_subject, strlen(spam_subject)))
+ {
+ strcpy(head->original_data, head->original_data+strlen(spam_subject)+1);
+ }
+
if (do_tag) {
long subject_length;
@@ -2479,7 +2521,20 @@
head->data = subject;
}
- }
+ if (head->original_data != NULL) {
+ subject_length =
+ strlen(head->original_data)+strlen(spam_subject)+2;
+ subject = malloc(subject_length);
+ if (subject != NULL) {
+ snprintf(subject,
+ subject_length, "%s %s",
+ spam_subject,
+ head->original_data);
+ free(head->original_data);
+ head->original_data = subject;
+ }
+ }
+ }
}
@@ -2523,6 +2578,16 @@
char data[128];
- snprintf(data, sizeof(data), "X-DSPAM-Result: %s",
- (result == DSR_ISSPAM) ? "Spam" : "Innocent");
+ strcpy(data, "X-DSPAM-Result: ");
+ switch (result) {
+ case DSR_ISSPAM:
+ strcat(data, "Spam");
+ break;
+ case DSR_ISWHITELISTED:
+ strcat(data, "Whitelisted");
+ break;
+ default:
+ strcat(data, "Innocent");
+ break;
+ }
head = _ds_create_header_field(data);
@@ -2563,10 +2628,10 @@
if (CTX->training_mode != DST_NOTRAIN && session[0] != 0) {
-#ifdef SIGNATURE_IN_HEADERS
+ if (!strcmp(_ds_pref_val(PTX, "signatureLocation"), "headers")) {
snprintf(data, sizeof(data), "X-DSPAM-Signature: %s%s%s",
SIGNATURE_BEGIN, session, SIGNATURE_END);
-#else
+ } else {
snprintf(data, sizeof(data), "X-DSPAM-Signature: %s", session);
-#endif
+ }
head = _ds_create_header_field(data);
@@ -2605,6 +2670,7 @@
-#ifndef SIGNATURE_IN_ATTACHMENTS
-#ifndef SIGNATURE_IN_HEADERS
+if (strcmp(_ds_pref_val(PTX, "signatureLocation"), "headers") &&
+ strcmp(_ds_pref_val(PTX, "signatureLocation"), "attachment")) {
+
#ifndef WEBMAIL
/* Embed the signature into all text segments.
@@ -2622,7 +2688,7 @@
&& block->media_subtype == MST_SIGNED && !i && block->boundary != NULL)
{
- size_t len = strlen (block->boundary);
- char *boundary = malloc(len + 1);
- char *term = malloc(len + 2);
+ size_t lenBoundary = strlen (block->boundary);
+ char *boundary = malloc(lenBoundary + 1);
+ char *term = malloc(lenBoundary + 2);
struct nt_node *node_nt, *prev_node = NULL;
struct nt_c c_nt;
@@ -2634,6 +2700,6 @@
*/
- strlcpy (boundary, block->boundary, sizeof (boundary));
- snprintf (term, sizeof (term), "%s--", boundary);
+ strlcpy (boundary, block->boundary, lenBoundary + 1);
+ snprintf (term, lenBoundary + 2, "%s--", boundary);
/* Strip the terminating boundary from the last block */
@@ -2663,4 +2729,5 @@
node_nt = c_nt_next (CTX->message->components, &c_nt);
}
+
node_nt = c_nt_first (CTX->message->components, &c_nt);
block = (struct _ds_message_block *) node_nt->ptr;
@@ -2670,5 +2737,5 @@
struct _ds_message_block *newblock;
struct _ds_header_field *field;
- char scratch[128], typedata[128], term[512];
+ char scratch[128], typedata[128];
snprintf (scratch, sizeof (scratch),
@@ -2694,5 +2761,5 @@
/* Create new block information */
- snprintf (term, sizeof (term), "%s--\n\n", boundary);
+ snprintf (term, lenBoundary + 2, "%s--\n\n", boundary);
newblock->boundary = NULL;
newblock->terminating_boundary = strdup (term);
@@ -2819,10 +2886,10 @@
}
}
+
node_nt = c_nt_next (CTX->message->components, &c_nt);
i++;
}
-#endif /* !WEBMAIL */
-#endif /* !SIGNATURE_IN_HEADERS */
-#endif /* !SIGNATURE_IN_ATTACHMENTS */
+#endif
+ }
/* reconstruct message from components */
@@ -2869,8 +2936,22 @@
if (CTX->operating_mode == DSM_CLASSIFY)
{
+ char data[128];
+
+ switch (CTX->result) {
+ case DSR_ISSPAM:
+ strcpy(data, "Spam");
+ break;
+ case DSR_ISWHITELISTED:
+ strcpy(data, "Whitelisted");
+ break;
+ default:
+ strcpy(data, "Innocent");
+ break;
+ }
+
printf("X-DSPAM-Result: %s; result=\"%s\"; probability=%01.4f; "
"confidence=%02.2f\n",
CTX->username,
- (result == DSR_ISSPAM) ? "Spam" : "Innocent",
+ data,
CTX->probability,
CTX->confidence);
@@ -3016,5 +3097,5 @@
}
- i = get_fcntl_lock(fileno(file));
+ i = _ds_get_fcntl_lock(fileno(file));
if (i) {
LOGDEBUG("Failed to lock %s: Error %d: %s\n", filename, i, strerror(errno));
@@ -3045,5 +3126,5 @@
fputs ("\n\n", file);
- free_fcntl_lock(fileno(file));
+ _ds_free_fcntl_lock(fileno(file));
fclose (file);
@@ -3265,109 +3346,2 @@
return 0;
}
-
-
-#ifdef TRUSTED_USER_SECURITY
-int is_trusted(struct passwd *p) {
- FILE *file;
- char filename[MAX_FILENAME_LENGTH];
- char buff[1024];
- int trusted = 0;
-
- snprintf (filename, MAX_FILENAME_LENGTH, "%s/trusted.users", DSPAM_HOME);
- file = fopen (filename, "r");
- if (file != NULL)
- {
- while (fgets (buff, sizeof (buff), file) != NULL)
- {
- ALLTRIM(buff);
- if (!strcmp (buff, p->pw_name))
- trusted = 1;
- }
- fclose (file);
- }
- else
- {
- LOG (LOG_CRIT,
- "unable to open %s/trusted.users for reading: %s.",
- DSPAM_HOME, strerror (errno));
- }
-
- if (!trusted)
- {
- LOG (LOG_INFO, "forcing username for untrusted user %s", p->pw_name);
- }
-
- return trusted;
-}
-#endif
-
-PREF *load_preferences(const char *username) {
- char filename[MAX_FILENAME_LENGTH];
- PREF *PTX = malloc(sizeof(PREF));
- char buff[128];
- FILE *file;
- char *p, *q;
-
- if (PTX == NULL)
- return NULL;
-
- PTX->training_mode = -1;
- PTX->spam_mode = -1;
- PTX->spam_subject[0] = 0;
- PTX->feature_bnr = -1;
- PTX->feature_whitelist = -1;
- PTX->training_buffer = -1;
-
- if (username == NULL) {
- snprintf(filename, MAX_FILENAME_LENGTH, "%s/default.prefs", DSPAM_HOME);
- } else {
- strcpy (filename, _ds_userdir_path (username, "prefs"));
- }
- file = fopen(filename, "r");
- if (file == NULL) {
- free(PTX);
- return (username == NULL) ? NULL : load_preferences(NULL);
- }
-
- while(fgets(buff, sizeof(buff), file)!=NULL) {
- p = strtok(buff, "=");
- if (p != NULL)
- q = strtok(NULL, "=");
- if (p != NULL && q != NULL) {
- chomp(q);
- if (!strcmp(p, "trainingMode")) {
- if (!strcmp(q, "TOE"))
- PTX->training_mode = DST_TOE;
- else if (!strcmp(q, "TEFT"))
- PTX->training_mode = DST_TEFT;
- else if (!strcmp(q, "TUM"))
- PTX->training_mode = DST_TUM;
- } else if (!strcmp(p, "spamAction")) {
- if (!strcmp(q, "quarantine"))
- PTX->spam_mode = DPS_QUARANTINE;
- else if (!strcmp(q, "tag"))
- PTX->spam_mode = DPS_TAG;
- else if (!strcmp(q, "deliver"))
- PTX->spam_mode = DPS_DELIVER;
- } else if (!strcmp(p, "spamSubject")) {
- strlcpy(PTX->spam_subject, q, sizeof(PTX->spam_subject));
- } else if (!strcmp(p, "enableBNR")) {
- if (!strcmp(q, "on"))
- PTX->feature_bnr = 1;
- else
- PTX->feature_bnr = 0;
- } else if (!strcmp(p, "enableWhitelist")) {
- if (!strcmp(q, "on"))
- PTX->feature_whitelist = 1;
- else
- PTX->feature_whitelist = 0;
- } else if (!strcmp(p, "statisticalSedation") && q[0] != 0) {
- PTX->training_buffer = atoi(q);
- }
- }
- }
- fclose(file);
-
- return PTX;
-}
-
Index: libdspam.c
===================================================================
RCS file: /cvsroot/mahogany/M/lib/dspam/libdspam.c,v
retrieving revision 1.7
retrieving revision 1.8
diff -b -u -2 -r1.7 -r1.8
--- libdspam.c 10 Jul 2004 16:01:18 -0000 1.7
+++ libdspam.c 10 Jul 2004 16:26:02 -0000 1.8
@@ -96,4 +96,5 @@
CTX->confidence = 0;
CTX->training_mode = DST_TEFT;
+ CTX->wh_threshold = 10;
CTX->training_buffer = 5;
CTX->classification = DSR_NONE;
@@ -382,10 +383,10 @@
#endif
- /* Robinson's Naive Bayesian, used to calculate confidence */
- float rob_top = 0.0; /* Robinson's Naive Bayesian */
+ /* Robinson's Geometric Mean, used to calculate confidence */
+ float rob_top = 0.0; /* Robinson's Geometric Mean */
float rob_bot = 0.0;
float rob_result = -1;
double p = 0.0, q = 0.0, s = 0.0; /* Robinson PQS Calculations */
- long rob_used = 0; /* Total tokens used in Robinson's Naive */
+ long rob_used = 0; /* Total tokens used in Robinson's GM */
#if defined(CHI_SQ) || defined(DEBUG)
@@ -414,4 +415,9 @@
int do_whitelist = 0;
+#ifdef DYNAMICBNR_EXTENSION
+ BNR_CTX *BTX = NULL;
+#endif
+
+
if (body == NULL)
body_length = 0;
@@ -759,4 +765,15 @@
int dub_length = 0;
+#ifdef DYNAMICBNR_EXTENSION
+ BTX = _ds_snr_load(CTX);
+ if (BTX == NULL) {
+ LOGDEBUG("unable to load snr values. starting from scratch");
+ BTX = calloc(1, sizeof(BNR_CTX));
+ if (BTX == NULL) {
+ LOG(LOG_CRIT, ERROR_MEM_ALLOC);
+ return EUNKNOWN;
+ }
+ }
+#endif
node_nt = c_nt_first(freq->order, &c_nt);
while(node_nt != NULL) {
@@ -940,4 +957,9 @@
LOGDEBUG("BNR noise ratio: %2.2f Spammy Tokens: %d Filtered: %d",
previous, spammy, i);
+
+#ifdef DYNAMICBNR_EXTENSION
+ if (BTX != NULL)
+ BTX->snr = previous;
+#endif
} else {
i = -1;
@@ -946,4 +968,10 @@
/* END: Bayesian Noise Reduction */
+
+ if (CTX->flags & DSF_WHITELIST)
+ {
+ LOGDEBUG("Whitelist threshold: %d", CTX->wh_threshold);
+ }
+
/* Create a binary tree index sorted by a token's delta from .5 */
node_lht = c_lht_first (freq, &c_lht);
@@ -956,5 +984,5 @@
if (node_lht->key == whitelist_token &&
node_lht->s.spam_hits == 0 &&
- node_lht->s.innocent_hits > 10 &&
+ node_lht->s.innocent_hits > CTX->wh_threshold &&
CTX->classification == DSR_NONE)
{
@@ -1121,5 +1149,5 @@
#endif
- /* Robinson's Naive Bayesian Definitions */
+ /* Robinson's Geometric Mean Definitions */
#define ROB_S 0.010 /* Sensitivity */
@@ -1256,6 +1284,6 @@
#if defined(ROBINSON) || defined(DEBUG)
- LOGDEBUG("Robinson's Naive Confidence: %f (Spamminess: %f, Non-Spamminess: "
- "%f, Samples: %ld)", rob_result, p, q, rob_used);
+ LOGDEBUG("Robinson's Geometric Confidence: %f (Spamminess: %f, "
+ "Non-Spamminess: %f, Samples: %ld)", rob_result, p, q, rob_used);
#endif
@@ -1288,6 +1316,5 @@
} else if (CTX->flags & DSF_WHITELIST && do_whitelist) {
LOGDEBUG("auto-whitelisting this message");
- CTX->result = DSR_ISINNOCENT;
- CTX->probability = -2;
+ CTX->result = DSR_ISWHITELISTED;
} else {
#ifdef BAYESIAN
@@ -1401,7 +1428,18 @@
}
+#ifdef DYNAMICBNR_EXTENSION
+ if (BTX != NULL) {
+ if (body_length<=3500)
+ BTX->ct_micro++;
+ else
+ BTX->ct_norm++;
+ _ds_snr_save(CTX, BTX);
+ }
+#endif
+
/* INNOCENT */
}
- else if (CTX->result == DSR_ISINNOCENT && CTX->operating_mode != DSM_CLASSIFY)
+ else if ((CTX->result == DSR_ISINNOCENT || CTX->result == DSR_ISWHITELISTED)
+ && CTX->operating_mode != DSM_CLASSIFY)
{
CTX->totals.innocent_learned++;
@@ -1422,5 +1460,5 @@
if (CTX->result == DSR_ISSPAM)
CTX->totals.spam_classified++;
- else if (CTX->result == DSR_ISINNOCENT)
+ else if (CTX->result == DSR_ISINNOCENT || CTX->result == DSR_ISWHITELISTED)
CTX->totals.innocent_classified++;
}
@@ -1530,5 +1568,5 @@
}
- return (CTX->result == DSR_ISSPAM) ? DSR_ISSPAM : DSR_ISINNOCENT;
+ return CTX->result;
bail_unknown:
Index: libdspam.h
===================================================================
RCS file: /cvsroot/mahogany/M/lib/dspam/libdspam.h,v
retrieving revision 1.3
retrieving revision 1.4
diff -b -u -2 -r1.3 -r1.4
--- libdspam.h 10 Jul 2004 15:58:11 -0000 1.3
+++ libdspam.h 10 Jul 2004 16:26:02 -0000 1.4
@@ -29,4 +29,5 @@
#include "decode.h"
#include "libdspam_objects.h"
+#include "util.h"
#ifndef _LIBDSPAM_H
Index: libdspam_objects.h
===================================================================
RCS file: /cvsroot/mahogany/M/lib/dspam/libdspam_objects.h,v
retrieving revision 1.5
retrieving revision 1.6
diff -b -u -2 -r1.5 -r1.6
--- libdspam_objects.h 10 Jul 2004 15:58:11 -0000 1.5
+++ libdspam_objects.h 10 Jul 2004 16:26:02 -0000 1.6
@@ -246,5 +246,5 @@
result (output)
The final result of the requested operation. This is generally either
- DSR_ISSPAM or DSR_ISINNOCENT.
+ DSR_ISSPAM, DSR_ISINNOCENT, or DSR_WHITELISTED.
confidence (output)
@@ -328,4 +328,5 @@
int training_mode; /* DST_ */
int training_buffer; /* 0-10 */
+ int wh_threshold; /* Whitelisting Threshold (default 10) */
int classification; /* DSR_ */
int source; /* DSS_ */
@@ -385,4 +386,5 @@
#define DSR_ISSPAM 0x01
#define DSR_ISINNOCENT 0x02
+#define DSR_ISWHITELISTED 0x03
#define DSR_NONE 0xFF
@@ -398,3 +400,18 @@
#define DSS_NONE 0xFF
+#ifdef DYNAMICBNR_EXTENSION
+typedef struct {
+ float avgsnr_micro;
+ float avgsnr_norm;
+ long sm_micro;
+ long fp_micro;
+ long sm_norm;
+ long fp_norm;
+ long ct_micro;
+ long ct_norm;
+ char disk;
+ float snr; /* SNR of current message (for save+refactor) */
+} BNR_CTX;
+#endif
+
#endif /* _LIBDSPAM_OBJECTS */
Index: nodetree.c
===================================================================
RCS file: /cvsroot/mahogany/M/lib/dspam/nodetree.c,v
retrieving revision 1.2
retrieving revision 1.3
diff -b -u -2 -r1.2 -r1.3
--- nodetree.c 7 Jul 2004 23:43:00 -0000 1.2
+++ nodetree.c 10 Jul 2004 16:26:02 -0000 1.3
@@ -20,4 +20,7 @@
#include <stdlib.h>
+#include <sys/types.h>
+#include <pwd.h>
+
#include "nodetree.h"
#include "util.h"
Index: storage_driver.h
===================================================================
RCS file: /cvsroot/mahogany/M/lib/dspam/storage_driver.h,v
retrieving revision 1.3
retrieving revision 1.4
diff -b -u -2 -r1.3 -r1.4
--- storage_driver.h 10 Jul 2004 15:58:11 -0000 1.3
+++ storage_driver.h 10 Jul 2004 16:26:02 -0000 1.4
@@ -24,4 +24,7 @@
#include "libdspam_objects.h"
#include "lht.h"
+#ifdef PREFERENCES_EXTENSION
+#include "pref.h"
+#endif
struct _ds_storage_record
@@ -49,5 +52,4 @@
};
-
/* dspam_init_driver: called by the application to initialize the storage
driver. should only be called once. */
@@ -100,14 +102,34 @@
int _ds_get_decision (DSPAM_CTX * CTX, struct _ds_neural_decision *DEC,
const char *signature);
-
int _ds_set_decision (DSPAM_CTX * CTX, struct _ds_neural_decision *DEC,
const char *signature);
-
int _ds_delete_decision (DSPAM_CTX * CTX, const char *signature);
-/* Preferences Extensions */
-
-
-
+/*
+ Storage Driver Preferences Extension
+ When defined, the built-in preferences functions are overridden with
+ functions specific to the storage driver. This allows preferences to be
+ alternatively stored in the storage facility instead of flat files.
+*/
+
+#ifdef PREFERENCES_EXTENSION
+AGENT_PREF _ds_pref_load(const char *user);
+int _ds_pref_save(const char *user, AGENT_PREF PTX);
+int _ds_pref_set(const char *user, const char *attrib, const char *value);
+int _ds_pref_del(const char *username, const char *attrib);
+#endif
+
+/*
+ Dynamic Noise Reduction Extensions
+ Dynamic BNR Extensions allow for dynamic tracking of SNR margins in email
+ to provide more dynamic (and effective) Bayesian noise reduction. Without
+ this extension, fixed values for SNR thresholds are used and no
+ calibration is performed.
+*/
+
+#ifdef DYNAMICBNR_EXTENSION
+BNR_CTX * _ds_snr_load(DSPAM_CTX *CTX);
+int _ds_snr_save(DSPAM_CTX *CTX, BNR_CTX *BTX);
+#endif
#endif /* _STORAGE_DRIVER_H */
Index: util.c
===================================================================
RCS file: /cvsroot/mahogany/M/lib/dspam/util.c,v
retrieving revision 1.4
retrieving revision 1.5
diff -b -u -2 -r1.4 -r1.5
--- util.c 10 Jul 2004 15:58:11 -0000 1.4
+++ util.c 10 Jul 2004 16:26:02 -0000 1.5
@@ -360,4 +360,9 @@
{
strlcat (path, dir, sizeof (path));
+#ifdef _WIN32
+ strlcat (path, "\\", sizeof (path));
+#else
+ strlcat (path, "/", sizeof (path));
+#endif
dir = strsep (&file, "/");
@@ -387,4 +392,5 @@
}
+
/* Subroutine: lc
Description: converts a string to lowercase
@@ -526,5 +532,5 @@
#endif
-int get_fcntl_lock(int fd) {
+int _ds_get_fcntl_lock(int fd) {
#ifdef _WIN32
return 0;
@@ -541,5 +547,5 @@
}
-int free_fcntl_lock(int fd) {
+int _ds_free_fcntl_lock(int fd) {
#ifdef _WIN32
return 0;
@@ -556,2 +562,37 @@
}
+#ifdef TRUSTED_USER_SECURITY
+int _ds_is_trusted(struct passwd *p) {
+ FILE *file;
+ char filename[MAX_FILENAME_LENGTH];
+ char buff[1024];
+ int trusted = 0;
+
+ snprintf (filename, MAX_FILENAME_LENGTH, "%s/trusted.users", DSPAM_HOME);
+ file = fopen (filename, "r");
+ if (file != NULL)
+ {
+ while (fgets (buff, sizeof (buff), file) != NULL)
+ {
+ ALLTRIM(buff);
+ if (!strcmp (buff, p->pw_name))
+ trusted = 1;
+ }
+ fclose (file);
+ }
+ else
+ {
+ LOG (LOG_CRIT,
+ "unable to open %s/trusted.users for reading: %s.",
+ DSPAM_HOME, strerror (errno));
+ }
+
+ if (!trusted)
+ {
+ LOG (LOG_INFO, "forcing username for untrusted user %s", p->pw_name);
+ }
+
+ return trusted;
+}
+#endif
+
-------------------------------------------------------
This SF.Net email sponsored by Black Hat Briefings & Training.
Attend Black Hat Briefings & Training, Las Vegas July 24-29 -
digital self defense, top technical experts, no vendor pitches,
unmatched networking opportunities. Visit www.blackhat.com
_______________________________________________
Mahogany-cvsupdates mailing list
[EMAIL PROTECTED]
https://lists.sourceforge.net/lists/listinfo/mahogany-cvsupdates