On 7/28/2010 6:18 PM, thyago wrote:
I'm researching ways of updating a signature database on-the-fly, so the way
clamAV does it, can really help me out...
I mean, what structures are there? how is it implemented?
Is there a data structure used to store the signatures on memory? If so, how
exactly is it updated?
what type of data structure? dynamic or static?
I need to know if you guys use a pointer to the structure, and then just set
it to point to the new updated structure,
and if for example, there's a condition, that limits when this pointer can
be changed...like a thread needing to finish first....
I tried to look for the implementation on the code itself...but it's so
big...i don't know in which file to look =/
Thank you very much, for the help
Thyago
Attached is my implementation. As you can see I use a RW lock to
minimize contention.
/**
* @file /magma/providers/external/clamav.c
*
* @brief Interface for the ClamAV library.
*
* $Author: Ladar Levison $
* $Date: 2010/08/13 10:32:38 $
* $Revision: ecaee526d4ba88a141c5b889dd023b13c05c2654 $
*
*/
#include "magma.h"
/**
* The virus engine spool directory.
*/
char *virus_spool = NULL;
/**
* The mask used to generate temporary file names.
*/
char *virus_spool_mask = NULL;
/**
* The status of the signatures directory.
*/
struct cl_stat virus_stat;
/**
* The number of signatures loaded by the virus engine.
*/
unsigned int virus_sigs = 0;
/**
* The virus engine context pointer.
*/
struct cl_engine *virus_engine = NULL;
/**
* The virus engine read/write lock.
*/
pthread_rwlock_t virus_lock = PTHREAD_RWLOCK_INITIALIZER;
/**
* Obtains a virus engine read lock and records the number of virus signatures
loaded by the active ClamAV engine context.
*
* @return Returns the number of virus signatures loaded by the active ClamAV
engine context.
*/
uint64_t virus_sigs_loaded(void) {
uint64_t loaded = 0;
pthread_rwlock_rdlock(&virus_lock);
loaded = virus_sigs;
pthread_rwlock_unlock(&virus_lock);
return loaded;
}
/**
* Counts the number of official signatures available inside the ClamAV
database folder.
*
* @return Returns the number of official signatures available inside the
ClamAV database folder.
*/
uint64_t virus_sigs_total(void) {
int state;
unsigned int total = 0;
if ((state = cl_countsigs_d(magma.iface.virus.signatures,
CL_COUNTSIGS_OFFICIAL, &total)) != CL_SUCCESS) {
log_error("ClamAV was unable to count the number of available
signatures. {cl_countsigs = %i = %s}", state, cl_strerror_d(state));
return 0;
}
return total;
}
/**
* Frees a ClamAV engine context and sets the pointer to NULL.
*
* @param target A doubly referenced pointer to a ClamAV engine context.
*/
void virus_engine_destroy(struct cl_engine **target) {
log_check(!target || !*target);
cl_engine_free_d(*target);
*target = NULL;
return;
}
/**
* Generates a new ClamAV engine context.
*
* @param signatures An optional pointer which will be used to record the
number of signatures loaded.
* @return Returns a pointer to the newly created context or NULL if an error
occurs.
*/
struct cl_engine * virus_engine_create(uint64_t *signatures) {
int state;
unsigned int loaded = 0;
struct cl_engine *target = NULL;
// Reset the signatures pointer if one was passed in.
if (*signatures) {
*signatures = 0;
}
// Allocate ClamAV engine context.
if ((target = cl_engine_new_d()) == NULL) {
log_error("ClamAV returned an error while allocating the engine
context. {cl_engine = NULL}");
return NULL;
}
// Load the current signature database.
if ((state = cl_load_d(magma.iface.virus.signatures, target, &loaded,
CL_DB_STDOPT)) != CL_SUCCESS) {
log_error("ClamAV returned an error while loading the database.
{cl_load = %i = %s}", state, cl_strerror_d(state));
cl_engine_free_d(target);
return NULL;
}
// Compile the internal lookup structures.
if ((state = cl_engine_compile_d(target)) != CL_SUCCESS) {
log_error("ClamAV database compilation error.
{cl_engine_compile = %i = %s}", state, cl_strerror_d(state));
cl_engine_free_d(target);
return NULL;
}
// Max scan size. 2048 MB.
// Sets the maximum amount of data to be scanned for each input file.
if ((state = cl_engine_set_num_d(target, CL_ENGINE_MAX_SCANSIZE, 2048ll
* 1048576ll)) != CL_SUCCESS) {
log_error("ClamAV configuration error. {cl_engine_set_num = %i
= %s}", state, cl_strerror_d(state));
cl_engine_free_d(target);
return NULL;
}
// Max file size. 512 MB.
// Files larger than this limit won't be scanned.
if ((state = cl_engine_set_num_d(target, CL_ENGINE_MAX_FILESIZE, 512 *
1048576)) != CL_SUCCESS) {
log_error("ClamAV configuration error. {cl_engine_set_num = %i
= %s}", state, cl_strerror_d(state));
cl_engine_free_d(target);
return NULL;
}
// Maximum recursion level for archives.
if ((state = cl_engine_set_num_d(target, CL_ENGINE_MAX_RECURSION, 32))
!= CL_SUCCESS) {
log_error("ClamAV configuration error. {cl_engine_set_num = %i
= %s}", state, cl_strerror_d(state));
cl_engine_free_d(target);
return NULL;
}
// Maximum number of files to scan within an archive.
if ((state = cl_engine_set_num_d(target, CL_ENGINE_MAX_FILES, 65536))
!= CL_SUCCESS) {
log_error("ClamAV configuration error. {cl_engine_set_num = %i
= %s}", state, cl_strerror_d(state));
cl_engine_free_d(target);
return NULL;
}
// This option sets the lowest number of social security numbers found
in a file to generate a detect.
if ((state = cl_engine_set_num_d(target, CL_ENGINE_MIN_SSN_COUNT,
1000000)) != CL_SUCCESS) {
log_error("ClamAV configuration error. {cl_engine_set_num = %i
= %s}", state, cl_strerror_d(state));
cl_engine_free_d(target);
return NULL;
}
// This option sets the lowest number of credit card numbers found in a
file to generate a detect.
if ((state = cl_engine_set_num_d(target, CL_ENGINE_MIN_CC_COUNT,
1000000)) != CL_SUCCESS) {
log_error("ClamAV configuration error. {cl_engine_set_num = %i
= %s}", state, cl_strerror_d(state));
cl_engine_free_d(target);
return NULL;
}
// Configure the bytecode engine evaluate all bytecode instructions
with suspicion.
if ((state = cl_engine_set_num_d(target, CL_ENGINE_BYTECODE_SECURITY,
CL_BYTECODE_TRUST_NOTHING)) != CL_SUCCESS) {
log_error("ClamAV configuration error. {cl_engine_set_num = %i
= %s}", state, cl_strerror_d(state));
cl_engine_free_d(target);
return NULL;
}
// Configure the directory where ClamAV should store spool/temp data
during scans.
if (virus_spool != NULL && (state = cl_engine_set_str_d(target,
CL_ENGINE_TMPDIR, virus_spool)) != CL_SUCCESS) {
log_error("ClamAV configuration error. {cl_engine_set_str = %i
= %s}", state, cl_strerror_d(state));
cl_engine_free_d(target);
return NULL;
}
// Configure the library to automatically delete temporary files when
its finished. Without historical data
// to reference the ClamAV engine will be unable to detect viruses that
have been split across multiple messages.
if ((state = cl_engine_set_num_d(target, CL_ENGINE_KEEPTMP, 0)) !=
CL_SUCCESS) {
log_error("ClamAV configuration error. {cl_engine_set_num = %i
= %s}", state, cl_strerror_d(state));
cl_engine_free_d(target);
return NULL;
}
// Reset the signatures pointer if one was passed in.
if (*signatures) {
*signatures = loaded;
}
return target;
}
/**
* Initializes the global ClamAV engine context and configures it appropriately.
*
* @return Returns true if the ClamAV engine was loaded correctly.
*/
bool_t virus_start(void) {
DIR *dir;
int state;
uint64_t loaded;
char buffer[1024];
// If we are not supposed to be scanning messages. So don't initialize
the engine.
if (!magma.iface.virus.available) {
return true;
}
// The ClamAV library must be initialized before any library function
is used.
if ((state = cl_init_d(CL_INIT_DEFAULT)) != CL_SUCCESS) {
log_critical("ClamAV returned an error during initialization.
{cl_init = %i = %s}", state, cl_strerror_d(state));
stats_increment_by_name("provider.virus.error");
return false;
}
// Configure the scanner spool directory. If spool is empty, use the
ClamAV default values.
if (magma.spool != NULL && ns_get_length(magma.spool) != 0) {
// Make sure the spool directory exists.
if ((dir = opendir(magma.spool)) == NULL) {
log_critical("Unable to access the spool directory.
{dir = %s}", magma.spool);
stats_increment_by_name("provider.virus.error");
return false;
}
closedir(dir);
// Check for the data sub-directory.
snprintf(buffer, 1024, "%s%s%s", magma.spool, (*(magma.spool +
ns_get_length(magma.spool) - 1) == '/' ? "" : "/"), "data/");
if ((dir = opendir(buffer)) == NULL) {
if (mkdir(buffer, S_IRWXU) != 0) {
log_critical("Unable to create the spool data
directory. {dir = %s}", buffer);
stats_increment_by_name("provider.virus.error");
return false;
} else {
log_info("Creating the spool data directory.
{dir = %s}", buffer);
}
} else {
closedir(dir);
}
// Check for the ClamAV sub-directory.
snprintf(buffer, 1024, "%s%s%s", magma.spool, (*(magma.spool +
ns_get_length(magma.spool) - 1) == '/' ? "" : "/"), "scan/");
if ((dir = opendir(buffer)) == NULL) {
if (mkdir(buffer, S_IRWXU) != 0) {
log_critical("Unable to create the spool ClamAV
directory. {dir = %s}", buffer);
stats_increment_by_name("provider.virus.error");
return false;
} else {
log_info("Creating the spool ClamAV directory.
{dir = %s}", buffer);
}
} else {
closedir(dir);
}
// Configure the directory where ClamAV should store spool/temp
data during scans.
snprintf(buffer, 1024, "%s%s%s", magma.spool, (*(magma.spool +
ns_get_length(magma.spool) - 1) == '/' ? "" : "/"), "scan/");
if ((virus_spool = ns_dupe(buffer)) == NULL) {
log_critical("Unable to generate the ClamAV spool
path.");
stats_increment_by_name("provider.virus.error");
return false;
}
// Build the format specifier. The double percent is used to
print a percent in the output. The format specifier %lu is then replaced with
the thread ID.
snprintf(buffer, 1024, "%s%s%sscan_%%lu", magma.spool,
(*(magma.spool + ns_get_length(magma.spool) - 1) == '/' ? "" : "/"), "data/");
if ((virus_spool_mask = ns_dupe(buffer)) == NULL) {
log_critical("Unable to generate the ClamAV spool file
format string.");
stats_increment_by_name("provider.virus.error");
return false;
}
}
// Setup the refresh data.
bl_clear(&virus_stat, sizeof(struct cl_stat));
cl_statinidir_d(magma.iface.virus.signatures, &virus_stat);
if ((virus_engine = virus_engine_create(&loaded)) == NULL) {
log_critical("Failed to construct a new ClamAV engine
context.");
stats_increment_by_name("provider.virus.error");
cl_statfree_d(&virus_stat);
return false;
}
// Record the number of signatures loaded.
virus_sigs = loaded;
// Update the ClamAV engine trackers.
stats_set_by_name("provider.virus.available", 1);
stats_set_by_name("provider.virus.signatures.loaded", loaded);
stats_set_by_name("provider.virus.signatures.total",
virus_sigs_total());
return true;
}
/**
* Shuts down the ClamAV library and releases any memory that is no longer
needed.
*/
void virus_stop(void) {
// If we are not supposed to be scanning messages. So don't free the
engine.
if (!magma.iface.virus.available) {
return;
}
// Free the memory associated with the virus scanning engine.
if (virus_spool_mask) {
ns_free(virus_spool_mask);
virus_spool_mask = NULL;
}
// Frees the engine context.
if (virus_engine) {
virus_engine_destroy(&virus_engine);
virus_engine = NULL;
virus_sigs = 0;
}
// Frees the database directory status.
cl_statfree_d(&virus_stat);
// Update the ClamAV engine trackers. The values below are used to
indicate a shutdown state.
stats_set_by_name("provider.virus.available", 0);
stats_set_by_name("provider.virus.signatures.loaded", 0);
return;
}
/**
* Checks the virus database directory for new signatures. If new signatures
are detected an updated ClamAV engine context is created.
*
* @return Returns 1 if the engine context is updated, 0 if no updates are
necessary and -1 in the event of an error.
*/
int virus_engine_refresh(void) {
int state;
time_t utime;
struct tm now;
uint64_t loaded, total;
struct cl_engine *original, *new = NULL;
// If we are not supposed to be scanning messages. So don't bother
refreshing engine.
if (!magma.iface.virus.available) {
return 0;
}
if (cl_statchkdir_d(&virus_stat) == 1) {
if ((new = virus_engine_create(&loaded)) == NULL) {
log_error("Failed to construct a new ClamAV engine
context.");
stats_increment_by_name("provider.virus.error");
return -1;
}
// Lock and then swap the pointer.
pthread_rwlock_wrlock(&virus_lock);
original = virus_engine;
virus_engine = new;
virus_sigs = loaded;
pthread_rwlock_unlock(&virus_lock);
// Free the old engine context.
virus_engine_destroy(&original);
// Refresh the statistics, so we can properly log the update.
cl_statfree_d(&virus_stat);
bl_clear(&virus_stat, sizeof(struct cl_stat));
cl_statinidir_d(magma.iface.virus.signatures, &virus_stat);
// Update the engine counters with counts from the new
signature database.
stats_set_by_name("provider.virus.signatures.loaded", loaded);
stats_set_by_name("provider.virus.signatures.total", (total =
virus_sigs_total()));
// If we have a problem calculating the local time, output the
message without the time.
if ((utime = time(NULL)) == ((time_t)-1) ||
(localtime_r(&utime, &now)) == NULL) {
log_info("%lu out of %lu signatures were loaded.",
loaded, total);
} else {
// Get the hour on a 12 hour clock.
if (now.tm_hour == 0) {
state = 12;
} else if (now.tm_hour > 12) {
state = now.tm_hour - 12;
} else {
state = now.tm_hour;
}
log_info("%lu out of %lu signatures were loaded.
(%.2i:%.2i %s %s)", loaded, total, state, now.tm_min, (now.tm_hour < 12 ? "AM"
: "PM"),
tzname[(now.tm_isdst > 0 ? 1 : 0)]);
}
}
return 1;
}
int virus_scan(stringer_t *data) {
int fd, state;
ssize_t written;
unsigned long int scanned;
char *virname, filename[1024];
// If we are not supposed to be scanning messages.
if (!magma.iface.virus.available) {
return 0;
}
// Lets make sure an actual message was passed..
if (data == NULL || st_get_length(data) == 0) {
log_error("An invalid message pointer was passed in.");
return -1;
}
// Generate file name.
if (snprintf(filename, 1024, (!virus_spool_mask ? "/tmp/scan_%lu" :
virus_spool_mask), pthread_self()) <= 1) {
log_error("Could not generate a valid temporary file name.");
stats_increment_by_name("provider.virus.error");
return -1;
}
// Open a new file and truncate it.
if ((fd = open64(filename, O_CREAT | O_RDWR | O_TRUNC | O_SYNC, S_IRUSR
| S_IWUSR)) == -1) {
log_error("Could not create a temporary file. {filename = %s}",
filename);
stats_increment_by_name("provider.virus.error");
return -1;
}
// Immediately unlink the file, so its deleted when the descriptor is
closed.
if (unlink(filename)) {
log_error("Could not unlink the file. {filename = %s}",
filename);
stats_increment_by_name("provider.virus.error");
close(fd);
return -1;
}
// Stick the message in the file for ClamAV.
if ((written = write(fd, st_get_data(data), st_get_length(data))) !=
st_get_length(data)) {
log_error("Not all of the bytes were written to disk. Was %zi,
but should have been %zu.", written, st_get_length(data));
stats_increment_by_name("provider.virus.error");
close(fd);
return -1;
}
// Scan the message. The OLE code has a bug in it that causes segfaults.
pthread_rwlock_rdlock(&virus_lock);
state = cl_scandesc_d(fd, (const char **)&virname, &scanned,
virus_engine, CL_SCAN_STDOPT);
// If we found something, then spit it back.
// http://wiki.clamav.net/Main/MalwareNaming has naming conventions.
if (state == CL_VIRUS) {
log_pedantic("%s detected by ClamAV.", virname);
// These are signature based phishing matches.
if (starts_ci_bl_bl("Email.Phishing", 14, virname,
ns_get_length(virname)) || starts_ci_bl_bl("HTML.Phishing", 13, virname,
ns_get_length(virname))) {
pthread_rwlock_unlock(&virus_lock);
stats_increment_by_name("provider.virus.scan.total");
stats_increment_by_name("provider.virus.scan.phishing");
close(fd);
return 2;
}
// We ignore email that ClamAV thinks is a phishing based on
scanner's internal heuristic checks.
else if (starts_ci_bl_bl("Phishing", 8, virname,
ns_get_length(virname)) || starts_ci_bl_bl("Joke", 4, virname,
ns_get_length(virname))) {
pthread_rwlock_unlock(&virus_lock);
stats_increment_by_name("provider.virus.scan.total");
stats_increment_by_name("provider.virus.scan.clean");
close(fd);
return 0;
}
// Its probably a worm, trojan, virus or something similar.
else {
pthread_rwlock_unlock(&virus_lock);
stats_increment_by_name("provider.virus.scan.total");
stats_increment_by_name("provider.virus.scan.infected");
close(fd);
return 1;
}
}
pthread_rwlock_unlock(&virus_lock);
close(fd);
// Track the number of clean messages. We can do the tracking after the
mutex is released.
if (state == CL_CLEAN) {
stats_increment_by_name("provider.virus.scan.total");
stats_increment_by_name("provider.virus.scan.clean");
} else {
log_error("An error occurred while scanning a message.
{cl_scandesc = %i = %s}", state, cl_strerror_d(state));
stats_increment_by_name("provider.virus.error");
}
return 0;
}
/**
* Returns the version of ClamAV that was loaded at runtime.
*
* @return The ClamAV version as a constant string.
*/
const char * lib_version_clamav(void) {
return cl_retver_d();
}
/**
* Loads the functions needed by the ClamAV interface.
*
* @return Returns true if all the functions loaded successfully.
*/
bool_t lib_load_clamav(void) {
symbol_t clamav[] = {
{
.name = "cl_retver",
.pointer = (void *)&cl_retver_d
},
{
.name = "cl_strerror",
.pointer = (void *)&cl_strerror_d
},
{
.name = "cl_statinidir",
.pointer = (void *)&cl_statinidir_d
},
{
.name = "cl_engine_new",
.pointer = (void *)&cl_engine_new_d
},
{
.name = "cl_engine_compile",
.pointer = (void *)&cl_engine_compile_d
},
{
.name = "cl_load",
.pointer = (void *)&cl_load_d
},
{
.name = "cl_engine_free",
.pointer = (void *)&cl_engine_free_d
},
{
.name = "cl_engine_set_num",
.pointer = (void *)&cl_engine_set_num_d
},
{
.name = "cl_engine_set_str",
.pointer = (void *)&cl_engine_set_str_d
},
{
.name = "cl_countsigs",
.pointer = (void *)&cl_countsigs_d
},
{
.name = "cl_statchkdir",
.pointer = (void *)&cl_statchkdir_d
},
{
.name = "cl_statfree",
.pointer = (void *)&cl_statfree_d
},
{
.name = "cl_scandesc",
.pointer = (void *)&cl_scandesc_d
}
};
if (lib_symbols(sizeof(clamav) / sizeof(symbol_t), clamav) != 1) {
return false;
}
return true;
}
_______________________________________________
http://lurker.clamav.net/list/clamav-devel.html
Please submit your patches to our Bugzilla: http://bugs.clamav.net