On 7/28/2010 6:18 PM, thyago wrote:
I'm researching ways of updating a signature database on-the-fly, so the way
clamAV does it, can really help me out...
I mean, what structures are there? how is it implemented?
Is there a data structure used to store the signatures on memory? If so, how
exactly is it updated?
what type of data structure? dynamic or static?
I need to know if you guys use a pointer to the structure, and then just set
it to point to the new updated structure,
and if for example, there's a condition, that limits when this pointer can
be changed...like a thread needing to finish first....

I tried to look for the implementation on the code itself...but it's so
big...i don't know in which file to look =/



Thank you very much, for the help

Thyago


Attached is my implementation. As you can see I use a RW lock to minimize contention.
/**
 * @file /magma/providers/external/clamav.c
 *
 * @brief Interface for the ClamAV library.
 *
 * $Author: Ladar Levison $
 * $Date: 2010/08/13 10:32:38 $
 * $Revision: ecaee526d4ba88a141c5b889dd023b13c05c2654 $
 *
 */

#include "magma.h"

/**
 * The virus engine spool directory.
 */
char *virus_spool = NULL;

/**
 * The mask used to generate temporary file names.
 */
char *virus_spool_mask = NULL;

/**
 * The status of the signatures directory.
 */
struct cl_stat virus_stat;

/**
 * The number of signatures loaded by the virus engine.
 */
unsigned int virus_sigs = 0;

/**
 * The virus engine context pointer.
 */
struct cl_engine *virus_engine = NULL;

/**
 * The virus engine read/write lock.
 */
pthread_rwlock_t virus_lock = PTHREAD_RWLOCK_INITIALIZER;

/**
 * Obtains a virus engine read lock and records the number of virus signatures 
loaded by the active ClamAV engine context.
 *
 * @return Returns the number of virus signatures loaded by the active ClamAV 
engine context.
 */
uint64_t virus_sigs_loaded(void) {

        uint64_t loaded = 0;

        pthread_rwlock_rdlock(&virus_lock);
        loaded = virus_sigs;
        pthread_rwlock_unlock(&virus_lock);

        return loaded;
}

/**
 * Counts the number of official signatures available inside the ClamAV 
database folder.
 *
 * @return Returns the number of official signatures available inside the 
ClamAV database folder.
 */
uint64_t virus_sigs_total(void) {

        int state;
        unsigned int total = 0;

        if ((state = cl_countsigs_d(magma.iface.virus.signatures, 
CL_COUNTSIGS_OFFICIAL, &total)) != CL_SUCCESS) {
                log_error("ClamAV was unable to count the number of available 
signatures. {cl_countsigs = %i = %s}", state, cl_strerror_d(state));
                return 0;
        }

        return total;
}

/**
 * Frees a ClamAV engine context and sets the pointer to NULL.
 *
 * @param target A doubly referenced pointer to a ClamAV engine context.
 */
void virus_engine_destroy(struct cl_engine **target) {
        log_check(!target || !*target);
        cl_engine_free_d(*target);
        *target = NULL;
        return;
}


/**
 * Generates a new ClamAV engine context.
 *
 * @param       signatures An optional pointer which will be used to record the 
number of signatures loaded.
 * @return Returns a pointer to the newly created context or NULL if an error 
occurs.
 */
struct cl_engine * virus_engine_create(uint64_t *signatures) {

        int state;
        unsigned int loaded = 0;
        struct cl_engine *target = NULL;

        // Reset the signatures pointer if one was passed in.
        if (*signatures) {
                *signatures = 0;
        }

        // Allocate ClamAV engine context.
        if ((target = cl_engine_new_d()) == NULL) {
                log_error("ClamAV returned an error while allocating the engine 
context. {cl_engine = NULL}");
                return NULL;
        }

        // Load the current signature database.
        if ((state = cl_load_d(magma.iface.virus.signatures, target, &loaded, 
CL_DB_STDOPT)) != CL_SUCCESS) {
                log_error("ClamAV returned an error while loading the database. 
{cl_load = %i = %s}", state, cl_strerror_d(state));
                cl_engine_free_d(target);
                return NULL;
        }

        // Compile the internal lookup structures.
        if ((state = cl_engine_compile_d(target)) != CL_SUCCESS) {
                log_error("ClamAV database compilation error. 
{cl_engine_compile = %i = %s}", state, cl_strerror_d(state));
                cl_engine_free_d(target);
                return NULL;
        }

        // Max scan size. 2048 MB.
        // Sets the maximum amount of data to be scanned for each input file.
        if ((state = cl_engine_set_num_d(target, CL_ENGINE_MAX_SCANSIZE, 2048ll 
* 1048576ll)) != CL_SUCCESS) {
                log_error("ClamAV configuration error. {cl_engine_set_num = %i 
= %s}", state, cl_strerror_d(state));
                cl_engine_free_d(target);
                return NULL;
        }

        // Max file size. 512 MB.
        // Files larger than this limit won't be scanned.
        if ((state = cl_engine_set_num_d(target, CL_ENGINE_MAX_FILESIZE, 512 * 
1048576)) != CL_SUCCESS) {
                log_error("ClamAV configuration error. {cl_engine_set_num = %i 
= %s}", state, cl_strerror_d(state));
                cl_engine_free_d(target);
                return NULL;
        }

        // Maximum recursion level for archives.
        if ((state = cl_engine_set_num_d(target, CL_ENGINE_MAX_RECURSION, 32)) 
!= CL_SUCCESS) {
                log_error("ClamAV configuration error. {cl_engine_set_num = %i 
= %s}", state, cl_strerror_d(state));
                cl_engine_free_d(target);
                return NULL;
        }

        // Maximum number of files to scan within an archive.
        if ((state = cl_engine_set_num_d(target, CL_ENGINE_MAX_FILES, 65536)) 
!= CL_SUCCESS) {
                log_error("ClamAV configuration error. {cl_engine_set_num = %i 
= %s}", state, cl_strerror_d(state));
                cl_engine_free_d(target);
                return NULL;
        }

        // This option sets the lowest number of social security numbers found 
in a file to generate a detect.
        if ((state = cl_engine_set_num_d(target, CL_ENGINE_MIN_SSN_COUNT, 
1000000)) != CL_SUCCESS) {
                log_error("ClamAV configuration error. {cl_engine_set_num = %i 
= %s}", state, cl_strerror_d(state));
                cl_engine_free_d(target);
                return NULL;
        }

        // This option sets the lowest number of credit card numbers found in a 
file to generate a detect.
        if ((state = cl_engine_set_num_d(target, CL_ENGINE_MIN_CC_COUNT, 
1000000)) != CL_SUCCESS) {
                log_error("ClamAV configuration error. {cl_engine_set_num = %i 
= %s}", state, cl_strerror_d(state));
                cl_engine_free_d(target);
                return NULL;
        }

        // Configure the bytecode engine evaluate all bytecode instructions 
with suspicion.
        if ((state = cl_engine_set_num_d(target, CL_ENGINE_BYTECODE_SECURITY, 
CL_BYTECODE_TRUST_NOTHING)) != CL_SUCCESS) {
                log_error("ClamAV configuration error. {cl_engine_set_num = %i 
= %s}", state, cl_strerror_d(state));
                cl_engine_free_d(target);
                return NULL;
        }

        // Configure the directory where ClamAV should store spool/temp data 
during scans.
        if (virus_spool != NULL && (state = cl_engine_set_str_d(target, 
CL_ENGINE_TMPDIR, virus_spool)) != CL_SUCCESS) {
                log_error("ClamAV configuration error. {cl_engine_set_str = %i 
= %s}", state, cl_strerror_d(state));
                cl_engine_free_d(target);
                return NULL;
        }

        // Configure the library to automatically delete temporary files when 
its finished. Without historical data
        // to reference the ClamAV engine will be unable to detect viruses that 
have been split across multiple messages.
        if ((state = cl_engine_set_num_d(target, CL_ENGINE_KEEPTMP, 0)) != 
CL_SUCCESS) {
                log_error("ClamAV configuration error. {cl_engine_set_num = %i 
= %s}", state, cl_strerror_d(state));
                cl_engine_free_d(target);
                return NULL;
        }

        // Reset the signatures pointer if one was passed in.
        if (*signatures) {
                *signatures = loaded;
        }

        return target;
}

/**
 * Initializes the global ClamAV engine context and configures it appropriately.
 *
 * @return Returns true if the ClamAV engine was loaded correctly.
 */
bool_t virus_start(void) {

        DIR *dir;
        int state;
        uint64_t loaded;
        char buffer[1024];

        // If we are not supposed to be scanning messages. So don't initialize 
the engine.
        if (!magma.iface.virus.available) {
                return true;
        }

        // The ClamAV library must be initialized before any library function 
is used.
        if ((state = cl_init_d(CL_INIT_DEFAULT)) != CL_SUCCESS) {
                log_critical("ClamAV returned an error during initialization. 
{cl_init = %i = %s}", state, cl_strerror_d(state));
                stats_increment_by_name("provider.virus.error");
                return false;
        }

        // Configure the scanner spool directory. If spool is empty, use the 
ClamAV default values.
        if (magma.spool != NULL && ns_get_length(magma.spool) != 0) {
                // Make sure the spool directory exists.
                if ((dir = opendir(magma.spool)) == NULL) {
                        log_critical("Unable to access the spool directory. 
{dir = %s}", magma.spool);
                        stats_increment_by_name("provider.virus.error");
                        return false;
                }

                closedir(dir);

                // Check for the data sub-directory.
                snprintf(buffer, 1024, "%s%s%s", magma.spool, (*(magma.spool + 
ns_get_length(magma.spool) - 1) == '/' ? "" : "/"), "data/");

                if ((dir = opendir(buffer)) == NULL) {
                        if (mkdir(buffer, S_IRWXU) != 0) {
                                log_critical("Unable to create the spool data 
directory. {dir = %s}", buffer);
                                stats_increment_by_name("provider.virus.error");
                                return false;
                        } else {
                                log_info("Creating the spool data directory. 
{dir = %s}", buffer);
                        }
                } else {
                        closedir(dir);
                }

                // Check for the ClamAV sub-directory.
                snprintf(buffer, 1024, "%s%s%s", magma.spool, (*(magma.spool + 
ns_get_length(magma.spool) - 1) == '/' ? "" : "/"), "scan/");

                if ((dir = opendir(buffer)) == NULL) {
                        if (mkdir(buffer, S_IRWXU) != 0) {
                                log_critical("Unable to create the spool ClamAV 
directory. {dir = %s}", buffer);
                                stats_increment_by_name("provider.virus.error");
                                return false;
                        } else {
                                log_info("Creating the spool ClamAV directory. 
{dir = %s}", buffer);
                        }
                } else {
                        closedir(dir);
                }

                // Configure the directory where ClamAV should store spool/temp 
data during scans.
                snprintf(buffer, 1024, "%s%s%s", magma.spool, (*(magma.spool + 
ns_get_length(magma.spool) - 1) == '/' ? "" : "/"), "scan/");

                if ((virus_spool = ns_dupe(buffer)) == NULL) {
                        log_critical("Unable to generate the ClamAV spool 
path.");
                        stats_increment_by_name("provider.virus.error");
                        return false;
                }

                // Build the format specifier. The double percent is used to 
print a percent in the output. The format specifier %lu is then replaced with 
the thread ID.
                snprintf(buffer, 1024, "%s%s%sscan_%%lu", magma.spool, 
(*(magma.spool + ns_get_length(magma.spool) - 1) == '/' ? "" : "/"), "data/");

                if ((virus_spool_mask = ns_dupe(buffer)) == NULL) {
                        log_critical("Unable to generate the ClamAV spool file 
format string.");
                        stats_increment_by_name("provider.virus.error");
                        return false;
                }
        }

        // Setup the refresh data.
        bl_clear(&virus_stat, sizeof(struct cl_stat));
        cl_statinidir_d(magma.iface.virus.signatures, &virus_stat);

        if ((virus_engine = virus_engine_create(&loaded)) == NULL) {
                log_critical("Failed to construct a new ClamAV engine 
context.");
                stats_increment_by_name("provider.virus.error");
                cl_statfree_d(&virus_stat);
                return false;
        }

        // Record the number of signatures loaded.
        virus_sigs = loaded;

        // Update the ClamAV engine trackers.
        stats_set_by_name("provider.virus.available", 1);
        stats_set_by_name("provider.virus.signatures.loaded", loaded);
        stats_set_by_name("provider.virus.signatures.total", 
virus_sigs_total());

        return true;
}

/**
 * Shuts down the ClamAV library and releases any memory that is no longer 
needed.
 */
void virus_stop(void) {

        // If we are not supposed to be scanning messages. So don't free the 
engine.
        if (!magma.iface.virus.available) {
                return;
        }

        // Free the memory associated with the virus scanning engine.
        if (virus_spool_mask) {
                ns_free(virus_spool_mask);
                virus_spool_mask = NULL;
        }

        // Frees the engine context.
        if (virus_engine) {
                virus_engine_destroy(&virus_engine);
                virus_engine = NULL;
                virus_sigs = 0;
        }

        // Frees the database directory status.
        cl_statfree_d(&virus_stat);

        // Update the ClamAV engine trackers. The values below are used to 
indicate a shutdown state.
        stats_set_by_name("provider.virus.available", 0);
        stats_set_by_name("provider.virus.signatures.loaded", 0);

        return;
}

/**
 * Checks the virus database directory for new signatures. If new signatures 
are detected an updated ClamAV engine context is created.
 *
 * @return Returns 1 if the engine context is updated, 0 if no updates are 
necessary and -1 in the event of an error.
 */
int virus_engine_refresh(void) {

        int state;
        time_t utime;
        struct tm now;
        uint64_t loaded, total;
        struct cl_engine *original, *new = NULL;

        // If we are not supposed to be scanning messages. So don't bother 
refreshing engine.
        if (!magma.iface.virus.available) {
                return 0;
        }

        if (cl_statchkdir_d(&virus_stat) == 1) {

                if ((new = virus_engine_create(&loaded)) == NULL) {
                        log_error("Failed to construct a new ClamAV engine 
context.");
                        stats_increment_by_name("provider.virus.error");
                        return -1;
                }

                // Lock and then swap the pointer.
                pthread_rwlock_wrlock(&virus_lock);
                original = virus_engine;
                virus_engine = new;
                virus_sigs = loaded;
                pthread_rwlock_unlock(&virus_lock);

                // Free the old engine context.
                virus_engine_destroy(&original);

                // Refresh the statistics, so we can properly log the update.
                cl_statfree_d(&virus_stat);
                bl_clear(&virus_stat, sizeof(struct cl_stat));
                cl_statinidir_d(magma.iface.virus.signatures, &virus_stat);

                // Update the engine counters with counts from the new 
signature database.
                stats_set_by_name("provider.virus.signatures.loaded", loaded);
                stats_set_by_name("provider.virus.signatures.total", (total = 
virus_sigs_total()));

                // If we have a problem calculating the local time, output the 
message without the time.
                if ((utime = time(NULL)) == ((time_t)-1) || 
(localtime_r(&utime, &now)) == NULL) {
                        log_info("%lu out of %lu signatures were loaded.", 
loaded, total);
                } else {

                        // Get the hour on a 12 hour clock.
                        if (now.tm_hour == 0) {
                                state = 12;
                        } else if (now.tm_hour > 12) {
                                state = now.tm_hour - 12;
                        } else {
                                state = now.tm_hour;
                        }

                        log_info("%lu out of %lu signatures were loaded. 
(%.2i:%.2i %s %s)", loaded, total, state, now.tm_min, (now.tm_hour < 12 ? "AM" 
: "PM"),
                                 tzname[(now.tm_isdst > 0 ? 1 : 0)]);
                }
        }

        return 1;
}

int virus_scan(stringer_t *data) {

        int fd, state;
        ssize_t written;
        unsigned long int scanned;
        char *virname, filename[1024];

        // If we are not supposed to be scanning messages.
        if (!magma.iface.virus.available) {
                        return 0;
        }

        // Lets make sure an actual message was passed..
        if (data == NULL || st_get_length(data) == 0) {
                log_error("An invalid message pointer was passed in.");
                return -1;
        }

        // Generate file name.
        if (snprintf(filename, 1024, (!virus_spool_mask ? "/tmp/scan_%lu" : 
virus_spool_mask), pthread_self()) <= 1) {
                log_error("Could not generate a valid temporary file name.");
                stats_increment_by_name("provider.virus.error");
                return -1;
        }

        // Open a new file and truncate it.
        if ((fd = open64(filename, O_CREAT | O_RDWR | O_TRUNC | O_SYNC, S_IRUSR 
| S_IWUSR)) == -1) {
                log_error("Could not create a temporary file. {filename = %s}", 
filename);
                stats_increment_by_name("provider.virus.error");
                return -1;
        }

        // Immediately unlink the file, so its deleted when the descriptor is 
closed.
        if (unlink(filename)) {
                log_error("Could not unlink the file. {filename = %s}", 
filename);
                stats_increment_by_name("provider.virus.error");
                close(fd);
                return -1;
        }

        // Stick the message in the file for ClamAV.
        if ((written = write(fd, st_get_data(data), st_get_length(data))) != 
st_get_length(data)) {
                log_error("Not all of the bytes were written to disk. Was %zi, 
but should have been %zu.", written, st_get_length(data));
                stats_increment_by_name("provider.virus.error");
                close(fd);
                return -1;
        }

        // Scan the message. The OLE code has a bug in it that causes segfaults.
        pthread_rwlock_rdlock(&virus_lock);
        state = cl_scandesc_d(fd, (const char **)&virname, &scanned, 
virus_engine, CL_SCAN_STDOPT);

        // If we found something, then spit it back.
        // http://wiki.clamav.net/Main/MalwareNaming has naming conventions.
        if (state == CL_VIRUS) {

                log_pedantic("%s detected by ClamAV.", virname);

                // These are signature based phishing matches.
                if (starts_ci_bl_bl("Email.Phishing", 14, virname, 
ns_get_length(virname)) || starts_ci_bl_bl("HTML.Phishing", 13, virname, 
ns_get_length(virname))) {
                        pthread_rwlock_unlock(&virus_lock);
                        stats_increment_by_name("provider.virus.scan.total");
                        stats_increment_by_name("provider.virus.scan.phishing");
                        close(fd);
                        return 2;
                }
                // We ignore email that ClamAV thinks is a phishing based on 
scanner's internal heuristic checks.
                else if (starts_ci_bl_bl("Phishing", 8, virname, 
ns_get_length(virname)) || starts_ci_bl_bl("Joke", 4, virname, 
ns_get_length(virname))) {
                        pthread_rwlock_unlock(&virus_lock);
                        stats_increment_by_name("provider.virus.scan.total");
                        stats_increment_by_name("provider.virus.scan.clean");
                        close(fd);
                        return 0;
                }
                // Its probably a worm, trojan, virus or something similar.
                else {
                        pthread_rwlock_unlock(&virus_lock);
                        stats_increment_by_name("provider.virus.scan.total");
                        stats_increment_by_name("provider.virus.scan.infected");
                        close(fd);
                        return 1;
                }
        }

        pthread_rwlock_unlock(&virus_lock);
        close(fd);

        // Track the number of clean messages. We can do the tracking after the 
mutex is released.
        if (state == CL_CLEAN) {
                stats_increment_by_name("provider.virus.scan.total");
                stats_increment_by_name("provider.virus.scan.clean");
        } else {
                log_error("An error occurred while scanning a message. 
{cl_scandesc = %i = %s}", state, cl_strerror_d(state));
                stats_increment_by_name("provider.virus.error");
        }

        return 0;
}

/**
 * Returns the version of ClamAV that was loaded at runtime.
 *
 * @return The ClamAV version as a constant string.
 */
const char * lib_version_clamav(void) {
        return cl_retver_d();
}

/**
 * Loads the functions needed by the ClamAV interface.
 *
 * @return Returns true if all the functions loaded successfully.
 */
bool_t lib_load_clamav(void) {

        symbol_t clamav[] = {
                {
                        .name = "cl_retver",
                        .pointer = (void *)&cl_retver_d
                },
                {
                        .name = "cl_strerror",
                        .pointer = (void *)&cl_strerror_d
                },
                {
                        .name = "cl_statinidir",
                        .pointer = (void *)&cl_statinidir_d
                },
                {
                        .name = "cl_engine_new",
                        .pointer = (void *)&cl_engine_new_d
                },
                {
                        .name = "cl_engine_compile",
                        .pointer = (void *)&cl_engine_compile_d
                },
                {
                        .name = "cl_load",
                        .pointer = (void *)&cl_load_d
                },
                {
                        .name = "cl_engine_free",
                        .pointer = (void *)&cl_engine_free_d
                },
                {
                        .name = "cl_engine_set_num",
                        .pointer = (void *)&cl_engine_set_num_d
                },
                {
                        .name = "cl_engine_set_str",
                        .pointer = (void *)&cl_engine_set_str_d
                },
                {
                        .name = "cl_countsigs",
                        .pointer = (void *)&cl_countsigs_d
                },
                {
                        .name = "cl_statchkdir",
                        .pointer = (void *)&cl_statchkdir_d
                },
                {
                        .name = "cl_statfree",
                        .pointer = (void *)&cl_statfree_d
                },
                {
                        .name = "cl_scandesc",
                        .pointer = (void *)&cl_scandesc_d
                }
        };

        if (lib_symbols(sizeof(clamav) / sizeof(symbol_t), clamav) != 1) {
                return false;
        }

        return true;
}

_______________________________________________
http://lurker.clamav.net/list/clamav-devel.html
Please submit your patches to our Bugzilla: http://bugs.clamav.net

Reply via email to