基本流程可以参考clamav中clamdscan --version 不生效

我们直接从解析command开始。parse_command函数返回COMMAND_RELOAD类型。然后进入execute_or_dispatch_command函数处理。

recvloop=>parse_dispatch_cmd=>execute_or_dispatch_command


/* returns:*  <0 for error*     -1 out of memory*     -2 other*   0 for async dispatched*   1 for command completed (connection can be closed)*/
int execute_or_dispatch_command(client_conn_t *conn, enum commands cmd, const char *argument)
{int desc                       = conn->sd;char term                      = conn->term;const struct cl_engine *engine = conn->engine;/* execute commands that can be executed quickly on the recvloop thread,* these must:*  - not involve any operation that can block for a long time, such as disk*  I/O*  - send of atomic message is allowed.* Dispatch other commands */if (conn->group) {switch (cmd) {case COMMAND_FILDES:case COMMAND_SCAN:case COMMAND_END:case COMMAND_INSTREAM:case COMMAND_INSTREAMSCAN:case COMMAND_VERSION:case COMMAND_PING:case COMMAND_STATS:case COMMAND_COMMANDS:/* These commands are accepted inside IDSESSION */break;default:/* these commands are not recognized inside an IDSESSION */conn_reply_error(conn, "Command invalid inside IDSESSION.");logg(LOGG_DEBUG_NV, "SESSION: command is not valid inside IDSESSION: %d\n", cmd);conn->group = NULL;return 1;}}switch (cmd) {case COMMAND_SHUTDOWN:pthread_mutex_lock(&exit_mutex);progexit = 1;pthread_mutex_unlock(&exit_mutex);return 1;case COMMAND_RELOAD:pthread_mutex_lock(&reload_mutex);reload = 1;//设置reload标志pthread_mutex_unlock(&reload_mutex);mdprintf(desc, "RELOADING%c", term);/* we set reload flag, and we'll reload before closing the* connection */return 1;
......}
}

然后返回到recvloop函数中,进行加载病毒库。由于代码太长,只贴出部分代码。

recvloop:

        /* DB reload */pthread_mutex_lock(&reload_mutex);if (reload) {pthread_mutex_unlock(&reload_mutex);/* Reload was requested */pthread_mutex_lock(&reload_stage_mutex);if (reload_stage == RELOAD_STAGE__IDLE) {/* Reloading not already taking place */reload_stage = RELOAD_STAGE__RELOADING;pthread_mutex_unlock(&reload_stage_mutex);if (CL_SUCCESS != reload_db(&engine, dboptions, opts, thr_pool)) {logg(LOGG_WARNING, "Database reload setup failed, keeping the previous instance\n");pthread_mutex_lock(&reload_mutex);reload = 0;pthread_mutex_unlock(&reload_mutex);pthread_mutex_lock(&reload_stage_mutex);reload_stage = RELOAD_STAGE__IDLE;pthread_mutex_unlock(&reload_stage_mutex);}pthread_mutex_lock(&reload_stage_mutex);}if (reload_stage == RELOAD_STAGE__NEW_DB_AVAILABLE) {/* New database available */if (g_newengine) {/* Reload succeeded */logg(LOGG_INFO, "Activating the newly loaded database...\n");thrmgr_setactiveengine(g_newengine);if (optget(opts, "ConcurrentDatabaseReload")->enabled) {/* If concurrent database reload, we now need to free the old engine. */cl_engine_free(engine);}engine      = g_newengine;g_newengine = NULL;} else {logg(LOGG_WARNING, "Database reload failed, keeping the previous instance\n");}reload_stage = RELOAD_STAGE__IDLE;pthread_mutex_unlock(&reload_stage_mutex);pthread_mutex_lock(&reload_mutex);reload = 0;pthread_mutex_unlock(&reload_mutex);time(&reloaded_time);} else {pthread_mutex_unlock(&reload_stage_mutex);}} else {pthread_mutex_unlock(&reload_mutex);}

recvloop=>reload_db


/*** @brief Reload the database.** @param[in,out] engine    The current scan engine, used to copy the settings.* @param dboptions         The current database options, used to copy the options.* @param opts              The command line options, used to get the database directory.* @return cl_error_t       CL_SUCCESS if the reload thread was successfully started. This does not mean that the database has reloaded successfully.*/
static cl_error_t reload_db(struct cl_engine **engine, unsigned int dboptions, const struct optstruct *opts, threadpool_t *thr_pool)
{cl_error_t status = CL_EMALFDB;cl_error_t retval;struct reload_th_t *rldata = NULL;pthread_t th;pthread_attr_t th_attr;if (NULL == opts || NULL == engine) {logg(LOGG_ERROR, "reload_db: Invalid arguments, unable to load signature databases.\n");status = CL_EARG;goto done;}rldata = malloc(sizeof(struct reload_th_t));if (!rldata) {logg(LOGG_ERROR, "Failed to allocate reload context\n");status = CL_EMEM;goto done;}memset(rldata, 0, sizeof(struct reload_th_t));rldata->dboptions = dboptions;if (*engine) {/* copy current settings */rldata->settings = cl_engine_settings_copy(*engine);//备份旧引擎的配置信息,在后面创建完新引擎后,复制过去if (!rldata->settings) {logg(LOGG_ERROR, "Can't make a copy of the current engine settings\n");goto done;}}rldata->dbdir = strdup(optget(opts, "DatabaseDirectory")->strarg);//获取配置文件中的指定的病毒库路径if (!rldata->dbdir) {logg(LOGG_ERROR, "Can't duplicate the database directory path\n");goto done;}if (dbstat.entries) {cl_statfree(&dbstat);}memset(&dbstat, 0, sizeof(struct cl_stat));retval = cl_statinidir(rldata->dbdir, &dbstat);//检查路径下的文件if (CL_SUCCESS != retval) {logg(LOGG_ERROR, "cl_statinidir() failed: %s\n", cl_strerror(retval));goto done;}if (*engine) {if (!optget(opts, "ConcurrentDatabaseReload")->enabled) {/** If concurrent reload disabled, we'll NULL out the current engine and deref it.* It will only actually be free'd once the last scan finishes.*/thrmgr_setactiveengine(NULL);cl_engine_free(*engine);*engine = NULL;/* Wait for all scans to finish */thrmgr_wait_for_threads(thr_pool);}}if (pthread_attr_init(&th_attr)) {logg(LOGG_ERROR, "Failed to init reload thread attributes\n");goto done;}if (optget(opts, "ConcurrentDatabaseReload")->enabled) {/* For concurrent reloads: set detached, so we don't leak thread resources */pthread_attr_setdetachstate(&th_attr, PTHREAD_CREATE_DETACHED);}retval = pthread_create(&th, &th_attr, reload_th, rldata);//启动线程进行加载病毒库if (pthread_attr_destroy(&th_attr))logg(LOGG_WARNING, "Failed to release reload thread attributes\n");if (retval) {logg(LOGG_ERROR, "Failed to spawn reload thread\n");goto done;}if (!optget(opts, "ConcurrentDatabaseReload")->enabled) {/* For non-concurrent reloads: join the thread */int join_ret = pthread_join(th, NULL);switch (join_ret) {case 0:logg(LOGG_INFO, "Database reload completed.\n");break;case EDEADLK:logg(LOGG_ERROR, "A deadlock was detected when waiting for the database reload thread.\n");goto done;case ESRCH:logg(LOGG_ERROR, "Failed to find database reload thread.\n");goto done;case EINVAL:logg(LOGG_ERROR, "The database reload thread is not a joinable thread.\n");goto done;default:logg(LOGG_ERROR, "An unknown error occured when waiting for the database reload thread: %d\n", join_ret);goto done;}}status = CL_SUCCESS;done:if (CL_SUCCESS != status) {/** Failed to spawn reload thread, so we're responsible for cleaning up* the rldata structure.*/if (NULL != rldata) {if (NULL != rldata->settings) {cl_engine_settings_free(rldata->settings);}if (NULL != rldata->dbdir) {free(rldata->dbdir);}free(rldata);}}return status;
}

recvloop=>reload_db=>reload_th


/*** @brief Thread entry point to load the signature databases & compile a new scanning engine.** Once loaded, an event will be set to indicate that the new engine is ready.** @param arg   A reload_th_t structure defining the db directory, db settings, engine settings.* @return void**/
static void *reload_th(void *arg)
{cl_error_t status = CL_EMALFDB;struct reload_th_t *rldata = arg;struct cl_engine *engine   = NULL;unsigned int sigs          = 0;int retval;if (NULL == rldata || NULL == rldata->dbdir || NULL == rldata->settings) {logg(LOGG_ERROR, "reload_th: Invalid arguments, unable to load signature databases.\n");status = CL_EARG;goto done;}logg(LOGG_INFO, "Reading databases from %s\n", rldata->dbdir);if (NULL == (engine = cl_engine_new())) {//创建引擎对象logg(LOGG_ERROR, "reload_th: Can't initialize antivirus engine\n");goto done;}retval = cl_engine_settings_apply(engine, rldata->settings);//复制旧引擎的配置过来if (CL_SUCCESS != retval) {logg(LOGG_ERROR, "reload_th: Failed to apply previous engine settings: %s\n", cl_strerror(retval));status = CL_EMEM;goto done;}retval = cl_load(rldata->dbdir, engine, &sigs, rldata->dboptions);//加载病毒库if (CL_SUCCESS != retval) {logg(LOGG_ERROR, "reload_th: Database load failed: %s\n", cl_strerror(retval));goto done;}retval = cl_engine_compile(engine);//编译病毒库if (CL_SUCCESS != retval) {logg(LOGG_ERROR, "reload_th: Database initialization error: can't compile engine: %s\n", cl_strerror(retval));goto done;}logg(LOGG_INFO, "Database correctly reloaded (%u signatures)\n", sigs);status = CL_SUCCESS;done:if (NULL != rldata) {if (NULL != rldata->settings) {cl_engine_settings_free(rldata->settings);}if (NULL != rldata->dbdir) {free(rldata->dbdir);}free(rldata);}if (CL_SUCCESS != status) {if (NULL != engine) {cl_engine_free(engine);engine = NULL;}}pthread_mutex_lock(&reload_stage_mutex);reload_stage = RELOAD_STAGE__NEW_DB_AVAILABLE; /* New DB available */g_newengine  = engine;pthread_mutex_unlock(&reload_stage_mutex);#ifdef _WIN32SetEvent(event_wake_recv);
#elseif (syncpipe_wake_recv_w != -1)if (write(syncpipe_wake_recv_w, "", 1) != 1)logg(LOGG_DEBUG_NV, "Failed to write to syncpipe\n");
#endifreturn NULL;
}

recvloop=>reload_db=>reload_th=>cl_load


cl_error_t cl_load(const char *path, struct cl_engine *engine, unsigned int *signo, unsigned int dboptions)
{STATBUF sb;int ret;if (!engine) {cli_errmsg("cl_load: engine == NULL\n");return CL_ENULLARG;}if (engine->dboptions & CL_DB_COMPILED) {cli_errmsg("cl_load(): can't load new databases when engine is already compiled\n");return CL_EARG;}if (CLAMSTAT(path, &sb) == -1) {//检查访问权限switch (errno) {
#if defined(EACCES)case EACCES:cli_errmsg("cl_load(): Access denied for path: %s\n", path);break;
#endif
#if defined(ENOENT)case ENOENT:cli_errmsg("cl_load(): No such file or directory: %s\n", path);break;
#endif
#if defined(ELOOP)case ELOOP:cli_errmsg("cl_load(): Too many symbolic links encountered in path: %s\n", path);break;
#endif
#if defined(EOVERFLOW)case EOVERFLOW:cli_errmsg("cl_load(): File size is too large to be recognized. Path: %s\n", path);break;
#endif
#if defined(EIO)case EIO:cli_errmsg("cl_load(): An I/O error occurred while reading from path: %s\n", path);break;
#endifdefault:cli_errmsg("cl_load: Can't get status of: %s\n", path);break;}return CL_ESTAT;}if ((dboptions & CL_DB_PHISHING_URLS) && !engine->phishcheck && (engine->dconf->phishing & PHISHING_CONF_ENGINE))if (CL_SUCCESS != (ret = phishing_init(engine)))return ret;if ((dboptions & CL_DB_BYTECODE) && !engine->bcs.inited) {if (CL_SUCCESS != (ret = cli_bytecode_init(&engine->bcs)))return ret;} else {cli_dbgmsg("Bytecode engine disabled\n");}if (!engine->cache && cli_cache_init(engine))return CL_EMEM;engine->dboptions |= dboptions;switch (sb.st_mode & S_IFMT) {//检查路径是目录还是文件,目录需要按优先级循环读取所有病毒库文件,我们这里是个目录。case S_IFREG:/* Count # of sigs in the database now */engine->num_total_signatures += count_signatures(path, engine, dboptions);ret = cli_load(path, engine, signo, dboptions, NULL);break;case S_IFDIR:/* Count # of signatures inside cli_loaddbdir(), before loading */ret = cli_loaddbdir(path, engine, signo, dboptions | CL_DB_DIRECTORY);break;default:cli_errmsg("cl_load(%s): Not supported database file type\n", path);return CL_EOPEN;}if (engine->cb_sigload_progress) {/* Let the progress callback function know we're done! */(void)engine->cb_sigload_progress(*signo, *signo, engine->cb_sigload_progress_ctx);}#ifdef YARA_PROTOif (yara_total) {cli_yaramsg("$$$$$$$$$$$$ YARA $$$$$$$$$$$$\n");cli_yaramsg("\tTotal Rules: %u\n", yara_total);cli_yaramsg("\tRules Loaded: %u\n", yara_loaded);cli_yaramsg("\tComplex Conditions: %u\n", yara_complex);cli_yaramsg("\tMalformed/Unsupported Rules: %u\n", yara_malform);cli_yaramsg("\tEmpty Rules: %u\n", yara_empty);cli_yaramsg("$$$$$$$$$$$$ YARA $$$$$$$$$$$$\n");}
#endifreturn ret;
}

recvloop=>reload_db=>reload_th=>cl_load=>cli_loaddbdir


static cl_error_t cli_loaddbdir(const char *dirname, struct cl_engine *engine, unsigned int *signo, unsigned int options)
{cl_error_t ret = CL_EOPEN;DIR *dd = NULL;struct dirent *dent;char *dbfile      = NULL;int ends_with_sep = 0;size_t dirname_len;struct cl_cvd *daily_cld = NULL;struct cl_cvd *daily_cvd = NULL;struct db_ll_entry *head = NULL;struct db_ll_entry *iter;struct db_ll_entry *next;cli_dbgmsg("Loading databases from %s\n", dirname);if ((dd = opendir(dirname)) == NULL) {//打开目录cli_errmsg("cli_loaddbdir: Can't open directory %s\n", dirname);ret = CL_EOPEN;goto done;}dirname_len = strlen(dirname);if (dirname_len >= strlen(PATHSEP)) {if (strcmp(dirname + dirname_len - strlen(PATHSEP), PATHSEP) == 0) {cli_dbgmsg("cli_loaddbdir: dirname ends with separator\n");ends_with_sep = 1;}}while ((dent = readdir(dd))) {//循环读取目录下的文件struct db_ll_entry *entry;unsigned int load_priority;if (!dent->d_ino) {continue;}if (!strcmp(dent->d_name, ".") || !strcmp(dent->d_name, "..")) {continue;}if (!CLI_DBEXT(dent->d_name)) {continue;}dbfile = (char *)cli_malloc(strlen(dent->d_name) + dirname_len + 2);if (!dbfile) {cli_errmsg("cli_loaddbdir: dbfile == NULL\n");ret = CL_EMEM;goto done;}if (ends_with_sep)sprintf(dbfile, "%s%s", dirname, dent->d_name);elsesprintf(dbfile, "%s" PATHSEP "%s", dirname, dent->d_name);#define DB_LOAD_PRIORITY_IGN 1
#define DB_LOAD_PRIORITY_DAILY_CLD 2
#define DB_LOAD_PRIORITY_DAILY_CVD 3
#define DB_LOAD_PRIORITY_LOCAL_GDB 4
#define DB_LOAD_PRIORITY_DAILY_CFG 5
#define DB_LOAD_PRIORITY_CRB 6
#define DB_LOAD_PRIORITY_NORMAL 7if (cli_strbcasestr(dent->d_name, ".ign") || cli_strbcasestr(dent->d_name, ".ign2")) {/* load .ign and .ign2 files first */load_priority = DB_LOAD_PRIORITY_IGN;engine->num_total_signatures += count_line_based_signatures(dbfile);} else if (!strcmp(dent->d_name, "daily.cld")) {/* The daily db must be loaded before main, this way, thedaily ign & ign2 signatures prevent ign'ored signaturesin all databases from being loaded. */load_priority = DB_LOAD_PRIORITY_DAILY_CLD;if (0 == access(dbfile, R_OK)) {daily_cld = cl_cvdhead(dbfile);if (!daily_cld) {cli_errmsg("cli_loaddbdir: error parsing header of %s\n", dbfile);ret = CL_EMALFDB;goto done;}/* Successfully opened the daily CLD file and read the header info. */engine->num_total_signatures += daily_cld->sigs;} else {free(dbfile);dbfile = NULL;continue;}} else if (!strcmp(dent->d_name, "daily.cvd")) {load_priority = DB_LOAD_PRIORITY_DAILY_CVD;if (0 == access(dbfile, R_OK)) {daily_cvd = cl_cvdhead(dbfile);if (!daily_cvd) {cli_errmsg("cli_loaddbdir: error parsing header of %s\n", dbfile);ret = CL_EMALFDB;goto done;}/* Successfully opened the daily CVD file and ready the header info. */engine->num_total_signatures += daily_cvd->sigs;} else {free(dbfile);dbfile = NULL;continue;}} else if (!strcmp(dent->d_name, "local.gdb")) {load_priority = DB_LOAD_PRIORITY_LOCAL_GDB;engine->num_total_signatures += count_line_based_signatures(dbfile);} else if (!strcmp(dent->d_name, "daily.cfg")) {load_priority = DB_LOAD_PRIORITY_DAILY_CFG;engine->num_total_signatures += count_line_based_signatures(dbfile);} else if ((options & CL_DB_OFFICIAL_ONLY) &&!strstr(dirname, "clamav-") &&            // Official databases that are temp-files (in the process of updating).!cli_strbcasestr(dent->d_name, ".cld") && // Official databases that have been updated using incremental updates.!cli_strbcasestr(dent->d_name, ".cvd")) { // Official databases.// TODO Should this be higher up in the list? Should we// ignore .ign/.ign2 files and the local.gdb file when this// flag is set?cli_dbgmsg("Skipping unofficial database %s\n", dent->d_name);free(dbfile);dbfile = NULL;continue;} else if (cli_strbcasestr(dent->d_name, ".crb")) {/* .cat files cannot be loaded successfully unless there are .crb* rules that trust the certs used to sign the catalog files.* Therefore, we need to ensure the .crb rules are loaded prior */load_priority = DB_LOAD_PRIORITY_CRB;engine->num_total_signatures += count_line_based_signatures(dbfile);} else {load_priority = DB_LOAD_PRIORITY_NORMAL;engine->num_total_signatures += count_signatures(dbfile, engine, options);//统计累加特征库中的特征条数}entry = malloc(sizeof(*entry));if (NULL == entry) {cli_errmsg("cli_loaddbdir: failed to allocate memory for database load list entry\n");ret = CL_EMEM;goto done;}entry->path          = dbfile;//保存文件名dbfile               = NULL;entry->load_priority = load_priority;cli_insertdbtoll(&head, entry);//添加到head链表中}/* The list entries are stored in priority order, so now just loop through* and load everything.* NOTE: If there's a daily.cld and a daily.cvd, we'll only load whichever* has the highest version number.  If they have the same version number* we load daily.cld, since that will load faster (it won't attempt to* verify the digital signature of the db).** TODO It'd be ideal if we treated all cld/cvd pairs like we do the daily* ones, and only loaded the one with the highest version. */for (iter = head; iter != NULL; iter = iter->next) {//循环遍历加载病毒库if (DB_LOAD_PRIORITY_DAILY_CLD == iter->load_priority) {/* iter is the daily.cld. If we also have the cvd and the cvd is newer, skip the cld. */if ((NULL != daily_cvd) && (daily_cld->version < daily_cvd->version)) {continue;}} else if (DB_LOAD_PRIORITY_DAILY_CVD == iter->load_priority) {/* iter is the daily.cvd. If we also have the cld and the cld is same or newer, skip the cvd. */if ((NULL != daily_cld) && (daily_cld->version >= daily_cvd->version)) {continue;}}ret = cli_load(iter->path, engine, signo, options, NULL);if (ret) {cli_errmsg("cli_loaddbdir: error loading database %s\n", iter->path);goto done;}}done:for (iter = head; iter != NULL; iter = next) {next = iter->next;free(iter->path);free(iter);}if (NULL != dbfile) {free(dbfile);}if (NULL != dd) {closedir(dd);}if (NULL != daily_cld) {cl_cvdfree(daily_cld);}if (NULL != daily_cvd) {cl_cvdfree(daily_cvd);}if (ret == CL_EOPEN)cli_errmsg("cli_loaddbdir: No supported database files found in %s\n", dirname);return ret;
}

recvloop=>reload_db=>reload_th=>cl_load=>cli_loaddbdir=>count_signatures


/*** @brief Count the number of signatures in a database file.** Non-database files will be ignored, and count as 0 signatures.* Database validation is not done, just signature counting.** CVD/CLD/CUD database archives are not counted the hard way, we just trust* signature count in the header. Yara rules and bytecode sigs count as 1 each.** @param filepath  Filepath of the database file to count.* @return size_t   The number of signatures.*/
static size_t count_signatures(const char *filepath, struct cl_engine *engine, unsigned int options)
{size_t num_signatures            = 0;struct cl_cvd *db_archive_header = NULL;if (cli_strbcasestr(filepath, ".cld") ||cli_strbcasestr(filepath, ".cvd") ||cli_strbcasestr(filepath, ".cud")) {/* use the CVD head to get the sig count. */if (0 == access(filepath, R_OK)) {db_archive_header = cl_cvdhead(filepath);//读取文件头信息,里面包含了sig个数if (!db_archive_header) {cli_errmsg("cli_loaddbdir: error parsing header of %s\n", filepath);goto done;}num_signatures += db_archive_header->sigs;//累加sig 个数}} else if ((CL_BYTECODE_TRUST_ALL == engine->bytecode_security) &&cli_strbcasestr(filepath, ".cbc")) {/* Counts as 1 signature if loading plain .cbc files. */num_signatures += 1;} else if ((options & CL_DB_YARA_ONLY) &&(cli_strbcasestr(filepath, ".yar") || cli_strbcasestr(filepath, ".yara"))) {/* Counts as 1 signature. */num_signatures += 1;} else if (cli_strbcasestr(filepath, ".db") ||cli_strbcasestr(filepath, ".crb") ||cli_strbcasestr(filepath, ".hdb") || cli_strbcasestr(filepath, ".hsb") ||cli_strbcasestr(filepath, ".hdu") || cli_strbcasestr(filepath, ".hsu") ||cli_strbcasestr(filepath, ".fp") || cli_strbcasestr(filepath, ".sfp") ||cli_strbcasestr(filepath, ".mdb") || cli_strbcasestr(filepath, ".msb") ||cli_strbcasestr(filepath, ".imp") ||cli_strbcasestr(filepath, ".mdu") || cli_strbcasestr(filepath, ".msu") ||cli_strbcasestr(filepath, ".ndb") || cli_strbcasestr(filepath, ".ndu") || cli_strbcasestr(filepath, ".sdb") ||cli_strbcasestr(filepath, ".ldb") || cli_strbcasestr(filepath, ".ldu") ||cli_strbcasestr(filepath, ".zmd") || cli_strbcasestr(filepath, ".rmd") ||cli_strbcasestr(filepath, ".cfg") ||cli_strbcasestr(filepath, ".wdb") ||cli_strbcasestr(filepath, ".pdb") || cli_strbcasestr(filepath, ".gdb") ||cli_strbcasestr(filepath, ".ftm") ||cli_strbcasestr(filepath, ".ign") || cli_strbcasestr(filepath, ".ign2") ||cli_strbcasestr(filepath, ".idb") ||cli_strbcasestr(filepath, ".cdb") ||cli_strbcasestr(filepath, ".cat") ||cli_strbcasestr(filepath, ".ioc") ||cli_strbcasestr(filepath, ".pwdb")) {/* Should be a line-based signaure file, count it the old fashioned way */num_signatures += count_line_based_signatures(filepath);}done:if (NULL != db_archive_header) {cl_cvdfree(db_archive_header);}return num_signatures;
}

recvloop=>reload_db=>reload_th=>cl_load=>cli_loaddbdir=>cli_load


cl_error_t cli_load(const char *filename, struct cl_engine *engine, unsigned int *signo, unsigned int options, struct cli_dbio *dbio)
{cl_error_t ret = CL_SUCCESS;FILE *fs        = NULL;uint8_t skipped = 0;const char *dbname;char buff[FILEBUFF];if (dbio && dbio->chkonly) {while (cli_dbgets(buff, FILEBUFF, NULL, dbio)) continue;return CL_SUCCESS;}if (!dbio && (fs = fopen(filename, "rb")) == NULL) {//打开文件if (options & CL_DB_DIRECTORY) { /* bb#1624 */if (access(filename, R_OK)) {if (errno == ENOENT) {cli_dbgmsg("Detected race condition, ignoring old file %s\n", filename);return CL_SUCCESS;}}}cli_errmsg("cli_load(): Can't open file %s\n", filename);return CL_EOPEN;}if ((dbname = strrchr(filename, *PATHSEP)))dbname++;elsedbname = filename;#ifdef HAVE_YARAif (options & CL_DB_YARA_ONLY) {if (cli_strbcasestr(dbname, ".yar") || cli_strbcasestr(dbname, ".yara"))ret = cli_loadyara(fs, engine, signo, options, dbio, filename);elseskipped = 1;} else
#endifif (cli_strbcasestr(dbname, ".db")) {ret = cli_loaddb(fs, engine, signo, options, dbio, dbname);} else if (cli_strbcasestr(dbname, ".cvd")) {//先走这里ret = cli_cvdload(fs, engine, signo, options, 0, filename, 0);} else if (cli_strbcasestr(dbname, ".cld")) {ret = cli_cvdload(fs, engine, signo, options, 1, filename, 0);} else if (cli_strbcasestr(dbname, ".cud")) {ret = cli_cvdload(fs, engine, signo, options, 2, filename, 0);} else if (cli_strbcasestr(dbname, ".crb")) {ret = cli_loadcrt(fs, engine, dbio);} else if (cli_strbcasestr(dbname, ".hdb") || cli_strbcasestr(dbname, ".hsb")) {ret = cli_loadhash(fs, engine, signo, MD5_HDB, options, dbio, dbname);} else if (cli_strbcasestr(dbname, ".hdu") || cli_strbcasestr(dbname, ".hsu")) {if (options & CL_DB_PUA)ret = cli_loadhash(fs, engine, signo, MD5_HDB, options | CL_DB_PUA_MODE, dbio, dbname);elseskipped = 1;} else if (cli_strbcasestr(dbname, ".fp") || cli_strbcasestr(dbname, ".sfp")) {ret = cli_loadhash(fs, engine, signo, MD5_FP, options, dbio, dbname);} else if (cli_strbcasestr(dbname, ".mdb") || cli_strbcasestr(dbname, ".msb")) {ret = cli_loadhash(fs, engine, signo, MD5_MDB, options, dbio, dbname);} else if (cli_strbcasestr(dbname, ".imp")) {ret = cli_loadhash(fs, engine, signo, MD5_IMP, options, dbio, dbname);} else if (cli_strbcasestr(dbname, ".mdu") || cli_strbcasestr(dbname, ".msu")) {if (options & CL_DB_PUA)ret = cli_loadhash(fs, engine, signo, MD5_MDB, options | CL_DB_PUA_MODE, dbio, dbname);elseskipped = 1;} else if (cli_strbcasestr(dbname, ".ndb")) {ret = cli_loadndb(fs, engine, signo, 0, options, dbio, dbname);} else if (cli_strbcasestr(dbname, ".ndu")) {if (!(options & CL_DB_PUA))skipped = 1;elseret = cli_loadndb(fs, engine, signo, 0, options | CL_DB_PUA_MODE, dbio, dbname);} else if (cli_strbcasestr(filename, ".ldb")) {ret = cli_loadldb(fs, engine, signo, options, dbio, dbname);} else if (cli_strbcasestr(filename, ".ldu")) {if (options & CL_DB_PUA)ret = cli_loadldb(fs, engine, signo, options | CL_DB_PUA_MODE, dbio, dbname);elseskipped = 1;} else if (cli_strbcasestr(filename, ".cbc")) {if (options & CL_DB_BYTECODE)ret = cli_loadcbc(fs, engine, signo, options, dbio, dbname);elseskipped = 1;} else if (cli_strbcasestr(dbname, ".sdb")) {ret = cli_loadndb(fs, engine, signo, 1, options, dbio, dbname);} else if (cli_strbcasestr(dbname, ".zmd")) {ret = cli_loadmd(fs, engine, signo, 1, options, dbio, dbname);} else if (cli_strbcasestr(dbname, ".rmd")) {ret = cli_loadmd(fs, engine, signo, 2, options, dbio, dbname);} else if (cli_strbcasestr(dbname, ".cfg")) {ret = cli_dconf_load(fs, engine, options, dbio);} else if (cli_strbcasestr(dbname, ".info")) {ret = cli_loadinfo(fs, engine, options, dbio);} else if (cli_strbcasestr(dbname, ".wdb")) {if (options & CL_DB_PHISHING_URLS) {ret = cli_loadwdb(fs, engine, options, dbio);} elseskipped = 1;} else if (cli_strbcasestr(dbname, ".pdb") || cli_strbcasestr(dbname, ".gdb")) {if (options & CL_DB_PHISHING_URLS) {ret = cli_loadpdb(fs, engine, signo, options, dbio);} elseskipped = 1;} else if (cli_strbcasestr(dbname, ".ftm")) {ret = cli_loadftm(fs, engine, options, 0, dbio);} else if (cli_strbcasestr(dbname, ".ign") || cli_strbcasestr(dbname, ".ign2")) {ret = cli_loadign(fs, engine, options, dbio);} else if (cli_strbcasestr(dbname, ".idb")) {ret = cli_loadidb(fs, engine, signo, options, dbio);} else if (cli_strbcasestr(dbname, ".cdb")) {ret = cli_loadcdb(fs, engine, signo, options, dbio);} else if (cli_strbcasestr(dbname, ".cat")) {ret = cli_loadmscat(fs, dbname, engine, options, dbio);} else if (cli_strbcasestr(dbname, ".ioc")) {ret = cli_loadopenioc(fs, dbname, engine, options);
#ifdef HAVE_YARA} else if (cli_strbcasestr(dbname, ".yar") || cli_strbcasestr(dbname, ".yara")) {if (!(options & CL_DB_YARA_EXCLUDE))ret = cli_loadyara(fs, engine, signo, options, dbio, filename);elseskipped = 1;
#endif} else if (cli_strbcasestr(dbname, ".pwdb")) {ret = cli_loadpwdb(fs, engine, options, 0, dbio);} else {cli_warnmsg("cli_load: unknown extension - skipping %s\n", filename);skipped = 1;}if (ret) {cli_errmsg("Can't load %s: %s\n", filename, cl_strerror(ret));} else {if (skipped)cli_dbgmsg("%s skipped\n", filename);elsecli_dbgmsg("%s loaded\n", filename);}if (fs)fclose(fs);if (engine->cb_sigload_progress) {/* Let the progress callback function know how we're doing */(void)engine->cb_sigload_progress(engine->num_total_signatures, *signo, engine->cb_sigload_progress_ctx);}return ret;
}

recvloop=>reload_db=>reload_th=>cl_load=>cli_loaddbdir=>cli_load=>cli_cvdload


int cli_cvdload(FILE *fs, struct cl_engine *engine, unsigned int *signo, unsigned int options, unsigned int dbtype, const char *filename, unsigned int chkonly)
{struct cl_cvd cvd, dupcvd;FILE *dupfs;int ret;time_t s_time;int cfd;struct cli_dbio dbio;struct cli_dbinfo *dbinfo = NULL;char *dupname;dbio.hashctx = NULL;cli_dbgmsg("in cli_cvdload()\n");/* verify */if ((ret = cli_cvdverify(fs, &cvd, dbtype)))return ret;if (dbtype <= 1) {/* check for duplicate db */dupname = cli_strdup(filename);if (!dupname)return CL_EMEM;dupname[strlen(dupname) - 2] = (dbtype == 1 ? 'v' : 'l');if (!access(dupname, R_OK) && (dupfs = fopen(dupname, "rb"))) {if ((ret = cli_cvdverify(dupfs, &dupcvd, !dbtype))) {fclose(dupfs);free(dupname);return ret;}fclose(dupfs);if (dupcvd.version > cvd.version) {cli_warnmsg("Detected duplicate databases %s and %s. The %s database is older and will not be loaded, you should manually remove it from the database directory.\n", filename, dupname, filename);free(dupname);return CL_SUCCESS;} else if (dupcvd.version == cvd.version && !dbtype) {cli_warnmsg("Detected duplicate databases %s and %s, please manually remove one of them\n", filename, dupname);free(dupname);return CL_SUCCESS;}}free(dupname);}if (strstr(filename, "daily.")) {time(&s_time);if (cvd.stime > s_time) {if (cvd.stime - (unsigned int)s_time > 3600) {cli_warnmsg("******************************************************\n");cli_warnmsg("***      Virus database timestamp in the future!   ***\n");cli_warnmsg("***  Please check the timezone and clock settings  ***\n");cli_warnmsg("******************************************************\n");}} else if ((unsigned int)s_time - cvd.stime > 604800) {cli_warnmsg("**************************************************\n");cli_warnmsg("***  The virus database is older than 7 days!  ***\n");cli_warnmsg("***   Please update it as soon as possible.    ***\n");cli_warnmsg("**************************************************\n");}engine->dbversion[0] = cvd.version;engine->dbversion[1] = cvd.stime;}if (cvd.fl > cl_retflevel()) {cli_warnmsg("*******************************************************************\n");cli_warnmsg("***  This version of the ClamAV engine is outdated.             ***\n");cli_warnmsg("***   Read https://docs.clamav.net/manual/Installing.html       ***\n");cli_warnmsg("*******************************************************************\n");}cfd          = fileno(fs);dbio.chkonly = 0;if (dbtype == 2)ret = cli_tgzload(cfd, engine, signo, options | CL_DB_UNSIGNED, &dbio, NULL);elseret = cli_tgzload(cfd, engine, signo, options | CL_DB_OFFICIAL, &dbio, NULL);//加载病毒库if (ret != CL_SUCCESS)return ret;dbinfo = engine->dbinfo;if (!dbinfo || !dbinfo->cvd || (dbinfo->cvd->version != cvd.version) || (dbinfo->cvd->sigs != cvd.sigs) || (dbinfo->cvd->fl != cvd.fl) || (dbinfo->cvd->stime != cvd.stime)) {cli_errmsg("cli_cvdload: Corrupted CVD header\n");return CL_EMALFDB;}dbinfo = engine->dbinfo ? engine->dbinfo->next : NULL;if (!dbinfo) {cli_errmsg("cli_cvdload: dbinfo error\n");return CL_EMALFDB;}dbio.chkonly = chkonly;if (dbtype == 2)options |= CL_DB_UNSIGNED;elseoptions |= CL_DB_SIGNED | CL_DB_OFFICIAL;ret = cli_tgzload(cfd, engine, signo, options, &dbio, dbinfo);while (engine->dbinfo) {dbinfo         = engine->dbinfo;engine->dbinfo = dbinfo->next;MPOOL_FREE(engine->mempool, dbinfo->name);MPOOL_FREE(engine->mempool, dbinfo->hash);if (dbinfo->cvd)cl_cvdfree(dbinfo->cvd);MPOOL_FREE(engine->mempool, dbinfo);}return ret;
}

recvloop=>reload_db=>reload_th=>cl_load=>cli_loaddbdir=>cli_load=>cli_cvdload=>cli_tgzload

static int cli_tgzload(int fd, struct cl_engine *engine, unsigned int *signo, unsigned int options, struct cli_dbio *dbio, struct cli_dbinfo *dbinfo)
{char osize[13], name[101];char block[TAR_BLOCKSIZE];int nread, fdd, ret;unsigned int type, size, pad, compr = 1;off_t off;struct cli_dbinfo *db;char hash[32];cli_dbgmsg("in cli_tgzload()\n");if (lseek(fd, 512, SEEK_SET) < 0) {return CL_ESEEK;}if (cli_readn(fd, block, 7) != 7)return CL_EFORMAT; /* truncated file? */if (!strncmp(block, "COPYING", 7))//检查是否为压缩文件compr = 0;if (lseek(fd, 512, SEEK_SET) < 0) {return CL_ESEEK;}if ((fdd = dup(fd)) == -1) {cli_errmsg("cli_tgzload: Can't duplicate descriptor %d\n", fd);return CL_EDUP;}if (compr) {if ((dbio->gzs = gzdopen(fdd, "rb")) == NULL) {//打开压缩文件cli_errmsg("cli_tgzload: Can't gzdopen() descriptor %d, errno = %d\n", fdd, errno);if (fdd > -1)close(fdd);return CL_EOPEN;}dbio->fs = NULL;} else {if ((dbio->fs = fdopen(fdd, "rb")) == NULL) {//打开普通文件cli_errmsg("cli_tgzload: Can't fdopen() descriptor %d, errno = %d\n", fdd, errno);if (fdd > -1)close(fdd);return CL_EOPEN;}dbio->gzs = NULL;}dbio->bufsize = CLI_DEFAULT_DBIO_BUFSIZE;dbio->buf     = cli_malloc(dbio->bufsize);if (!dbio->buf) {cli_errmsg("cli_tgzload: Can't allocate memory for dbio->buf\n");cli_tgzload_cleanup(compr, dbio, fdd);return CL_EMALFDB;}dbio->bufpt  = NULL;dbio->usebuf = 1;dbio->readpt = dbio->buf;while (1) {if (compr)nread = gzread(dbio->gzs, block, TAR_BLOCKSIZE);elsenread = fread(block, 1, TAR_BLOCKSIZE, dbio->fs);if (!nread)break;if (nread != TAR_BLOCKSIZE) {cli_errmsg("cli_tgzload: Incomplete block read\n");cli_tgzload_cleanup(compr, dbio, fdd);return CL_EMALFDB;}if (block[0] == '\0') /* We're done *///读取结束break;strncpy(name, block, 100);name[100] = '\0';//获取文件名if (strchr(name, '/')) {cli_errmsg("cli_tgzload: Slash separators are not allowed in CVD\n");cli_tgzload_cleanup(compr, dbio, fdd);return CL_EMALFDB;}type = block[156];switch (type) {case '0':case '\0':break;case '5':cli_errmsg("cli_tgzload: Directories are not supported in CVD\n");cli_tgzload_cleanup(compr, dbio, fdd);return CL_EMALFDB;default:cli_errmsg("cli_tgzload: Unknown type flag '%c'\n", type);cli_tgzload_cleanup(compr, dbio, fdd);return CL_EMALFDB;}strncpy(osize, block + 124, 12);osize[12] = '\0';if ((sscanf(osize, "%o", &size)) == 0) {//获取文件大小cli_errmsg("cli_tgzload: Invalid size in header\n");cli_tgzload_cleanup(compr, dbio, fdd);return CL_EMALFDB;}dbio->size     = size;dbio->readsize = dbio->size < dbio->bufsize ? dbio->size : dbio->bufsize - 1;dbio->bufpt    = NULL;dbio->readpt   = dbio->buf;if (!(dbio->hashctx)) {dbio->hashctx = cl_hash_init("sha256");//初始化hash上下文,后面会用这个对文件内容进行计算hash验证if (!(dbio->hashctx)) {cli_tgzload_cleanup(compr, dbio, fdd);return CL_EMALFDB;}}dbio->bread = 0;/* cli_dbgmsg("cli_tgzload: Loading %s, size: %u\n", name, size); */if (compr)off = (off_t)gzseek(dbio->gzs, 0, SEEK_CUR);elseoff = ftell(dbio->fs);if ((!dbinfo && cli_strbcasestr(name, ".info")) || (dbinfo && CLI_DBEXT(name))) {ret = cli_load(name, engine, signo, options, dbio);//加载病毒库if (ret) {cli_errmsg("cli_tgzload: Can't load %s\n", name);cli_tgzload_cleanup(compr, dbio, fdd);return CL_EMALFDB;}if (!dbinfo) {cli_tgzload_cleanup(compr, dbio, fdd);return CL_SUCCESS;} else {db = dbinfo;while (db && strcmp(db->name, name))db = db->next;if (!db) {cli_errmsg("cli_tgzload: File %s not found in .info\n", name);cli_tgzload_cleanup(compr, dbio, fdd);return CL_EMALFDB;}if (dbio->bread) {if (db->size != dbio->bread) {cli_errmsg("cli_tgzload: File %s not correctly loaded\n", name);cli_tgzload_cleanup(compr, dbio, fdd);return CL_EMALFDB;}cl_finish_hash(dbio->hashctx, hash);dbio->hashctx = cl_hash_init("sha256");if (!(dbio->hashctx)) {cli_tgzload_cleanup(compr, dbio, fdd);return CL_EMALFDB;}if (memcmp(db->hash, hash, 32)) {//验证hashcli_errmsg("cli_tgzload: Invalid checksum for file %s\n", name);cli_tgzload_cleanup(compr, dbio, fdd);return CL_EMALFDB;}}}}pad = size % TAR_BLOCKSIZE ? (TAR_BLOCKSIZE - (size % TAR_BLOCKSIZE)) : 0;if (compr) {if (off == gzseek(dbio->gzs, 0, SEEK_CUR))gzseek(dbio->gzs, size + pad, SEEK_CUR);else if (pad)gzseek(dbio->gzs, pad, SEEK_CUR);} else {if (off == ftell(dbio->fs))fseek(dbio->fs, size + pad, SEEK_CUR);else if (pad)fseek(dbio->fs, pad, SEEK_CUR);}}cli_tgzload_cleanup(compr, dbio, fdd);return CL_SUCCESS;
}

recvloop=>reload_db=>reload_th=>cl_load=>cli_loaddbdir=>cli_load=>cli_cvdload=>cli_tgzload=>cli_load

/*我们以最简单的hdb病毒库为例,进行讲解,加压后原始内容格式如下* 44d88612fea8a8f36de82e1278abb02f:68:Eicar-Test-Signature*/
cl_error_t cli_load(const char *filename, struct cl_engine *engine, unsigned int *signo, unsigned int options, struct cli_dbio *dbio)
{cl_error_t ret = CL_SUCCESS;FILE *fs        = NULL;uint8_t skipped = 0;const char *dbname;char buff[FILEBUFF];if (dbio && dbio->chkonly) {while (cli_dbgets(buff, FILEBUFF, NULL, dbio)) continue;return CL_SUCCESS;}if (!dbio && (fs = fopen(filename, "rb")) == NULL) {if (options & CL_DB_DIRECTORY) { /* bb#1624 */if (access(filename, R_OK)) {if (errno == ENOENT) {cli_dbgmsg("Detected race condition, ignoring old file %s\n", filename);return CL_SUCCESS;}}}cli_errmsg("cli_load(): Can't open file %s\n", filename);return CL_EOPEN;}if ((dbname = strrchr(filename, *PATHSEP)))dbname++;elsedbname = filename;#ifdef HAVE_YARAif (options & CL_DB_YARA_ONLY) {if (cli_strbcasestr(dbname, ".yar") || cli_strbcasestr(dbname, ".yara"))ret = cli_loadyara(fs, engine, signo, options, dbio, filename);elseskipped = 1;} else
#endifif (cli_strbcasestr(dbname, ".db")) {ret = cli_loaddb(fs, engine, signo, options, dbio, dbname);} else if (cli_strbcasestr(dbname, ".cvd")) {ret = cli_cvdload(fs, engine, signo, options, 0, filename, 0);} else if (cli_strbcasestr(dbname, ".cld")) {ret = cli_cvdload(fs, engine, signo, options, 1, filename, 0);} else if (cli_strbcasestr(dbname, ".cud")) {ret = cli_cvdload(fs, engine, signo, options, 2, filename, 0);} else if (cli_strbcasestr(dbname, ".crb")) {ret = cli_loadcrt(fs, engine, dbio);} else if (cli_strbcasestr(dbname, ".hdb") || cli_strbcasestr(dbname, ".hsb")) {ret = cli_loadhash(fs, engine, signo, MD5_HDB, options, dbio, dbname);//进入此函数进行读取}
......if (fs)fclose(fs);if (engine->cb_sigload_progress) {/* Let the progress callback function know how we're doing */(void)engine->cb_sigload_progress(engine->num_total_signatures, *signo, engine->cb_sigload_progress_ctx);}return ret;
}

recvloop=>reload_db=>reload_th=>cl_load=>cli_loaddbdir=>cli_load=>cli_cvdload=>cli_tgzload=>cli_load=>cli_loadhash

static int cli_loadhash(FILE *fs, struct cl_engine *engine, unsigned int *signo, unsigned int mode, unsigned int options, struct cli_dbio *dbio, const char *dbname)
{const char *tokens[MD5_TOKENS + 1];char buffer[FILEBUFF], *buffer_cpy = NULL;const char *pt, *virname;int ret                 = CL_SUCCESS;unsigned int size_field = 1, md5_field = 0, line = 0, sigs = 0, tokens_count;unsigned int req_fl = 0;struct cli_matcher *db;unsigned long size;if (mode == MD5_MDB) {size_field = 0;md5_field  = 1;db         = engine->hm_mdb;} else if (mode == MD5_HDB)//类型为MD5_HDB, md5_field  为0, size_field 为1db = engine->hm_hdb;else if (mode == MD5_IMP)db = engine->hm_imp;elsedb = engine->hm_fp;if (!db) {if (!(db = MPOOL_CALLOC(engine->mempool, 1, sizeof(*db))))return CL_EMEM;
#ifdef USE_MPOOLdb->mempool = engine->mempool;
#endifif (mode == MD5_HDB)engine->hm_hdb = db;else if (mode == MD5_MDB)engine->hm_mdb = db;else if (mode == MD5_IMP)engine->hm_imp = db;elseengine->hm_fp = db;}if (engine->ignored)if (!(buffer_cpy = cli_malloc(FILEBUFF))) {cli_errmsg("cli_loadhash: Can't allocate memory for buffer_cpy\n");return CL_EMEM;}while (cli_dbgets(buffer, FILEBUFF, fs, dbio)) {//循环读取一行内容来处理line++;if (buffer[0] == '#')//注释跳过continue;cli_chomp(buffer);if (engine->ignored)strcpy(buffer_cpy, buffer);tokens_count = cli_strtokenize(buffer, ':', MD5_TOKENS + 1, tokens);if (tokens_count < 3) {//少于3个,报错ret = CL_EMALFDB;break;}if (tokens_count > MD5_TOKENS - 2) {req_fl = atoi(tokens[MD5_TOKENS - 2]);if (tokens_count > MD5_TOKENS) {ret = CL_EMALFDB;break;}if (cl_retflevel() < req_fl)continue;if (tokens_count == MD5_TOKENS) {int max_fl = atoi(tokens[MD5_TOKENS - 1]);if (cl_retflevel() > (unsigned int)max_fl)continue;}}if (strcmp(tokens[size_field], "*")) {//指定了size 为68,进行转换成数值类型size = strtoul(tokens[size_field], (char **)&pt, 10);if (*pt || !size || size >= 0xffffffff) {cli_errmsg("cli_loadhash: Invalid value for the size field\n");ret = CL_EMALFDB;break;}} else {size = 0;// The wildcard feature was added in FLEVEL 73, so for backwards// compatibility with older clients, ensure that a minimum FLEVEL// is specified.  This check doesn't apply to .imp rules, though,// since this rule category wasn't introduced until FLEVEL 90, and// has always supported wildcard usage in rules.if (mode != MD5_IMP && ((tokens_count < MD5_TOKENS - 1) || (req_fl < 73))) {cli_errmsg("cli_loadhash: Minimum FLEVEL field must be at least 73 for wildcard size hash signatures."" For reference, running FLEVEL is %d\n",cl_retflevel());ret = CL_EMALFDB;break;}}pt = tokens[2]; /* virname *///病毒名Eicar-Test-Signatureif (engine->pua_cats && (options & CL_DB_PUA_MODE) && (options & (CL_DB_PUA_INCLUDE | CL_DB_PUA_EXCLUDE)))if (cli_chkpua(pt, engine->pua_cats, options))continue;if (engine->ignored && cli_chkign(engine->ignored, pt, buffer_cpy))continue;if (engine->cb_sigload) {const char *dot = strchr(dbname, '.');if (!dot)dot = dbname;elsedot++;if (engine->cb_sigload(dot, pt, ~options & CL_DB_OFFICIAL, engine->cb_sigload_ctx)) {cli_dbgmsg("cli_loadhash: skipping %s (%s) due to callback\n", pt, dot);continue;}}virname = CLI_MPOOL_VIRNAME(engine->mempool, pt, options & CL_DB_OFFICIAL);if (!virname) {ret = CL_EMALFDB;break;}/*md5 hash 44d88612fea8a8f36de82e1278abb02f 转换成实际的hash,小一倍长度,并存入一个关于长度的hash数组(我们的长度为16),后面编译时,对其进行排序,匹配时按二分查找定位*/if (CL_SUCCESS != (ret = hm_addhash_str(db, tokens[md5_field], size, virname))) {cli_errmsg("cli_loadhash: Malformed hash string at line %u\n", line);MPOOL_FREE(engine->mempool, (void *)virname);break;}sigs++;//sig计数加一, 一个sig解析成功,为下面的进度计算提供输入if (engine->cb_sigload_progress && ((*signo + sigs) % 10000 == 0)) {//sig进度条/* Let the progress callback function know how we're doing */(void)engine->cb_sigload_progress(engine->num_total_signatures, *signo + sigs, engine->cb_sigload_progress_ctx);}}if (engine->ignored)free(buffer_cpy);if (!line) {cli_errmsg("cli_loadhash: Empty database file\n");return CL_EMALFDB;}if (ret) {cli_errmsg("cli_loadhash: Problem parsing database at line %u\n", line);return ret;}if (signo)*signo += sigs;return CL_SUCCESS;
}

recvloop=>reload_db=>reload_th=>cl_load=>cli_loaddbdir=>cli_load=>cli_cvdload=>cli_tgzload=>cli_load=>cli_loadhash=>hm_addhash_str

int hm_addhash_str(struct cli_matcher *root, const char *strhash, uint32_t size, const char *virusname)
{enum CLI_HASH_TYPE type;char binhash[CLI_HASHLEN_MAX];int hlen;if (!root || !strhash) {cli_errmsg("hm_addhash_str: NULL root or hash\n");return CL_ENULLARG;}/* size 0 here is now a wildcard size match */if (size == (uint32_t)-1) {cli_errmsg("hm_addhash_str: null or invalid size (%u)\n", size);return CL_EARG;}hlen = strlen(strhash);switch (hlen) {case 32:type = CLI_HASH_MD5;//此例为这个typebreak;case 40:type = CLI_HASH_SHA1;break;case 64:type = CLI_HASH_SHA256;break;default:cli_errmsg("hm_addhash_str: invalid hash %s -- FIXME!\n", strhash);return CL_EARG;}if (cli_hex2str_to(strhash, (char *)binhash, hlen)) {//hex转换成strcli_errmsg("hm_addhash_str: invalid hash %s\n", strhash);return CL_EARG;}return hm_addhash_bin(root, binhash, type, size, virusname);//存入哈希数组
}

recvloop=>reload_db=>reload_th=>cl_load=>cli_loaddbdir=>cli_load=>cli_cvdload=>cli_tgzload=>cli_load=>cli_loadhash=>hm_addhash_str=>hm_addhash_bin

int hm_addhash_bin(struct cli_matcher *root, const void *binhash, enum CLI_HASH_TYPE type, uint32_t size, const char *virusname)
{const unsigned int hlen = hashlen[type];const struct cli_htu32_element *item;struct cli_sz_hash *szh;struct cli_htu32 *ht;int i;if (size) {/* size non-zero, find sz_hash element in size-driven hashtable hash长度哈希表 */ht = &root->hm.sizehashes[type];if (!root->hm.sizehashes[type].capacity) {i = cli_htu32_init(ht, 64, root->mempool);if (i) return i;}item = cli_htu32_find(ht, size);if (!item) {struct cli_htu32_element htitem;szh = MPOOL_CALLOC(root->mempool, 1, sizeof(*szh));if (!szh) {cli_errmsg("hm_addhash_bin: failed to allocate size hash\n");return CL_EMEM;}htitem.key         = size;//长度作为keyhtitem.data.as_ptr = szh;i                  = cli_htu32_insert(ht, &htitem, root->mempool);if (i) {cli_errmsg("hm_addhash_bin: failed to add item to hashtab");MPOOL_FREE(root->mempool, szh);return i;}} elseszh = (struct cli_sz_hash *)item->data.as_ptr;} else {/* size 0 = wildcard */szh = &root->hwild.hashes[type];}szh->items++;//增加key对应value的元素个数,好重新分配内存szh->hash_array = MPOOL_REALLOC2(root->mempool, szh->hash_array, hlen * szh->items);if (!szh->hash_array) {cli_errmsg("hm_addhash_bin: failed to grow hash array to %u entries\n", szh->items);szh->items = 0;MPOOL_FREE(root->mempool, szh->virusnames);szh->virusnames = NULL;return CL_EMEM;}szh->virusnames = MPOOL_REALLOC2(root->mempool, szh->virusnames, sizeof(*szh->virusnames) * szh->items);//分配内存存入对应的病毒名称if (!szh->virusnames) {cli_errmsg("hm_addhash_bin: failed to grow virusname array to %u entries\n", szh->items);szh->items = 0;MPOOL_FREE(root->mempool, szh->hash_array);szh->hash_array = NULL;return CL_EMEM;}memcpy(&szh->hash_array[(szh->items - 1) * hlen], binhash, hlen);//存入病毒的哈希szh->virusnames[(szh->items - 1)] = virusname;//存入病毒名称return 0;
}

接着就是循环调用读取文件中的sig然后解析,添加到这个哈希数组中。

我们加载完病毒库后,需要对其进行编译。

recvloop=>reload_db=>reload_th=>cl_engine_compile


cl_error_t cl_engine_compile(struct cl_engine *engine)
{
......if (engine->hm_hdb)hm_flush(engine->hm_hdb);//编译哈希数组TASK_COMPLETE();
.....engine->dboptions |= CL_DB_COMPILED;return CL_SUCCESS;
}

recvloop=>reload_db=>reload_th=>cl_engine_compile

/* flush both size-specific and agnostic hash sets */
void hm_flush(struct cli_matcher *root)
{enum CLI_HASH_TYPE type;unsigned int keylen;struct cli_sz_hash *szh;if (!root)return;for (type = CLI_HASH_MD5; type < CLI_HASH_AVAIL_TYPES; type++) {struct cli_htu32 *ht                 = &root->hm.sizehashes[type];const struct cli_htu32_element *item = NULL;szh                                  = NULL;if (!root->hm.sizehashes[type].capacity)continue;while ((item = cli_htu32_next(ht, item))) {szh    = (struct cli_sz_hash *)item->data.as_ptr;keylen = hashlen[type];if (szh->items > 1)hm_sort(szh, 0, szh->items, keylen);//排序}}for (type = CLI_HASH_MD5; type < CLI_HASH_AVAIL_TYPES; type++) {szh    = &root->hwild.hashes[type];keylen = hashlen[type];if (szh->items > 1)hm_sort(szh, 0, szh->items, keylen);}
}

recvloop=>reload_db=>reload_th=>cl_engine_compile=>hm_sort

/*md5的长度都是固定的, 所以每个元素的长度也是固定,然后对其进行排序*/
static void hm_sort(struct cli_sz_hash *szh, size_t l, size_t r, unsigned int keylen)
{uint8_t piv[CLI_HASHLEN_MAX], tmph[CLI_HASHLEN_MAX];size_t l1, r1;const char *tmpv;if (l + 1 >= r)return;l1 = l + 1, r1 = r;memcpy(piv, &szh->hash_array[keylen * l], keylen);while (l1 < r1) {if (hm_cmp(&szh->hash_array[keylen * l1], piv, keylen) > 0) {r1--;if (l1 == r1) break;memcpy(tmph, &szh->hash_array[keylen * l1], keylen);tmpv = szh->virusnames[l1];memcpy(&szh->hash_array[keylen * l1], &szh->hash_array[keylen * r1], keylen);szh->virusnames[l1] = szh->virusnames[r1];memcpy(&szh->hash_array[keylen * r1], tmph, keylen);szh->virusnames[r1] = tmpv;} elsel1++;}l1--;if (l1 != l) {memcpy(tmph, &szh->hash_array[keylen * l1], keylen);tmpv = szh->virusnames[l1];memcpy(&szh->hash_array[keylen * l1], &szh->hash_array[keylen * l], keylen);szh->virusnames[l1] = szh->virusnames[l];memcpy(&szh->hash_array[keylen * l], tmph, keylen);szh->virusnames[l] = tmpv;}hm_sort(szh, l, l1, keylen);hm_sort(szh, r1, r, keylen);
}

到此,整个病毒库的加载就算是结束了。

clamav --reload 加载病毒库源码分析相关推荐

  1. 这篇文章绝对让你深刻理解java类的加载以及ClassLoader源码分析

    前言 package com.jvm.classloader;class Father2{public static String strFather="HelloJVM_Father&qu ...

  2. jQuery deferred应用dom加载完毕详细源码分析(三)

    我承认上章ajax部分写得不好,不要怪我,它的ajax代码太多了,而且跨越大,方法跳跃多,实在不好排版与讲解,但如果你真正想研究源码并且仔细读了得话,你的 收获应该会很大,至少你明白了js的ajax是 ...

  3. 描述一下JAVA的加载过程_JVM源码分析之Java类的加载过程

    简书 占小狼 转载请注明原创出处,谢谢! 趁着年轻,多学习 背景 最近对Java细节的底层实现比较感兴趣,比如Java类文件是如何加载到虚拟机的,类对象和方法是以什么数据结构存在于虚拟机中?虚方法.实 ...

  4. Android开发必会技术!Flutter中网络图片加载和缓存源码分析,完整PDF

    起因 事情是这样的. 4年前毕业那会,呆在公司的短视频项目,做 视频.那会做得比抖音还早,但是由于短视频太烧钱了,项目被公司关掉了.当时需要开发横竖屏直播/异步视频的场景,就研究下了市场上的 app, ...

  5. 下血本买的!Flutter中网络图片加载和缓存源码分析,看看这篇文章吧!

    目录 想要成为一名优秀的Android开发,你需要一份完备的知识体系,在这里,让我们一起成长为自己所想的那样. PagerAdapter 介绍 ViwePager 缓存策略 ViewPager 布局处 ...

  6. Flutter中网络图片加载和缓存源码分析,踩坑了

    关于Android的近况 大家都知道,今年移动开发不那么火热了,完全没有了前两年Android开发那种火热的势头,如此同时,AI热火朝天,很多言论都说Android不行了.其实不光是Android,i ...

  7. 2款不同样式的CSS3 Loading加载动画 附源码

    原文:2款不同样式的CSS3 Loading加载动画 附源码 我们经常看到的Loading加载很多都是转圈圈的那种,今天我们来换一种有创意的CSS3 Loading加载动画,一种是声波形状的动画,另一 ...

  8. 2.2 LayoutInflater 加载布局文件源码

    LayoutInflater 加载布局文件源码 LayoutInflater是一个用于将xml布局文件加载为View或者ViewGroup对象的工具,我们可以称之为布局加载器. 获取LayoutInf ...

  9. html动画爱心制作代码,CSS心形加载的动画源码的实现

    废话不多说上代码,代码很简答,研究一下就明白了,有不明白的可以问我. .heart-loading { margin-top: 120px; width: 200px; height: 200px; ...

最新文章

  1. matlab s% d%,matlab中var函数的翻译For N-D arrays, VAR operates along the first
  2. go语言之进阶篇http客户端编程
  3. 三级工作台抽奖出啥_【早早聊】如何落地一体化运营工作台
  4. 论文阅读——《Exposure Control using Bayesian Optimization based on Entropy Weighted Image Gradient》
  5. 操作系统安装必备基础知识----浅谈电脑系统里的那些UEFI, BIOS, MBR, GPT。
  6. flashcom网址收集
  7. Android的面孔_Actiyity
  8. 由***Web挂机产生的想法!
  9. Hibernate 框架基本知识
  10. 使用FileReader对象的readAsDataURL方法来读取图像文件
  11. 【BZOJ 2726】任务安排【斜率优化dp】
  12. android代码设置drawable,Android:Textview 通过代码设置 Drawable
  13. 电路实验---全桥整流电路
  14. js自动缩放页面自适应屏幕分辨率
  15. 运动耳机什么牌子好、这五款是最值得推荐的运动耳机
  16. H3C路由器交换机配置sflow
  17. 三维点云数据集汇总(分类+检测+识别)
  18. C语言结构体详解(结构体定义,使用,结构体大小等)
  19. Java基础:数据类型与变量
  20. Java 编译与反编译

热门文章

  1. Airflow Timezone
  2. 百度智能音箱入局正当时,如何平台+硬件两不误?
  3. 为了防止女朋友怼我,我就先用python爬了3600个怼人表情包等她来战!
  4. PAKDD2018小结
  5. 英语中For和To的区别
  6. 做教育怎么引流?教育行业怎么引流?培训机构引流如何转化?
  7. 华为p60pro和mate50pro哪个好 华为p60pro和mate50pro参数对比
  8. 三角形面积的勾股定理
  9. AdaBoost.M1算法
  10. android 获取单个通讯录联系人信息(无权限跳转权限设置页面)