diff --git a/libclamav/htmlnorm.c b/libclamav/htmlnorm.c
index a090e731f6..edd1bc00d7 100644
--- a/libclamav/htmlnorm.c
+++ b/libclamav/htmlnorm.c
@@ -490,7 +490,6 @@ static inline void html_tag_contents_append(struct tag_contents *cont, const uns
uint32_t mbchar = 0;
if (!begin || !end)
return;
-
for (i = cont->pos; i < MAX_TAG_CONTENTS_LENGTH && (begin < end); i++) {
uint8_t c = *begin++;
if (mbchar && (c < 0x80 || mbchar >= 0x10000)) {
@@ -688,7 +687,6 @@ static bool cli_html_normalise(cli_ctx *ctx, int fd, m_area_t *m_area, const cha
uint32_t mbchar = 0;
uint32_t mbchar2 = 0;
-
/*
* Initialize stack buffers.
*/
@@ -1931,7 +1929,6 @@ static bool cli_html_normalise(cli_ctx *ctx, int fd, m_area_t *m_area, const cha
cli_js_destroy(js_state);
js_state = NULL;
}
-
html_tag_arg_free(&tag_args);
if (!m_area) {
fclose(stream_in);
diff --git a/libclamav/others.h b/libclamav/others.h
index 4ffb7d0a50..fdff4283ca 100644
--- a/libclamav/others.h
+++ b/libclamav/others.h
@@ -552,7 +552,7 @@ extern LIBCLAMAV_EXPORT int have_rar;
#define SCAN_HEURISTICS (ctx->options->general & CL_SCAN_GENERAL_HEURISTICS)
#define SCAN_HEURISTIC_PRECEDENCE (ctx->options->general & CL_SCAN_GENERAL_HEURISTIC_PRECEDENCE)
#define SCAN_UNPRIVILEGED (ctx->options->general & CL_SCAN_GENERAL_UNPRIVILEGED)
-#define STORE_HTML_URLS (ctx->options->general & CL_SCAN_STORE_HTML_URLS)
+#define STORE_HTML_URLS (ctx->options->general & CL_SCAN_STORE_HTML_URLS)
#define SCAN_PARSE_ARCHIVE (ctx->options->parse & CL_SCAN_PARSE_ARCHIVE)
#define SCAN_PARSE_ELF (ctx->options->parse & CL_SCAN_PARSE_ELF)
diff --git a/libclamav/scanners.c b/libclamav/scanners.c
index d9a577f02b..b4c47df156 100644
--- a/libclamav/scanners.c
+++ b/libclamav/scanners.c
@@ -2082,32 +2082,418 @@ static cl_error_t cli_ole2_tempdir_scan_for_xlm_and_images(const char *dir, cli_
return ret;
}
-const char * const HTML_URLS_JSON_KEY = "HTMLUrls";
-
-
-
-static bool is_url(const char * const str){
+const char *const HTML_URLS_JSON_KEY = "HTMLUrls";
+/* https://www.iana.org/assignments/uri-schemes/uri-schemes.xhtml */
+/* clang-format off */
+const char * URI_LIST[] = {
+ "aaa://"
+ , "aaas://"
+ , "about://"
+ , "acap://"
+ , "acct://"
+ , "acd://"
+ , "acr://"
+ , "adiumxtra://"
+ , "adt://"
+ , "afp://"
+ , "afs://"
+ , "aim://"
+ , "amss://"
+ , "android://"
+ , "appdata://"
+ , "apt://"
+ , "ar://"
+ , "ark://"
+ , "at://"
+ , "attachment://"
+ , "aw://"
+ , "barion://"
+ , "bb://"
+ , "beshare://"
+ , "bitcoin://"
+ , "bitcoincash://"
+ , "blob://"
+ , "bolo://"
+ , "brid://"
+ , "browserext://"
+ , "cabal://"
+ , "calculator://"
+ , "callto://"
+ , "cap://"
+ , "cast://"
+ , "casts://"
+ , "chrome://"
+ , "chrome-extension://"
+ , "cid://"
+ , "coap://"
+ , "coap+tcp://"
+ , "coap+ws://"
+ , "coaps://"
+ , "coaps+tcp://"
+ , "coaps+ws://"
+ , "com-eventbrite-attendee://"
+ , "content://"
+ , "content-type://"
+ , "crid://"
+ , "cstr://"
+ , "cvs://"
+ , "dab://"
+ , "dat://"
+ , "data://"
+ , "dav://"
+ , "dhttp://"
+ , "diaspora://"
+ , "dict://"
+ , "did://"
+ , "dis://"
+ , "dlna-playcontainer://"
+ , "dlna-playsingle://"
+ , "dns://"
+ , "dntp://"
+ , "doi://"
+ , "dpp://"
+ , "drm://"
+ , "drop://"
+ , "dtmi://"
+ , "dtn://"
+ , "dvb://"
+ , "dvx://"
+ , "dweb://"
+ , "ed2k://"
+ , "eid://"
+ , "elsi://"
+ , "embedded://"
+ , "ens://"
+ , "ethereum://"
+ , "example://"
+ , "facetime://"
+ , "fax://"
+ , "feed://"
+ , "feedready://"
+ , "fido://"
+ , "file://"
+ , "filesystem://"
+ , "finger://"
+ , "first-run-pen-experience://"
+ , "fish://"
+ , "fm://"
+ , "ftp://"
+ , "fuchsia-pkg://"
+ , "geo://"
+ , "gg://"
+ , "git://"
+ , "gitoid://"
+ , "gizmoproject://"
+ , "go://"
+ , "gopher://"
+ , "graph://"
+ , "grd://"
+ , "gtalk://"
+ , "h323://"
+ , "ham://"
+ , "hcap://"
+ , "hcp://"
+ , "hs20://"
+ , "http://"
+ , "https://"
+ , "hxxp://"
+ , "hxxps://"
+ , "hydrazone://"
+ , "hyper://"
+ , "iax://"
+ , "icap://"
+ , "icon://"
+ , "im://"
+ , "imap://"
+ , "info://"
+ , "iotdisco://"
+ , "ipfs://"
+ , "ipn://"
+ , "ipns://"
+ , "ipp://"
+ , "ipps://"
+ , "irc://"
+ , "irc6://"
+ , "ircs://"
+ , "iris://"
+ , "iris.beep://"
+ , "iris.lwz://"
+ , "iris.xpc://"
+ , "iris.xpcs://"
+ , "isostore://"
+ , "itms://"
+ , "jabber://"
+ , "jar://"
+ , "jms://"
+ , "keyparc://"
+ , "lastfm://"
+ , "lbry://"
+ , "ldap://"
+ , "ldaps://"
+ , "leaptofrogans://"
+ , "lid://"
+ , "lorawan://"
+ , "lpa://"
+ , "lvlt://"
+ , "machineProvisioningProgressReporter://"
+ , "magnet://"
+ , "mailserver://"
+ , "mailto://"
+ , "maps://"
+ , "market://"
+ , "matrix://"
+ , "message://"
+ , "microsoft.windows.camera://"
+ , "microsoft.windows.camera.multipicker://"
+ , "microsoft.windows.camera.picker://"
+ , "mid://"
+ , "mms://"
+ , "modem://"
+ , "mongodb://"
+ , "moz://"
+ , "ms-access://"
+ , "ms-appinstaller://"
+ , "ms-browser-extension://"
+ , "ms-calculator://"
+ , "ms-drive-to://"
+ , "ms-enrollment://"
+ , "ms-excel://"
+ , "ms-eyecontrolspeech://"
+ , "ms-gamebarservices://"
+ , "ms-gamingoverlay://"
+ , "ms-getoffice://"
+ , "ms-help://"
+ , "ms-infopath://"
+ , "ms-inputapp://"
+ , "ms-launchremotedesktop://"
+ , "ms-lockscreencomponent-config://"
+ , "ms-media-stream-id://"
+ , "ms-meetnow://"
+ , "ms-mixedrealitycapture://"
+ , "ms-mobileplans://"
+ , "ms-newsandinterests://"
+ , "ms-officeapp://"
+ , "ms-people://"
+ , "ms-project://"
+ , "ms-powerpoint://"
+ , "ms-publisher://"
+ , "ms-recall://"
+ , "ms-remotedesktop://"
+ , "ms-remotedesktop-launch://"
+ , "ms-restoretabcompanion://"
+ , "ms-screenclip://"
+ , "ms-screensketch://"
+ , "ms-search://"
+ , "ms-search-repair://"
+ , "ms-secondary-screen-controller://"
+ , "ms-secondary-screen-setup://"
+ , "ms-settings://"
+ , "ms-settings-airplanemode://"
+ , "ms-settings-bluetooth://"
+ , "ms-settings-camera://"
+ , "ms-settings-cellular://"
+ , "ms-settings-cloudstorage://"
+ , "ms-settings-connectabledevices://"
+ , "ms-settings-displays-topology://"
+ , "ms-settings-emailandaccounts://"
+ , "ms-settings-language://"
+ , "ms-settings-location://"
+ , "ms-settings-lock://"
+ , "ms-settings-nfctransactions://"
+ , "ms-settings-notifications://"
+ , "ms-settings-power://"
+ , "ms-settings-privacy://"
+ , "ms-settings-proximity://"
+ , "ms-settings-screenrotation://"
+ , "ms-settings-wifi://"
+ , "ms-settings-workplace://"
+ , "ms-spd://"
+ , "ms-stickers://"
+ , "ms-sttoverlay://"
+ , "ms-transit-to://"
+ , "ms-useractivityset://"
+ , "ms-virtualtouchpad://"
+ , "ms-visio://"
+ , "ms-walk-to://"
+ , "ms-whiteboard://"
+ , "ms-whiteboard-cmd://"
+ , "ms-word://"
+ , "msnim://"
+ , "msrp://"
+ , "msrps://"
+ , "mss://"
+ , "mt://"
+ , "mtqp://"
+ , "mumble://"
+ , "mupdate://"
+ , "mvn://"
+ , "mvrp://"
+ , "mvrps://"
+ , "news://"
+ , "nfs://"
+ , "ni://"
+ , "nih://"
+ , "nntp://"
+ , "notes://"
+ , "num://"
+ , "ocf://"
+ , "oid://"
+ , "onenote://"
+ , "onenote-cmd://"
+ , "opaquelocktoken://"
+ , "openid://"
+ , "openpgp4fpr://"
+ , "otpauth://"
+ , "p1://"
+ , "pack://"
+ , "palm://"
+ , "paparazzi://"
+ , "payment://"
+ , "payto://"
+ , "pkcs11://"
+ , "platform://"
+ , "pop://"
+ , "pres://"
+ , "prospero://"
+ , "proxy://"
+ , "pwid://"
+ , "psyc://"
+ , "pttp://"
+ , "qb://"
+ , "query://"
+ , "quic-transport://"
+ , "redis://"
+ , "rediss://"
+ , "reload://"
+ , "res://"
+ , "resource://"
+ , "rmi://"
+ , "rsync://"
+ , "rtmfp://"
+ , "rtmp://"
+ , "rtsp://"
+ , "rtsps://"
+ , "rtspu://"
+ , "sarif://"
+ , "secondlife://"
+ , "secret-token://"
+ , "service://"
+ , "session://"
+ , "sftp://"
+ , "sgn://"
+ , "shc://"
+ , "shttp://"
+ , "sieve://"
+ , "simpleledger://"
+ , "simplex://"
+ , "sip://"
+ , "sips://"
+ , "skype://"
+ , "smb://"
+ , "smp://"
+ , "sms://"
+ , "smtp://"
+ , "snews://"
+ , "snmp://"
+ , "soap.beep://"
+ , "soap.beeps://"
+ , "soldat://"
+ , "spiffe://"
+ , "spotify://"
+ , "ssb://"
+ , "ssh://"
+ , "starknet://"
+ , "steam://"
+ , "stun://"
+ , "stuns://"
+ , "submit://"
+ , "svn://"
+ , "swh://"
+ , "swid://"
+ , "swidpath://"
+ , "tag://"
+ , "taler://"
+ , "teamspeak://"
+ , "tel://"
+ , "teliaeid://"
+ , "telnet://"
+ , "tftp://"
+ , "things://"
+ , "thismessage://"
+ , "tip://"
+ , "tn3270://"
+ , "tool://"
+ , "turn://"
+ , "turns://"
+ , "tv://"
+ , "udp://"
+ , "unreal://"
+ , "upt://"
+ , "urn://"
+ , "ut2004://"
+ , "uuid-in-package://"
+ , "v-event://"
+ , "vemmi://"
+ , "ventrilo://"
+ , "ves://"
+ , "videotex://"
+ , "vnc://"
+ , "view-source://"
+ , "vscode://"
+ , "vscode-insiders://"
+ , "vsls://"
+ , "w3://"
+ , "wais://"
+ , "web3://"
+ , "wcr://"
+ , "webcal://"
+ , "web+ap://"
+ , "wifi://"
+ , "wpid://"
+ , "ws://"
+ , "wss://"
+ , "wtai://"
+ , "wyciwyg://"
+ , "xcon://"
+ , "xcon-userid://"
+ , "xfire://"
+ , "xmlrpc.beep://"
+ , "xmlrpc.beeps://"
+ , "xmpp://"
+ , "xftp://"
+ , "xrcp://"
+ , "xri://"
+ , "ymsgr://"
+ , "z39.50://"
+ , "z39.50r://"
+ , "z39.50s://"
+};
+/* clang-format on */
+
+static bool is_url(const char *const str)
+{
-#define MATCH(str, prefix) \
- do { \
- if (str && (strlen(str) > strlen(prefix)) \
- && (0 == strncasecmp(str, prefix, strlen(prefix)))) { \
- bRet = true; \
- goto done; \
- } \
+#define MATCH(str, prefix) \
+ do { \
+ if (str && (strlen(str) > strlen(prefix)) && (0 == strncasecmp(str, prefix, strlen(prefix)))) { \
+ bRet = true; \
+ goto done; \
+ } \
} while (0);
bool bRet = false;
+ size_t i;
- MATCH(str, "https://");
- MATCH(str, "http://");
- MATCH(str, "ftp://");
+ for (i = 0; i < sizeof(URI_LIST) / sizeof(URI_LIST[0]); i++) {
+ MATCH(str, URI_LIST[i]);
+ }
done:
return bRet;
#undef MATCH
}
-static void save_urls(cli_ctx * ctx, tag_arguments_t * hrefs) {
- int i = 0;
+static void save_urls(cli_ctx *ctx, tag_arguments_t *hrefs)
+{
+ int i = 0;
bool haveOne = false;
if (NULL == hrefs) {
return;
@@ -2121,28 +2507,27 @@ static void save_urls(cli_ctx * ctx, tag_arguments_t * hrefs) {
return;
}
- for (i = 0; i < hrefs->count; i++){
- if (is_url((const char *) hrefs->value[i])) {
+ for (i = 0; i < hrefs->count; i++) {
+ if (is_url((const char *)hrefs->value[i])) {
haveOne = true;
break;
}
}
-
- if (!haveOne){
+
+ if (!haveOne) {
return;
}
- json_object *ary = cli_jsonarray(ctx->wrkproperty, HTML_URLS_JSON_KEY );
+ json_object *ary = cli_jsonarray(ctx->wrkproperty, HTML_URLS_JSON_KEY);
if (ary) {
- for (i = 0; i < hrefs->count; i++){
- if (is_url((const char *) hrefs->value[i])){
- cli_jsonstr(ary, NULL, (const char *) hrefs->value[i]);
+ for (i = 0; i < hrefs->count; i++) {
+ if (is_url((const char *)hrefs->value[i])) {
+ cli_jsonstr(ary, NULL, (const char *)hrefs->value[i]);
}
}
} else {
- cli_dbgmsg("[cli_scanhtml] Failed to add \"%s\" entry JSON array\n", HTML_URLS_JSON_KEY );
+ cli_dbgmsg("[cli_scanhtml] Failed to add \"%s\" entry JSON array\n", HTML_URLS_JSON_KEY);
}
-
}
static cl_error_t cli_scanhtml(cli_ctx *ctx)
@@ -2179,7 +2564,7 @@ static cl_error_t cli_scanhtml(cli_ctx *ctx)
/* Output JSON Summary Information */
if (STORE_HTML_URLS && SCAN_COLLECT_METADATA && (ctx->wrkproperty != NULL)) {
tag_arguments_t hrefs = {0};
- hrefs.scanContents = 1;
+ hrefs.scanContents = 1;
(void)html_normalise_map(ctx, map, tempname, &hrefs, ctx->dconf);
save_urls(ctx, &hrefs);
} else {
@@ -4283,9 +4668,9 @@ static inline bool result_should_goto_done(cli_ctx *ctx, cl_error_t result_in, c
cl_error_t cli_magic_scan(cli_ctx *ctx, cli_file_t type)
{
- cl_error_t ret = CL_CLEAN;
+ cl_error_t ret = CL_CLEAN;
cl_error_t cache_check_result = CL_VIRUS;
- bool cache_enabled = true;
+ bool cache_enabled = true;
cl_error_t verdict_at_this_level;
cli_file_t dettype = 0;
uint8_t typercg = 1;