diff --git a/libclamav/htmlnorm.c b/libclamav/htmlnorm.c index d0360abb7b..b9199dd2a5 100644 --- a/libclamav/htmlnorm.c +++ b/libclamav/htmlnorm.c @@ -818,6 +818,7 @@ static bool cli_html_normalise(cli_ctx *ctx, int fd, m_area_t *m_area, const cha while (*ptr && isspace(*ptr)) { ptr++; } +// fprintf(stderr, "%s::%d::'%s'\n", __FUNCTION__, __LINE__, ptr); while (*ptr) { if (!binary && *ptr == '\n') { /* Convert it to a space and re-process */ @@ -893,7 +894,8 @@ static bool cli_html_normalise(cli_ctx *ctx, int fd, m_area_t *m_area, const cha case HTML_NORM: if (*ptr == '<') { ptrend = ptr; /* for use by scanContents */ -//fprintf(stderr, "%s::%d::'%s'\n", __FUNCTION__, __LINE__, ptrend); +fprintf(stderr, "%s::%d::'%s'\n", __FUNCTION__, __LINE__, ptrend); +//this is the only place I am seeing them; html_output_c(file_buff_o2, '<'); if (in_tag == TAG_DONT_EXTRACT && !text_space_written) { html_output_c(file_buff_text, ' '); @@ -1319,6 +1321,7 @@ static bool cli_html_normalise(cli_ctx *ctx, int fd, m_area_t *m_area, const cha href_contents_begin = ptr; if (strcmp(tag, "a") == 0) { arg_value = html_tag_arg_value(&tag_args, "href"); + fprintf(stderr, "%s::%d::'%s'\n", __FUNCTION__, __LINE__, arg_value); if (arg_value && strlen((const char *)arg_value) > 0) { if (hrefs->scanContents) { char *arg_value_title = html_tag_arg_value(&tag_args, "title");