diff --git a/Cargo.lock b/Cargo.lock index c6b59aa0ad..3da9db77f6 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -170,6 +170,7 @@ dependencies = [ "tempfile", "thiserror", "transpose", + "unicode-segmentation", ] [[package]] @@ -1170,6 +1171,12 @@ version = "1.0.8" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e5464a87b239f13a63a501f2701565754bae92d243d4bb7eb12f6d57d2269bf4" +[[package]] +name = "unicode-segmentation" +version = "1.10.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1dd624098567895118886609431a7c3b8f516e41d30e0643f03d94592a147e36" + [[package]] name = "unicode-width" version = "0.1.10" diff --git a/libclamav_rust/Cargo.toml b/libclamav_rust/Cargo.toml index 8805858f44..a5f162b08a 100644 --- a/libclamav_rust/Cargo.toml +++ b/libclamav_rust/Cargo.toml @@ -18,6 +18,7 @@ transpose = "0.2" num-traits = "0.2" base64 = "0.21.0" sha1 = "0.10.5" +unicode-segmentation = "1.10.1" [lib] crate-type = ["staticlib"] diff --git a/libclamav_rust/src/cdiff.rs b/libclamav_rust/src/cdiff.rs index bee2f64efe..693187c812 100644 --- a/libclamav_rust/src/cdiff.rs +++ b/libclamav_rust/src/cdiff.rs @@ -1032,7 +1032,7 @@ fn process_line(ctx: &mut Context, line: &[u8]) -> Result<(), InputError> { cmd_unlink(ctx, unlink_op) } _ => Err(InputError::UnknownCommand( - String::from_utf8_lossy(&cmd).to_string(), + String::from_utf8_lossy(cmd).to_string(), )), } } diff --git a/libclamav_rust/src/css_image_extract.rs b/libclamav_rust/src/css_image_extract.rs index 463bb5848c..8abd5dbf52 100644 --- a/libclamav_rust/src/css_image_extract.rs +++ b/libclamav_rust/src/css_image_extract.rs @@ -25,6 +25,7 @@ use std::{ffi::CStr, mem::ManuallyDrop, os::raw::c_char}; use base64::{engine::general_purpose as base64_engine_standard, Engine as _}; use log::{debug, error, warn}; use thiserror::Error; +use unicode_segmentation::UnicodeSegmentation; use crate::sys; @@ -73,12 +74,12 @@ impl<'a> CssImageExtractor<'a> { }; // Skip whitespace until we find '(' - for (pos, c) in self.remaining.chars().enumerate() { - if c == '(' { + for (pos, c) in self.remaining.grapheme_indices(true) { + if c == "(" { // Found left-paren. (_, self.remaining) = self.remaining.split_at(pos + 1); break; - } else if char::is_whitespace(c) { + } else if c.contains(char::is_whitespace) { // Skipping whitespace. continue; } else { @@ -90,11 +91,11 @@ impl<'a> CssImageExtractor<'a> { // Find closing ')' let mut depth = 1; let mut url_parameter: Option<&str> = None; - for (pos, c) in self.remaining.chars().enumerate() { - if c == '(' { + for (pos, c) in self.remaining.grapheme_indices(true) { + if c == "(" { // Found nested left-paren. depth += 1; - } else if c == ')' { + } else if c == ")" { if depth > 1 { // Found nested right-paren. depth -= 1; @@ -121,8 +122,8 @@ impl<'a> CssImageExtractor<'a> { // Strip optional whitespace and quotes from front and back. // Trim off whitespace at beginning - for (pos, c) in url_parameter.chars().enumerate() { - if char::is_whitespace(c) { + for (pos, c) in url_parameter.grapheme_indices(true) { + if c.contains(char::is_whitespace) { // Skipping whitespace before url contents. continue; } else { @@ -132,8 +133,8 @@ impl<'a> CssImageExtractor<'a> { } // Trim off whitespace at end - for (pos, c) in url_parameter.chars().rev().enumerate() { - if char::is_whitespace(c) { + for (pos, c) in url_parameter.graphemes(true).rev().enumerate() { + if c.contains(char::is_whitespace) { // Skipping whitespace after url contents. continue; } else { @@ -143,24 +144,24 @@ impl<'a> CssImageExtractor<'a> { } // Trim off " at beginning. - let c = url_parameter.chars().next(); + let c = url_parameter.graphemes(true).next(); if let Some(c) = c { - if c == '"' { + if c == "\"" { (_, url_parameter) = url_parameter.split_at(1); } }; // Trim off " at end. - let c = url_parameter.chars().rev().next(); + let c = url_parameter.graphemes(true).rev().next(); if let Some(c) = c { - if c == '"' { + if c == "\"" { (url_parameter, _) = url_parameter.split_at(url_parameter.len() - 1); } }; // Trim off whitespace at beginning. - for (pos, c) in url_parameter.chars().enumerate() { - if char::is_whitespace(c) { + for (pos, c) in url_parameter.grapheme_indices(true) { + if c.contains(char::is_whitespace) { // Skipping whitespace before url contents. continue; } else { @@ -170,8 +171,8 @@ impl<'a> CssImageExtractor<'a> { } // Trim off whitespace at end. - for (pos, c) in url_parameter.chars().rev().enumerate() { - if char::is_whitespace(c) { + for (pos, c) in url_parameter.graphemes(true).rev().enumerate() { + if c.contains(char::is_whitespace) { // Skipping whitespace after url contents. continue; } else { @@ -203,12 +204,12 @@ impl<'a> CssImageExtractor<'a> { }; // Skip whitespace until we find a 'b' (starting "base64") - for (pos, c) in url_parameter.chars().enumerate() { - if c == 'b' { + for (pos, c) in url_parameter.grapheme_indices(true) { + if c == "b" { // Found 'b'. (_, url_parameter) = url_parameter.split_at(pos + 1); break; - } else if char::is_whitespace(c) { + } else if c.contains(char::is_whitespace) { // Skipping whitespace. continue; } else { @@ -227,12 +228,12 @@ impl<'a> CssImageExtractor<'a> { (_, url_parameter) = url_parameter.split_at("ase64".len()); // Skip whitespace until we find ',' - for (pos, c) in url_parameter.chars().enumerate() { - if c == ',' { + for (pos, c) in url_parameter.grapheme_indices(true) { + if c == "," { // Found ','. (_, url_parameter) = url_parameter.split_at(pos + 1); break; - } else if char::is_whitespace(c) { + } else if c.contains(char::is_whitespace) { // Skipping whitespace. continue; } else { @@ -242,8 +243,8 @@ impl<'a> CssImageExtractor<'a> { } // Trim off whitespace at beginning. - for (pos, c) in url_parameter.chars().enumerate() { - if char::is_whitespace(c) { + for (pos, c) in url_parameter.grapheme_indices(true) { + if c.contains(char::is_whitespace) { // Skipping whitespace before url contents. continue; } else {