Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Handle ClickHouse queries with other statements being invalid #58

Open
wants to merge 12 commits into
base: main
Choose a base branch
from
75 changes: 70 additions & 5 deletions src/sql_injection/detect_sql_injection.rs
Original file line number Diff line number Diff line change
Expand Up @@ -21,24 +21,31 @@ pub fn detect_sql_injection_str(query: &str, userinput: &str, dialect: i32) -> b

// Tokenize query :
let tokens = tokenize_with_fallback(query, dialect);

// Tokens are empty, this means that the query is invalid
if tokens.len() <= 0 {
// Tokens are empty, probably a parsing issue with original query, return false.
if dialect == 3 && extra_statement_was_created_by_user_input(query, userinput, dialect) {
// Clickhouse does not support multiple statements
// The first statement will still be executed if the other statements are invalid
// If the query with user input replaced is valid, we'll assume it's an injection because it created a new statement
return true;
}

// If the query is invalid, we can't determine if it's an injection
return false;
}

// Remove leading and trailing spaces from userinput :
let trimmed_userinput = userinput.trim_matches(SPACE_CHAR);
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

trimmed userinput is now only used for length check? everything else moved to replace_user_input_with_safe_str


// Tokenize query without user input :
if trimmed_userinput.len() <= 1 {
// If the trimmed userinput is one character or empty, no injection took place.
return false;
}

// Replace user input with string of equal length and tokenize again :
let safe_replace_str = "a".repeat(trimmed_userinput.len());
let query_without_input: &str = &query.replace(trimmed_userinput, safe_replace_str.as_str());
let tokens_without_input = tokenize_with_fallback(query_without_input, dialect);
let query_without_input = replace_user_input_with_safe_str(query, userinput);
let tokens_without_input = tokenize_with_fallback(query_without_input.as_str(), dialect);

// Check delta for both comment tokens and all tokens in general :
if diff_in_vec_len!(tokens, tokens_without_input) {
Expand All @@ -56,6 +63,64 @@ pub fn detect_sql_injection_str(query: &str, userinput: &str, dialect: i32) -> b
return false;
}

fn extra_statement_was_created_by_user_input(query: &str, userinput: &str, dialect: i32) -> bool {
if !has_multiple_statements(query, dialect) {
return false;
}

let query_without_input = replace_user_input_with_safe_str(query, userinput);
let tokens_without_input = tokenize_with_fallback(query_without_input.as_str(), dialect);

if tokens_without_input.len() <= 0 {
// Invalid query without user input
return false;
}

return is_single_statement(&tokens_without_input);
}

fn is_single_statement(tokens: &Vec<Token>) -> bool {
let has_semicolon = tokens.iter().any(|x| matches!(x, Token::SemiColon));
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

TODO: Count them


if !has_semicolon {
return true;
}

matches!(tokens.last(), Some(Token::SemiColon)) && has_semicolon
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

has_semicolon is not necessary

}

/// Check if the query has multiple statements
/// The other statements can be invalid
fn has_multiple_statements(query: &str, dialect: i32) -> bool {
if !query.contains(';') {
return false;
}

// Find the first valid statement and see if there's anything left
for (i, c) in query.char_indices() {
if c == ';' {
let statement = &query[..i + 1];
let tokens = tokenize_with_fallback(statement, dialect);
if tokens.len() > 0 {
if let Some(Token::SemiColon) = tokens.last() {
let remaining = &query[i + 1..];

return remaining.trim().len() > 0;
}
}
}
}

return false;
}

fn replace_user_input_with_safe_str(query: &str, user_input: &str) -> String {
let trimmed_user_input = user_input.trim_matches(SPACE_CHAR);
let safe_replace_str = "a".repeat(trimmed_user_input.len());

query.replace(trimmed_user_input, safe_replace_str.as_str())
}

fn tokenize_with_fallback(query: &str, dialect: i32) -> Vec<Token> {
let query_tokens = tokenize_query(query, dialect);
if query_tokens.len() <= 0 && dialect != 0 {
Expand Down
105 changes: 87 additions & 18 deletions src/sql_injection/detect_sql_injection_test.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,34 +4,40 @@ mod tests {

macro_rules! is_injection {
($query:expr, $input:expr) => {
assert!(detect_sql_injection_str(
&$query.to_lowercase(),
&$input.to_lowercase(),
0
))
assert!(
detect_sql_injection_str(&$query.to_lowercase(), &$input.to_lowercase(), 0),
"should be an injection\nquery: {}\ninput: {}",
$query,
$input
)
};
($query:expr, $input:expr, $dialect:expr) => {
assert!(detect_sql_injection_str(
&$query.to_lowercase(),
&$input.to_lowercase(),
assert!(
detect_sql_injection_str(&$query.to_lowercase(), &$input.to_lowercase(), $dialect),
"should be an injection\nquery: {}\ninput: {}\ndialect: {}\n",
$query,
$input,
$dialect
))
)
};
}
macro_rules! not_injection {
($query:expr, $input:expr) => {
assert!(!detect_sql_injection_str(
&$query.to_lowercase(),
&$input.to_lowercase(),
0
))
assert!(
!detect_sql_injection_str(&$query.to_lowercase(), &$input.to_lowercase(), 0),
"should not be an injection\nquery: {}\ninput: {}",
$query,
$input
)
};
($query:expr, $input:expr, $dialect:expr) => {
assert!(!detect_sql_injection_str(
&$query.to_lowercase(),
&$input.to_lowercase(),
assert!(
!detect_sql_injection_str(&$query.to_lowercase(), &$input.to_lowercase(), $dialect),
"should not be an injection\nquery: {}\ninput: {}\ndialect: {}\n",
$query,
$input,
$dialect
))
)
};
}

Expand All @@ -40,6 +46,7 @@ mod tests {
"mysql" => 8,
"postgresql" => 9,
"sqlite" => 12,
"clickhouse" => 3,
_ => panic!("Unknown dialect"),
}
}
Expand Down Expand Up @@ -718,4 +725,66 @@ mod tests {
dialect("postgresql")
);
}

#[test]
fn test_clickhouse_semicolon() {
// Since clickhouse doesn't support multiple statements, this is an injection
// Only the first statement is valid in this case, the second statement is not
is_injection!(
"insert into cats_table (id, petname) values (null, 'foo'||version());');",
"foo'||version());",
dialect("clickhouse")
);
is_injection!(
"select '1; abc' from table_name WHERE id = 1; abc",
"1; abc",
dialect("clickhouse")
);
is_injection!(
"select '1;' from table_name WHERE id = 1;",
"1;",
dialect("clickhouse")
);
is_injection!(
"select /* 1; */ from table_name WHERE id = 1;",
"1;",
dialect("clickhouse")
);
is_injection!(
"select 'a;' from table_name WHERE id = a; abc",
"a;",
dialect("clickhouse")
);
is_injection!(
"select 'a;' from table_name WHERE id = a; select 'a;",
"a; select 'a;",
dialect("clickhouse")
);

not_injection!(
"insert into cats_table (id, petname) values (null, 'foo||version());')",
"foo||version());",
dialect("clickhouse")
);
not_injection!(
"select 'a;' from table_name WHERE id = 1",
"a;",
dialect("clickhouse")
);
not_injection!(
"select * from table_name WHERE id = 1;",
"table_name",
dialect("clickhouse")
);
not_injection!(
"select /* 'abc'; */ from table_name WHERE id = 1;",
"'abc';",
dialect("clickhouse")
);
not_injection!(
"select 'a;' from table_name WHERE id = 'a;'; select 'a;';",
"a;",
dialect("clickhouse")
);
}
}
Loading