-
Notifications
You must be signed in to change notification settings - Fork 0
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Handle ClickHouse queries with other statements being invalid #58
Open
hansott
wants to merge
12
commits into
main
Choose a base branch
from
clickhouse-multiple-statements
base: main
Could not load branches
Branch not found: {{ refName }}
Loading
Could not load tags
Nothing to show
Loading
Are you sure you want to change the base?
Some commits from the old base branch may be removed from the timeline,
and old review comments may become outdated.
+157
−23
Open
Changes from all commits
Commits
Show all changes
12 commits
Select commit
Hold shift + click to select a range
e64ac24
Handle Clickhouse queries with other statements being invalid
hansott f041dec
Move back
hansott 014e8cd
Revert change to improve diff
hansott ddb7d7a
Improve safety and simplicity
hansott f423af2
Simplify
hansott 324f489
Fix comment
hansott 2451986
Simplify
hansott 1ba7364
Improve comments
hansott 5b04955
Improve check for extra statement
hansott e063c96
Format query
hansott c3f2436
Fix lint
hansott 602e000
Update comment
hansott File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -21,24 +21,31 @@ pub fn detect_sql_injection_str(query: &str, userinput: &str, dialect: i32) -> b | |
|
||
// Tokenize query : | ||
let tokens = tokenize_with_fallback(query, dialect); | ||
|
||
// Tokens are empty, this means that the query is invalid | ||
if tokens.len() <= 0 { | ||
// Tokens are empty, probably a parsing issue with original query, return false. | ||
if dialect == 3 && extra_statement_was_created_by_user_input(query, userinput, dialect) { | ||
// Clickhouse does not support multiple statements | ||
// The first statement will still be executed if the other statements are invalid | ||
// If the query with user input replaced is valid, we'll assume it's an injection because it created a new statement | ||
return true; | ||
} | ||
|
||
// If the query is invalid, we can't determine if it's an injection | ||
return false; | ||
} | ||
|
||
// Remove leading and trailing spaces from userinput : | ||
let trimmed_userinput = userinput.trim_matches(SPACE_CHAR); | ||
|
||
// Tokenize query without user input : | ||
if trimmed_userinput.len() <= 1 { | ||
// If the trimmed userinput is one character or empty, no injection took place. | ||
return false; | ||
} | ||
|
||
// Replace user input with string of equal length and tokenize again : | ||
let safe_replace_str = "a".repeat(trimmed_userinput.len()); | ||
let query_without_input: &str = &query.replace(trimmed_userinput, safe_replace_str.as_str()); | ||
let tokens_without_input = tokenize_with_fallback(query_without_input, dialect); | ||
let query_without_input = replace_user_input_with_safe_str(query, userinput); | ||
let tokens_without_input = tokenize_with_fallback(query_without_input.as_str(), dialect); | ||
|
||
// Check delta for both comment tokens and all tokens in general : | ||
if diff_in_vec_len!(tokens, tokens_without_input) { | ||
|
@@ -56,6 +63,64 @@ pub fn detect_sql_injection_str(query: &str, userinput: &str, dialect: i32) -> b | |
return false; | ||
} | ||
|
||
fn extra_statement_was_created_by_user_input(query: &str, userinput: &str, dialect: i32) -> bool { | ||
if !has_multiple_statements(query, dialect) { | ||
return false; | ||
} | ||
|
||
let query_without_input = replace_user_input_with_safe_str(query, userinput); | ||
let tokens_without_input = tokenize_with_fallback(query_without_input.as_str(), dialect); | ||
|
||
if tokens_without_input.len() <= 0 { | ||
// Invalid query without user input | ||
return false; | ||
} | ||
|
||
return is_single_statement(&tokens_without_input); | ||
} | ||
|
||
fn is_single_statement(tokens: &Vec<Token>) -> bool { | ||
let has_semicolon = tokens.iter().any(|x| matches!(x, Token::SemiColon)); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. TODO: Count them |
||
|
||
if !has_semicolon { | ||
return true; | ||
} | ||
|
||
matches!(tokens.last(), Some(Token::SemiColon)) && has_semicolon | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
|
||
} | ||
|
||
/// Check if the query has multiple statements | ||
/// The other statements can be invalid | ||
fn has_multiple_statements(query: &str, dialect: i32) -> bool { | ||
if !query.contains(';') { | ||
return false; | ||
} | ||
|
||
// Find the first valid statement and see if there's anything left | ||
for (i, c) in query.char_indices() { | ||
if c == ';' { | ||
let statement = &query[..i + 1]; | ||
let tokens = tokenize_with_fallback(statement, dialect); | ||
if tokens.len() > 0 { | ||
if let Some(Token::SemiColon) = tokens.last() { | ||
let remaining = &query[i + 1..]; | ||
|
||
return remaining.trim().len() > 0; | ||
} | ||
} | ||
} | ||
} | ||
|
||
return false; | ||
} | ||
|
||
fn replace_user_input_with_safe_str(query: &str, user_input: &str) -> String { | ||
let trimmed_user_input = user_input.trim_matches(SPACE_CHAR); | ||
let safe_replace_str = "a".repeat(trimmed_user_input.len()); | ||
|
||
query.replace(trimmed_user_input, safe_replace_str.as_str()) | ||
} | ||
|
||
fn tokenize_with_fallback(query: &str, dialect: i32) -> Vec<Token> { | ||
let query_tokens = tokenize_query(query, dialect); | ||
if query_tokens.len() <= 0 && dialect != 0 { | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
trimmed userinput is now only used for length check? everything else moved to replace_user_input_with_safe_str