From 9e891568164093e62168601290ab6498b58937b6 Mon Sep 17 00:00:00 2001 From: SimonVandeVyver Date: Fri, 13 Sep 2024 10:27:18 +0200 Subject: [PATCH 1/9] add functionality to search only tryptic matches --- sa-index/src/peptide_search.rs | 22 +++++-- sa-index/src/sa_searcher.rs | 104 ++++++++++++++++++++++++--------- sa-server/src/main.rs | 6 +- 3 files changed, 98 insertions(+), 34 deletions(-) diff --git a/sa-index/src/peptide_search.rs b/sa-index/src/peptide_search.rs index 55d629f..b3958c7 100644 --- a/sa-index/src/peptide_search.rs +++ b/sa-index/src/peptide_search.rs @@ -38,6 +38,7 @@ impl From<&Protein> for ProteinInfo { /// * `equate_il` - Boolean indicating if we want to equate I and L during search /// * `clean_taxa` - Boolean indicating if we want to filter out proteins that are invalid in the /// taxonomy +/// * `tryptic` - Boolean indicating if we only want tryptic matches. /// /// # Returns /// @@ -50,7 +51,8 @@ pub fn search_proteins_for_peptide<'a>( searcher: &'a Searcher, peptide: &str, cutoff: usize, - equate_il: bool + equate_il: bool, + tryptic: bool ) -> Option<(bool, Vec<&'a Protein>)> { let peptide = peptide.trim_end().to_uppercase(); @@ -59,7 +61,7 @@ pub fn search_proteins_for_peptide<'a>( return None; } - let suffix_search = searcher.search_matching_suffixes(peptide.as_bytes(), cutoff, equate_il); + let suffix_search = searcher.search_matching_suffixes(peptide.as_bytes(), cutoff, equate_il, tryptic); let (suffixes, cutoff_used) = match suffix_search { SearchAllSuffixesResult::MaxMatches(matched_suffixes) => Some((matched_suffixes, true)), SearchAllSuffixesResult::SearchResult(matched_suffixes) => Some((matched_suffixes, false)), @@ -71,8 +73,14 @@ pub fn search_proteins_for_peptide<'a>( Some((cutoff_used, proteins)) } -pub fn search_peptide(searcher: &Searcher, peptide: &str, cutoff: usize, equate_il: bool) -> Option { - let (cutoff_used, proteins) = search_proteins_for_peptide(searcher, peptide, cutoff, equate_il)?; +pub fn search_peptide( + searcher: &Searcher, + peptide: &str, + cutoff: usize, + equate_il: bool, + tryptic: bool +) -> Option { + let (cutoff_used, proteins) = search_proteins_for_peptide(searcher, peptide, cutoff, equate_il, tryptic)?; Some(SearchResult { sequence: peptide.to_string(), @@ -91,6 +99,7 @@ pub fn search_peptide(searcher: &Searcher, peptide: &str, cutoff: usize, equate_ /// * `equate_il` - Boolean indicating if we want to equate I and L during search /// * `clean_taxa` - Boolean indicating if we want to filter out proteins that are invalid in the /// taxonomy +/// * `tryptic` - Boolean indicating if we only want tryptic matches. /// /// # Returns /// @@ -99,11 +108,12 @@ pub fn search_all_peptides( searcher: &Searcher, peptides: &Vec, cutoff: usize, - equate_il: bool + equate_il: bool, + tryptic: bool ) -> Vec { peptides .par_iter() - .filter_map(|peptide| search_peptide(searcher, peptide, cutoff, equate_il)) + .filter_map(|peptide| search_peptide(searcher, peptide, cutoff, equate_il, tryptic)) .collect() } diff --git a/sa-index/src/sa_searcher.rs b/sa-index/src/sa_searcher.rs index d09c704..87ab494 100644 --- a/sa-index/src/sa_searcher.rs +++ b/sa-index/src/sa_searcher.rs @@ -1,6 +1,6 @@ use std::{cmp::min, ops::Deref}; -use sa_mappings::proteins::{Protein, Proteins}; +use sa_mappings::proteins::{Protein, Proteins, SEPARATION_CHARACTER, TERMINATION_CHARACTER}; use crate::{ sa_searcher::BoundSearch::{Maximum, Minimum}, @@ -297,6 +297,7 @@ impl Searcher { /// * `max_matches` - The maximum amount of matches processed, if more matches are found we /// don't process them /// * `equate_il` - True if we want to equate I and L during search, otherwise false + /// * `tryptic` - Boolean indicating if we only want tryptic matches. /// /// # Returns /// @@ -306,7 +307,8 @@ impl Searcher { &self, search_string: &[u8], max_matches: usize, - equate_il: bool + equate_il: bool, + tryptic: bool ) -> SearchAllSuffixesResult { let mut matching_suffixes: Vec = vec![]; let mut il_locations = vec![]; @@ -334,32 +336,41 @@ impl Searcher { let mut sa_index = min_bound; while sa_index < max_bound { let suffix = self.sa.get(sa_index) as usize; - // filter away matches where I was wrongfully equalized to L, and check the - // unmatched prefix when I and L equalized, we only need to - // check the prefix, not the whole match, when the prefix is 0, we don't need to - // check at all - if suffix >= skip - && ((skip == 0 + + if suffix >= skip { + let match_start = suffix - skip; + let match_end = suffix + search_string.len() - skip; + + // filter away matches where I was wrongfully equalized to L, and check the + // unmatched prefix when I and L equalized, we only need to + // check the prefix, not the whole match, when the prefix is 0, we don't need to + // check at all + if (skip == 0 || Self::check_prefix( current_search_string_prefix, - &self.proteins.input_string[suffix - skip..suffix], + &self.proteins.input_string[match_start..suffix], equate_il )) && Self::check_suffix( skip, il_locations_current_suffix, current_search_string_suffix, - &self.proteins.input_string[suffix..suffix + search_string.len() - skip], + &self.proteins.input_string[suffix..match_end], equate_il - )) - { - matching_suffixes.push((suffix - skip) as i64); - - // return if max number of matches is reached - if matching_suffixes.len() >= max_matches { - return SearchAllSuffixesResult::MaxMatches(matching_suffixes); + ) + && (!tryptic + || ((self.check_start_of_protein(match_start) || self.check_tryptic_cut(match_start)) + && (self.check_end_of_protein(match_end) || self.check_tryptic_cut(match_end)))) + { + matching_suffixes.push((suffix - skip) as i64); + + // return if max number of matches is reached + if matching_suffixes.len() >= max_matches { + return SearchAllSuffixesResult::MaxMatches(matching_suffixes); + } } } + sa_index += 1; } } @@ -373,6 +384,47 @@ impl Searcher { } } + /// Check if a cut is the start of a protein. + /// + /// # Arguments + /// * `cut_index` - The index of the cut in the text of proteins. + /// + /// # Returns + /// + /// Returns true if the cut is at the start of a protein. + #[inline] + fn check_start_of_protein(&self, cut_index: usize) -> bool { + cut_index == 0 || self.proteins.input_string[cut_index - 1] == SEPARATION_CHARACTER + } + + /// Check if a cut is the end of a protein. + /// + /// # Arguments + /// * `cut_index` - The index of the cut in the text of proteins. + /// + /// # Returns + /// + /// Returns true if the cut is at the end of a protein. + #[inline] + fn check_end_of_protein(&self, cut_index: usize) -> bool { + self.proteins.input_string[cut_index] == TERMINATION_CHARACTER + || self.proteins.input_string[cut_index] == SEPARATION_CHARACTER + } + + /// Check if a cut is a tryptic cut, so check if the amino acid preceding the cut is K or R and the amino acid at the cut is not P. + /// + /// # Arguments + /// * `cut_index` - The index of the cut in the text of proteins. + /// + /// # Returns + /// + /// Returns true if the cut is a tryptic cut. + #[inline] + fn check_tryptic_cut(&self, cut_index: usize) -> bool { + (self.proteins.input_string[cut_index - 1] == b'K' || self.proteins.input_string[cut_index - 1] == b'R') + && self.proteins.input_string[cut_index] != b'P' + } + /// Returns true of the prefixes are the same /// if `equate_il` is set to true, L and I are considered the same /// @@ -545,11 +597,11 @@ mod tests { let searcher = Searcher::new(sa, proteins, Box::new(suffix_index_to_protein)); // search suffix 'VAA' - let found_suffixes = searcher.search_matching_suffixes(&[b'V', b'A', b'A'], usize::MAX, false); + let found_suffixes = searcher.search_matching_suffixes(&[b'V', b'A', b'A'], usize::MAX, false, false); assert_eq!(found_suffixes, SearchAllSuffixesResult::SearchResult(vec![7])); // search suffix 'AC' - let found_suffixes = searcher.search_matching_suffixes(&[b'A', b'C'], usize::MAX, false); + let found_suffixes = searcher.search_matching_suffixes(&[b'A', b'C'], usize::MAX, false, false); assert_eq!(found_suffixes, SearchAllSuffixesResult::SearchResult(vec![5, 11])); } @@ -578,11 +630,11 @@ mod tests { let searcher = Searcher::new(sa, proteins, Box::new(suffix_index_to_protein)); // search bounds 'RIZ' with equal I and L - let found_suffixes = searcher.search_matching_suffixes(&[b'R', b'I', b'Z'], usize::MAX, true); + let found_suffixes = searcher.search_matching_suffixes(&[b'R', b'I', b'Z'], usize::MAX, true, false); assert_eq!(found_suffixes, SearchAllSuffixesResult::SearchResult(vec![16])); // search bounds 'RIZ' without equal I and L - let found_suffixes = searcher.search_matching_suffixes(&[b'R', b'I', b'Z'], usize::MAX, false); + let found_suffixes = searcher.search_matching_suffixes(&[b'R', b'I', b'Z'], usize::MAX, false, false); assert_eq!(found_suffixes, SearchAllSuffixesResult::NoMatches); } @@ -605,7 +657,7 @@ mod tests { let searcher = Searcher::new(sparse_sa, proteins, Box::new(suffix_index_to_protein)); // search bounds 'IM' with equal I and L - let found_suffixes = searcher.search_matching_suffixes(&[b'I', b'M'], usize::MAX, true); + let found_suffixes = searcher.search_matching_suffixes(&[b'I', b'M'], usize::MAX, true, false); assert_eq!(found_suffixes, SearchAllSuffixesResult::SearchResult(vec![0])); } @@ -626,7 +678,7 @@ mod tests { let suffix_index_to_protein = SparseSuffixToProtein::new(&proteins.input_string); let searcher = Searcher::new(sparse_sa, proteins, Box::new(suffix_index_to_protein)); - let found_suffixes = searcher.search_matching_suffixes(&[b'I'], usize::MAX, true); + let found_suffixes = searcher.search_matching_suffixes(&[b'I'], usize::MAX, true, false); assert_eq!(found_suffixes, SearchAllSuffixesResult::SearchResult(vec![2, 3, 4, 5])); } @@ -647,7 +699,7 @@ mod tests { let suffix_index_to_protein = SparseSuffixToProtein::new(&proteins.input_string); let searcher = Searcher::new(sparse_sa, proteins, Box::new(suffix_index_to_protein)); - let found_suffixes = searcher.search_matching_suffixes(&[b'I', b'I'], usize::MAX, true); + let found_suffixes = searcher.search_matching_suffixes(&[b'I', b'I'], usize::MAX, true, false); assert_eq!(found_suffixes, SearchAllSuffixesResult::SearchResult(vec![0, 1, 2, 3, 4])); } @@ -670,7 +722,7 @@ mod tests { // search all places where II is in the string IIIILL, but with a sparse SA // this way we check if filtering the suffixes works as expected - let found_suffixes = searcher.search_matching_suffixes(&[b'I', b'I'], usize::MAX, false); + let found_suffixes = searcher.search_matching_suffixes(&[b'I', b'I'], usize::MAX, false, false); assert_eq!(found_suffixes, SearchAllSuffixesResult::SearchResult(vec![0, 1, 2])); } @@ -692,7 +744,7 @@ mod tests { let searcher = Searcher::new(sparse_sa, proteins, Box::new(suffix_index_to_protein)); // search bounds 'IM' with equal I and L - let found_suffixes = searcher.search_matching_suffixes(&[b'I', b'I'], usize::MAX, true); + let found_suffixes = searcher.search_matching_suffixes(&[b'I', b'I'], usize::MAX, true, false); assert_eq!(found_suffixes, SearchAllSuffixesResult::SearchResult(vec![0, 1, 2, 3, 4])); } } diff --git a/sa-server/src/main.rs b/sa-server/src/main.rs index 5284546..1a1cedf 100644 --- a/sa-server/src/main.rs +++ b/sa-server/src/main.rs @@ -58,7 +58,9 @@ struct InputData { cutoff: usize, #[serde(default = "bool::default")] // default value is false // TODO: maybe default should be true? - equate_il: bool + equate_il: bool, + #[serde(default = "bool::default")] // default false + tryptic: bool } #[tokio::main] @@ -83,7 +85,7 @@ async fn search( State(searcher): State>, data: Json ) -> Result>, StatusCode> { - let search_result = search_all_peptides(&searcher, &data.peptides, data.cutoff, data.equate_il); + let search_result = search_all_peptides(&searcher, &data.peptides, data.cutoff, data.equate_il, data.tryptic); Ok(Json(search_result)) } From fa13d60c4355c1c81e86576612177b1c7b59e833 Mon Sep 17 00:00:00 2001 From: SimonVandeVyver Date: Fri, 13 Sep 2024 10:45:59 +0200 Subject: [PATCH 2/9] Add test for tryptic search --- sa-index/src/sa_searcher.rs | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/sa-index/src/sa_searcher.rs b/sa-index/src/sa_searcher.rs index 87ab494..0be7d17 100644 --- a/sa-index/src/sa_searcher.rs +++ b/sa-index/src/sa_searcher.rs @@ -747,4 +747,28 @@ mod tests { let found_suffixes = searcher.search_matching_suffixes(&[b'I', b'I'], usize::MAX, true, false); assert_eq!(found_suffixes, SearchAllSuffixesResult::SearchResult(vec![0, 1, 2, 3, 4])); } + + #[test] + fn test_tryptic_search() { + let text = "PAA-AAKPKAPAA$".to_string().into_bytes(); + + let proteins = Proteins { + input_string: text, + proteins: vec![Protein { + uniprot_id: String::new(), + taxon_id: 0, + functional_annotations: vec![] + }] + }; + + let sparse_sa = SuffixArray::Original(vec![13, 3, 12, 11, 1, 4, 2, 5, 9, 8, 6, 10, 0, 7], 1); + let suffix_index_to_protein = SparseSuffixToProtein::new(&proteins.input_string); + let searcher = Searcher::new(sparse_sa, proteins, Box::new(suffix_index_to_protein)); + + let found_suffixes_1 = searcher.search_matching_suffixes(&[b'P', b'A', b'A'], usize::MAX, false, true); + assert_eq!(found_suffixes_1, SearchAllSuffixesResult::SearchResult(vec![0])); + + let found_suffixes_2 = searcher.search_matching_suffixes(&[b'A', b'P', b'A', b'A'], usize::MAX, false, true); + assert_eq!(found_suffixes_2, SearchAllSuffixesResult::SearchResult(vec![9])); + } } From 757426ea4a109d5319f0eb77ee6cedfa3ce76504 Mon Sep 17 00:00:00 2001 From: Pieter Verschaffelt Date: Thu, 19 Sep 2024 11:17:51 +0200 Subject: [PATCH 3/9] Run formatter --- sa-index/src/sa_searcher.rs | 30 ++++++++++++++---------------- 1 file changed, 14 insertions(+), 16 deletions(-) diff --git a/sa-index/src/sa_searcher.rs b/sa-index/src/sa_searcher.rs index 2687de0..dab8577 100644 --- a/sa-index/src/sa_searcher.rs +++ b/sa-index/src/sa_searcher.rs @@ -179,10 +179,8 @@ impl Searcher { while index_in_search_string < search_string.len() && index_in_suffix < self.proteins.text.len() && (search_string[index_in_search_string] == self.proteins.text.get(index_in_suffix) - || (search_string[index_in_search_string] == b'L' - && self.proteins.text.get(index_in_suffix) == b'I') - || (search_string[index_in_search_string] == b'I' - && self.proteins.text.get(index_in_suffix) == b'L')) + || (search_string[index_in_search_string] == b'L' && self.proteins.text.get(index_in_suffix) == b'I') + || (search_string[index_in_search_string] == b'I' && self.proteins.text.get(index_in_suffix) == b'L')) { index_in_suffix += 1; index_in_search_string += 1; @@ -348,20 +346,20 @@ impl Searcher { // check at all if (skip == 0 || Self::check_prefix( - current_search_string_prefix, - ProteinTextSlice::new(&self.proteins.text, match_start, suffix), - equate_il - )) + current_search_string_prefix, + ProteinTextSlice::new(&self.proteins.text, match_start, suffix), + equate_il + )) && Self::check_suffix( - skip, - il_locations_current_suffix, - current_search_string_suffix, - ProteinTextSlice::new(&self.proteins.text, suffix, match_end), - equate_il - ) + skip, + il_locations_current_suffix, + current_search_string_suffix, + ProteinTextSlice::new(&self.proteins.text, suffix, match_end), + equate_il + ) && (!tryptic - || ((self.check_start_of_protein(match_start) || self.check_tryptic_cut(match_start)) - && (self.check_end_of_protein(match_end) || self.check_tryptic_cut(match_end)))) + || ((self.check_start_of_protein(match_start) || self.check_tryptic_cut(match_start)) + && (self.check_end_of_protein(match_end) || self.check_tryptic_cut(match_end)))) { matching_suffixes.push((suffix - skip) as i64); From e3cc70fa5c5cdd91d21287a8d7b2e2897979c34e Mon Sep 17 00:00:00 2001 From: Pieter Verschaffelt Date: Thu, 19 Sep 2024 11:47:37 +0200 Subject: [PATCH 4/9] Add missing character to error messages --- text-compression/src/lib.rs | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/text-compression/src/lib.rs b/text-compression/src/lib.rs index 4866a6c..d56d4b4 100644 --- a/text-compression/src/lib.rs +++ b/text-compression/src/lib.rs @@ -58,7 +58,7 @@ impl ProteinText { let mut bit_array = BitArray::with_capacity(input_string.len(), 5); for (i, c) in input_string.chars().enumerate() { - let char_5bit: u8 = *char_to_5bit.get(&(c as u8)).expect("Input character not in alphabet"); + let char_5bit: u8 = *char_to_5bit.get(&(c as u8)).expect(&format!("Input character '{}' not in alphabet", c)); bit_array.set(i, char_5bit as u64); } @@ -79,7 +79,7 @@ impl ProteinText { let mut bit_array = BitArray::with_capacity(input_vec.len(), 5); for (i, e) in input_vec.iter().enumerate() { - let char_5bit: u8 = *char_to_5bit.get(e).expect("Input character not in alphabet"); + let char_5bit: u8 = *char_to_5bit.get(&(e as u8)).expect(&format!("Input character '{}' not in alphabet", e)); bit_array.set(i, char_5bit as u64); } @@ -131,7 +131,7 @@ impl ProteinText { /// * `index` - The index of the character to change. /// * `value` - The character to fill in as `u8`. pub fn set(&mut self, index: usize, value: u8) { - let char_5bit: u8 = *self.char_to_5bit.get(&value).expect("Input character not in alphabet"); + let char_5bit: u8 = *self.char_to_5bit.get(&value).expect(&format!("Input character '{}' not in alphabet", value)); self.bit_array.set(index, char_5bit as u64); } @@ -477,7 +477,7 @@ mod tests { let mut bit_array = BitArray::with_capacity(input_string.len(), 5); for (i, c) in input_string.chars().enumerate() { - let char_5bit: u8 = *char_to_5bit.get(&(c as u8)).expect("Input character not in alphabet"); + let char_5bit: u8 = *char_to_5bit.get(&(c as u8)).expect(&format!("Input character '{}' not in alphabet", c)); bit_array.set(i, char_5bit as u64); } From 8fdf7155c6315adc13d0c6ec9343623c352b6951 Mon Sep 17 00:00:00 2001 From: Pieter Verschaffelt Date: Thu, 19 Sep 2024 11:49:05 +0200 Subject: [PATCH 5/9] Fix compilation error --- text-compression/src/lib.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/text-compression/src/lib.rs b/text-compression/src/lib.rs index d56d4b4..4c8ecf9 100644 --- a/text-compression/src/lib.rs +++ b/text-compression/src/lib.rs @@ -79,7 +79,7 @@ impl ProteinText { let mut bit_array = BitArray::with_capacity(input_vec.len(), 5); for (i, e) in input_vec.iter().enumerate() { - let char_5bit: u8 = *char_to_5bit.get(&(e as u8)).expect(&format!("Input character '{}' not in alphabet", e)); + let char_5bit: u8 = *char_to_5bit.get(e).expect(&format!("Input character '{}' not in alphabet", e)); bit_array.set(i, char_5bit as u64); } From e308ed3b01e95f84e63d2964d4f04725c57c3e14 Mon Sep 17 00:00:00 2001 From: Pieter Verschaffelt Date: Thu, 19 Sep 2024 11:52:59 +0200 Subject: [PATCH 6/9] Add support for pseudo amino acids BOUZX --- text-compression/src/lib.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/text-compression/src/lib.rs b/text-compression/src/lib.rs index 4c8ecf9..0f0300c 100644 --- a/text-compression/src/lib.rs +++ b/text-compression/src/lib.rs @@ -24,7 +24,7 @@ impl ProteinText { /// Returns the hashmap fn create_char_to_5bit_hashmap() -> HashMap { let mut hashmap = HashMap::::new(); - for (i, c) in "ACDEFGHIKLMNPQRSTVWY-$".chars().enumerate() { + for (i, c) in "ABCDEFGHIKLMNOPQRSTUVWXYZ-$".chars().enumerate() { hashmap.insert(c as u8, i as u8); } @@ -38,7 +38,7 @@ impl ProteinText { /// Returns the vector fn create_bit5_to_char() -> Vec { let mut vec = Vec::::new(); - for c in "ACDEFGHIKLMNPQRSTVWY-$".chars() { + for c in "ABCDEFGHIKLMNOPQRSTUVWXYZ-$".chars() { vec.push(c as u8); } vec @@ -445,7 +445,7 @@ mod tests { let char_to_5bit = ProteinText::create_char_to_5bit_hashmap(); let bit5_to_char = ProteinText::create_bit5_to_char(); - for c in "ACDEFGHIKLMNPQRSTVWY-$".chars() { + for c in "ABCDEFGHIKLMNOPQRSTUVWXYZ-$".chars() { let char_5bit = char_to_5bit.get(&(c as u8)).unwrap(); assert_eq!(c as u8, bit5_to_char[*char_5bit as usize]); } From dc7e8b4eee95eb08d853cfff58b72153a955cb31 Mon Sep 17 00:00:00 2001 From: Pieter Verschaffelt Date: Thu, 19 Sep 2024 14:07:34 +0200 Subject: [PATCH 7/9] Run linter --- text-compression/src/lib.rs | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/text-compression/src/lib.rs b/text-compression/src/lib.rs index 0f0300c..6fa7227 100644 --- a/text-compression/src/lib.rs +++ b/text-compression/src/lib.rs @@ -58,7 +58,8 @@ impl ProteinText { let mut bit_array = BitArray::with_capacity(input_string.len(), 5); for (i, c) in input_string.chars().enumerate() { - let char_5bit: u8 = *char_to_5bit.get(&(c as u8)).expect(&format!("Input character '{}' not in alphabet", c)); + let char_5bit: u8 = + *char_to_5bit.get(&(c as u8)).expect(&format!("Input character '{}' not in alphabet", c)); bit_array.set(i, char_5bit as u64); } @@ -131,7 +132,8 @@ impl ProteinText { /// * `index` - The index of the character to change. /// * `value` - The character to fill in as `u8`. pub fn set(&mut self, index: usize, value: u8) { - let char_5bit: u8 = *self.char_to_5bit.get(&value).expect(&format!("Input character '{}' not in alphabet", value)); + let char_5bit: u8 = + *self.char_to_5bit.get(&value).expect(&format!("Input character '{}' not in alphabet", value)); self.bit_array.set(index, char_5bit as u64); } @@ -477,7 +479,8 @@ mod tests { let mut bit_array = BitArray::with_capacity(input_string.len(), 5); for (i, c) in input_string.chars().enumerate() { - let char_5bit: u8 = *char_to_5bit.get(&(c as u8)).expect(&format!("Input character '{}' not in alphabet", c)); + let char_5bit: u8 = + *char_to_5bit.get(&(c as u8)).expect(&format!("Input character '{}' not in alphabet", c)); bit_array.set(i, char_5bit as u64); } From 5094fbf0c156d5bfe96f644ca7166431bd639e53 Mon Sep 17 00:00:00 2001 From: Pieter Verschaffelt Date: Thu, 19 Sep 2024 14:11:59 +0200 Subject: [PATCH 8/9] Fix warnings generated by clippy --- text-compression/src/lib.rs | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/text-compression/src/lib.rs b/text-compression/src/lib.rs index 6fa7227..337c6a6 100644 --- a/text-compression/src/lib.rs +++ b/text-compression/src/lib.rs @@ -59,7 +59,7 @@ impl ProteinText { let mut bit_array = BitArray::with_capacity(input_string.len(), 5); for (i, c) in input_string.chars().enumerate() { let char_5bit: u8 = - *char_to_5bit.get(&(c as u8)).expect(&format!("Input character '{}' not in alphabet", c)); + *char_to_5bit.get(&(c as u8)).unwrap_or_else(|| panic!("Input character '{}' not in alphabet", c)); bit_array.set(i, char_5bit as u64); } @@ -80,7 +80,8 @@ impl ProteinText { let mut bit_array = BitArray::with_capacity(input_vec.len(), 5); for (i, e) in input_vec.iter().enumerate() { - let char_5bit: u8 = *char_to_5bit.get(e).expect(&format!("Input character '{}' not in alphabet", e)); + let char_5bit: u8 = + *char_to_5bit.get(e).unwrap_or_else(|| panic!("Input character '{}' not in alphabet", e)); bit_array.set(i, char_5bit as u64); } @@ -132,8 +133,10 @@ impl ProteinText { /// * `index` - The index of the character to change. /// * `value` - The character to fill in as `u8`. pub fn set(&mut self, index: usize, value: u8) { - let char_5bit: u8 = - *self.char_to_5bit.get(&value).expect(&format!("Input character '{}' not in alphabet", value)); + let char_5bit: u8 = *self + .char_to_5bit + .get(&value) + .unwrap_or_else(|| panic!("Input character '{}' not in alphabet", value)); self.bit_array.set(index, char_5bit as u64); } @@ -480,7 +483,7 @@ mod tests { let mut bit_array = BitArray::with_capacity(input_string.len(), 5); for (i, c) in input_string.chars().enumerate() { let char_5bit: u8 = - *char_to_5bit.get(&(c as u8)).expect(&format!("Input character '{}' not in alphabet", c)); + *char_to_5bit.get(&(c as u8)).unwrap_or_else(|| panic!("Input character '{}' not in alphabet", c)); bit_array.set(i, char_5bit as u64); } From 9a50fcae96792af35b5bc80fbf1d72b10e292ea1 Mon Sep 17 00:00:00 2001 From: Pieter Verschaffelt Date: Thu, 19 Sep 2024 14:28:52 +0200 Subject: [PATCH 9/9] Fixed tests because of added symbols --- text-compression/src/lib.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/text-compression/src/lib.rs b/text-compression/src/lib.rs index 337c6a6..338e234 100644 --- a/text-compression/src/lib.rs +++ b/text-compression/src/lib.rs @@ -598,7 +598,7 @@ mod tests { let mut reader = std::io::BufReader::new(&data[..]); let compressed_text = load_compressed_text(&mut reader).unwrap(); - for (i, c) in "CDEFGHIKLM".chars().enumerate() { + for (i, c) in "BCDEFGHIKL".chars().enumerate() { assert_eq!(compressed_text.get(i), c as u8); } }