From 3871af9c210d77712ecbf266e34996bd8d8e32d4 Mon Sep 17 00:00:00 2001 From: Tobias Dorra Date: Wed, 18 Dec 2024 00:20:57 +0100 Subject: [PATCH] fix: as_las_string_lossy ignores anything after the first null character (#100) I just came across a las file with the following Extra-Attributes VLR: ``` Vlr { user_id: "LASF_Spec\0\0\0\0\0\u{18}\0", record_id: 4, ... } ``` The user_id is not properly filled with null characters (see the `\u{18}` hidden in there...). In this case, `as_las_string_lossy` currently returns the full string including all null characters. This results in the extra-bytes VLR not being detected correctly in my own code. Most other tools like LasTools or CloudCompare seem to have no problem with this and just ignore anything after the first null character. I changed `as_las_string_lossy` to work this way, too. Now `as_las_string_lossy` correctly parses above user id as "LASF_Spec". Co-authored-by: Tobias Dorra --- src/utils.rs | 11 ++++++++++- src/vlr.rs | 16 +++++----------- 2 files changed, 15 insertions(+), 12 deletions(-) diff --git a/src/utils.rs b/src/utils.rs index 4f0a0303..01346eff 100644 --- a/src/utils.rs +++ b/src/utils.rs @@ -41,7 +41,10 @@ impl AsLasStr for &'_ [u8] { fn as_las_string_lossy(&self) -> String { match self.as_las_str() { Ok(s) => s.to_string(), - Err(_) => String::from_utf8_lossy(self).to_string(), + Err(_) => { + let len = self.iter().position(|c| *c == 0).unwrap_or(self.len()); + String::from_utf8_lossy(&self[..len]).to_string() + } } } } @@ -101,4 +104,10 @@ mod tests { let mut bytes = [0; 5]; assert!(bytes.as_mut().from_las_str("Beer!!").is_err()); } + + #[test] + fn lossy_from_not_null_filled() { + let bytes = [65, 66, 67, 0, 68]; + assert_eq!("ABC", bytes.as_ref().as_las_string_lossy()); + } } diff --git a/src/vlr.rs b/src/vlr.rs index 6efee6c5..298a8db9 100644 --- a/src/vlr.rs +++ b/src/vlr.rs @@ -191,29 +191,23 @@ mod tests { #[test] fn allow_non_ascii_user_id() { let raw_vlr = raw::Vlr { - user_id: [0, 42, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], + user_id: [194, 174, 0, 0, 0, 0, 0, 0, 42, 0, 0, 0, 0, 0, 0, 0], ..Default::default() }; let vlr = Vlr::new(raw_vlr); - assert_eq!( - "\u{0}*\u{0}\u{0}\u{0}\u{0}\u{0}\u{0}\u{0}\u{0}\u{0}\u{0}\u{0}\u{0}\u{0}\u{0}", - vlr.user_id - ); + assert_eq!("®", vlr.user_id); } #[test] fn allow_non_ascii_description() { let raw_vlr = raw::Vlr { description: [ - 0, 42, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 42, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, + 194, 174, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 42, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, ], ..Default::default() }; let vlr = Vlr::new(raw_vlr); - assert_eq!( - "\u{0}*\u{0}\u{0}\u{0}\u{0}\u{0}\u{0}\u{0}\u{0}\u{0}\u{0}\u{0}\u{0}\u{0}\u{0}\u{0}*\u{0}\u{0}\u{0}\u{0}\u{0}\u{0}\u{0}\u{0}\u{0}\u{0}\u{0}\u{0}\u{0}\u{0}", - vlr.description - ); + assert_eq!("®", vlr.description); } }