apache · himadripal · Feb 23, 2025
diff --git a/arrow-cast/src/cast/decimal.rs b/arrow-cast/src/cast/decimal.rs
@@ -590,6 +590,7 @@ where
 #[cfg(test)]
 mod tests {
     use super::*;
+    use crate::parse::parse_decimal;
 
     #[test]
     fn test_parse_string_to_decimal_native() -> Result<(), ArrowError> {
@@ -598,7 +599,20 @@ mod tests {
             0_i128
         );
         assert_eq!(
+            parse_decimal::<Decimal128Type>("0", 38, 0)?,
+            parse_string_to_decimal_native::<Decimal128Type>("0", 0)?,
+            "value is {}",
+            0_i128
+        );
+
+        assert_eq!(
+            parse_string_to_decimal_native::<Decimal128Type>("0", 5)?,
+            0_i128
+        );
+        assert_eq!(
+            parse_decimal::<Decimal128Type>("0", 38, 5)?,
             parse_string_to_decimal_native::<Decimal128Type>("0", 5)?,
+            "value is {}",
             0_i128
         );
 
@@ -607,7 +621,20 @@ mod tests {
             123_i128
         );
         assert_eq!(
+            parse_decimal::<Decimal128Type>("123", 38, 0)?,
+            parse_string_to_decimal_native::<Decimal128Type>("123", 0)?,
+            "value is {}",
+            123_i128
+        );
+
+        assert_eq!(
+            parse_string_to_decimal_native::<Decimal128Type>("123", 5)?,
+            12300000_i128
+        );
+        assert_eq!(
+            parse_decimal::<Decimal128Type>("123", 38, 5)?,
             parse_string_to_decimal_native::<Decimal128Type>("123", 5)?,
+            "value is {}",
             12300000_i128
         );
 
@@ -616,7 +643,20 @@ mod tests {
             123_i128
         );
         assert_eq!(
+            parse_decimal::<Decimal128Type>("123.45", 38, 0)?,
+            parse_string_to_decimal_native::<Decimal128Type>("123.45", 0)?,
+            "value is {}",
+            123_i128
+        );
+
+        assert_eq!(
+            parse_string_to_decimal_native::<Decimal128Type>("123.45", 5)?,
+            12345000_i128
+        );
+        assert_eq!(
+            parse_decimal::<Decimal128Type>("123.45", 38, 5)?,
             parse_string_to_decimal_native::<Decimal128Type>("123.45", 5)?,
+            "value is {}",
             12345000_i128
         );
 
@@ -625,7 +665,20 @@ mod tests {
             123_i128
         );
         assert_eq!(
+            parse_decimal::<Decimal128Type>("123.4567891", 38, 0)?,
+            parse_string_to_decimal_native::<Decimal128Type>("123.4567891", 0)?,
+            "value is {}",
+            123_i128
+        );
+
+        assert_eq!(
+            parse_string_to_decimal_native::<Decimal128Type>("123.4567891", 5)?,
+            12345679_i128
+        );
+        assert_eq!(
+            parse_decimal::<Decimal128Type>("123.4567891", 38, 5)?,
             parse_string_to_decimal_native::<Decimal128Type>("123.4567891", 5)?,
+            "value is {}",
             12345679_i128
         );
         Ok(())

diff --git a/arrow-cast/src/parse.rs b/arrow-cast/src/parse.rs
@@ -850,7 +850,16 @@ fn parse_e_notation<T: DecimalType>(
     }
 
     if exp < 0 {
-        result = result.div_wrapping(base.pow_wrapping(-exp as _));
+        let result_with_scale = result.div_wrapping(base.pow_wrapping(-exp as _));
+        let result_with_one_scale_up =
+            result.div_wrapping(base.pow_wrapping(-exp.add_wrapping(1) as _));
+        let rounding_digit =
+            result_with_one_scale_up.sub_wrapping(result_with_scale.mul_wrapping(base));
+        if rounding_digit >= T::Native::usize_as(5) {
+            result = result_with_scale.add_wrapping(T::Native::usize_as(1));
+        } else {
+            result = result_with_scale;
+        }
     } else {
         result = result.mul_wrapping(base.pow_wrapping(exp as _));
     }
@@ -868,6 +877,7 @@ pub fn parse_decimal<T: DecimalType>(
     let mut result = T::Native::usize_as(0);
     let mut fractionals: i8 = 0;
     let mut digits: u8 = 0;
+    let mut rounding_digit = -1; // to store digit after the scale for rounding
     let base = T::Native::usize_as(10);
 
     let bs = s.as_bytes();
@@ -897,6 +907,13 @@ pub fn parse_decimal<T: DecimalType>(
                     // Ignore leading zeros.
                     continue;
                 }
+                if fractionals == scale && scale != 0 {
+                    // Capture the rounding digit once
+                    if rounding_digit < 0 {
+                        rounding_digit = (b - b'0') as i8;
+                    }
+                    continue;
+                }
                 digits += 1;
                 result = result.mul_wrapping(base);
                 result = result.add_wrapping(T::Native::usize_as((b - b'0') as usize));
@@ -925,11 +942,17 @@ pub fn parse_decimal<T: DecimalType>(
                             "can't parse the string value {s} to decimal"
                         )));
                     }
-                    if fractionals == scale && scale != 0 {
+                    if fractionals == scale {
+                        // Capture the rounding digit once
+                        if rounding_digit < 0 {
+                            rounding_digit = (b - b'0') as i8;
+                        }
                         // We have processed all the digits that we need. All that
                         // is left is to validate that the rest of the string contains
                         // valid digits.
-                        continue;
+                        if scale != 0 {
+                            continue;
+                        }
                     }
                     fractionals += 1;
                     digits += 1;
@@ -986,6 +1009,13 @@ pub fn parse_decimal<T: DecimalType>(
                 "parse decimal overflow ({s})"
             )));
         }
+        if scale == 0 {
+            result = result.div_wrapping(base.pow_wrapping(fractionals as u32))
+        }
+        //add one if >=5
+        if rounding_digit >= 5 {
+            result = result.add_wrapping(T::Native::usize_as(1));
+        }
     }
 
     Ok(if negative {

diff --git a/arrow-csv/src/reader/mod.rs b/arrow-csv/src/reader/mod.rs
@@ -1286,7 +1286,7 @@ mod tests {
         assert_eq!("53.002666", lat.value_as_string(1));
         assert_eq!("52.412811", lat.value_as_string(2));
         assert_eq!("51.481583", lat.value_as_string(3));
-        assert_eq!("12.123456", lat.value_as_string(4));
+        assert_eq!("12.123457", lat.value_as_string(4));
         assert_eq!("50.760000", lat.value_as_string(5));
         assert_eq!("0.123000", lat.value_as_string(6));
         assert_eq!("123.000000", lat.value_as_string(7));

diff --git a/arrow-json/src/reader/mod.rs b/arrow-json/src/reader/mod.rs
@@ -1181,7 +1181,7 @@ mod tests {
         assert!(col1.is_null(5));
         assert_eq!(
             col1.values(),
-            &[100, 200, 204, 1103420, 0, 0].map(T::Native::usize_as)
+            &[100, 200, 205, 1103420, 0, 0].map(T::Native::usize_as)
         );
 
         let col2 = batches[0].column(1).as_primitive::<T>();
@@ -1201,7 +1201,7 @@ mod tests {
         assert!(col3.is_null(5));
         assert_eq!(
             col3.values(),
-            &[3830, 12345, 0, 0, 0, 0].map(T::Native::usize_as)
+            &[3830, 12346, 0, 0, 0, 0].map(T::Native::usize_as)
         );
     }