Truncate negative millisecond fractions (#71)

sydney-runkle · web-flow · commit 27b17f768fe1 · 2024-07-03T10:55:38.000-05:00
diff --git a/benches/main.rs b/benches/main.rs
@@ -248,3 +248,25 @@ fn format_date_time(bench: &mut Bencher) {
         black_box(date.to_string());
     })
 }
+
+#[bench]
+fn parse_timestamp_str(bench: &mut Bencher) {
+    let timestamps = black_box([
+        "1654646400",
+        "-1654646400",
+        "1654646404",
+        "-1654646404",
+        "1654646404.5",
+        "1654646404.123456",
+        "1654646404000.5",
+        "1654646404123.456",
+        "-1654646404.123456",
+        "-1654646404000.123",
+    ]);
+
+    bench.iter(|| {
+        for timestamp in &timestamps {
+            black_box(DateTime::parse_str(black_box(*timestamp)).unwrap());
+        }
+    });
+}
diff --git a/src/datetime.rs b/src/datetime.rs
@@ -1,5 +1,7 @@
 use crate::date::MS_WATERSHED;
-use crate::{int_parse_bytes, MicrosecondsPrecisionOverflowBehavior, TimeConfigBuilder};
+use crate::{
+    float_parse_bytes, numbers::decimal_digits, IntFloat, MicrosecondsPrecisionOverflowBehavior, TimeConfigBuilder,
+};
 use crate::{time::TimeConfig, Date, ParseError, Time};
 use std::cmp::Ordering;
 use std::fmt;
@@ -339,50 +341,41 @@ impl DateTime {
     pub fn parse_bytes_with_config(bytes: &[u8], config: &TimeConfig) -> Result<Self, ParseError> {
         match Self::parse_bytes_rfc3339_with_config(bytes, config) {
             Ok(d) => Ok(d),
-            Err(e) => {
-                let mut split = bytes.splitn(2, |&b| b == b'.');
-                let Some(timestamp) =
-                    int_parse_bytes(split.next().expect("splitn always returns at least one element"))
-                else {
-                    return Err(e);
-                };
-                let float_fraction = split.next();
-                debug_assert!(split.next().is_none()); // at most two elements
-                match float_fraction {
-                    // If fraction exists but is empty (i.e. trailing `.`), allow for backwards compatibility;
-                    // TODO might want to reconsider this later?
-                    Some(b"") | None => Self::from_timestamp_with_config(timestamp, 0, config),
-                    Some(fract) => {
-                        // fraction is either:
-                        // - up to 3 digits of millisecond fractions, i.e. microseconds
-                        // - or up to 6 digits of second fractions, i.e. milliseconds
-                        let max_digits = if timestamp > MS_WATERSHED { 3 } else { 6 };
-                        let Some(fract_integers) = int_parse_bytes(fract) else {
-                            return Err(e);
-                        };
-                        if config.microseconds_precision_overflow_behavior
-                            == MicrosecondsPrecisionOverflowBehavior::Error
-                            && fract.len() > max_digits
-                        {
-                            return Err(if timestamp > MS_WATERSHED {
-                                ParseError::MillisecondFractionTooLong
-                            } else {
-                                ParseError::SecondFractionTooLong
-                            });
+            Err(e) => match float_parse_bytes(bytes) {
+                IntFloat::Int(int) => Self::from_timestamp_with_config(int, 0, config),
+                IntFloat::Float(float) => {
+                    let timestamp_in_milliseconds = float.abs() > MS_WATERSHED as f64;
+
+                    if config.microseconds_precision_overflow_behavior == MicrosecondsPrecisionOverflowBehavior::Error {
+                        let decimal_digits_count = decimal_digits(bytes);
+
+                        // If the number of decimal digits exceeds the maximum allowed for the timestamp precision,
+                        // return an error. For timestamps in milliseconds, the maximum is 3, for timestamps in seconds,
+                        // the maximum is 6. These end up being the same in terms of allowing microsecond precision.
+                        if timestamp_in_milliseconds && decimal_digits_count > 3 {
+                            return Err(ParseError::MillisecondFractionTooLong);
+                        } else if !timestamp_in_milliseconds && decimal_digits_count > 6 {
+                            return Err(ParseError::SecondFractionTooLong);
                         }
-                        // TODO: Technically this is rounding, but this is what the existing
-                        // behaviour already did. Probably this is always better than "truncating"
-                        // so we might want to change MicrosecondsPrecisionOverflowBehavior and
-                        // make other uses also round / deprecate truncating.
-                        let multiple = 10f64.powf(max_digits as f64 - fract.len() as f64);
-                        Self::from_timestamp_with_config(
-                            timestamp,
-                            (fract_integers as f64 * multiple).round() as u32,
-                            config,
-                        )
                     }
+
+                    let timestamp_normalized: f64 = if timestamp_in_milliseconds {
+                        float / 1_000f64
+                    } else {
+                        float
+                    };
+
+                    // if seconds is negative, we round down (left on the number line), so -6.25 -> -7
+                    // which allows for a positive number of microseconds to compensate back up to -6.25
+                    // which is the equivalent of doing (seconds - 1) and (microseconds + 1_000_000)
+                    // like we do in Date::timestamp_watershed
+                    let seconds = timestamp_normalized.floor() as i64;
+                    let microseconds = ((timestamp_normalized - seconds as f64) * 1_000_000f64).round() as u32;
+
+                    Self::from_timestamp_with_config(seconds, microseconds, config)
                 }
-            }
+                IntFloat::Err => Err(e),
+            },
         }
     }
 
diff --git a/src/numbers.rs b/src/numbers.rs
@@ -115,3 +115,13 @@ pub fn float_parse_bytes(s: &[u8]) -> IntFloat {
         IntFloat::Int(int_part)
     }
 }
+
+/// Count the number of decimal places in a byte slice.
+/// Caution: does not verify the integrity of the input,
+/// so it may return incorrect results for invalid inputs.
+pub(crate) fn decimal_digits(bytes: &[u8]) -> usize {
+    match bytes.splitn(2, |&b| b == b'.').nth(1) {
+        Some(b"") | None => 0,
+        Some(fraction) => fraction.len(),
+    }
+}
diff --git a/tests/main.rs b/tests/main.rs
@@ -863,10 +863,14 @@ param_tests! {
     dt_underscore: ok => "2020-01-01_12:13:14,123z", "2020-01-01T12:13:14.123000Z";
     dt_unix1: ok => "1654646400", "2022-06-08T00:00:00";
     dt_unix2: ok => "1654646404", "2022-06-08T00:00:04";
+    dt_unix_1_neg: ok => "-1654646400", "1917-07-27T00:00:00";
+    dt_unix_2_neg: ok => "-1654646404", "1917-07-26T23:59:56";
     dt_unix_float: ok => "1654646404.5", "2022-06-08T00:00:04.500000";
     dt_unix_float_limit: ok => "1654646404.123456", "2022-06-08T00:00:04.123456";
     dt_unix_float_ms: ok => "1654646404000.5", "2022-06-08T00:00:04.000500";
     dt_unix_float_ms_limit: ok => "1654646404123.456", "2022-06-08T00:00:04.123456";
+    dt_unix_float_ms_neg: ok => "-1654646404.123456", "1917-07-26T23:59:55.876544";
+    dt_unix_float_ms_neg_limit: ok => "-1654646404000.123", "1917-07-26T23:59:55.999877";
     dt_unix_float_empty: ok => "1654646404.", "2022-06-08T00:00:04";
     dt_unix_float_ms_empty: ok => "1654646404000.", "2022-06-08T00:00:04";
     dt_unix_float_too_long: err => "1654646404.1234567", SecondFractionTooLong;

Original file line number	Diff line number	Diff line change
`@@ -115,3 +115,13 @@ pub fn float_parse_bytes(s: &[u8]) -> IntFloat {`
`115`	`115`	`IntFloat::Int(int_part)`
`116`	`116`	`}`
`117`	`117`	`}`
	`118`	`+`
	`119`	`+/// Count the number of decimal places in a byte slice.`
	`120`	`+/// Caution: does not verify the integrity of the input,`
	`121`	`+/// so it may return incorrect results for invalid inputs.`
	`122`	`+pub(crate) fn decimal_digits(bytes: &[u8]) -> usize {`
	`123`	`+ match bytes.splitn(2, \|&b\| b == b'.').nth(1) {`
	`124`	`+ Some(b"") \| None => 0,`
	`125`	`+ Some(fraction) => fraction.len(),`
	`126`	`+ }`
	`127`	`+}`