Skip to content

Commit 27b17f7

Browse files
Truncate negative millisecond fractions (#71)
1 parent a036efb commit 27b17f7

File tree

4 files changed

+71
-42
lines changed

4 files changed

+71
-42
lines changed

benches/main.rs

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -248,3 +248,25 @@ fn format_date_time(bench: &mut Bencher) {
248248
black_box(date.to_string());
249249
})
250250
}
251+
252+
#[bench]
253+
fn parse_timestamp_str(bench: &mut Bencher) {
254+
let timestamps = black_box([
255+
"1654646400",
256+
"-1654646400",
257+
"1654646404",
258+
"-1654646404",
259+
"1654646404.5",
260+
"1654646404.123456",
261+
"1654646404000.5",
262+
"1654646404123.456",
263+
"-1654646404.123456",
264+
"-1654646404000.123",
265+
]);
266+
267+
bench.iter(|| {
268+
for timestamp in &timestamps {
269+
black_box(DateTime::parse_str(black_box(*timestamp)).unwrap());
270+
}
271+
});
272+
}

src/datetime.rs

Lines changed: 35 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
use crate::date::MS_WATERSHED;
2-
use crate::{int_parse_bytes, MicrosecondsPrecisionOverflowBehavior, TimeConfigBuilder};
2+
use crate::{
3+
float_parse_bytes, numbers::decimal_digits, IntFloat, MicrosecondsPrecisionOverflowBehavior, TimeConfigBuilder,
4+
};
35
use crate::{time::TimeConfig, Date, ParseError, Time};
46
use std::cmp::Ordering;
57
use std::fmt;
@@ -339,50 +341,41 @@ impl DateTime {
339341
pub fn parse_bytes_with_config(bytes: &[u8], config: &TimeConfig) -> Result<Self, ParseError> {
340342
match Self::parse_bytes_rfc3339_with_config(bytes, config) {
341343
Ok(d) => Ok(d),
342-
Err(e) => {
343-
let mut split = bytes.splitn(2, |&b| b == b'.');
344-
let Some(timestamp) =
345-
int_parse_bytes(split.next().expect("splitn always returns at least one element"))
346-
else {
347-
return Err(e);
348-
};
349-
let float_fraction = split.next();
350-
debug_assert!(split.next().is_none()); // at most two elements
351-
match float_fraction {
352-
// If fraction exists but is empty (i.e. trailing `.`), allow for backwards compatibility;
353-
// TODO might want to reconsider this later?
354-
Some(b"") | None => Self::from_timestamp_with_config(timestamp, 0, config),
355-
Some(fract) => {
356-
// fraction is either:
357-
// - up to 3 digits of millisecond fractions, i.e. microseconds
358-
// - or up to 6 digits of second fractions, i.e. milliseconds
359-
let max_digits = if timestamp > MS_WATERSHED { 3 } else { 6 };
360-
let Some(fract_integers) = int_parse_bytes(fract) else {
361-
return Err(e);
362-
};
363-
if config.microseconds_precision_overflow_behavior
364-
== MicrosecondsPrecisionOverflowBehavior::Error
365-
&& fract.len() > max_digits
366-
{
367-
return Err(if timestamp > MS_WATERSHED {
368-
ParseError::MillisecondFractionTooLong
369-
} else {
370-
ParseError::SecondFractionTooLong
371-
});
344+
Err(e) => match float_parse_bytes(bytes) {
345+
IntFloat::Int(int) => Self::from_timestamp_with_config(int, 0, config),
346+
IntFloat::Float(float) => {
347+
let timestamp_in_milliseconds = float.abs() > MS_WATERSHED as f64;
348+
349+
if config.microseconds_precision_overflow_behavior == MicrosecondsPrecisionOverflowBehavior::Error {
350+
let decimal_digits_count = decimal_digits(bytes);
351+
352+
// If the number of decimal digits exceeds the maximum allowed for the timestamp precision,
353+
// return an error. For timestamps in milliseconds, the maximum is 3, for timestamps in seconds,
354+
// the maximum is 6. These end up being the same in terms of allowing microsecond precision.
355+
if timestamp_in_milliseconds && decimal_digits_count > 3 {
356+
return Err(ParseError::MillisecondFractionTooLong);
357+
} else if !timestamp_in_milliseconds && decimal_digits_count > 6 {
358+
return Err(ParseError::SecondFractionTooLong);
372359
}
373-
// TODO: Technically this is rounding, but this is what the existing
374-
// behaviour already did. Probably this is always better than "truncating"
375-
// so we might want to change MicrosecondsPrecisionOverflowBehavior and
376-
// make other uses also round / deprecate truncating.
377-
let multiple = 10f64.powf(max_digits as f64 - fract.len() as f64);
378-
Self::from_timestamp_with_config(
379-
timestamp,
380-
(fract_integers as f64 * multiple).round() as u32,
381-
config,
382-
)
383360
}
361+
362+
let timestamp_normalized: f64 = if timestamp_in_milliseconds {
363+
float / 1_000f64
364+
} else {
365+
float
366+
};
367+
368+
// if seconds is negative, we round down (left on the number line), so -6.25 -> -7
369+
// which allows for a positive number of microseconds to compensate back up to -6.25
370+
// which is the equivalent of doing (seconds - 1) and (microseconds + 1_000_000)
371+
// like we do in Date::timestamp_watershed
372+
let seconds = timestamp_normalized.floor() as i64;
373+
let microseconds = ((timestamp_normalized - seconds as f64) * 1_000_000f64).round() as u32;
374+
375+
Self::from_timestamp_with_config(seconds, microseconds, config)
384376
}
385-
}
377+
IntFloat::Err => Err(e),
378+
},
386379
}
387380
}
388381

src/numbers.rs

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -115,3 +115,13 @@ pub fn float_parse_bytes(s: &[u8]) -> IntFloat {
115115
IntFloat::Int(int_part)
116116
}
117117
}
118+
119+
/// Count the number of decimal places in a byte slice.
120+
/// Caution: does not verify the integrity of the input,
121+
/// so it may return incorrect results for invalid inputs.
122+
pub(crate) fn decimal_digits(bytes: &[u8]) -> usize {
123+
match bytes.splitn(2, |&b| b == b'.').nth(1) {
124+
Some(b"") | None => 0,
125+
Some(fraction) => fraction.len(),
126+
}
127+
}

tests/main.rs

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -863,10 +863,14 @@ param_tests! {
863863
dt_underscore: ok => "2020-01-01_12:13:14,123z", "2020-01-01T12:13:14.123000Z";
864864
dt_unix1: ok => "1654646400", "2022-06-08T00:00:00";
865865
dt_unix2: ok => "1654646404", "2022-06-08T00:00:04";
866+
dt_unix_1_neg: ok => "-1654646400", "1917-07-27T00:00:00";
867+
dt_unix_2_neg: ok => "-1654646404", "1917-07-26T23:59:56";
866868
dt_unix_float: ok => "1654646404.5", "2022-06-08T00:00:04.500000";
867869
dt_unix_float_limit: ok => "1654646404.123456", "2022-06-08T00:00:04.123456";
868870
dt_unix_float_ms: ok => "1654646404000.5", "2022-06-08T00:00:04.000500";
869871
dt_unix_float_ms_limit: ok => "1654646404123.456", "2022-06-08T00:00:04.123456";
872+
dt_unix_float_ms_neg: ok => "-1654646404.123456", "1917-07-26T23:59:55.876544";
873+
dt_unix_float_ms_neg_limit: ok => "-1654646404000.123", "1917-07-26T23:59:55.999877";
870874
dt_unix_float_empty: ok => "1654646404.", "2022-06-08T00:00:04";
871875
dt_unix_float_ms_empty: ok => "1654646404000.", "2022-06-08T00:00:04";
872876
dt_unix_float_too_long: err => "1654646404.1234567", SecondFractionTooLong;

0 commit comments

Comments
 (0)