Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
33 changes: 31 additions & 2 deletions datafusion/functions/src/datetime/to_date.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,10 +16,12 @@
// under the License.

use crate::datetime::common::*;
use arrow::compute::cast_with_options;
use arrow::datatypes::DataType;
use arrow::datatypes::DataType::*;
use arrow::error::ArrowError::ParseError;
use arrow::{array::types::Date32Type, compute::kernels::cast_utils::Parser};
use datafusion_common::format::DEFAULT_CAST_OPTIONS;
use datafusion_common::{arrow_err, exec_err, internal_datafusion_err, Result};
use datafusion_expr::{
ColumnarValue, Documentation, ScalarUDFImpl, Signature, Volatility,
Expand All @@ -30,7 +32,7 @@ use std::any::Any;
#[user_doc(
doc_section(label = "Time and Date Functions"),
description = r"Converts a value to a date (`YYYY-MM-DD`).
Supports strings, integer and double types as input.
Supports strings, integer, double and timestamp types as input.
Strings are parsed as YYYY-MM-DD (e.g. '2023-07-20') if no [Chrono format](https://docs.rs/chrono/latest/chrono/format/strftime/index.html)s are provided.
Integers and doubles are interpreted as days since the unix epoch (`1970-01-01T00:00:00Z`).
Returns the corresponding date.
Expand Down Expand Up @@ -144,9 +146,36 @@ impl ScalarUDFImpl for ToDateFunc {
}

match args[0].data_type() {
Int32 | Int64 | Null | Float64 | Date32 | Date64 => {
Null | Int32 | Int64 | Date32 | Date64 | Timestamp(_, _) => {
args[0].cast_to(&Date32, None)
}
UInt8 | UInt16 | UInt32 | UInt64 | Int8 | Int16 => {
// Arrow cast doesn't support direct casting of these types to date32
// as it only supports Int32 and Int64. To work around that limitation,
// use cast_with_options to cast to Int32 and then cast the result of
// that to Date32.
match &args[0] {
ColumnarValue::Array(array) => {
Ok(ColumnarValue::Array(cast_with_options(
&cast_with_options(&array, &Int32, &DEFAULT_CAST_OPTIONS)?,
&Date32,
&DEFAULT_CAST_OPTIONS,
)?))
}
ColumnarValue::Scalar(scalar) => {
let sv =
scalar.cast_to_with_options(&Int32, &DEFAULT_CAST_OPTIONS)?;
Ok(ColumnarValue::Scalar(
sv.cast_to_with_options(&Date32, &DEFAULT_CAST_OPTIONS)?,
))
}
}
}
Float16 | Float32 | Float64 => {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
Float16 | Float32 | Float64 => {
Float16 | Float32 | Float64 | Decimal32(_, _) | Decimal64(_, _) | Decimal128(_, _) | Decimal256(_, _) => {

Then in the description can simplify as:

Supports strings, numeric and timestamp types as input.

// The only way this makes sense is to get the Int64 value of the float
// and then cast that to Date32.
args[0].cast_to(&Int64, None)?.cast_to(&Date32, None)
}
Utf8View | LargeUtf8 | Utf8 => self.to_date(&args),
other => {
exec_err!("Unsupported data type {} for function to_date", other)
Expand Down
72 changes: 72 additions & 0 deletions datafusion/sqllogictest/test_files/dates.slt
Original file line number Diff line number Diff line change
Expand Up @@ -164,12 +164,84 @@ SELECT to_date('21311111');
statement error DataFusion error: Arrow error:
SELECT to_date('213111111');

# verify date cast with tinyint input
query DDDDDD
SELECT to_date(null::tinyint), to_date(0::tinyint), to_date(19::tinyint), to_date(1::tinyint), to_date(-1::tinyint), to_date((0-1)::tinyint)
----
NULL 1970-01-01 1970-01-20 1970-01-02 1969-12-31 1969-12-31

# verify date cast with smallint input
query DDDDDD
SELECT to_date(null::smallint), to_date(0::smallint), to_date(19234::smallint), to_date(1::smallint), to_date(-1::smallint), to_date((0-1)::smallint)
----
NULL 1970-01-01 2022-08-30 1970-01-02 1969-12-31 1969-12-31

# verify date cast with integer input
query DDDDDD
SELECT to_date(null), to_date(0), to_date(19266320), to_date(1), to_date(-1), to_date(0-1)
----
NULL 1970-01-01 +54719-05-25 1970-01-02 1969-12-31 1969-12-31

# verify date cast with bigint input
query DDDDDD
SELECT to_date(null::bigint), to_date(0::bigint), to_date(191234::bigint), to_date(1::bigint), to_date(-1::bigint), to_date((0-1)::bigint)
----
NULL 1970-01-01 2493-07-31 1970-01-02 1969-12-31 1969-12-31

# verify date cast with unsigned tinyint input
query DDDD
SELECT to_date(null::tinyint unsigned), to_date(0::tinyint unsigned), to_date(192::tinyint unsigned), to_date(1::tinyint unsigned)
----
NULL 1970-01-01 1970-07-12 1970-01-02

# verify date cast with unsigned smallint input
query DDDD
SELECT to_date(null::smallint unsigned), to_date(0::smallint unsigned), to_date(19260::smallint unsigned), to_date(1::smallint unsigned)
----
NULL 1970-01-01 2022-09-25 1970-01-02

# verify date cast with unsigned int input
query DDDD
SELECT to_date(null::int unsigned), to_date(0::int unsigned), to_date(19260::int unsigned), to_date(1::int unsigned)
----
NULL 1970-01-01 2022-09-25 1970-01-02

# verify date cast with unsigned bigint input
query DDDD
SELECT to_date(null::bigint unsigned), to_date(0::bigint unsigned), to_date(19260000::bigint unsigned), to_date(1::bigint unsigned)
----
NULL 1970-01-01 +54702-02-03 1970-01-02

# verify date cast with real input (float32)
query DDDDDD
SELECT to_date(null::real), to_date(0.0::real), to_date(19260.1::real), to_date(1.1::real), to_date(-1.1::real), to_date(0-1.1::real)
----
NULL 1970-01-01 2022-09-25 1970-01-02 1969-12-31 1969-12-31

# verify date cast with double input (float64)
query DDDDDD
SELECT to_date(null::double), to_date(0.0::double), to_date(19260.1::double), to_date(1.1::double), to_date(-1.1::double), to_date(0-1.1::double)
----
NULL 1970-01-01 2022-09-25 1970-01-02 1969-12-31 1969-12-31

# verify date cast with date input
query DDDD
SELECT to_date('2024-12-1'::date), to_date('1920-01-12'::date), to_date('1970-01-01'::date), to_date('-0200-07-22'::date)
----
2024-12-01 1920-01-12 1970-01-01 -0200-07-22

# verify date cast with date64 input
query DDDD
SELECT to_date(arrow_cast('2024-12-1', 'Date64')), to_date(arrow_cast('1920-01-12', 'Date64')), to_date(arrow_cast('1970-01-01', 'Date64')), to_date(arrow_cast(-863999913600000, 'Date64'))
----
2024-12-01 1920-01-12 1970-01-01 -25410-12-07

# verify date cast with timestamp input
query DD
SELECT to_date('2024-12-01T00:32:45'::timestamp), to_date('1677-12-01T00:32:45'::timestamp)
----
2024-12-01 1677-12-01

# verify date output types
query TTT
SELECT arrow_typeof(to_date(1)), arrow_typeof(to_date(null)), arrow_typeof(to_date('2023-01-10 12:34:56.000'))
Expand Down
2 changes: 1 addition & 1 deletion docs/source/user-guide/sql/scalar_functions.md
Original file line number Diff line number Diff line change
Expand Up @@ -2675,7 +2675,7 @@ Additional examples can be found [here](https://github.com/apache/datafusion/blo
### `to_date`

Converts a value to a date (`YYYY-MM-DD`).
Supports strings, integer and double types as input.
Supports strings, integer, double and timestamp types as input.
Strings are parsed as YYYY-MM-DD (e.g. '2023-07-20') if no [Chrono format](https://docs.rs/chrono/latest/chrono/format/strftime/index.html)s are provided.
Integers and doubles are interpreted as days since the unix epoch (`1970-01-01T00:00:00Z`).
Returns the corresponding date.
Expand Down