Skip to content

Commit 62bf411

Browse files
committed
fix: coerce polars.Array into a suitable Arrow list type
Ideally polars wouldn't be giving us `item` as the list field name, but it's more important to Just Work ™️ than be pedantic about these things Fixes #3566 Signed-off-by: R. Tyler Croy <[email protected]>
1 parent bb8728e commit 62bf411

File tree

3 files changed

+54
-2
lines changed

3 files changed

+54
-2
lines changed

crates/core/src/kernel/schema/cast/mod.rs

Lines changed: 20 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,8 +2,8 @@
22
//!
33
use arrow_array::cast::AsArray;
44
use arrow_array::{
5-
new_null_array, Array, ArrayRef, GenericListArray, MapArray, OffsetSizeTrait, RecordBatch,
6-
RecordBatchOptions, StructArray,
5+
new_null_array, Array, ArrayRef, FixedSizeListArray, GenericListArray, MapArray,
6+
OffsetSizeTrait, RecordBatch, RecordBatchOptions, StructArray,
77
};
88
use arrow_cast::{cast_with_options, CastOptions};
99
use arrow_schema::{ArrowError, DataType, FieldRef, Fields, SchemaRef as ArrowSchemaRef};
@@ -104,6 +104,24 @@ fn cast_field(
104104
add_missing,
105105
)?) as ArrayRef)
106106
}
107+
(DataType::FixedSizeList(_, _), DataType::FixedSizeList(child_fields, _)) => {
108+
let to_type =
109+
DataType::new_list(child_fields.data_type().clone(), child_fields.is_nullable());
110+
let col = arrow::compute::kernels::cast(
111+
col.as_any()
112+
.downcast_ref::<FixedSizeListArray>()
113+
.ok_or_else(|| {
114+
ArrowError::CastError(format!(
115+
"Failed to convert a FixedSizeList into a new list {} ({col_type})",
116+
field.name()
117+
))
118+
})?,
119+
&to_type,
120+
)?;
121+
// Once the FixedSizeList has been converted to a regular list, go through the usual
122+
// list casting code
123+
cast_field(&col, field, cast_options, add_missing)
124+
}
107125
(DataType::List(_), DataType::List(child_fields)) => Ok(Arc::new(cast_list(
108126
col.as_any()
109127
.downcast_ref::<GenericListArray<i32>>()

python/deltalake/writer/_conversion.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,15 @@ def list_to_delta_dtype(
6060
if DataType.is_large_list(dtype):
6161
return DataType.large_list(inner_field_casted)
6262
elif DataType.is_fixed_size_list(dtype):
63+
# Fixed sized lists can come in from polars via their Array type.
64+
# These may carry array field names of "item" rather than "element"
65+
# which is expected everywhere else. Converting the field name and
66+
# then passing the field through for further casting in Rust will
67+
# accommodate this
68+
#
69+
# See also: <https://github.com/delta-io/delta-rs/issues/3566>
70+
if inner_field_casted.name == "item":
71+
inner_field_casted = inner_field_casted.with_name("element")
6372
return DataType.list(inner_field_casted, dtype.list_size)
6473
elif DataType.is_large_list_view(dtype):
6574
return DataType.large_list_view(inner_field_casted)

python/tests/test_writer.py

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2478,3 +2478,28 @@ def test_write_binary_col_with_dssc(tmp_path: pathlib.Path):
24782478
assert stats["null_count.z.z"].to_pylist() == [1]
24792479
assert stats["min.z.z"].to_pylist() == [101]
24802480
assert stats["max.z.z"].to_pylist() == [104]
2481+
2482+
2483+
@pytest.mark.polars
2484+
@pytest.mark.pyarrow
2485+
def test_polars_write_array(tmp_path: pathlib.Path):
2486+
"""
2487+
https://github.com/delta-io/delta-rs/issues/3566
2488+
"""
2489+
import polars as pl
2490+
2491+
from deltalake import DeltaTable, write_deltalake
2492+
2493+
df = pl.DataFrame(
2494+
{"array": [[-5, -4], [0, 0], [5, 9]]}, schema={"array": pl.Array(pl.Int32, 2)}
2495+
)
2496+
DeltaTable.create(
2497+
tmp_path,
2498+
df.to_arrow().schema,
2499+
mode="overwrite",
2500+
)
2501+
write_deltalake(
2502+
tmp_path,
2503+
df,
2504+
mode="overwrite",
2505+
)

0 commit comments

Comments
 (0)