Skip to content

Commit d04a600

Browse files
authored
Merge pull request #209 from JanKaul/truncate-tranform
Add integer truncate function for arrow
2 parents b582b34 + 55821be commit d04a600

File tree

1 file changed

+79
-2
lines changed

1 file changed

+79
-2
lines changed

iceberg-rust/src/arrow/transform.rs

Lines changed: 79 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,9 +14,11 @@
1414
use std::sync::Arc;
1515

1616
use arrow::{
17-
array::{as_primitive_array, Array, ArrayRef},
17+
array::{as_primitive_array, Array, ArrayRef, PrimitiveArray},
1818
compute::{binary, cast, date_part, unary, DatePart},
19-
datatypes::{DataType, Date32Type, Int32Type, Int64Type, TimeUnit, TimestampMicrosecondType},
19+
datatypes::{
20+
DataType, Date32Type, Int16Type, Int32Type, Int64Type, TimeUnit, TimestampMicrosecondType,
21+
},
2022
error::ArrowError,
2123
};
2224

@@ -97,6 +99,21 @@ pub fn transform_arrow(array: ArrayRef, transform: &Transform) -> Result<ArrayRe
9799
datepart_to_years,
98100
)))
99101
}
102+
(DataType::Int16, Transform::Truncate(m)) => Ok(Arc::<PrimitiveArray<Int16Type>>::new(
103+
unary(as_primitive_array::<Int16Type>(&array), |i| {
104+
i - i.rem_euclid(*m as i16)
105+
}),
106+
)),
107+
(DataType::Int32, Transform::Truncate(m)) => Ok(Arc::<PrimitiveArray<Int32Type>>::new(
108+
unary(as_primitive_array::<Int32Type>(&array), |i| {
109+
i - i.rem_euclid(*m as i32)
110+
}),
111+
)),
112+
(DataType::Int64, Transform::Truncate(m)) => Ok(Arc::<PrimitiveArray<Int64Type>>::new(
113+
unary(as_primitive_array::<Int64Type>(&array), |i| {
114+
i - i.rem_euclid(*m as i64)
115+
}),
116+
)),
100117
_ => Err(ArrowError::ComputeError(
101118
"Failed to perform transform for datatype".to_string(),
102119
)),
@@ -245,6 +262,66 @@ mod tests {
245262
assert_eq!(&expected, &result);
246263
}
247264

265+
#[test]
266+
fn test_int16_truncate_transform() {
267+
let array = Arc::new(arrow::array::Int16Array::from(vec![
268+
Some(17),
269+
Some(23),
270+
Some(-15),
271+
Some(5),
272+
None,
273+
])) as ArrayRef;
274+
let result = transform_arrow(array, &Transform::Truncate(10)).unwrap();
275+
let expected = Arc::new(arrow::array::Int16Array::from(vec![
276+
Some(10), // 17 - 17 % 10 = 17 - 7 = 10
277+
Some(20), // 23 - 23 % 10 = 23 - 3 = 20
278+
Some(-20), // -15 - (-15 % 10) = -15 - (-5) = -15 + 5 = -10, but rem_euclid gives -15 - 5 = -20
279+
Some(0), // 5 - 5 % 10 = 5 - 5 = 0
280+
None,
281+
])) as ArrayRef;
282+
assert_eq!(&expected, &result);
283+
}
284+
285+
#[test]
286+
fn test_int32_truncate_transform() {
287+
let array = Arc::new(arrow::array::Int32Array::from(vec![
288+
Some(127),
289+
Some(234),
290+
Some(-156),
291+
Some(50),
292+
None,
293+
])) as ArrayRef;
294+
let result = transform_arrow(array, &Transform::Truncate(100)).unwrap();
295+
let expected = Arc::new(arrow::array::Int32Array::from(vec![
296+
Some(100), // 127 - 127 % 100 = 127 - 27 = 100
297+
Some(200), // 234 - 234 % 100 = 234 - 34 = 200
298+
Some(-200), // -156 - (-156 % 100) = -156 - (-56) = -156 + 56 = -100, but rem_euclid gives -156 - 44 = -200
299+
Some(0), // 50 - 50 % 100 = 50 - 50 = 0
300+
None,
301+
])) as ArrayRef;
302+
assert_eq!(&expected, &result);
303+
}
304+
305+
#[test]
306+
fn test_int64_truncate_transform() {
307+
let array = Arc::new(arrow::array::Int64Array::from(vec![
308+
Some(1275),
309+
Some(2348),
310+
Some(-1567),
311+
Some(500),
312+
None,
313+
])) as ArrayRef;
314+
let result = transform_arrow(array, &Transform::Truncate(1000)).unwrap();
315+
let expected = Arc::new(arrow::array::Int64Array::from(vec![
316+
Some(1000), // 1275 - 1275 % 1000 = 1275 - 275 = 1000
317+
Some(2000), // 2348 - 2348 % 1000 = 2348 - 348 = 2000
318+
Some(-2000), // -1567 - (-1567 % 1000) = -1567 - (-567) = -1567 + 567 = -1000, but rem_euclid gives -1567 - 433 = -2000
319+
Some(0), // 500 - 500 % 1000 = 500 - 500 = 0
320+
None,
321+
])) as ArrayRef;
322+
assert_eq!(&expected, &result);
323+
}
324+
248325
#[test]
249326
fn test_unsupported_transform() {
250327
let array = Arc::new(arrow::array::StringArray::from(vec!["a", "b", "c"])) as ArrayRef;

0 commit comments

Comments
 (0)