Skip to content

Commit e4a11a9

Browse files
committed
check if parquet column has min/max set
1 parent 93bdf79 commit e4a11a9

File tree

1 file changed

+112
-110
lines changed

1 file changed

+112
-110
lines changed

iceberg-rust/src/file_format/parquet.rs

Lines changed: 112 additions & 110 deletions
Original file line numberDiff line numberDiff line change
@@ -95,123 +95,125 @@ pub fn parquet_to_datafile(
9595
.ok_or_else(|| Error::Schema(column_name.to_string(), "".to_string()))?
9696
.field_type;
9797

98-
if let Type::Primitive(_) = &data_type {
99-
let new = Value::try_from_bytes(statistics.min_bytes(), data_type)?;
100-
match lower_bounds.entry(id) {
101-
Entry::Occupied(mut entry) => {
102-
let entry = entry.get_mut();
103-
match (&entry, &new) {
104-
(Value::Int(current), Value::Int(new_val)) => {
105-
if *current > *new_val {
106-
*entry = new
107-
}
108-
}
109-
(Value::LongInt(current), Value::LongInt(new_val)) => {
110-
if *current > *new_val {
111-
*entry = new
112-
}
113-
}
114-
(Value::Float(current), Value::Float(new_val)) => {
115-
if *current > *new_val {
116-
*entry = new
117-
}
98+
if statistics.has_min_max_set() {
99+
if let Type::Primitive(_) = &data_type {
100+
let new = Value::try_from_bytes(statistics.min_bytes(), data_type)?;
101+
match lower_bounds.entry(id) {
102+
Entry::Occupied(mut entry) => {
103+
let entry = entry.get_mut();
104+
match (&entry, &new) {
105+
(Value::Int(current), Value::Int(new_val)) => {
106+
if *current > *new_val {
107+
*entry = new
108+
}
109+
}
110+
(Value::LongInt(current), Value::LongInt(new_val)) => {
111+
if *current > *new_val {
112+
*entry = new
113+
}
114+
}
115+
(Value::Float(current), Value::Float(new_val)) => {
116+
if *current > *new_val {
117+
*entry = new
118+
}
119+
}
120+
(Value::Double(current), Value::Double(new_val)) => {
121+
if *current > *new_val {
122+
*entry = new
123+
}
124+
}
125+
(Value::Date(current), Value::Date(new_val)) => {
126+
if *current > *new_val {
127+
*entry = new
128+
}
129+
}
130+
(Value::Time(current), Value::Time(new_val)) => {
131+
if *current > *new_val {
132+
*entry = new
133+
}
134+
}
135+
(Value::Timestamp(current), Value::Timestamp(new_val)) => {
136+
if *current > *new_val {
137+
*entry = new
138+
}
139+
}
140+
(Value::TimestampTZ(current), Value::TimestampTZ(new_val)) => {
141+
if *current > *new_val {
142+
*entry = new
143+
}
144+
}
145+
_ => (),
118146
}
119-
(Value::Double(current), Value::Double(new_val)) => {
120-
if *current > *new_val {
121-
*entry = new
122-
}
123-
}
124-
(Value::Date(current), Value::Date(new_val)) => {
125-
if *current > *new_val {
126-
*entry = new
127-
}
128-
}
129-
(Value::Time(current), Value::Time(new_val)) => {
130-
if *current > *new_val {
131-
*entry = new
132-
}
133-
}
134-
(Value::Timestamp(current), Value::Timestamp(new_val)) => {
135-
if *current > *new_val {
136-
*entry = new
137-
}
138-
}
139-
(Value::TimestampTZ(current), Value::TimestampTZ(new_val)) => {
140-
if *current > *new_val {
141-
*entry = new
142-
}
143-
}
144-
_ => (),
147+
}
148+
Entry::Vacant(entry) => {
149+
entry.insert(new);
145150
}
146151
}
147-
Entry::Vacant(entry) => {
148-
entry.insert(new);
149-
}
150-
}
151-
let new = Value::try_from_bytes(statistics.max_bytes(), data_type)?;
152-
match upper_bounds.entry(id) {
153-
Entry::Occupied(mut entry) => {
154-
let entry = entry.get_mut();
155-
match (&entry, &new) {
156-
(Value::Int(current), Value::Int(new_val)) => {
157-
if *current < *new_val {
158-
*entry = new
159-
}
152+
let new = Value::try_from_bytes(statistics.max_bytes(), data_type)?;
153+
match upper_bounds.entry(id) {
154+
Entry::Occupied(mut entry) => {
155+
let entry = entry.get_mut();
156+
match (&entry, &new) {
157+
(Value::Int(current), Value::Int(new_val)) => {
158+
if *current < *new_val {
159+
*entry = new
160+
}
161+
}
162+
(Value::LongInt(current), Value::LongInt(new_val)) => {
163+
if *current < *new_val {
164+
*entry = new
165+
}
166+
}
167+
(Value::Float(current), Value::Float(new_val)) => {
168+
if *current < *new_val {
169+
*entry = new
170+
}
171+
}
172+
(Value::Double(current), Value::Double(new_val)) => {
173+
if *current < *new_val {
174+
*entry = new
175+
}
176+
}
177+
(Value::Date(current), Value::Date(new_val)) => {
178+
if *current < *new_val {
179+
*entry = new
180+
}
181+
}
182+
(Value::Time(current), Value::Time(new_val)) => {
183+
if *current < *new_val {
184+
*entry = new
185+
}
186+
}
187+
(Value::Timestamp(current), Value::Timestamp(new_val)) => {
188+
if *current < *new_val {
189+
*entry = new
190+
}
191+
}
192+
(Value::TimestampTZ(current), Value::TimestampTZ(new_val)) => {
193+
if *current < *new_val {
194+
*entry = new
195+
}
196+
}
197+
_ => (),
160198
}
161-
(Value::LongInt(current), Value::LongInt(new_val)) => {
162-
if *current < *new_val {
163-
*entry = new
164-
}
165-
}
166-
(Value::Float(current), Value::Float(new_val)) => {
167-
if *current < *new_val {
168-
*entry = new
169-
}
170-
}
171-
(Value::Double(current), Value::Double(new_val)) => {
172-
if *current < *new_val {
173-
*entry = new
174-
}
175-
}
176-
(Value::Date(current), Value::Date(new_val)) => {
177-
if *current < *new_val {
178-
*entry = new
179-
}
180-
}
181-
(Value::Time(current), Value::Time(new_val)) => {
182-
if *current < *new_val {
183-
*entry = new
184-
}
185-
}
186-
(Value::Timestamp(current), Value::Timestamp(new_val)) => {
187-
if *current < *new_val {
188-
*entry = new
189-
}
190-
}
191-
(Value::TimestampTZ(current), Value::TimestampTZ(new_val)) => {
192-
if *current < *new_val {
193-
*entry = new
194-
}
195-
}
196-
_ => (),
199+
}
200+
Entry::Vacant(entry) => {
201+
entry.insert(new);
197202
}
198203
}
199-
Entry::Vacant(entry) => {
200-
entry.insert(new);
201-
}
202-
}
203204

204-
if let Some(partition_value) = partition.get_mut(column_name) {
205-
if partition_value.is_none() {
206-
let transform = transforms
207-
.get(column_name)
208-
.ok_or_else(|| Error::InvalidFormat("transform".to_string()))?;
209-
let min = Value::try_from_bytes(statistics.min_bytes(), data_type)?
210-
.tranform(transform)?;
211-
let max = Value::try_from_bytes(statistics.max_bytes(), data_type)?
212-
.tranform(transform)?;
213-
if min == max {
214-
*partition_value = Some(min)
205+
if let Some(partition_value) = partition.get_mut(column_name) {
206+
if partition_value.is_none() {
207+
let transform = transforms
208+
.get(column_name)
209+
.ok_or_else(|| Error::InvalidFormat("transform".to_string()))?;
210+
let min = Value::try_from_bytes(statistics.min_bytes(), data_type)?
211+
.tranform(transform)?;
212+
let max = Value::try_from_bytes(statistics.max_bytes(), data_type)?
213+
.tranform(transform)?;
214+
if min == max {
215+
*partition_value = Some(min)
216+
}
215217
}
216218
}
217219
}

0 commit comments

Comments
 (0)