@@ -79,11 +79,15 @@ pub fn parquet_to_datafile(
79
79
. or_insert ( row_group. num_rows ( ) ) ;
80
80
81
81
if let Some ( statistics) = column. statistics ( ) {
82
+ let mut only_nulls = false ;
82
83
null_value_counts
83
84
. entry ( id)
84
85
. and_modify ( |x| * x += statistics. null_count ( ) as i64 )
85
86
. or_insert ( statistics. null_count ( ) as i64 ) ;
86
87
if let Some ( distinct_count) = statistics. distinct_count ( ) {
88
+ if statistics. null_count ( ) == distinct_count {
89
+ only_nulls = true
90
+ }
87
91
distinct_counts
88
92
. entry ( id)
89
93
. and_modify ( |x| * x += distinct_count as i64 )
@@ -95,123 +99,125 @@ pub fn parquet_to_datafile(
95
99
. ok_or_else ( || Error :: Schema ( column_name. to_string ( ) , "" . to_string ( ) ) ) ?
96
100
. field_type ;
97
101
98
- if let Type :: Primitive ( _) = & data_type {
99
- let new = Value :: try_from_bytes ( statistics. min_bytes ( ) , data_type) ?;
100
- match lower_bounds. entry ( id) {
101
- Entry :: Occupied ( mut entry) => {
102
- let entry = entry. get_mut ( ) ;
103
- match ( & entry, & new) {
104
- ( Value :: Int ( current) , Value :: Int ( new_val) ) => {
105
- if * current > * new_val {
106
- * entry = new
107
- }
108
- }
109
- ( Value :: LongInt ( current) , Value :: LongInt ( new_val) ) => {
110
- if * current > * new_val {
111
- * entry = new
112
- }
113
- }
114
- ( Value :: Float ( current) , Value :: Float ( new_val) ) => {
115
- if * current > * new_val {
116
- * entry = new
117
- }
118
- }
119
- ( Value :: Double ( current) , Value :: Double ( new_val) ) => {
120
- if * current > * new_val {
121
- * entry = new
122
- }
123
- }
124
- ( Value :: Date ( current) , Value :: Date ( new_val) ) => {
125
- if * current > * new_val {
126
- * entry = new
127
- }
102
+ if !only_nulls {
103
+ if let Type :: Primitive ( _) = & data_type {
104
+ let new = Value :: try_from_bytes ( statistics. min_bytes ( ) , data_type) ?;
105
+ match lower_bounds. entry ( id) {
106
+ Entry :: Occupied ( mut entry) => {
107
+ let entry = entry. get_mut ( ) ;
108
+ match ( & entry, & new) {
109
+ ( Value :: Int ( current) , Value :: Int ( new_val) ) => {
110
+ if * current > * new_val {
111
+ * entry = new
112
+ }
113
+ }
114
+ ( Value :: LongInt ( current) , Value :: LongInt ( new_val) ) => {
115
+ if * current > * new_val {
116
+ * entry = new
117
+ }
118
+ }
119
+ ( Value :: Float ( current) , Value :: Float ( new_val) ) => {
120
+ if * current > * new_val {
121
+ * entry = new
122
+ }
123
+ }
124
+ ( Value :: Double ( current) , Value :: Double ( new_val) ) => {
125
+ if * current > * new_val {
126
+ * entry = new
127
+ }
128
+ }
129
+ ( Value :: Date ( current) , Value :: Date ( new_val) ) => {
130
+ if * current > * new_val {
131
+ * entry = new
132
+ }
133
+ }
134
+ ( Value :: Time ( current) , Value :: Time ( new_val) ) => {
135
+ if * current > * new_val {
136
+ * entry = new
137
+ }
138
+ }
139
+ ( Value :: Timestamp ( current) , Value :: Timestamp ( new_val) ) => {
140
+ if * current > * new_val {
141
+ * entry = new
142
+ }
143
+ }
144
+ ( Value :: TimestampTZ ( current) , Value :: TimestampTZ ( new_val) ) => {
145
+ if * current > * new_val {
146
+ * entry = new
147
+ }
148
+ }
149
+ _ => ( ) ,
128
150
}
129
- ( Value :: Time ( current) , Value :: Time ( new_val) ) => {
130
- if * current > * new_val {
131
- * entry = new
132
- }
133
- }
134
- ( Value :: Timestamp ( current) , Value :: Timestamp ( new_val) ) => {
135
- if * current > * new_val {
136
- * entry = new
137
- }
138
- }
139
- ( Value :: TimestampTZ ( current) , Value :: TimestampTZ ( new_val) ) => {
140
- if * current > * new_val {
141
- * entry = new
142
- }
143
- }
144
- _ => ( ) ,
151
+ }
152
+ Entry :: Vacant ( entry) => {
153
+ entry. insert ( new) ;
145
154
}
146
155
}
147
- Entry :: Vacant ( entry) => {
148
- entry. insert ( new) ;
149
- }
150
- }
151
- let new = Value :: try_from_bytes ( statistics. max_bytes ( ) , data_type) ?;
152
- match upper_bounds. entry ( id) {
153
- Entry :: Occupied ( mut entry) => {
154
- let entry = entry. get_mut ( ) ;
155
- match ( & entry, & new) {
156
- ( Value :: Int ( current) , Value :: Int ( new_val) ) => {
157
- if * current < * new_val {
158
- * entry = new
159
- }
160
- }
161
- ( Value :: LongInt ( current) , Value :: LongInt ( new_val) ) => {
162
- if * current < * new_val {
163
- * entry = new
164
- }
165
- }
166
- ( Value :: Float ( current) , Value :: Float ( new_val) ) => {
167
- if * current < * new_val {
168
- * entry = new
169
- }
156
+ let new = Value :: try_from_bytes ( statistics. max_bytes ( ) , data_type) ?;
157
+ match upper_bounds. entry ( id) {
158
+ Entry :: Occupied ( mut entry) => {
159
+ let entry = entry. get_mut ( ) ;
160
+ match ( & entry, & new) {
161
+ ( Value :: Int ( current) , Value :: Int ( new_val) ) => {
162
+ if * current < * new_val {
163
+ * entry = new
164
+ }
165
+ }
166
+ ( Value :: LongInt ( current) , Value :: LongInt ( new_val) ) => {
167
+ if * current < * new_val {
168
+ * entry = new
169
+ }
170
+ }
171
+ ( Value :: Float ( current) , Value :: Float ( new_val) ) => {
172
+ if * current < * new_val {
173
+ * entry = new
174
+ }
175
+ }
176
+ ( Value :: Double ( current) , Value :: Double ( new_val) ) => {
177
+ if * current < * new_val {
178
+ * entry = new
179
+ }
180
+ }
181
+ ( Value :: Date ( current) , Value :: Date ( new_val) ) => {
182
+ if * current < * new_val {
183
+ * entry = new
184
+ }
185
+ }
186
+ ( Value :: Time ( current) , Value :: Time ( new_val) ) => {
187
+ if * current < * new_val {
188
+ * entry = new
189
+ }
190
+ }
191
+ ( Value :: Timestamp ( current) , Value :: Timestamp ( new_val) ) => {
192
+ if * current < * new_val {
193
+ * entry = new
194
+ }
195
+ }
196
+ ( Value :: TimestampTZ ( current) , Value :: TimestampTZ ( new_val) ) => {
197
+ if * current < * new_val {
198
+ * entry = new
199
+ }
200
+ }
201
+ _ => ( ) ,
170
202
}
171
- ( Value :: Double ( current) , Value :: Double ( new_val) ) => {
172
- if * current < * new_val {
173
- * entry = new
174
- }
175
- }
176
- ( Value :: Date ( current) , Value :: Date ( new_val) ) => {
177
- if * current < * new_val {
178
- * entry = new
179
- }
180
- }
181
- ( Value :: Time ( current) , Value :: Time ( new_val) ) => {
182
- if * current < * new_val {
183
- * entry = new
184
- }
185
- }
186
- ( Value :: Timestamp ( current) , Value :: Timestamp ( new_val) ) => {
187
- if * current < * new_val {
188
- * entry = new
189
- }
190
- }
191
- ( Value :: TimestampTZ ( current) , Value :: TimestampTZ ( new_val) ) => {
192
- if * current < * new_val {
193
- * entry = new
194
- }
195
- }
196
- _ => ( ) ,
203
+ }
204
+ Entry :: Vacant ( entry) => {
205
+ entry. insert ( new) ;
197
206
}
198
207
}
199
- Entry :: Vacant ( entry) => {
200
- entry. insert ( new) ;
201
- }
202
- }
203
208
204
- if let Some ( partition_value) = partition. get_mut ( column_name) {
205
- if partition_value. is_none ( ) {
206
- let transform = transforms
207
- . get ( column_name)
208
- . ok_or_else ( || Error :: InvalidFormat ( "transform" . to_string ( ) ) ) ?;
209
- let min = Value :: try_from_bytes ( statistics. min_bytes ( ) , data_type) ?
210
- . tranform ( transform) ?;
211
- let max = Value :: try_from_bytes ( statistics. max_bytes ( ) , data_type) ?
212
- . tranform ( transform) ?;
213
- if min == max {
214
- * partition_value = Some ( min)
209
+ if let Some ( partition_value) = partition. get_mut ( column_name) {
210
+ if partition_value. is_none ( ) {
211
+ let transform = transforms
212
+ . get ( column_name)
213
+ . ok_or_else ( || Error :: InvalidFormat ( "transform" . to_string ( ) ) ) ?;
214
+ let min = Value :: try_from_bytes ( statistics. min_bytes ( ) , data_type) ?
215
+ . tranform ( transform) ?;
216
+ let max = Value :: try_from_bytes ( statistics. max_bytes ( ) , data_type) ?
217
+ . tranform ( transform) ?;
218
+ if min == max {
219
+ * partition_value = Some ( min)
220
+ }
215
221
}
216
222
}
217
223
}
0 commit comments