@@ -24,7 +24,7 @@ use datafusion_proto::bytes::{
24
24
logical_plan_from_bytes_with_extension_codec, logical_plan_to_bytes_with_extension_codec,
25
25
} ;
26
26
use deltalake_core:: delta_datafusion:: DeltaScan ;
27
- use deltalake_core:: kernel:: { DataType , MapType , PrimitiveType , StructField , StructType } ;
27
+ use deltalake_core:: kernel:: { Add , DataType , MapType , PrimitiveType , StructField , StructType } ;
28
28
use deltalake_core:: operations:: create:: CreateBuilder ;
29
29
use deltalake_core:: protocol:: SaveMode ;
30
30
use deltalake_core:: writer:: { DeltaWriter , RecordBatchWriter } ;
@@ -429,8 +429,6 @@ mod local {
429
429
let _result = collect ( plan. execute ( 0 , task_ctx) ?) . await ?;
430
430
visit_execution_plan ( & plan, & mut metrics) . unwrap ( ) ;
431
431
} else {
432
- // if scan produces no output from ParquetExec, we still want to visit DeltaScan
433
- // to check its metrics
434
432
visit_execution_plan ( scan. as_ref ( ) , & mut metrics) . unwrap ( ) ;
435
433
}
436
434
@@ -859,6 +857,108 @@ mod local {
859
857
Ok ( ( ) )
860
858
}
861
859
860
+ async fn get_scan_metrics_with_files (
861
+ table : & DeltaTable ,
862
+ state : & SessionState ,
863
+ files : Option < Vec < Add > > ,
864
+ e : & [ Expr ] ,
865
+ ) -> Result < ExecutionMetricsCollector > {
866
+ use deltalake_core:: delta_datafusion:: { DeltaScanConfig , DeltaTableProvider } ;
867
+
868
+ let mut provider = DeltaTableProvider :: try_new (
869
+ table. snapshot ( ) . unwrap ( ) . clone ( ) ,
870
+ table. log_store ( ) ,
871
+ DeltaScanConfig :: default ( ) ,
872
+ ) ?;
873
+
874
+ if let Some ( f) = files {
875
+ provider = provider. with_files ( f) ;
876
+ }
877
+
878
+ let mut metrics = ExecutionMetricsCollector :: default ( ) ;
879
+ let scan = provider. scan ( state, None , e, None ) . await ?;
880
+
881
+ if scan. properties ( ) . output_partitioning ( ) . partition_count ( ) > 0 {
882
+ let plan = CoalescePartitionsExec :: new ( scan) ;
883
+ let task_ctx = Arc :: new ( TaskContext :: from ( state) ) ;
884
+ let _result = collect ( plan. execute ( 0 , task_ctx) ?) . await ?;
885
+ visit_execution_plan ( & plan, & mut metrics) . unwrap ( ) ;
886
+ } else {
887
+ visit_execution_plan ( scan. as_ref ( ) , & mut metrics) . unwrap ( ) ;
888
+ }
889
+
890
+ Ok ( metrics)
891
+ }
892
+
893
+ #[ tokio:: test]
894
+ async fn test_files_scanned_with_files ( ) -> Result < ( ) > {
895
+ use datafusion:: prelude:: * ;
896
+ let ctx = SessionContext :: new ( ) ;
897
+ let state = ctx. state ( ) ;
898
+
899
+ let batch1 = create_all_types_batch ( 3 , 0 , 0 ) ; // values 0-2
900
+ let batch2 = create_all_types_batch ( 3 , 0 , 4 ) ; // values 4-6
901
+ let batch3 = create_all_types_batch ( 3 , 0 , 7 ) ; // values 7-9
902
+
903
+ let ( _tmp, mut table) = prepare_table ( vec ! [ batch1] , SaveMode :: Overwrite , vec ! [ ] ) . await ;
904
+ let files_before_1 = table. snapshot ( ) . unwrap ( ) . file_actions ( ) . unwrap ( ) ;
905
+
906
+ table = DeltaOps ( table)
907
+ . write ( vec ! [ batch2] )
908
+ . with_save_mode ( SaveMode :: Append )
909
+ . await
910
+ . unwrap ( ) ;
911
+ let files_before_2 = table. snapshot ( ) . unwrap ( ) . file_actions ( ) . unwrap ( ) ;
912
+
913
+ table = DeltaOps ( table)
914
+ . write ( vec ! [ batch3] )
915
+ . with_save_mode ( SaveMode :: Append )
916
+ . await
917
+ . unwrap ( ) ;
918
+ let all_files = table. snapshot ( ) . unwrap ( ) . file_actions ( ) . unwrap ( ) ;
919
+
920
+ assert_eq ! ( all_files. len( ) , 3 ) ;
921
+
922
+ let file_0_2 = files_before_1[ 0 ] . clone ( ) ;
923
+ let file_4_6 = files_before_2
924
+ . iter ( )
925
+ . find ( |f| !files_before_1. iter ( ) . any ( |f1| f1. path == f. path ) )
926
+ . unwrap ( )
927
+ . clone ( ) ;
928
+ let file_7_9 = all_files
929
+ . iter ( )
930
+ . find ( |f| !files_before_2. iter ( ) . any ( |f2| f2. path == f. path ) )
931
+ . unwrap ( )
932
+ . clone ( ) ;
933
+
934
+ // Test without with_files (normal snapshot pruning)
935
+ let e = col ( "int64" ) . eq ( lit ( 5i64 ) ) ;
936
+ let metrics = get_scan_metrics ( & table, & state, & [ e] ) . await ?;
937
+ assert_eq ! ( metrics. num_scanned_files( ) , 1 ) ;
938
+ assert_eq ! ( metrics. skip_count, 2 ) ;
939
+
940
+ // Test with with_files providing all files (should behave the same)
941
+ let e = col ( "int64" ) . eq ( lit ( 5i64 ) ) ;
942
+ let metrics =
943
+ get_scan_metrics_with_files ( & table, & state, Some ( all_files. clone ( ) ) , & [ e] ) . await ?;
944
+ assert_eq ! ( metrics. num_scanned_files( ) , 1 ) ;
945
+ assert_eq ! ( metrics. skip_count, 2 ) ;
946
+
947
+ let subset_files = vec ! [ file_0_2. clone( ) , file_7_9. clone( ) ] ;
948
+ let e = col ( "int64" ) . gt ( lit ( 6i64 ) ) ;
949
+ let metrics = get_scan_metrics_with_files ( & table, & state, Some ( subset_files) , & [ e] ) . await ?;
950
+ assert_eq ! ( metrics. num_scanned_files( ) , 1 ) ;
951
+ assert_eq ! ( metrics. skip_count, 1 ) ;
952
+
953
+ let subset_files = vec ! [ file_0_2. clone( ) , file_4_6. clone( ) ] ;
954
+ let e = col ( "int64" ) . gt ( lit ( 6i64 ) ) ;
955
+ let metrics = get_scan_metrics_with_files ( & table, & state, Some ( subset_files) , & [ e] ) . await ?;
956
+ assert_eq ! ( metrics. num_scanned_files( ) , 0 ) ;
957
+ assert_eq ! ( metrics. skip_count, 2 ) ;
958
+
959
+ Ok ( ( ) )
960
+ }
961
+
862
962
#[ tokio:: test]
863
963
async fn test_datafusion_partitioned_types ( ) -> Result < ( ) > {
864
964
let ctx = SessionContext :: new ( ) ;
0 commit comments