Skip to content

Commit 76ac5c5

Browse files
author
Hongdan Zhu
committed
HIVE-28655: Implement HMS Related Drop Stats Changes, Reset COLUMN_STAT_ACCURATE After Dropping
1 parent 89e7d4a commit 76ac5c5

File tree

12 files changed

+213
-35
lines changed

12 files changed

+213
-35
lines changed

itests/hive-unit/src/test/java/org/apache/hadoop/hive/metastore/cache/TestCachedStoreUpdateUsingEvents.java

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -829,7 +829,16 @@ private String getValidWriteIds(String dbName, String tblName) throws Throwable
829829
private void validateTablePara(String dbName, String tblName) throws Throwable {
830830
Table tblRead = rawStore.getTable(DEFAULT_CATALOG_NAME, dbName, tblName);
831831
Table tblRead1 = sharedCache.getTableFromCache(DEFAULT_CATALOG_NAME, dbName, tblName);
832-
Assert.assertEquals(tblRead.getParameters(), tblRead1.getParameters());
832+
// Prepare both the expected and actual table parameters
833+
Map<String, String> expected = new HashMap<>(tblRead.getParameters());
834+
Map<String, String> actual = new HashMap<>(tblRead1.getParameters());
835+
836+
// Remove the COLUMN_STATS_ACCURATE entry from both maps, because it is now completely removed
837+
expected.remove("COLUMN_STATS_ACCURATE");
838+
actual.remove("COLUMN_STATS_ACCURATE");
839+
840+
// Now assert equality without the COLUMN_STATS_ACCURATE key
841+
Assert.assertEquals(expected, actual);
833842
}
834843

835844
private void validatePartPara(String dbName, String tblName, String partName) throws Throwable {

ql/src/test/results/clientpositive/llap/acid_stats4.q.out

Lines changed: 159 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -567,36 +567,137 @@ POSTHOOK: Output: default@stats_part@p=104
567567
PREHOOK: query: explain select count(key) from stats_part where p = 101
568568
PREHOOK: type: QUERY
569569
PREHOOK: Input: default@stats_part
570+
PREHOOK: Input: default@stats_part@p=101
570571
#### A masked pattern was here ####
571572
POSTHOOK: query: explain select count(key) from stats_part where p = 101
572573
POSTHOOK: type: QUERY
573574
POSTHOOK: Input: default@stats_part
575+
POSTHOOK: Input: default@stats_part@p=101
574576
#### A masked pattern was here ####
575577
STAGE DEPENDENCIES:
576-
Stage-0 is a root stage
578+
Stage-1 is a root stage
579+
Stage-0 depends on stages: Stage-1
577580

578581
STAGE PLANS:
582+
Stage: Stage-1
583+
Tez
584+
#### A masked pattern was here ####
585+
Edges:
586+
Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE)
587+
#### A masked pattern was here ####
588+
Vertices:
589+
Map 1
590+
Map Operator Tree:
591+
TableScan
592+
alias: stats_part
593+
filterExpr: (p = 101) (type: boolean)
594+
Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE
595+
Select Operator
596+
expressions: key (type: int)
597+
outputColumnNames: key
598+
Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE
599+
Group By Operator
600+
aggregations: count(key)
601+
minReductionHashAggr: 0.4
602+
mode: hash
603+
outputColumnNames: _col0
604+
Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
605+
Reduce Output Operator
606+
null sort order:
607+
sort order:
608+
Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
609+
value expressions: _col0 (type: bigint)
610+
Execution mode: vectorized, llap
611+
LLAP IO: may be used (ACID table)
612+
Reducer 2
613+
Execution mode: vectorized, llap
614+
Reduce Operator Tree:
615+
Group By Operator
616+
aggregations: count(VALUE._col0)
617+
mode: mergepartial
618+
outputColumnNames: _col0
619+
Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
620+
File Output Operator
621+
compressed: false
622+
Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
623+
table:
624+
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
625+
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
626+
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
627+
579628
Stage: Stage-0
580629
Fetch Operator
581-
limit: 1
630+
limit: -1
582631
Processor Tree:
583632
ListSink
584633

585634
PREHOOK: query: explain select count(key) from stats_part
586635
PREHOOK: type: QUERY
587636
PREHOOK: Input: default@stats_part
637+
PREHOOK: Input: default@stats_part@p=101
638+
PREHOOK: Input: default@stats_part@p=103
639+
PREHOOK: Input: default@stats_part@p=104
588640
#### A masked pattern was here ####
589641
POSTHOOK: query: explain select count(key) from stats_part
590642
POSTHOOK: type: QUERY
591643
POSTHOOK: Input: default@stats_part
644+
POSTHOOK: Input: default@stats_part@p=101
645+
POSTHOOK: Input: default@stats_part@p=103
646+
POSTHOOK: Input: default@stats_part@p=104
592647
#### A masked pattern was here ####
593648
STAGE DEPENDENCIES:
594-
Stage-0 is a root stage
649+
Stage-1 is a root stage
650+
Stage-0 depends on stages: Stage-1
595651

596652
STAGE PLANS:
653+
Stage: Stage-1
654+
Tez
655+
#### A masked pattern was here ####
656+
Edges:
657+
Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE)
658+
#### A masked pattern was here ####
659+
Vertices:
660+
Map 1
661+
Map Operator Tree:
662+
TableScan
663+
alias: stats_part
664+
Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE
665+
Select Operator
666+
expressions: key (type: int)
667+
outputColumnNames: key
668+
Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE
669+
Group By Operator
670+
aggregations: count(key)
671+
minReductionHashAggr: 0.6666666
672+
mode: hash
673+
outputColumnNames: _col0
674+
Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
675+
Reduce Output Operator
676+
null sort order:
677+
sort order:
678+
Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
679+
value expressions: _col0 (type: bigint)
680+
Execution mode: vectorized, llap
681+
LLAP IO: may be used (ACID table)
682+
Reducer 2
683+
Execution mode: vectorized, llap
684+
Reduce Operator Tree:
685+
Group By Operator
686+
aggregations: count(VALUE._col0)
687+
mode: mergepartial
688+
outputColumnNames: _col0
689+
Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
690+
File Output Operator
691+
compressed: false
692+
Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
693+
table:
694+
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
695+
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
696+
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
697+
597698
Stage: Stage-0
598699
Fetch Operator
599-
limit: 1
700+
limit: -1
600701
Processor Tree:
601702
ListSink
602703

@@ -721,18 +822,70 @@ STAGE PLANS:
721822
PREHOOK: query: explain select count(value) from stats_part
722823
PREHOOK: type: QUERY
723824
PREHOOK: Input: default@stats_part
825+
PREHOOK: Input: default@stats_part@p=101
826+
PREHOOK: Input: default@stats_part@p=103
827+
PREHOOK: Input: default@stats_part@p=104
724828
#### A masked pattern was here ####
725829
POSTHOOK: query: explain select count(value) from stats_part
726830
POSTHOOK: type: QUERY
727831
POSTHOOK: Input: default@stats_part
832+
POSTHOOK: Input: default@stats_part@p=101
833+
POSTHOOK: Input: default@stats_part@p=103
834+
POSTHOOK: Input: default@stats_part@p=104
728835
#### A masked pattern was here ####
729836
STAGE DEPENDENCIES:
730-
Stage-0 is a root stage
837+
Stage-1 is a root stage
838+
Stage-0 depends on stages: Stage-1
731839

732840
STAGE PLANS:
841+
Stage: Stage-1
842+
Tez
843+
#### A masked pattern was here ####
844+
Edges:
845+
Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE)
846+
#### A masked pattern was here ####
847+
Vertices:
848+
Map 1
849+
Map Operator Tree:
850+
TableScan
851+
alias: stats_part
852+
Statistics: Num rows: 3 Data size: 261 Basic stats: COMPLETE Column stats: COMPLETE
853+
Select Operator
854+
expressions: value (type: string)
855+
outputColumnNames: value
856+
Statistics: Num rows: 3 Data size: 261 Basic stats: COMPLETE Column stats: COMPLETE
857+
Group By Operator
858+
aggregations: count(value)
859+
minReductionHashAggr: 0.6666666
860+
mode: hash
861+
outputColumnNames: _col0
862+
Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
863+
Reduce Output Operator
864+
null sort order:
865+
sort order:
866+
Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
867+
value expressions: _col0 (type: bigint)
868+
Execution mode: llap
869+
LLAP IO: may be used (ACID table)
870+
Reducer 2
871+
Execution mode: vectorized, llap
872+
Reduce Operator Tree:
873+
Group By Operator
874+
aggregations: count(VALUE._col0)
875+
mode: mergepartial
876+
outputColumnNames: _col0
877+
Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
878+
File Output Operator
879+
compressed: false
880+
Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
881+
table:
882+
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
883+
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
884+
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
885+
733886
Stage: Stage-0
734887
Fetch Operator
735-
limit: 1
888+
limit: -1
736889
Processor Tree:
737890
ListSink
738891

ql/src/test/results/clientpositive/llap/alter_table_column_stats.q.out

Lines changed: 4 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -1105,7 +1105,6 @@ Retention: 0
11051105
#### A masked pattern was here ####
11061106
Table Type: MANAGED_TABLE
11071107
Table Parameters:
1108-
COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"}
11091108
bucketing_version 2
11101109
#### A masked pattern was here ####
11111110
numFiles 2
@@ -1146,7 +1145,6 @@ Database: statsdb1
11461145
Table: testpart1
11471146
#### A masked pattern was here ####
11481147
Partition Parameters:
1149-
COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"col1\":\"true\",\"col2\":\"true\",\"col3\":\"true\"}}
11501148
numFiles 1
11511149
numRows 10
11521150
rawDataSize 154
@@ -1238,7 +1236,6 @@ Database: statsdb1
12381236
Table: testpart1
12391237
#### A masked pattern was here ####
12401238
Partition Parameters:
1241-
COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"col1\":\"true\",\"col2\":\"true\",\"col3\":\"true\"}}
12421239
numFiles 1
12431240
numRows 20
12441241
rawDataSize 312
@@ -1343,7 +1340,6 @@ Retention: 0
13431340
#### A masked pattern was here ####
13441341
Table Type: MANAGED_TABLE
13451342
Table Parameters:
1346-
COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"}
13471343
bucketing_version 2
13481344
#### A masked pattern was here ####
13491345
numFiles 2
@@ -1384,7 +1380,6 @@ Database: statsdb1
13841380
Table: testpart1
13851381
#### A masked pattern was here ####
13861382
Partition Parameters:
1387-
COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"col1\":\"true\",\"col2\":\"true\"}}
13881383
numFiles 1
13891384
numRows 10
13901385
rawDataSize 154
@@ -1476,7 +1471,7 @@ Database: statsdb1
14761471
Table: testpart1
14771472
#### A masked pattern was here ####
14781473
Partition Parameters:
1479-
COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"col1\":\"true\",\"col2\":\"true\"}}
1474+
COLUMN_STATS_ACCURATE {}
14801475
numFiles 1
14811476
numRows 20
14821477
rawDataSize 312
@@ -1581,7 +1576,6 @@ Retention: 0
15811576
#### A masked pattern was here ####
15821577
Table Type: MANAGED_TABLE
15831578
Table Parameters:
1584-
COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"}
15851579
bucketing_version 2
15861580
#### A masked pattern was here ####
15871581
numFiles 2
@@ -1622,7 +1616,6 @@ Database: statsdb1
16221616
Table: testpart1
16231617
#### A masked pattern was here ####
16241618
Partition Parameters:
1625-
COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"col2\":\"true\"}}
16261619
numFiles 1
16271620
numRows 10
16281621
rawDataSize 154
@@ -1714,7 +1707,7 @@ Database: statsdb1
17141707
Table: testpart1
17151708
#### A masked pattern was here ####
17161709
Partition Parameters:
1717-
COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"col2\":\"true\"}}
1710+
COLUMN_STATS_ACCURATE {}
17181711
numFiles 1
17191712
numRows 20
17201713
rawDataSize 312
@@ -1819,7 +1812,6 @@ Retention: 0
18191812
#### A masked pattern was here ####
18201813
Table Type: MANAGED_TABLE
18211814
Table Parameters:
1822-
COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"}
18231815
bucketing_version 2
18241816
#### A masked pattern was here ####
18251817
numFiles 2
@@ -3102,7 +3094,6 @@ Retention: 0
31023094
#### A masked pattern was here ####
31033095
Table Type: MANAGED_TABLE
31043096
Table Parameters:
3105-
COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"}
31063097
bucketing_version 2
31073098
#### A masked pattern was here ####
31083099
numFiles 2
@@ -3143,7 +3134,6 @@ Database: statsdb1
31433134
Table: testpart1
31443135
#### A masked pattern was here ####
31453136
Partition Parameters:
3146-
COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"col1\":\"true\",\"col2\":\"true\",\"col3\":\"true\"}}
31473137
numFiles 1
31483138
numRows 10
31493139
rawDataSize 154
@@ -3235,7 +3225,6 @@ Database: statsdb1
32353225
Table: testpart1
32363226
#### A masked pattern was here ####
32373227
Partition Parameters:
3238-
COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"col1\":\"true\",\"col2\":\"true\",\"col3\":\"true\"}}
32393228
numFiles 1
32403229
numRows 20
32413230
rawDataSize 312
@@ -3340,7 +3329,6 @@ Retention: 0
33403329
#### A masked pattern was here ####
33413330
Table Type: MANAGED_TABLE
33423331
Table Parameters:
3343-
COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"}
33443332
bucketing_version 2
33453333
#### A masked pattern was here ####
33463334
numFiles 2
@@ -3381,7 +3369,6 @@ Database: statsdb1
33813369
Table: testpart1
33823370
#### A masked pattern was here ####
33833371
Partition Parameters:
3384-
COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"col1\":\"true\",\"col2\":\"true\"}}
33853372
numFiles 1
33863373
numRows 10
33873374
rawDataSize 154
@@ -3473,7 +3460,7 @@ Database: statsdb1
34733460
Table: testpart1
34743461
#### A masked pattern was here ####
34753462
Partition Parameters:
3476-
COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"col1\":\"true\",\"col2\":\"true\"}}
3463+
COLUMN_STATS_ACCURATE {}
34773464
numFiles 1
34783465
numRows 20
34793466
rawDataSize 312
@@ -3578,7 +3565,6 @@ Retention: 0
35783565
#### A masked pattern was here ####
35793566
Table Type: MANAGED_TABLE
35803567
Table Parameters:
3581-
COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"}
35823568
bucketing_version 2
35833569
#### A masked pattern was here ####
35843570
numFiles 2
@@ -3619,7 +3605,6 @@ Database: statsdb1
36193605
Table: testpart1
36203606
#### A masked pattern was here ####
36213607
Partition Parameters:
3622-
COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"col2\":\"true\"}}
36233608
numFiles 1
36243609
numRows 10
36253610
rawDataSize 154
@@ -3711,7 +3696,7 @@ Database: statsdb1
37113696
Table: testpart1
37123697
#### A masked pattern was here ####
37133698
Partition Parameters:
3714-
COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"col2\":\"true\"}}
3699+
COLUMN_STATS_ACCURATE {}
37153700
numFiles 1
37163701
numRows 20
37173702
rawDataSize 312
@@ -3816,7 +3801,6 @@ Retention: 0
38163801
#### A masked pattern was here ####
38173802
Table Type: MANAGED_TABLE
38183803
Table Parameters:
3819-
COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"}
38203804
bucketing_version 2
38213805
#### A masked pattern was here ####
38223806
numFiles 2

0 commit comments

Comments
 (0)