Skip to content

Commit d0afa49

Browse files
author
Hongdan Zhu
committed
HIVE-28655: Implement HMS Related Drop Stats Changes, Reset COLUMN_STAT_ACCURATE After Dropping
1 parent ba1b83d commit d0afa49

File tree

4 files changed

+30
-1
lines changed

4 files changed

+30
-1
lines changed

standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/HMSHandler.java

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7491,6 +7491,8 @@ public boolean delete_column_statistics_req(DeleteColumnStatisticsRequest req) t
74917491
.collect(Collectors.toList()) : colNames) {
74927492
for (String partName : partNames) {
74937493
List<String> partVals = getPartValsFromName(table, partName);
7494+
Partition partition = rawStore.getPartition(parsedDbName[CAT_NAME], parsedDbName[DB_NAME], tableName, partVals);
7495+
Map<String, String> partParams = partition.getParameters();
74947496
if (transactionalListeners != null && !transactionalListeners.isEmpty()) {
74957497
MetaStoreListenerNotifier.notifyEvent(transactionalListeners, eventType,
74967498
new DeletePartitionColumnStatEvent(parsedDbName[CAT_NAME], parsedDbName[DB_NAME], tableName,
@@ -7502,6 +7504,15 @@ public boolean delete_column_statistics_req(DeleteColumnStatisticsRequest req) t
75027504
}
75037505
}
75047506
}
7507+
// on the table level, partially delete(update) table level parameter COLUMN_STATS_ACCURATE
7508+
if (colNames == null || colNames.isEmpty()){
7509+
// remove all column names in parameter COLUMN_STATS_ACCURATE
7510+
StatsSetupConst.clearColumnStatsState(table.getParameters());
7511+
} else {
7512+
// remove the deleted column names in parameter COLUMN_STATS_ACCURATE
7513+
StatsSetupConst.removeColumnStatsState(table.getParameters(), colNames);
7514+
}
7515+
rawStore.alterTable(parsedDbName[CAT_NAME], parsedDbName[DB_NAME], tableName, table, null);
75057516
committed = rawStore.commitTransaction();
75067517
} finally {
75077518
if (!committed) {

standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/MetaStoreDirectSql.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -183,7 +183,7 @@ public static <T> String getIdListForIn(Collection<T> objectIds) throws MetaExce
183183

184184
// Table names with schema name, if necessary
185185
@TableName
186-
private String DBS, TBLS, PARTITIONS, DATABASE_PARAMS, PARTITION_PARAMS, SORT_COLS, SD_PARAMS,
186+
private String DBS, TBLS, PARTITIONS, DATABASE_PARAMS, TABLE_PARAMS, PARTITION_PARAMS, SORT_COLS, SD_PARAMS,
187187
SDS, SERDES, SKEWED_STRING_LIST_VALUES, SKEWED_VALUES, BUCKETING_COLS, SKEWED_COL_NAMES,
188188
SKEWED_COL_VALUE_LOC_MAP, COLUMNS_V2, PARTITION_KEYS, SERDE_PARAMS, PART_COL_STATS, KEY_CONSTRAINTS,
189189
TAB_COL_STATS, PARTITION_KEY_VALS, PART_PRIVS, PART_COL_PRIVS, SKEWED_STRING_LIST, CDS,

standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/ObjectStore.java

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10260,6 +10260,8 @@ public boolean deletePartitionColumnStatistics(String catName, String dbName, St
1026010260
}
1026110261
dbName = org.apache.commons.lang3.StringUtils.defaultString(dbName, Warehouse.DEFAULT_DATABASE_NAME);
1026210262
catName = normalizeIdentifier(catName);
10263+
// use directSql to compeletely delete the parameter COLUMN_STATS_ACCUARTE on partition level
10264+
directSql.deleteColumnStatsState(getTable(catName, dbName, tableName).getId());
1026310265
return new GetHelper<Boolean>(catName, dbName, tableName, true, true) {
1026410266
@Override
1026510267
protected String describeResult() {

standalone-metastore/metastore-server/src/test/java/org/apache/hadoop/hive/metastore/TestHiveMetaStore.java

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1846,6 +1846,11 @@ public void testColumnStatistics() throws Throwable {
18461846
List<ColumnStatisticsObj> stats = client.getTableColumnStatistics(
18471847
dbName, tblName, Lists.newArrayList(colName[1]), ENGINE);
18481848
assertTrue("stats are not empty: " + stats, stats.isEmpty());
1849+
// test if all columns are deleted from parameter COLUMN_STATS_ACCURATE
1850+
Map<String, String> tableParams = client.getTable(dbName, tblName).getParameters();
1851+
String table_column_stats_accurate = tableParams.get("COLUMN_STATS_ACCURATE");
1852+
assertTrue("parameter COLUMN_STATS_ACCURATE is not accurate in " + tblName, table_column_stats_accurate == null ||
1853+
(!table_column_stats_accurate.contains(colName[0]) && !table_column_stats_accurate.contains(colName[1])));
18491854

18501855
colStats.setStatsDesc(statsDesc);
18511856
colStats.setStatsObj(statsObjs);
@@ -1863,6 +1868,11 @@ public void testColumnStatistics() throws Throwable {
18631868
// multiple columns
18641869
request.setCol_names(Arrays.asList(colName));
18651870
assertTrue(client.deleteColumnStatistics(request));
1871+
// test if the columns in colName array are deleted from parameter COLUMN_STATS_ACCURATE
1872+
tableParams = client.getTable(dbName, tblName).getParameters();
1873+
table_column_stats_accurate = tableParams.get("COLUMN_STATS_ACCURATE");
1874+
assertTrue("parameter COLUMN_STATS_ACCURATE is not accurate in " + tblName, table_column_stats_accurate == null ||
1875+
(!table_column_stats_accurate.contains(colName[0]) && !table_column_stats_accurate.contains(colName[1])));
18661876
colStats3 = client.getTableColumnStatistics(
18671877
dbName, tblName, Lists.newArrayList(colName), ENGINE);
18681878
assertTrue("stats are not empty: " + colStats3, colStats3.isEmpty());
@@ -1958,6 +1968,12 @@ public void testColumnStatistics() throws Throwable {
19581968
Lists.newArrayList(partitions.get(0), partitions.get(1), partitions.get(2)), Lists.newArrayList(colName), ENGINE);
19591969
assertEquals(1, stats2.size());
19601970
assertEquals(2, stats2.get(partitions.get(2)).size());
1971+
// test if all columns are deleted from parameter COLUMN_STATS_ACCURATE
1972+
Partition partition_0 = client.getPartition(dbName, tblName, partitions.get(0));
1973+
Map<String, String> partitionParams = partition_0.getParameters();
1974+
String partition_column_stats_accurate = partitionParams.get("COLUMN_STATS_ACCURATE");
1975+
assertTrue("parameter COLUMN_STATS_ACCURATE is not accurate in " + partitions.get(0),partition_column_stats_accurate == null ||
1976+
(!table_column_stats_accurate.contains(colName[0]) && !table_column_stats_accurate.contains(colName[1])));
19611977

19621978
// no partition or column name is set
19631979
request.unsetPart_names();

0 commit comments

Comments
 (0)