Skip to content

Commit 04336cd

Browse files
authored
HIVE-29042: Performance degradation for add columns cascade (#5895)
1 parent 13a2313 commit 04336cd

File tree

2 files changed

+12
-13
lines changed

2 files changed

+12
-13
lines changed

standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/DirectSqlUpdatePart.java

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -744,7 +744,7 @@ private void updateStorageDescriptorInBatch(Map<Long, StorageDescriptor> idToSd)
744744
throws MetaException {
745745
Map<Long, Long> sdIdToCdId = new HashMap<>();
746746
Map<Long, Long> sdIdToSerdeId = new HashMap<>();
747-
List<Long> cdIds = new ArrayList<>();
747+
Set<Long> cdIds = new HashSet<>();
748748
List<Long> validSdIds = filterIdsByNonNullValue(new ArrayList<>(idToSd.keySet()), idToSd);
749749
Batchable.runBatched(maxBatchSize, validSdIds, new Batchable<Long, Void>() {
750750
@Override
@@ -793,7 +793,7 @@ public List<Void> run(List<Long> input) throws Exception {
793793
updateBucketColsInBatch(idToBucketCols, validSdIds);
794794
updateSortColsInBatch(idToSortCols, validSdIds);
795795
updateSkewedInfoInBatch(idToSkewedInfo, validSdIds);
796-
Map<Long, Long> sdIdToNewCdId = updateCDInBatch(cdIds, validSdIds, sdIdToCdId, sdIdToNewColumns);
796+
Map<Long, Long> sdIdToNewCdId = updateCDInBatch(cdIds.stream().toList(), validSdIds, sdIdToCdId, sdIdToNewColumns);
797797
updateSerdeInBatch(serdeIds, serdeIdToSerde);
798798
updateParamTableInBatch("\"SERDE_PARAMS\"", "\"SERDE_ID\"", serdeIds, serdeParamsOpt);
799799

@@ -1158,6 +1158,10 @@ public List<Void> run(List<Long> input) throws Exception {
11581158
Map<Long, List<Pair<Integer, Integer>>> oldCdIdToColIdxPairs = new HashMap<>();
11591159
for (Long sdId : sdIds) {
11601160
Long cdId = sdIdToCdId.get(sdId);
1161+
if (oldCdIdToNewCdId.containsKey(cdId)) {
1162+
sdIdToNewCdId.put(sdId, oldCdIdToNewCdId.get(cdId));
1163+
continue;
1164+
}
11611165
List<Pair<Integer, FieldSchema>> cols = cdIdToColIdxPair.get(cdId);
11621166
// Placeholder to avoid IndexOutOfBoundsException.
11631167
List<FieldSchema> oldCols = new ArrayList<>(Collections.nCopies(cols.size(), null));

standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/HiveAlterHandler.java

Lines changed: 6 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -420,35 +420,30 @@ public List<Void> run(List<Partition> input) throws Exception {
420420
req.setCatName(catName);
421421
req.setMaxParts((short) -1);
422422
parts = handler.get_partitions_req(req).getPartitions();
423-
Table finalOldt = oldt;
423+
Table table = oldt;
424424
int partitionBatchSize = MetastoreConf.getIntVar(handler.getConf(),
425-
MetastoreConf.ConfVars.BATCH_RETRIEVE_MAX);
425+
MetastoreConf.ConfVars.BATCH_RETRIEVE_MAX);
426426
Batchable.runBatched(partitionBatchSize, parts, new Batchable<Partition, Void>() {
427427
@Override
428428
public List<Void> run(List<Partition> input) throws Exception {
429429
List<Partition> oldParts = new ArrayList<>(input.size());
430430
List<List<String>> partVals = input.stream().map(Partition::getValues).collect(Collectors.toList());
431-
// update changed properties (stats)
432431
for (Partition part : input) {
433432
Partition oldPart = new Partition(part);
434433
List<FieldSchema> oldCols = part.getSd().getCols();
435434
part.getSd().setCols(newt.getSd().getCols());
436435
List<ColumnStatistics> colStats = updateOrGetPartitionColumnStats(msdb, catalogName, databaseName,
437-
tableName, part.getValues(), oldCols, finalOldt, part, null, null);
436+
tableName, part.getValues(), oldCols, table, part, null, null);
438437
assert (colStats.isEmpty());
439438
if (!cascade) {
439+
// update changed properties (stats)
440440
oldPart.setParameters(part.getParameters());
441441
oldParts.add(oldPart);
442442
}
443443
}
444444
Deadline.checkTimeout();
445-
if (cascade) {
446-
msdb.alterPartitions(catalogName, databaseName, tableName, partVals, input, newt.getWriteId(),
447-
writeIdList);
448-
} else {
449-
msdb.alterPartitions(catalogName, newDbName, newTblName, partVals, oldParts, newt.getWriteId(),
450-
writeIdList);
451-
}
445+
msdb.alterPartitions(catalogName, databaseName, tableName,
446+
partVals, (cascade) ? input : oldParts, newt.getWriteId(), writeIdList);
452447
return Collections.emptyList();
453448
}
454449
});

0 commit comments

Comments
 (0)