Skip to content

Commit 0d378ba

Browse files
committedNov 1, 2024
Fix a leak of open Blob files (#13106)
Summary: An earlier change (b34cef5) removed apparently unused functionality where an obsolete blob file number is passed for removal from TableCache, which manages SST files. This was actually relying on broken/fragile abstractions wherein TableCache and BlobFileCache share the same Cache and using the TableCache interface to manipulate blob file caching. No unit test was actually checking for removal of obsolete blob files from the cache (which is somewhat tricky to check and a second order correctness requirement). Here we fix the leak and add a DEBUG+ASAN-only check in DB::Close() that no obsolete files are lingering in the table/blob file cache. Fixes #13066 Important follow-up (FIXME): The added check discovered some apparent cases of leaked (into table_cache) SST file readers that would stick around until DB::Close(). Need to enable that check, diagnose, and fix. Pull Request resolved: #13106 Test Plan: added a check that is called during DB::Close in ASAN builds (to minimize paying the cost in all unit tests). Without the fix, the check failed in at least these tests: ``` db_blob_basic_test DBBlobBasicTest.DynamicallyWarmCacheDuringFlush db_blob_compaction_test DBBlobCompactionTest.CompactionReadaheadMerge db_blob_compaction_test DBBlobCompactionTest.MergeBlobWithBase db_blob_compaction_test DBBlobCompactionTest.CompactionDoNotFillCache db_blob_compaction_test DBBlobCompactionTest.SkipUntilFilter db_blob_compaction_test DBBlobCompactionTest.CompactionFilter db_blob_compaction_test DBBlobCompactionTest.CompactionReadaheadFilter db_blob_compaction_test DBBlobCompactionTest.CompactionReadaheadGarbageCollection ``` Reviewed By: ltamasi Differential Revision: D65296123 Pulled By: pdillinger fbshipit-source-id: 2276d76482beb2c75c9010bc1bec070bb23a24c0
1 parent 520436a commit 0d378ba

File tree

10 files changed

+88
-11
lines changed

10 files changed

+88
-11
lines changed
 

‎db/blob/blob_file_cache.cc

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,7 @@ Status BlobFileCache::GetBlobFileReader(
4242
assert(blob_file_reader);
4343
assert(blob_file_reader->IsEmpty());
4444

45+
// NOTE: sharing same Cache with table_cache
4546
const Slice key = GetSliceForKey(&blob_file_number);
4647

4748
assert(cache_);
@@ -98,4 +99,13 @@ Status BlobFileCache::GetBlobFileReader(
9899
return Status::OK();
99100
}
100101

102+
void BlobFileCache::Evict(uint64_t blob_file_number) {
103+
// NOTE: sharing same Cache with table_cache
104+
const Slice key = GetSliceForKey(&blob_file_number);
105+
106+
assert(cache_);
107+
108+
cache_.get()->Erase(key);
109+
}
110+
101111
} // namespace ROCKSDB_NAMESPACE

‎db/blob/blob_file_cache.h

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,15 @@ class BlobFileCache {
3636
uint64_t blob_file_number,
3737
CacheHandleGuard<BlobFileReader>* blob_file_reader);
3838

39+
// Called when a blob file is obsolete to ensure it is removed from the cache
40+
// to avoid effectively leaking the open file and assicated memory
41+
void Evict(uint64_t blob_file_number);
42+
43+
// Used to identify cache entries for blob files (not normally useful)
44+
static const Cache::CacheItemHelper* GetHelper() {
45+
return CacheInterface::GetBasicHelper();
46+
}
47+
3948
private:
4049
using CacheInterface =
4150
BasicTypedCacheInterface<BlobFileReader, CacheEntryRole::kMisc>;

‎db/column_family.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -401,6 +401,7 @@ class ColumnFamilyData {
401401
SequenceNumber earliest_seq);
402402

403403
TableCache* table_cache() const { return table_cache_.get(); }
404+
BlobFileCache* blob_file_cache() const { return blob_file_cache_.get(); }
404405
BlobSource* blob_source() const { return blob_source_.get(); }
405406

406407
// See documentation in compaction_picker.h

‎db/db_impl/db_impl.cc

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -659,15 +659,19 @@ Status DBImpl::CloseHelper() {
659659
// We need to release them before the block cache is destroyed. The block
660660
// cache may be destroyed inside versions_.reset(), when column family data
661661
// list is destroyed, so leaving handles in table cache after
662-
// versions_.reset() may cause issues.
663-
// Here we clean all unreferenced handles in table cache.
662+
// versions_.reset() may cause issues. Here we clean all unreferenced handles
663+
// in table cache, and (for certain builds/conditions) assert that no obsolete
664+
// files are hanging around unreferenced (leak) in the table/blob file cache.
664665
// Now we assume all user queries have finished, so only version set itself
665666
// can possibly hold the blocks from block cache. After releasing unreferenced
666667
// handles here, only handles held by version set left and inside
667668
// versions_.reset(), we will release them. There, we need to make sure every
668669
// time a handle is released, we erase it from the cache too. By doing that,
669670
// we can guarantee that after versions_.reset(), table cache is empty
670671
// so the cache can be safely destroyed.
672+
#ifndef NDEBUG
673+
TEST_VerifyNoObsoleteFilesCached(/*db_mutex_already_held=*/true);
674+
#endif // !NDEBUG
671675
table_cache_->EraseUnRefEntries();
672676

673677
for (auto& txn_entry : recovered_transactions_) {

‎db/db_impl/db_impl.h

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1241,9 +1241,14 @@ class DBImpl : public DB {
12411241
static Status TEST_ValidateOptions(const DBOptions& db_options) {
12421242
return ValidateOptions(db_options);
12431243
}
1244-
12451244
#endif // NDEBUG
12461245

1246+
// In certain configurations, verify that the table/blob file cache only
1247+
// contains entries for live files, to check for effective leaks of open
1248+
// files. This can only be called when purging of obsolete files has
1249+
// "settled," such as during parts of DB Close().
1250+
void TEST_VerifyNoObsoleteFilesCached(bool db_mutex_already_held) const;
1251+
12471252
// persist stats to column family "_persistent_stats"
12481253
void PersistStats();
12491254

‎db/db_impl/db_impl_debug.cc

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99

1010
#ifndef NDEBUG
1111

12+
#include "db/blob/blob_file_cache.h"
1213
#include "db/column_family.h"
1314
#include "db/db_impl/db_impl.h"
1415
#include "db/error_handler.h"
@@ -328,5 +329,49 @@ size_t DBImpl::TEST_EstimateInMemoryStatsHistorySize() const {
328329
InstrumentedMutexLock l(&const_cast<DBImpl*>(this)->stats_history_mutex_);
329330
return EstimateInMemoryStatsHistorySize();
330331
}
332+
333+
void DBImpl::TEST_VerifyNoObsoleteFilesCached(
334+
bool db_mutex_already_held) const {
335+
// This check is somewhat expensive and obscure to make a part of every
336+
// unit test in every build variety. Thus, we only enable it for ASAN builds.
337+
if (!kMustFreeHeapAllocations) {
338+
return;
339+
}
340+
341+
std::optional<InstrumentedMutexLock> l;
342+
if (db_mutex_already_held) {
343+
mutex_.AssertHeld();
344+
} else {
345+
l.emplace(&mutex_);
346+
}
347+
348+
std::vector<uint64_t> live_files;
349+
for (auto cfd : *versions_->GetColumnFamilySet()) {
350+
if (cfd->IsDropped()) {
351+
continue;
352+
}
353+
// Sneakily add both SST and blob files to the same list
354+
cfd->current()->AddLiveFiles(&live_files, &live_files);
355+
}
356+
std::sort(live_files.begin(), live_files.end());
357+
358+
auto fn = [&live_files](const Slice& key, Cache::ObjectPtr, size_t,
359+
const Cache::CacheItemHelper* helper) {
360+
if (helper != BlobFileCache::GetHelper()) {
361+
// Skip non-blob files for now
362+
// FIXME: diagnose and fix the leaks of obsolete SST files revealed in
363+
// unit tests.
364+
return;
365+
}
366+
// See TableCache and BlobFileCache
367+
assert(key.size() == sizeof(uint64_t));
368+
uint64_t file_number;
369+
GetUnaligned(reinterpret_cast<const uint64_t*>(key.data()), &file_number);
370+
// Assert file is in sorted live_files
371+
assert(
372+
std::binary_search(live_files.begin(), live_files.end(), file_number));
373+
};
374+
table_cache_->ApplyToAllEntries(fn, {});
375+
}
331376
} // namespace ROCKSDB_NAMESPACE
332377
#endif // NDEBUG

‎db/table_cache.cc

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -164,6 +164,7 @@ Status TableCache::GetTableReader(
164164
}
165165

166166
Cache::Handle* TableCache::Lookup(Cache* cache, uint64_t file_number) {
167+
// NOTE: sharing same Cache with BlobFileCache
167168
Slice key = GetSliceForFileNumber(&file_number);
168169
return cache->Lookup(key);
169170
}
@@ -179,6 +180,7 @@ Status TableCache::FindTable(
179180
size_t max_file_size_for_l0_meta_pin, Temperature file_temperature) {
180181
PERF_TIMER_GUARD_WITH_CLOCK(find_table_nanos, ioptions_.clock);
181182
uint64_t number = file_meta.fd.GetNumber();
183+
// NOTE: sharing same Cache with BlobFileCache
182184
Slice key = GetSliceForFileNumber(&number);
183185
*handle = cache_.Lookup(key);
184186
TEST_SYNC_POINT_CALLBACK("TableCache::FindTable:0",

‎db/version_builder.cc

Lines changed: 8 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424
#include <vector>
2525

2626
#include "cache/cache_reservation_manager.h"
27+
#include "db/blob/blob_file_cache.h"
2728
#include "db/blob/blob_file_meta.h"
2829
#include "db/dbformat.h"
2930
#include "db/internal_stats.h"
@@ -744,12 +745,9 @@ class VersionBuilder::Rep {
744745
return Status::Corruption("VersionBuilder", oss.str());
745746
}
746747

747-
// Note: we use C++11 for now but in C++14, this could be done in a more
748-
// elegant way using generalized lambda capture.
749-
VersionSet* const vs = version_set_;
750-
const ImmutableCFOptions* const ioptions = ioptions_;
751-
752-
auto deleter = [vs, ioptions](SharedBlobFileMetaData* shared_meta) {
748+
auto deleter = [vs = version_set_, ioptions = ioptions_,
749+
bc = cfd_ ? cfd_->blob_file_cache()
750+
: nullptr](SharedBlobFileMetaData* shared_meta) {
753751
if (vs) {
754752
assert(ioptions);
755753
assert(!ioptions->cf_paths.empty());
@@ -758,6 +756,9 @@ class VersionBuilder::Rep {
758756
vs->AddObsoleteBlobFile(shared_meta->GetBlobFileNumber(),
759757
ioptions->cf_paths.front().path);
760758
}
759+
if (bc) {
760+
bc->Evict(shared_meta->GetBlobFileNumber());
761+
}
761762

762763
delete shared_meta;
763764
};
@@ -766,7 +767,7 @@ class VersionBuilder::Rep {
766767
blob_file_number, blob_file_addition.GetTotalBlobCount(),
767768
blob_file_addition.GetTotalBlobBytes(),
768769
blob_file_addition.GetChecksumMethod(),
769-
blob_file_addition.GetChecksumValue(), deleter);
770+
blob_file_addition.GetChecksumValue(), std::move(deleter));
770771

771772
mutable_blob_file_metas_.emplace(
772773
blob_file_number, MutableBlobFileMetaData(std::move(shared_meta)));

‎db/version_set.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1514,7 +1514,6 @@ class VersionSet {
15141514
void GetLiveFilesMetaData(std::vector<LiveFileMetaData>* metadata);
15151515

15161516
void AddObsoleteBlobFile(uint64_t blob_file_number, std::string path) {
1517-
// TODO: Erase file from BlobFileCache?
15181517
obsolete_blob_files_.emplace_back(blob_file_number, std::move(path));
15191518
}
15201519

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
* Fix a leak of obsolete blob files left open until DB::Close(). This bug was introduced in version 9.4.0.

0 commit comments

Comments
 (0)
Please sign in to comment.