Skip to content

Commit ab13d43

Browse files
anand76facebook-github-bot
authored andcommitted
Pass a timeout to FileSystem for random reads (facebook#6751)
Summary: Calculate ```IOOptions::timeout``` using ```ReadOptions::deadline``` and pass it to ```FileSystem::Read/FileSystem::MultiRead```. This allows us to impose a tighter bound on the time taken by Get/MultiGet on FileSystem/Envs that support IO timeouts. Even on those that don't support, check in ```RandomAccessFileReader::Read``` and ```MultiRead``` and return ```Status::TimedOut()``` if the deadline is exceeded. For now, TableReader creation, which might do file opens and reads, are not covered. It will be implemented in another PR. Tests: Update existing unit tests to verify the correct timeout value is being passed Pull Request resolved: facebook#6751 Reviewed By: riversand963 Differential Revision: D21285631 Pulled By: anand1976 fbshipit-source-id: d89af843e5a91ece866e87aa29438b52a65a8567
1 parent eecd8fb commit ab13d43

24 files changed

+196
-113
lines changed

db/db_basic_test.cc

Lines changed: 47 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -2387,17 +2387,22 @@ class DBBasicTestMultiGetDeadline : public DBBasicTestMultiGet {
23872387

23882388
class DeadlineRandomAccessFile : public FSRandomAccessFileWrapper {
23892389
public:
2390-
DeadlineRandomAccessFile(DeadlineFS& fs,
2390+
DeadlineRandomAccessFile(DeadlineFS& fs, SpecialEnv* env,
23912391
std::unique_ptr<FSRandomAccessFile>& file)
23922392
: FSRandomAccessFileWrapper(file.get()),
23932393
fs_(fs),
2394-
file_(std::move(file)) {}
2394+
file_(std::move(file)),
2395+
env_(env) {}
23952396

23962397
IOStatus Read(uint64_t offset, size_t len, const IOOptions& opts,
23972398
Slice* result, char* scratch, IODebugContext* dbg) const override {
23982399
int delay;
2400+
const std::chrono::microseconds deadline = fs_.GetDeadline();
2401+
if (deadline.count()) {
2402+
AssertDeadline(deadline, opts);
2403+
}
23992404
if (fs_.ShouldDelay(&delay)) {
2400-
Env::Default()->SleepForMicroseconds(delay);
2405+
env_->SleepForMicroseconds(delay);
24012406
}
24022407
return FSRandomAccessFileWrapper::Read(offset, len, opts, result, scratch,
24032408
dbg);
@@ -2406,22 +2411,37 @@ class DBBasicTestMultiGetDeadline : public DBBasicTestMultiGet {
24062411
IOStatus MultiRead(FSReadRequest* reqs, size_t num_reqs,
24072412
const IOOptions& options, IODebugContext* dbg) override {
24082413
int delay;
2414+
const std::chrono::microseconds deadline = fs_.GetDeadline();
2415+
if (deadline.count()) {
2416+
AssertDeadline(deadline, options);
2417+
}
24092418
if (fs_.ShouldDelay(&delay)) {
2410-
Env::Default()->SleepForMicroseconds(delay);
2419+
env_->SleepForMicroseconds(delay);
24112420
}
24122421
return FSRandomAccessFileWrapper::MultiRead(reqs, num_reqs, options, dbg);
24132422
}
24142423

24152424
private:
2425+
void AssertDeadline(const std::chrono::microseconds deadline,
2426+
const IOOptions& opts) const {
2427+
// Give a leeway of +- 10us as it can take some time for the Get/
2428+
// MultiGet call to reach here, in order to avoid false alarms
2429+
std::chrono::microseconds now =
2430+
std::chrono::microseconds(env_->NowMicros());
2431+
ASSERT_EQ(deadline - now, opts.timeout);
2432+
}
24162433
DeadlineFS& fs_;
24172434
std::unique_ptr<FSRandomAccessFile> file_;
2435+
SpecialEnv* env_;
24182436
};
24192437

24202438
class DeadlineFS : public FileSystemWrapper {
24212439
public:
2422-
DeadlineFS()
2423-
: FileSystemWrapper(FileSystem::Default()),
2424-
delay_idx_(0) {}
2440+
DeadlineFS(SpecialEnv* env)
2441+
: FileSystemWrapper(FileSystem::Default()),
2442+
delay_idx_(0),
2443+
deadline_(std::chrono::microseconds::zero()),
2444+
env_(env) {}
24252445
~DeadlineFS() = default;
24262446

24272447
IOStatus NewRandomAccessFile(const std::string& fname,
@@ -2432,13 +2452,14 @@ class DBBasicTestMultiGetDeadline : public DBBasicTestMultiGet {
24322452
IOStatus s;
24332453

24342454
s = target()->NewRandomAccessFile(fname, opts, &file, dbg);
2435-
result->reset(new DeadlineRandomAccessFile(*this, file));
2455+
result->reset(new DeadlineRandomAccessFile(*this, env_, file));
24362456
return s;
24372457
}
24382458

24392459
// Set a vector of {IO counter, delay in microseconds} pairs that control
24402460
// when to inject a delay and duration of the delay
2441-
void SetDelaySequence(const std::vector<std::pair<int, int>>&& seq) {
2461+
void SetDelaySequence(const std::chrono::microseconds deadline,
2462+
const std::vector<std::pair<int, int>>&& seq) {
24422463
int total_delay = 0;
24432464
for (auto& seq_iter : seq) {
24442465
// Ensure no individual delay is > 500ms
@@ -2451,6 +2472,7 @@ class DBBasicTestMultiGetDeadline : public DBBasicTestMultiGet {
24512472
delay_seq_ = seq;
24522473
delay_idx_ = 0;
24532474
io_count_ = 0;
2475+
deadline_ = deadline;
24542476
}
24552477

24562478
// Increment the IO counter and return a delay in microseconds
@@ -2464,10 +2486,14 @@ class DBBasicTestMultiGetDeadline : public DBBasicTestMultiGet {
24642486
return false;
24652487
}
24662488

2489+
const std::chrono::microseconds GetDeadline() { return deadline_; }
2490+
24672491
private:
24682492
std::vector<std::pair<int, int>> delay_seq_;
24692493
size_t delay_idx_;
24702494
int io_count_;
2495+
std::chrono::microseconds deadline_;
2496+
SpecialEnv* env_;
24712497
};
24722498

24732499
inline void CheckStatus(std::vector<Status>& statuses, size_t num_ok) {
@@ -2483,8 +2509,10 @@ class DBBasicTestMultiGetDeadline : public DBBasicTestMultiGet {
24832509

24842510
TEST_F(DBBasicTestMultiGetDeadline, MultiGetDeadlineExceeded) {
24852511
std::shared_ptr<DBBasicTestMultiGetDeadline::DeadlineFS> fs(
2486-
new DBBasicTestMultiGetDeadline::DeadlineFS());
2487-
std::unique_ptr<Env> env = NewCompositeEnv(fs);
2512+
new DBBasicTestMultiGetDeadline::DeadlineFS(env_));
2513+
std::unique_ptr<Env> env(new CompositeEnvWrapper(env_, fs));
2514+
env_->no_slowdown_ = true;
2515+
env_->time_elapse_only_sleep_.store(true);
24882516
Options options = CurrentOptions();
24892517

24902518
std::shared_ptr<Cache> cache = NewLRUCache(1048576);
@@ -2509,13 +2537,13 @@ TEST_F(DBBasicTestMultiGetDeadline, MultiGetDeadlineExceeded) {
25092537
cfs[i] = handles_[i];
25102538
keys[i] = Slice(key_str[i].data(), key_str[i].size());
25112539
}
2512-
// Delay the first IO by 200ms
2513-
fs->SetDelaySequence({{0, 200000}});
25142540

25152541
ReadOptions ro;
25162542
ro.deadline = std::chrono::microseconds{env->NowMicros() + 10000};
2543+
// Delay the first IO by 200ms
2544+
fs->SetDelaySequence(ro.deadline, {{0, 20000}});
2545+
25172546
std::vector<Status> statuses = dbfull()->MultiGet(ro, cfs, keys, &values);
2518-
std::cout << "Non-batched MultiGet";
25192547
// The first key is successful because we check after the lookup, but
25202548
// subsequent keys fail due to deadline exceeded
25212549
CheckStatus(statuses, 1);
@@ -2537,10 +2565,9 @@ TEST_F(DBBasicTestMultiGetDeadline, MultiGetDeadlineExceeded) {
25372565
cfs[i] = handles_[i / 2];
25382566
keys[i] = Slice(key_str[i].data(), key_str[i].size());
25392567
}
2540-
fs->SetDelaySequence({{1, 200000}});
25412568
ro.deadline = std::chrono::microseconds{env->NowMicros() + 10000};
2569+
fs->SetDelaySequence(ro.deadline, {{1, 20000}});
25422570
statuses = dbfull()->MultiGet(ro, cfs, keys, &values);
2543-
std::cout << "Non-batched 2";
25442571
CheckStatus(statuses, 3);
25452572

25462573
// Test batched MultiGet with an IO delay in the first data block read.
@@ -2552,11 +2579,10 @@ TEST_F(DBBasicTestMultiGetDeadline, MultiGetDeadlineExceeded) {
25522579
cache->SetCapacity(1048576);
25532580
statuses.clear();
25542581
statuses.resize(keys.size());
2555-
fs->SetDelaySequence({{0, 200000}});
25562582
ro.deadline = std::chrono::microseconds{env->NowMicros() + 10000};
2583+
fs->SetDelaySequence(ro.deadline, {{0, 20000}});
25572584
dbfull()->MultiGet(ro, keys.size(), cfs.data(), keys.data(),
25582585
pin_values.data(), statuses.data());
2559-
std::cout << "Batched 1";
25602586
CheckStatus(statuses, 2);
25612587

25622588
// Similar to the previous one, but an IO delay in the third CF data block
@@ -2568,11 +2594,10 @@ TEST_F(DBBasicTestMultiGetDeadline, MultiGetDeadlineExceeded) {
25682594
cache->SetCapacity(1048576);
25692595
statuses.clear();
25702596
statuses.resize(keys.size());
2571-
fs->SetDelaySequence({{2, 200000}});
25722597
ro.deadline = std::chrono::microseconds{env->NowMicros() + 10000};
2598+
fs->SetDelaySequence(ro.deadline, {{2, 20000}});
25732599
dbfull()->MultiGet(ro, keys.size(), cfs.data(), keys.data(),
25742600
pin_values.data(), statuses.data());
2575-
std::cout << "Batched 2";
25762601
CheckStatus(statuses, 6);
25772602

25782603
// Similar to the previous one, but an IO delay in the last but one CF
@@ -2583,11 +2608,10 @@ TEST_F(DBBasicTestMultiGetDeadline, MultiGetDeadlineExceeded) {
25832608
cache->SetCapacity(1048576);
25842609
statuses.clear();
25852610
statuses.resize(keys.size());
2586-
fs->SetDelaySequence({{3, 200000}});
25872611
ro.deadline = std::chrono::microseconds{env->NowMicros() + 10000};
2612+
fs->SetDelaySequence(ro.deadline, {{3, 20000}});
25882613
dbfull()->MultiGet(ro, keys.size(), cfs.data(), keys.data(),
25892614
pin_values.data(), statuses.data());
2590-
std::cout << "Batched 3";
25912615
CheckStatus(statuses, 8);
25922616

25932617
// Test batched MultiGet with single CF and lots of keys. Inject delay
@@ -2610,11 +2634,10 @@ TEST_F(DBBasicTestMultiGetDeadline, MultiGetDeadlineExceeded) {
26102634
}
26112635
statuses.clear();
26122636
statuses.resize(keys.size());
2613-
fs->SetDelaySequence({{1, 200000}});
26142637
ro.deadline = std::chrono::microseconds{env->NowMicros() + 10000};
2638+
fs->SetDelaySequence(ro.deadline, {{1, 20000}});
26152639
dbfull()->MultiGet(ro, handles_[0], keys.size(), keys.data(),
26162640
pin_values.data(), statuses.data());
2617-
std::cout << "Batched single CF";
26182641
CheckStatus(statuses, 64);
26192642
Close();
26202643
}

db/db_impl/db_impl.cc

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1527,11 +1527,6 @@ Status DBImpl::GetImpl(const ReadOptions& read_options, const Slice& key,
15271527
GetImplOptions& get_impl_options) {
15281528
assert(get_impl_options.value != nullptr ||
15291529
get_impl_options.merge_operands != nullptr);
1530-
// We will eventually support deadline for Get requests too, but safeguard
1531-
// for now
1532-
if (read_options.deadline != std::chrono::microseconds::zero()) {
1533-
return Status::NotSupported("ReadOptions deadline is not supported");
1534-
}
15351530

15361531
#ifndef NDEBUG
15371532
assert(get_impl_options.column_family);

file/file_prefetch_buffer.cc

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -88,8 +88,7 @@ Status FilePrefetchBuffer::Prefetch(RandomAccessFileReader* reader,
8888

8989
Slice result;
9090
size_t read_len = static_cast<size_t>(roundup_len - chunk_len);
91-
s = reader->Read(rounddown_offset + chunk_len,
92-
read_len, &result,
91+
s = reader->Read(IOOptions(), rounddown_offset + chunk_len, read_len, &result,
9392
buffer_.BufferStart() + chunk_len, nullptr, for_compaction);
9493
#ifndef NDEBUG
9594
if (!s.ok() || result.size() < read_len) {

file/file_util.h

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,4 +30,20 @@ extern Status DeleteDBFile(const ImmutableDBOptions* db_options,
3030

3131
extern bool IsWalDirSameAsDBPath(const ImmutableDBOptions* db_options);
3232

33+
inline IOStatus PrepareIOFromReadOptions(const ReadOptions& ro, Env* env,
34+
IOOptions& opts) {
35+
if (!env) {
36+
env = Env::Default();
37+
}
38+
39+
if (ro.deadline.count()) {
40+
std::chrono::microseconds now = std::chrono::microseconds(env->NowMicros());
41+
if (now > ro.deadline) {
42+
return IOStatus::TimedOut("Deadline exceeded");
43+
}
44+
opts.timeout = ro.deadline - now;
45+
}
46+
return IOStatus::OK();
47+
}
48+
3349
} // namespace ROCKSDB_NAMESPACE

file/random_access_file_reader.cc

Lines changed: 23 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -15,14 +15,16 @@
1515
#include "monitoring/histogram.h"
1616
#include "monitoring/iostats_context_imp.h"
1717
#include "port/port.h"
18+
#include "table/format.h"
1819
#include "test_util/sync_point.h"
1920
#include "util/random.h"
2021
#include "util/rate_limiter.h"
2122

2223
namespace ROCKSDB_NAMESPACE {
2324

24-
Status RandomAccessFileReader::Read(uint64_t offset, size_t n, Slice* result,
25-
char* scratch, AlignedBuf* aligned_buf,
25+
Status RandomAccessFileReader::Read(const IOOptions& opts, uint64_t offset,
26+
size_t n, Slice* result, char* scratch,
27+
AlignedBuf* aligned_buf,
2628
bool for_compaction) const {
2729
(void)aligned_buf;
2830
Status s;
@@ -62,10 +64,16 @@ Status RandomAccessFileReader::Read(uint64_t offset, size_t n, Slice* result,
6264
start_ts = std::chrono::system_clock::now();
6365
orig_offset = aligned_offset + buf.CurrentSize();
6466
}
67+
6568
{
6669
IOSTATS_CPU_TIMER_GUARD(cpu_read_nanos, env_);
67-
s = file_->Read(aligned_offset + buf.CurrentSize(), allowed,
68-
IOOptions(), &tmp, buf.Destination(), nullptr);
70+
// Only user reads are expected to specify a timeout. And user reads
71+
// are not subjected to rate_limiter and should go through only
72+
// one iteration of this loop, so we don't need to check and adjust
73+
// the opts.timeout before calling file_->Read
74+
assert(!opts.timeout.count() || allowed == read_size);
75+
s = file_->Read(aligned_offset + buf.CurrentSize(), allowed, opts,
76+
&tmp, buf.Destination(), nullptr);
6977
}
7078
if (ShouldNotifyListeners()) {
7179
auto finish_ts = std::chrono::system_clock::now();
@@ -116,9 +124,15 @@ Status RandomAccessFileReader::Read(uint64_t offset, size_t n, Slice* result,
116124
start_ts = std::chrono::system_clock::now();
117125
}
118126
#endif
127+
119128
{
120129
IOSTATS_CPU_TIMER_GUARD(cpu_read_nanos, env_);
121-
s = file_->Read(offset + pos, allowed, IOOptions(), &tmp_result,
130+
// Only user reads are expected to specify a timeout. And user reads
131+
// are not subjected to rate_limiter and should go through only
132+
// one iteration of this loop, so we don't need to check and adjust
133+
// the opts.timeout before calling file_->Read
134+
assert(!opts.timeout.count() || allowed == n);
135+
s = file_->Read(offset + pos, allowed, opts, &tmp_result,
122136
scratch + pos, nullptr);
123137
}
124138
#ifndef ROCKSDB_LITE
@@ -186,7 +200,8 @@ bool TryMerge(FSReadRequest* dest, const FSReadRequest& src) {
186200
return true;
187201
}
188202

189-
Status RandomAccessFileReader::MultiRead(FSReadRequest* read_reqs,
203+
Status RandomAccessFileReader::MultiRead(const IOOptions& opts,
204+
FSReadRequest* read_reqs,
190205
size_t num_reqs,
191206
AlignedBuf* aligned_buf) const {
192207
(void)aligned_buf; // suppress warning of unused variable in LITE mode
@@ -244,9 +259,10 @@ Status RandomAccessFileReader::MultiRead(FSReadRequest* read_reqs,
244259
start_ts = std::chrono::system_clock::now();
245260
}
246261
#endif // ROCKSDB_LITE
262+
247263
{
248264
IOSTATS_CPU_TIMER_GUARD(cpu_read_nanos, env_);
249-
s = file_->MultiRead(fs_reqs, num_fs_reqs, IOOptions(), nullptr);
265+
s = file_->MultiRead(fs_reqs, num_fs_reqs, opts, nullptr);
250266
}
251267

252268
#ifndef ROCKSDB_LITE

file/random_access_file_reader.h

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -114,15 +114,16 @@ class RandomAccessFileReader {
114114
// 2. Otherwise, scratch is not used and can be null, the aligned_buf owns
115115
// the internally allocated buffer on return, and the result refers to a
116116
// region in aligned_buf.
117-
Status Read(uint64_t offset, size_t n, Slice* result, char* scratch,
118-
AlignedBuf* aligned_buf, bool for_compaction = false) const;
117+
Status Read(const IOOptions& opts, uint64_t offset, size_t n, Slice* result,
118+
char* scratch, AlignedBuf* aligned_buf,
119+
bool for_compaction = false) const;
119120

120121
// REQUIRES:
121122
// num_reqs > 0, reqs do not overlap, and offsets in reqs are increasing.
122123
// In non-direct IO mode, aligned_buf should be null;
123124
// In direct IO mode, aligned_buf stores the aligned buffer allocated inside
124125
// MultiRead, the result Slices in reqs refer to aligned_buf.
125-
Status MultiRead(FSReadRequest* reqs, size_t num_reqs,
126+
Status MultiRead(const IOOptions& opts, FSReadRequest* reqs, size_t num_reqs,
126127
AlignedBuf* aligned_buf) const;
127128

128129
Status Prefetch(uint64_t offset, size_t n) const {
@@ -134,5 +135,7 @@ class RandomAccessFileReader {
134135
std::string file_name() const { return file_name_; }
135136

136137
bool use_direct_io() const { return file_->use_direct_io(); }
138+
139+
Env* env() const { return env_; }
137140
};
138141
} // namespace ROCKSDB_NAMESPACE

file/random_access_file_reader_test.cc

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -106,7 +106,8 @@ TEST_F(RandomAccessFileReaderTest, ReadDirectIO) {
106106
Slice result;
107107
AlignedBuf buf;
108108
for (bool for_compaction : {true, false}) {
109-
ASSERT_OK(r->Read(offset, len, &result, nullptr, &buf, for_compaction));
109+
ASSERT_OK(r->Read(IOOptions(), offset, len, &result, nullptr, &buf,
110+
for_compaction));
110111
ASSERT_EQ(result.ToString(), content.substr(offset, len));
111112
}
112113
}
@@ -152,7 +153,8 @@ TEST_F(RandomAccessFileReaderTest, MultiReadDirectIO) {
152153
reqs.push_back(std::move(r0));
153154
reqs.push_back(std::move(r1));
154155
AlignedBuf aligned_buf;
155-
ASSERT_OK(r->MultiRead(reqs.data(), reqs.size(), &aligned_buf));
156+
ASSERT_OK(
157+
r->MultiRead(IOOptions(), reqs.data(), reqs.size(), &aligned_buf));
156158

157159
AssertResult(content, reqs);
158160
}
@@ -189,7 +191,8 @@ TEST_F(RandomAccessFileReaderTest, MultiReadDirectIO) {
189191
reqs.push_back(std::move(r1));
190192
reqs.push_back(std::move(r2));
191193
AlignedBuf aligned_buf;
192-
ASSERT_OK(r->MultiRead(reqs.data(), reqs.size(), &aligned_buf));
194+
ASSERT_OK(
195+
r->MultiRead(IOOptions(), reqs.data(), reqs.size(), &aligned_buf));
193196

194197
AssertResult(content, reqs);
195198
}
@@ -226,7 +229,8 @@ TEST_F(RandomAccessFileReaderTest, MultiReadDirectIO) {
226229
reqs.push_back(std::move(r1));
227230
reqs.push_back(std::move(r2));
228231
AlignedBuf aligned_buf;
229-
ASSERT_OK(r->MultiRead(reqs.data(), reqs.size(), &aligned_buf));
232+
ASSERT_OK(
233+
r->MultiRead(IOOptions(), reqs.data(), reqs.size(), &aligned_buf));
230234

231235
AssertResult(content, reqs);
232236
}
@@ -255,7 +259,8 @@ TEST_F(RandomAccessFileReaderTest, MultiReadDirectIO) {
255259
reqs.push_back(std::move(r0));
256260
reqs.push_back(std::move(r1));
257261
AlignedBuf aligned_buf;
258-
ASSERT_OK(r->MultiRead(reqs.data(), reqs.size(), &aligned_buf));
262+
ASSERT_OK(
263+
r->MultiRead(IOOptions(), reqs.data(), reqs.size(), &aligned_buf));
259264

260265
AssertResult(content, reqs);
261266
}

0 commit comments

Comments
 (0)