Skip to content

Commit 663afef

Browse files
rynelisiying
authored andcommitted
Add EnvLibrados - RocksDB Env of RADOS (facebook#1222)
EnvLibrados is a customized RocksDB Env to use RADOS as the backend file system of RocksDB. It overrides all file system related API of default Env. The easiest way to use it is just like following: std::string db_name = "test_db"; std::string config_path = "path/to/ceph/config"; DB* db; Options options; options.env = EnvLibrados(db_name, config_path); Status s = DB::Open(options, kDBPath, &db); Then EnvLibrados will forward all file read/write operation to the RADOS cluster assigned by config_path. Default pool is db_name+"_pool". There are some options that users could set for EnvLibrados. - write_buffer_size. This variable is the max buffer size for WritableFile. After reaching the buffer_max_size, EnvLibrados will sync buffer content to RADOS, then clear buffer. - db_pool. Rather than using default pool, users could set their own db pool name - wal_dir. The dir for WAL files. Because RocksDB only has 2-level structure (dir_name/file_name), the format of wal_dir is "/dir_name"(CAN'T be "/dir1/dir2"). Default wal_dir is "/wal". - wal_pool. Corresponding pool name for WAL files. Default value is db_name+"_wal_pool" The example of setting options looks like following: db_name = "test_db"; db_pool = db_name+"_pool"; wal_dir = "/wal"; wal_pool = db_name+"_wal_pool"; write_buffer_size = 1 << 20; env_ = new EnvLibrados(db_name, config, db_pool, wal_dir, wal_pool, write_buffer_size); DB* db; Options options; options.env = env_; // The last level dir name should match the dir name in prefix_pool_map options.wal_dir = "/tmp/wal"; // open DB Status s = DB::Open(options, kDBPath, &db); Librados is required to compile EnvLibrados. Then use "$make LIBRADOS=1" to compile RocksDB. If you want to only compile EnvLibrados test, just run "$ make env_librados_test LIBRADOS=1". To run env_librados_test, you need to have a running RADOS cluster with the configure file located in "../ceph/src/ceph.conf" related to "rocksdb/".
1 parent 32604e6 commit 663afef

File tree

5 files changed

+2962
-1
lines changed

5 files changed

+2962
-1
lines changed

Makefile

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -130,8 +130,12 @@ am__v_AR_ = $(am__v_AR_$(AM_DEFAULT_VERBOSITY))
130130
am__v_AR_0 = @echo " AR " $@;
131131
am__v_AR_1 =
132132

133-
AM_LINK = $(AM_V_CCLD)$(CXX) $^ $(EXEC_LDFLAGS) -o $@ $(LDFLAGS) $(COVERAGEFLAGS)
133+
ifdef ROCKSDB_USE_LIBRADOS
134+
LIB_SOURCES += utilities/env_librados.cc
135+
LDFLAGS += -lrados
136+
endif
134137

138+
AM_LINK = $(AM_V_CCLD)$(CXX) $^ $(EXEC_LDFLAGS) -o $@ $(LDFLAGS) $(COVERAGEFLAGS)
135139
# detect what platform we're building on
136140
dummy := $(shell (export ROCKSDB_ROOT="$(CURDIR)"; "$(CURDIR)/build_tools/build_detect_platform" "$(CURDIR)/make_config.mk"))
137141
# this file is generated by the previous line to set build flags and sources
@@ -997,6 +1001,11 @@ spatial_db_test: utilities/spatialdb/spatial_db_test.o $(LIBOBJECTS) $(TESTHARNE
9971001
env_mirror_test: utilities/env_mirror_test.o $(LIBOBJECTS) $(TESTHARNESS)
9981002
$(AM_LINK)
9991003

1004+
ifdef ROCKSDB_USE_LIBRADOS
1005+
env_librados_test: utilities/env_librados_test.o $(LIBOBJECTS) $(TESTHARNESS)
1006+
$(AM_V_CCLD)$(CXX) $^ $(EXEC_LDFLAGS) -o $@ $(LDFLAGS) $(COVERAGEFLAGS)
1007+
endif
1008+
10001009
env_registry_test: utilities/env_registry_test.o $(LIBOBJECTS) $(TESTHARNESS)
10011010
$(AM_LINK)
10021011

Lines changed: 186 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,186 @@
1+
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2+
// vim: ts=8 sw=2 smarttab
3+
#ifndef ROCKSDB_UTILITIES_ENV_LIBRADOS_H
4+
#define ROCKSDB_UTILITIES_ENV_LIBRADOS_H
5+
6+
#include <memory>
7+
#include <string>
8+
9+
#include "rocksdb/status.h"
10+
#include "rocksdb/utilities/env_mirror.h"
11+
12+
#include <rados/librados.hpp>
13+
14+
namespace rocksdb {
15+
class LibradosWritableFile;
16+
17+
class EnvLibrados : public EnvWrapper {
18+
public:
19+
// Create a brand new sequentially-readable file with the specified name.
20+
// On success, stores a pointer to the new file in *result and returns OK.
21+
// On failure stores nullptr in *result and returns non-OK. If the file does
22+
// not exist, returns a non-OK status.
23+
//
24+
// The returned file will only be accessed by one thread at a time.
25+
Status NewSequentialFile(
26+
const std::string& fname,
27+
std::unique_ptr<SequentialFile>* result,
28+
const EnvOptions& options);
29+
30+
// Create a brand new random access read-only file with the
31+
// specified name. On success, stores a pointer to the new file in
32+
// *result and returns OK. On failure stores nullptr in *result and
33+
// returns non-OK. If the file does not exist, returns a non-OK
34+
// status.
35+
//
36+
// The returned file may be concurrently accessed by multiple threads.
37+
Status NewRandomAccessFile(
38+
const std::string& fname,
39+
std::unique_ptr<RandomAccessFile>* result,
40+
const EnvOptions& options);
41+
42+
// Create an object that writes to a new file with the specified
43+
// name. Deletes any existing file with the same name and creates a
44+
// new file. On success, stores a pointer to the new file in
45+
// *result and returns OK. On failure stores nullptr in *result and
46+
// returns non-OK.
47+
//
48+
// The returned file will only be accessed by one thread at a time.
49+
Status NewWritableFile(
50+
const std::string& fname,
51+
std::unique_ptr<WritableFile>* result,
52+
const EnvOptions& options);
53+
54+
// Reuse an existing file by renaming it and opening it as writable.
55+
Status ReuseWritableFile(
56+
const std::string& fname,
57+
const std::string& old_fname,
58+
std::unique_ptr<WritableFile>* result,
59+
const EnvOptions& options);
60+
61+
// Create an object that represents a directory. Will fail if directory
62+
// doesn't exist. If the directory exists, it will open the directory
63+
// and create a new Directory object.
64+
//
65+
// On success, stores a pointer to the new Directory in
66+
// *result and returns OK. On failure stores nullptr in *result and
67+
// returns non-OK.
68+
Status NewDirectory(
69+
const std::string& name,
70+
std::unique_ptr<Directory>* result);
71+
72+
// Returns OK if the named file exists.
73+
// NotFound if the named file does not exist,
74+
// the calling process does not have permission to determine
75+
// whether this file exists, or if the path is invalid.
76+
// IOError if an IO Error was encountered
77+
Status FileExists(const std::string& fname);
78+
79+
// Store in *result the names of the children of the specified directory.
80+
// The names are relative to "dir".
81+
// Original contents of *results are dropped.
82+
Status GetChildren(const std::string& dir,
83+
std::vector<std::string>* result);
84+
85+
// Delete the named file.
86+
Status DeleteFile(const std::string& fname);
87+
88+
// Create the specified directory. Returns error if directory exists.
89+
Status CreateDir(const std::string& dirname);
90+
91+
// Creates directory if missing. Return Ok if it exists, or successful in
92+
// Creating.
93+
Status CreateDirIfMissing(const std::string& dirname);
94+
95+
// Delete the specified directory.
96+
Status DeleteDir(const std::string& dirname);
97+
98+
// Store the size of fname in *file_size.
99+
Status GetFileSize(const std::string& fname, uint64_t* file_size);
100+
101+
// Store the last modification time of fname in *file_mtime.
102+
Status GetFileModificationTime(const std::string& fname,
103+
uint64_t* file_mtime);
104+
// Rename file src to target.
105+
Status RenameFile(const std::string& src,
106+
const std::string& target);
107+
// Hard Link file src to target.
108+
Status LinkFile(const std::string& src, const std::string& target);
109+
110+
// Lock the specified file. Used to prevent concurrent access to
111+
// the same db by multiple processes. On failure, stores nullptr in
112+
// *lock and returns non-OK.
113+
//
114+
// On success, stores a pointer to the object that represents the
115+
// acquired lock in *lock and returns OK. The caller should call
116+
// UnlockFile(*lock) to release the lock. If the process exits,
117+
// the lock will be automatically released.
118+
//
119+
// If somebody else already holds the lock, finishes immediately
120+
// with a failure. I.e., this call does not wait for existing locks
121+
// to go away.
122+
//
123+
// May create the named file if it does not already exist.
124+
Status LockFile(const std::string& fname, FileLock** lock);
125+
126+
// Release the lock acquired by a previous successful call to LockFile.
127+
// REQUIRES: lock was returned by a successful LockFile() call
128+
// REQUIRES: lock has not already been unlocked.
129+
Status UnlockFile(FileLock* lock);
130+
131+
// Get full directory name for this db.
132+
Status GetAbsolutePath(const std::string& db_path,
133+
std::string* output_path);
134+
135+
// Generate unique id
136+
std::string GenerateUniqueId();
137+
138+
// Get default EnvLibrados
139+
static EnvLibrados* Default();
140+
141+
explicit EnvLibrados(const std::string& db_name,
142+
const std::string& config_path,
143+
const std::string& db_pool);
144+
145+
explicit EnvLibrados(const std::string& client_name, // first 3 parameters are for RADOS client init
146+
const std::string& cluster_name,
147+
const uint64_t flags,
148+
const std::string& db_name,
149+
const std::string& config_path,
150+
const std::string& db_pool,
151+
const std::string& wal_dir,
152+
const std::string& wal_pool,
153+
const uint64_t write_buffer_size);
154+
~EnvLibrados() {
155+
_rados.shutdown();
156+
}
157+
private:
158+
std::string _client_name;
159+
std::string _cluster_name;
160+
uint64_t _flags;
161+
std::string _db_name; // get from user, readable string; Also used as db_id for db metadata
162+
std::string _config_path;
163+
librados::Rados _rados; // RADOS client
164+
std::string _db_pool_name;
165+
librados::IoCtx _db_pool_ioctx; // IoCtx for connecting db_pool
166+
std::string _wal_dir; // WAL dir path
167+
std::string _wal_pool_name;
168+
librados::IoCtx _wal_pool_ioctx; // IoCtx for connecting wal_pool
169+
uint64_t _write_buffer_size; // WritableFile buffer max size
170+
171+
/* private function to communicate with rados */
172+
std::string _CreateFid();
173+
Status _GetFid(const std::string& fname, std::string& fid);
174+
Status _GetFid(const std::string& fname, std::string& fid, int fid_len);
175+
Status _RenameFid(const std::string& old_fname, const std::string& new_fname);
176+
Status _AddFid(const std::string& fname, const std::string& fid);
177+
Status _DelFid(const std::string& fname);
178+
Status _GetSubFnames(
179+
const std::string& dirname,
180+
std::vector<std::string> * result
181+
);
182+
librados::IoCtx* _GetIoctx(const std::string& prefix);
183+
friend class LibradosWritableFile;
184+
};
185+
}
186+
#endif

0 commit comments

Comments
 (0)