Skip to content

Commit 85f250e

Browse files
authored
MAINT: apply new joblib store API (#9)
* MAINT: apply new joblib store API
1 parent ba03e94 commit 85f250e

File tree

4 files changed

+37
-54
lines changed

4 files changed

+37
-54
lines changed

README.rst

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -64,9 +64,9 @@ Using joblib-hadoop on a Hadoop cluster
6464
if __name__ == '__main__':
6565
register_hdfs_store_backend()
6666
67-
mem = Memory(location='joblib_cache_hdfs',
68-
backend='hdfs', host='namenode', port=8020, user='test',
69-
verbose=100, compress=True)
67+
mem = Memory(location='joblib_cache_hdfs', backend='hdfs',
68+
verbose=100, compress=True
69+
store_options=dict(host='namenode', port=8020, user='test'))
7070
7171
multiply = mem.cache(np.multiply)
7272
array1 = np.arange(10000)

examples/joblib_hdfs_multiply.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99

1010
mem = Memory(location='joblib_cache_hdfs', backend='hdfs',
1111
verbose=100, compress=True,
12-
store_options=dict(host='namenode', port=9000, user='test'))
12+
backend_options=dict(host='namenode', port=9000, user='test'))
1313
mem.clear()
1414
multiply = mem.cache(np.multiply)
1515
array1 = np.arange(1000)

joblibhadoop/hdfs/backend.py

Lines changed: 19 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,10 @@
77
from joblib._store_backends import (StoreBackendBase, StoreBackendMixin,
88
CacheItemInfo)
99

10+
DEFAULT_BACKEND_OPTIONS = dict(host='localhost', port=9000, user=None,
11+
ticket_cache=None, token=None, pars=None,
12+
connect=True)
13+
1014

1115
class HDFSStoreBackend(StoreBackendBase, StoreBackendMixin):
1216
"""A StoreBackend for Hadoop storage file system (HDFS)."""
@@ -32,11 +36,11 @@ def get_items(self):
3236
"""Return the whole list of items available in cache."""
3337
cache_items = []
3438
try:
35-
self.storage.ls(self._location)
39+
self.storage.ls(self.location)
3640
except IOError:
3741
return []
3842

39-
for path in self.storage.walk(self._location):
43+
for path in self.storage.walk(self.location):
4044
is_cache_hash_dir = re.match('[a-f0-9]{32}$',
4145
os.path.basename(path))
4246

@@ -58,50 +62,29 @@ def get_items(self):
5862

5963
return cache_items
6064

61-
def _prepare_options(self, store_options):
62-
if 'host' not in store_options:
63-
store_options['host'] = 'localhost'
64-
65-
if 'port' not in store_options:
66-
store_options['port'] = 9000
67-
68-
if 'user' not in store_options:
69-
store_options['user'] = None
70-
71-
if 'ticket_cache' not in store_options:
72-
store_options['ticket_cache'] = None
73-
74-
if 'token' not in store_options:
75-
store_options['token'] = None
76-
77-
if 'pars' not in store_options:
78-
store_options['pars'] = None
79-
80-
if 'connect' not in store_options:
81-
store_options['connect'] = True
65+
def _check_options(self, options):
66+
for k, v in DEFAULT_BACKEND_OPTIONS.items():
67+
if k not in options:
68+
options[k] = v
8269

83-
return store_options
70+
return options
8471

8572
def configure(self, location, verbose=0,
86-
store_options=dict(host='localhost', port=9000, user=None,
87-
ticket_cache=None, token=None, pars=None,
88-
connect=True)):
73+
backend_options=DEFAULT_BACKEND_OPTIONS):
8974
"""Configure the store backend."""
9075

91-
store_options = self._prepare_options(store_options)
76+
options = self._check_options(backend_options.copy())
9277
self.storage = hdfs3.HDFileSystem(
93-
host=store_options['host'], port=store_options['port'],
94-
user=store_options['user'],
95-
ticket_cache=store_options['ticket_cache'],
96-
token=store_options['token'], pars=store_options['pars'],
97-
connect=store_options['connect'])
78+
host=options['host'], port=options['port'], user=options['user'],
79+
ticket_cache=options['ticket_cache'], token=options['token'],
80+
pars=options['pars'], connect=options['connect'])
9881
if location.startswith('/'):
9982
location = location[1:]
100-
self._location = location
101-
self.storage.mkdir(self._location)
83+
self.location = location
84+
self.storage.mkdir(self.location)
10285

10386
# computation results can be stored compressed for faster I/O
104-
self.compress = store_options['compress']
87+
self.compress = options['compress']
10588

10689
# Memory map mode is not supported
10790
self.mmap_mode = None

joblibhadoop/hdfs/tests/test_hdfs_backend.py

Lines changed: 14 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -34,10 +34,10 @@ def func(arg):
3434

3535
mem = Memory(location=tmpdir.strpath[1:], backend='hdfs',
3636
verbose=0, compress=compress,
37-
store_options=dict(host=NAMENODE, user='test'))
37+
backend_options=dict(host=NAMENODE, user='test'))
3838

39-
assert mem.store_backend._location == os.path.join(tmpdir.strpath[1:],
40-
"joblib")
39+
assert mem.store_backend.location == os.path.join(tmpdir.strpath[1:],
40+
"joblib")
4141

4242
func = mem.cache(func)
4343

@@ -72,10 +72,10 @@ def test_root_location_replacement(tmpdir):
7272
register_hdfs_store_backend()
7373

7474
mem = Memory(location=location, backend='hdfs', verbose=100,
75-
store_options=dict(host=NAMENODE, user='test'))
75+
backend_options=dict(host=NAMENODE, user='test'))
7676

77-
assert mem.store_backend._location == os.path.join(tmpdir.strpath[1:],
78-
"joblib")
77+
assert mem.store_backend.location == os.path.join(tmpdir.strpath[1:],
78+
"joblib")
7979

8080

8181
def test_passing_backend_base_to_memory(tmpdir):
@@ -84,15 +84,15 @@ def test_passing_backend_base_to_memory(tmpdir):
8484
register_hdfs_store_backend()
8585

8686
mem = Memory(location=tmpdir.strpath, backend='hdfs', verbose=100,
87-
store_options=dict(host=NAMENODE, user='test'))
87+
backend_options=dict(host=NAMENODE, user='test'))
8888

89-
assert mem.store_backend._location == os.path.join(tmpdir.strpath[1:],
90-
"joblib")
89+
assert mem.store_backend.location == os.path.join(tmpdir.strpath[1:],
90+
"joblib")
9191

9292
mem2 = Memory(location=mem.store_backend, backend='hdfs', verbose=100,
93-
store_options=dict(host=NAMENODE, user='test'))
93+
backend_options=dict(host=NAMENODE, user='test'))
9494

95-
assert mem2.store_backend._location == mem.store_backend._location
95+
assert mem2.store_backend.location == mem.store_backend.location
9696

9797

9898
def test_clear_cache(tmpdir):
@@ -106,13 +106,13 @@ def func(arg):
106106

107107
mem = Memory(location=tmpdir.strpath, backend='hdfs',
108108
verbose=100, compress=False,
109-
store_options=dict(host=NAMENODE, user='test'))
109+
backend_options=dict(host=NAMENODE, user='test'))
110110
cached_func = mem.cache(func)
111111
cached_func("test")
112112

113113
mem.clear()
114114

115-
assert not mem.store_backend._item_exists(mem.store_backend._location)
115+
assert not mem.store_backend._item_exists(mem.store_backend.location)
116116

117117

118118
def test_get_items(tmpdir):
@@ -125,7 +125,7 @@ def func(arg):
125125

126126
mem = Memory(location=tmpdir.strpath, backend='hdfs',
127127
verbose=100, compress=False,
128-
store_options=dict(host=NAMENODE, user='test'))
128+
backend_options=dict(host=NAMENODE, user='test'))
129129
assert not mem.store_backend.get_items()
130130

131131
cached_func = mem.cache(func)

0 commit comments

Comments
 (0)