Skip to content

Commit 88cf1f4

Browse files
authored
Do not allow exceptions in ManifestStore (#683)
* Do not allow exceptions in ManifestStore * Remove try/except * Make synthetic manifests more realistic * Improve HTTPStore handling * Simplify and remove some DMRPP tests * Use real data in dmrpp tests
1 parent 64125c5 commit 88cf1f4

File tree

6 files changed

+412
-413
lines changed

6 files changed

+412
-413
lines changed

conftest.py

Lines changed: 20 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -87,7 +87,8 @@ def zarr_store_scalar(tmpdir):
8787
def _generate_chunk_entries(
8888
shape: tuple[int, ...],
8989
chunks: tuple[int, ...],
90-
entry_generator: Callable[[tuple[int, ...]], dict[str, Any]],
90+
itemsize: int,
91+
entry_generator: Callable[[tuple[int, ...], tuple[int, ...], int], dict[str, Any]],
9192
) -> dict[str, dict[str, Any]]:
9293
"""
9394
Generate chunk entries for a manifest based on shape and chunks.
@@ -112,30 +113,29 @@ def _generate_chunk_entries(
112113
)
113114

114115
if chunk_grid_shape == ():
115-
return {"0": entry_generator((0,))}
116+
return {"0": entry_generator((0,), (0,), itemsize)}
116117

117118
all_possible_combos = itertools.product(
118119
*[range(length) for length in chunk_grid_shape]
119120
)
120-
return {join(ind): entry_generator(ind) for ind in all_possible_combos}
121-
122-
123-
def _offset_from_chunk_key(ind: tuple[int, ...]) -> int:
124-
"""Generate an offset value from chunk indices."""
125-
return sum(ind) * 10
121+
return {
122+
join(ind): entry_generator(ind, chunks, itemsize) for ind in all_possible_combos
123+
}
126124

127125

128-
def _length_from_chunk_key(ind: tuple[int, ...]) -> int:
126+
def _length_from_chunk_key(chunks: tuple[int, ...], itemsize: int) -> int:
129127
"""Generate a length value from chunk indices."""
130-
return sum(ind) + 5
128+
return int(np.prod(chunks) * itemsize)
131129

132130

133-
def _entry_from_chunk_key(ind: tuple[int, ...]) -> dict[str, str | int]:
131+
def _entry_from_chunk_key(
132+
ind: tuple[int, ...], chunks: tuple[int, ...], itemsize: int
133+
) -> dict[str, str | int]:
134134
"""Generate a (somewhat) unique manifest entry from a given chunk key."""
135135
entry = {
136136
"path": f"/foo.{str(join(ind))}.nc",
137-
"offset": _offset_from_chunk_key(ind),
138-
"length": _length_from_chunk_key(ind),
137+
"offset": 0,
138+
"length": _length_from_chunk_key(chunks, itemsize),
139139
}
140140
return entry # type: ignore[return-value]
141141

@@ -150,7 +150,9 @@ def _generate_chunk_manifest(
150150
"""Generate a chunk manifest with sequential offsets for each chunk."""
151151
current_offset = [offset] # Use list to allow mutation in closure
152152

153-
def sequential_entry_generator(ind: tuple[int, ...]) -> dict[str, Any]:
153+
def sequential_entry_generator(
154+
ind: tuple[int, ...], chunks: tuple[int, ...], itemsize: int
155+
) -> dict[str, Any]:
154156
entry = {
155157
"path": netcdf4_file,
156158
"offset": current_offset[0],
@@ -159,7 +161,7 @@ def sequential_entry_generator(ind: tuple[int, ...]) -> dict[str, Any]:
159161
current_offset[0] += length
160162
return entry
161163

162-
entries = _generate_chunk_entries(shape, chunks, sequential_entry_generator)
164+
entries = _generate_chunk_entries(shape, chunks, 32, sequential_entry_generator)
163165
return ChunkManifest(entries)
164166

165167

@@ -369,7 +371,9 @@ def _manifest_array(
369371
codecs=codecs,
370372
dimension_names=dimension_names,
371373
)
372-
entries = _generate_chunk_entries(shape, chunks, _entry_from_chunk_key)
374+
entries = _generate_chunk_entries(
375+
shape, chunks, data_type.itemsize, _entry_from_chunk_key
376+
)
373377
chunkmanifest = ChunkManifest(entries=entries)
374378
return ManifestArray(chunkmanifest=chunkmanifest, metadata=metadata)
375379

virtualizarr/manifests/store.py

Lines changed: 12 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -32,13 +32,6 @@
3232
__all__ = ["ManifestStore"]
3333

3434

35-
_ALLOWED_EXCEPTIONS: tuple[type[Exception], ...] = (
36-
FileNotFoundError,
37-
IsADirectoryError,
38-
NotADirectoryError,
39-
)
40-
41-
4235
@dataclass
4336
class StoreRequest:
4437
"""Dataclass for matching a key to the store instance"""
@@ -259,28 +252,26 @@ async def get(
259252
f"Could not find a store to use for {path} in the store registry"
260253
)
261254

262-
# Truncate path to match Obstore expectations
263-
key = urlparse(path).path
255+
path_in_store = urlparse(path).path
264256
if hasattr(store, "prefix") and store.prefix:
265-
# strip the prefix from key
266-
key = key.removeprefix(str(store.prefix))
267-
257+
prefix = str(store.prefix).lstrip("/")
258+
path_in_store = path_in_store.lstrip("/").removeprefix(prefix).lstrip("/")
259+
elif hasattr(store, "url"):
260+
prefix = urlparse(store.url).path.lstrip("/")
261+
path_in_store = path_in_store.lstrip("/").removeprefix(prefix).lstrip("/")
268262
# Transform the input byte range to account for the chunk location in the file
269263
chunk_end_exclusive = offset + length
270264
byte_range = _transform_byte_range(
271265
byte_range, chunk_start=offset, chunk_end_exclusive=chunk_end_exclusive
272266
)
273267

274268
# Actually get the bytes
275-
try:
276-
bytes = await store.get_range_async(
277-
key,
278-
start=byte_range.start,
279-
end=byte_range.end,
280-
)
281-
return prototype.buffer.from_bytes(bytes) # type: ignore[arg-type]
282-
except _ALLOWED_EXCEPTIONS:
283-
return None
269+
bytes = await store.get_range_async(
270+
path_in_store,
271+
start=byte_range.start,
272+
end=byte_range.end,
273+
)
274+
return prototype.buffer.from_bytes(bytes) # type: ignore[arg-type]
284275

285276
async def get_partial_values(
286277
self,

virtualizarr/tests/test_manifests/test_array.py

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -138,9 +138,9 @@ def test_broadcast_existing_axis(self, manifest_array):
138138
assert expanded.shape == (3, 2)
139139
assert expanded.chunks == (1, 2)
140140
assert expanded.manifest.dict() == {
141-
"0.0": {"path": "file:///foo.0.0.nc", "offset": 0, "length": 5},
142-
"1.0": {"path": "file:///foo.0.0.nc", "offset": 0, "length": 5},
143-
"2.0": {"path": "file:///foo.0.0.nc", "offset": 0, "length": 5},
141+
"0.0": {"path": "file:///foo.0.0.nc", "offset": 0, "length": 8},
142+
"1.0": {"path": "file:///foo.0.0.nc", "offset": 0, "length": 8},
143+
"2.0": {"path": "file:///foo.0.0.nc", "offset": 0, "length": 8},
144144
}
145145

146146
def test_broadcast_new_axis(self, manifest_array):
@@ -149,9 +149,9 @@ def test_broadcast_new_axis(self, manifest_array):
149149
assert expanded.shape == (1, 3)
150150
assert expanded.chunks == (1, 1)
151151
assert expanded.manifest.dict() == {
152-
"0.0": {"path": "file:///foo.0.nc", "offset": 0, "length": 5},
153-
"0.1": {"path": "file:///foo.1.nc", "offset": 10, "length": 6},
154-
"0.2": {"path": "file:///foo.2.nc", "offset": 20, "length": 7},
152+
"0.0": {"path": "file:///foo.0.nc", "offset": 0, "length": 4},
153+
"0.1": {"path": "file:///foo.1.nc", "offset": 0, "length": 4},
154+
"0.2": {"path": "file:///foo.2.nc", "offset": 0, "length": 4},
155155
}
156156

157157
def test_broadcast_scalar(self, manifest_array):
@@ -160,14 +160,14 @@ def test_broadcast_scalar(self, manifest_array):
160160
assert marr.shape == ()
161161
assert marr.chunks == ()
162162
assert marr.manifest.dict() == {
163-
"0": {"path": "file:///foo.0.nc", "offset": 0, "length": 5},
163+
"0": {"path": "file:///foo.0.nc", "offset": 0, "length": 0},
164164
}
165165

166166
expanded = np.broadcast_to(marr, shape=(1,))
167167
assert expanded.shape == (1,)
168168
assert expanded.chunks == (1,)
169169
assert expanded.manifest.dict() == {
170-
"0": {"path": "file:///foo.0.nc", "offset": 0, "length": 5},
170+
"0": {"path": "file:///foo.0.nc", "offset": 0, "length": 0},
171171
}
172172

173173
@pytest.mark.parametrize(

virtualizarr/tests/test_manifests/test_store.py

Lines changed: 22 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
from typing import TYPE_CHECKING
66

77
import numpy as np
8+
import obstore as obs
89
import pytest
910
from obstore.store import MemoryStore
1011
from zarr.abc.store import (
@@ -341,20 +342,28 @@ def test_single_group_to_dataset(
341342
expected_loadable_variables,
342343
):
343344
marr1 = manifest_array(
344-
shape=(3, 2, 5), chunks=(1, 2, 1), dimension_names=["x", "y", "t"]
345+
shape=(3, 2, 5),
346+
chunks=(1, 2, 1),
347+
dimension_names=["x", "y", "t"],
348+
codecs=None,
345349
)
346-
marr2 = manifest_array(shape=(3, 2), chunks=(1, 2), dimension_names=["x", "y"])
347-
marr3 = manifest_array(shape=(5,), chunks=(5,), dimension_names=["t"])
348-
349-
paths1 = list({v["path"] for v in marr1.manifest.values()})
350-
paths2 = list({v["path"] for v in marr2.manifest.values()})
351-
paths3 = list({v["path"] for v in marr2.manifest.values()})
352-
unique_paths = list(set(paths1 + paths2 + paths3))
353-
stores = {}
354-
for path in unique_paths:
355-
store = MemoryStore()
356-
stores[get_store_prefix(path)] = store
357-
store_registry = ObjectStoreRegistry(stores=stores)
350+
marr2 = manifest_array(
351+
shape=(3, 2), chunks=(1, 2), dimension_names=["x", "y"], codecs=None
352+
)
353+
marr3 = manifest_array(
354+
shape=(5,), chunks=(5,), dimension_names=["t"], codecs=None
355+
)
356+
357+
store = MemoryStore()
358+
for marr in [marr1, marr2, marr3]:
359+
unique_paths = list({v["path"] for v in marr.manifest.values()})
360+
for path in unique_paths:
361+
obs.put(
362+
store,
363+
path.split("/")[-1],
364+
np.ones(marr.chunks, dtype=marr.dtype).tobytes(),
365+
)
366+
store_registry = ObjectStoreRegistry({"file://": store})
358367

359368
manifest_group = ManifestGroup(
360369
arrays={

0 commit comments

Comments
 (0)