From bcd04f66eec05509c0d04866975c9c2f5211f6bb Mon Sep 17 00:00:00 2001 From: sharkinsspatial Date: Thu, 10 Jul 2025 09:24:54 -0400 Subject: [PATCH] Add ManifestStore support for empty chunks. --- virtualizarr/manifests/manifest.py | 4 +-- virtualizarr/manifests/store.py | 2 ++ .../tests/test_manifests/test_store.py | 35 +++++++++++++++++++ 3 files changed, 39 insertions(+), 2 deletions(-) diff --git a/virtualizarr/manifests/manifest.py b/virtualizarr/manifests/manifest.py index 1dd8761bf..0947075e1 100644 --- a/virtualizarr/manifests/manifest.py +++ b/virtualizarr/manifests/manifest.py @@ -48,8 +48,8 @@ def with_validation( """ # note: we can't just use `__init__` or a dataclass' `__post_init__` because we need `fs_root` to be an optional kwarg - - path = validate_and_normalize_path_to_uri(path, fs_root=fs_root) + if path != "": + path = validate_and_normalize_path_to_uri(path, fs_root=fs_root) validate_byte_range(offset=offset, length=length) return ChunkEntry(path=path, offset=offset, length=length) diff --git a/virtualizarr/manifests/store.py b/virtualizarr/manifests/store.py index 679cd7e44..c1917b7f7 100644 --- a/virtualizarr/manifests/store.py +++ b/virtualizarr/manifests/store.py @@ -253,6 +253,8 @@ async def get( key, marr.metadata.chunk_key_encoding.separator ) path = manifest._paths[*chunk_indexes] + if path == "": + return None offset = manifest._offsets[*chunk_indexes] length = manifest._lengths[*chunk_indexes] # Get the configured object store instance that matches the path diff --git a/virtualizarr/tests/test_manifests/test_store.py b/virtualizarr/tests/test_manifests/test_store.py index b5e3fed0d..109bf815f 100644 --- a/virtualizarr/tests/test_manifests/test_store.py +++ b/virtualizarr/tests/test_manifests/test_store.py @@ -126,6 +126,31 @@ def s3_store(minio_bucket): ) +@pytest.fixture() +def empty_memory_store(): + import obstore as obs + + store = obs.store.MemoryStore() + prefix = get_store_prefix("") + chunk_dict = { + "0.0": {"path": "", "offset": 0, "length": 4}, + } + manifest = ChunkManifest(entries=chunk_dict) + codecs = [{"configuration": {"endian": "little"}, "name": "bytes"}] + array_metadata = create_v3_array_metadata( + shape=(1, 1), + chunk_shape=(1, 1), + data_type=np.dtype("int32"), + codecs=codecs, + chunk_key_encoding={"name": "default", "separator": "."}, + fill_value=0, + ) + manifest_array = ManifestArray(metadata=array_metadata, chunkmanifest=manifest) + manifest_group = ManifestGroup(arrays={"foo": manifest_array}) + registry = ObjectStoreRegistry({prefix: store}) + return ManifestStore(store_registry=registry, group=manifest_group) + + @requires_obstore class TestManifestStore: def test_manifest_store_properties(self, local_store): @@ -135,6 +160,16 @@ def test_manifest_store_properties(self, local_store): assert not local_store.supports_writes assert not local_store.supports_partial_writes + @pytest.mark.asyncio + @pytest.mark.parametrize( + "manifest_store", + ["empty_memory_store"], + ) + async def test_get_empty_chunk(self, manifest_store, request): + store = request.getfixturevalue(manifest_store) + observed = await store.get("foo/c/0.0", prototype=default_buffer_prototype()) + assert observed is None + @pytest.mark.asyncio @pytest.mark.parametrize( "manifest_store",