Skip to content

Commit b8d0d38

Browse files
committed
Sync OpenHCS io
1 parent 15db1d5 commit b8d0d38

25 files changed

Lines changed: 4665 additions & 563 deletions

src/polystore/__init__.py

Lines changed: 120 additions & 65 deletions
Original file line numberDiff line numberDiff line change
@@ -1,85 +1,140 @@
11
"""
2-
Polystore - Framework-agnostic multi-backend storage abstraction.
3-
4-
Provides pluggable storage backends with multi-framework I/O support for
5-
NumPy, PyTorch, JAX, TensorFlow, CuPy, and Zarr.
2+
Polystore package exports.
63
"""
74

8-
__version__ = "0.1.2"
5+
import os
96

10-
# Core abstractions
7+
from .atomic import file_lock, atomic_write_json, atomic_update_json, FileLockError, FileLockTimeoutError
8+
from .backend_registry import (
9+
get_backend_instance,
10+
cleanup_backend_connections,
11+
cleanup_all_backends,
12+
register_cleanup_callback,
13+
STORAGE_BACKENDS,
14+
)
1115
from .base import (
16+
BackendBase,
1217
DataSink,
1318
DataSource,
14-
StorageBackend,
15-
VirtualBackend,
1619
ReadOnlyBackend,
20+
StorageBackend,
21+
storage_registry,
22+
reset_memory_backend,
23+
ensure_storage_registry,
24+
get_backend,
1725
)
18-
19-
# Concrete backends
20-
from .memory import MemoryBackend
21-
from .disk import DiskBackend
22-
23-
# Optional backends
24-
# Zarr is a required backend for this project. Import it directly so
25-
# missing dependency errors surface loudly during test/setup.
26-
from .zarr import ZarrBackend
27-
28-
# File manager
26+
from .constants import Backend, MemoryType, TransportMode
27+
from .disk import DiskStorageBackend
2928
from .filemanager import FileManager
30-
31-
# Registry
32-
from .backend_registry import BackendRegistry, create_storage_registry
33-
34-
# Atomic operations
35-
from .atomic import atomic_write, atomic_write_json
36-
37-
# Exceptions
38-
from .exceptions import (
39-
StorageError,
40-
StorageResolutionError,
41-
BackendNotFoundError,
42-
UnsupportedFormatError,
29+
from .formats import FileFormat, DEFAULT_IMAGE_EXTENSIONS
30+
from .memory import MemoryStorageBackend
31+
from .metadata_writer import (
32+
AtomicMetadataWriter,
33+
MetadataWriteError,
34+
METADATA_CONFIG,
35+
get_metadata_path,
36+
get_subdirectory_name,
37+
resolve_subdirectory_path,
4338
)
44-
45-
# Streaming (optional and lazy)
46-
# Don't import at module level - streaming is heavy and optional
47-
# Users can import manually if needed: from polystore.streaming import StreamingBackend
48-
try:
49-
from .streaming import StreamingBackend
50-
except ImportError:
51-
StreamingBackend = None
52-
# Streaming (optional and lazy)
53-
# Don't import at module level - streaming is heavy and optional
54-
# Users can import manually if needed: from polystore.streaming import StreamingBackend
55-
StreamingBackend = None
39+
from .metadata_migration import detect_legacy_format, migrate_legacy_metadata, migrate_plate_metadata
40+
from .pipeline_migration import detect_legacy_pipeline, migrate_pipeline_file, load_pipeline_with_migration
41+
from .roi import (
42+
ROI,
43+
PolygonShape,
44+
PolylineShape,
45+
MaskShape,
46+
PointShape,
47+
EllipseShape,
48+
extract_rois_from_labeled_mask,
49+
load_rois_from_json,
50+
load_rois_from_zip,
51+
materialize_rois,
52+
)
53+
from .streaming import StreamingBackend
54+
from .streaming_constants import StreamingDataType, NapariShapeType
5655

5756
__all__ = [
58-
# Version
59-
"__version__",
60-
# Core abstractions
57+
"Backend",
58+
"MemoryType",
59+
"TransportMode",
60+
"FileFormat",
61+
"DEFAULT_IMAGE_EXTENSIONS",
62+
"BackendBase",
6163
"DataSink",
6264
"DataSource",
63-
"StorageBackend",
64-
"VirtualBackend",
6565
"ReadOnlyBackend",
66-
# Backends
67-
"MemoryBackend",
68-
"DiskBackend",
69-
"ZarrBackend",
70-
# File manager
66+
"StorageBackend",
67+
"StreamingBackend",
68+
"storage_registry",
69+
"reset_memory_backend",
70+
"ensure_storage_registry",
71+
"get_backend",
72+
"get_backend_instance",
73+
"cleanup_backend_connections",
74+
"cleanup_all_backends",
75+
"register_cleanup_callback",
76+
"STORAGE_BACKENDS",
77+
"DiskStorageBackend",
78+
"MemoryStorageBackend",
7179
"FileManager",
72-
# Registry
73-
"BackendRegistry",
74-
# Atomic operations
75-
"atomic_write",
80+
"file_lock",
7681
"atomic_write_json",
77-
# Exceptions
78-
"StorageError",
79-
"StorageResolutionError",
80-
"BackendNotFoundError",
81-
"UnsupportedFormatError",
82-
# Streaming (optional)
83-
"StreamingBackend",
82+
"atomic_update_json",
83+
"FileLockError",
84+
"FileLockTimeoutError",
85+
"AtomicMetadataWriter",
86+
"MetadataWriteError",
87+
"METADATA_CONFIG",
88+
"get_metadata_path",
89+
"get_subdirectory_name",
90+
"resolve_subdirectory_path",
91+
"detect_legacy_format",
92+
"migrate_legacy_metadata",
93+
"migrate_plate_metadata",
94+
"detect_legacy_pipeline",
95+
"migrate_pipeline_file",
96+
"load_pipeline_with_migration",
97+
"ROI",
98+
"PolygonShape",
99+
"PolylineShape",
100+
"MaskShape",
101+
"PointShape",
102+
"EllipseShape",
103+
"extract_rois_from_labeled_mask",
104+
"load_rois_from_json",
105+
"load_rois_from_zip",
106+
"materialize_rois",
107+
"StreamingDataType",
108+
"NapariShapeType",
109+
"NapariStreamingBackend",
110+
"FijiStreamingBackend",
111+
"ZarrStorageBackend",
112+
"OMEROLocalBackend",
113+
"OMEROFileFormatRegistry",
84114
]
85115

116+
_LAZY_BACKEND_REGISTRY = {
117+
"NapariStreamingBackend": ("polystore.napari_stream", "NapariStreamingBackend"),
118+
"FijiStreamingBackend": ("polystore.fiji_stream", "FijiStreamingBackend"),
119+
"ZarrStorageBackend": ("polystore.zarr", "ZarrStorageBackend"),
120+
"OMEROLocalBackend": ("polystore.omero_local", "OMEROLocalBackend"),
121+
"OMEROFileFormatRegistry": ("polystore.omero_local", "OMEROFileFormatRegistry"),
122+
}
123+
124+
125+
def __getattr__(name):
126+
"""Lazy import of optional/extra backend classes."""
127+
if name not in _LAZY_BACKEND_REGISTRY:
128+
raise AttributeError(f"module '{__name__}' has no attribute '{name}'")
129+
130+
if os.getenv("POLYSTORE_SUBPROCESS_NO_GPU") == "1":
131+
class PlaceholderBackend:
132+
pass
133+
PlaceholderBackend.__name__ = name
134+
PlaceholderBackend.__qualname__ = name
135+
return PlaceholderBackend
136+
137+
module_path, class_name = _LAZY_BACKEND_REGISTRY[name]
138+
import importlib
139+
module = importlib.import_module(module_path)
140+
return getattr(module, class_name)

src/polystore/async_init.py

Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,65 @@
1+
"""
2+
Async initialization for I/O backends.
3+
4+
This module provides background loading of GPU-heavy dependencies
5+
to avoid blocking during import while ensuring they're ready when needed.
6+
"""
7+
import logging
8+
import threading
9+
10+
logger = logging.getLogger(__name__)
11+
12+
_init_complete = threading.Event()
13+
_init_thread = None
14+
15+
16+
def _background_init():
17+
"""
18+
Background thread target for async initialization.
19+
20+
NOTE: Due to Python's GIL, importing heavy modules in a background thread
21+
still blocks the main thread during imports. Therefore, we use lazy initialization
22+
for GPU libraries and storage backends - they'll be loaded on first use.
23+
24+
Currently this is a no-op placeholder. GPU registry initialization happens
25+
lazily on first use, not during startup.
26+
"""
27+
try:
28+
logger.info("Background I/O initialization complete (lazy mode - no-op)")
29+
except Exception as e:
30+
logger.error(f"Background I/O initialization failed: {e}")
31+
finally:
32+
_init_complete.set()
33+
34+
35+
def start_async_initialization():
36+
"""
37+
Start background initialization of GPU-heavy dependencies.
38+
39+
Call this during application startup (GUI/CLI) to pre-load
40+
GPU libraries without blocking. Safe to call multiple times.
41+
"""
42+
global _init_thread
43+
44+
if _init_thread is None:
45+
_init_thread = threading.Thread(
46+
target=_background_init,
47+
daemon=True,
48+
name="io-async-init"
49+
)
50+
_init_thread.start()
51+
logger.info("Started background I/O initialization")
52+
53+
54+
def wait_for_initialization(timeout: float = 60.0) -> bool:
55+
"""
56+
Wait for background initialization to complete.
57+
58+
Args:
59+
timeout: Maximum seconds to wait
60+
61+
Returns:
62+
True if completed, False if timeout
63+
"""
64+
return _init_complete.wait(timeout)
65+

src/polystore/atomic.py

Lines changed: 0 additions & 55 deletions
Original file line numberDiff line numberDiff line change
@@ -122,61 +122,6 @@ def _cleanup_lock(lock_fd: Optional[int], lock_path: Path) -> None:
122122
logger.warning(f"Error removing lock file {lock_path}: {e}")
123123

124124

125-
@contextmanager
126-
def atomic_write(file_path: Union[str, Path], mode: str = 'w', ensure_directory: bool = True):
127-
"""
128-
Context manager for atomic file writes.
129-
130-
Writes to a temporary file first, then renames to the target path.
131-
This ensures the operation is atomic - either fully succeeds or fails
132-
without leaving partial writes.
133-
134-
Args:
135-
file_path: Target file path
136-
mode: File mode ('w' for text, 'wb' for binary)
137-
ensure_directory: Create parent directory if it doesn't exist
138-
139-
Example:
140-
with atomic_write("output.txt") as f:
141-
f.write("data")
142-
"""
143-
file_path = Path(file_path)
144-
145-
if ensure_directory:
146-
file_path.parent.mkdir(parents=True, exist_ok=True)
147-
148-
# Create temporary file in same directory
149-
with tempfile.NamedTemporaryFile(
150-
mode=mode,
151-
dir=file_path.parent,
152-
prefix=f"{LOCK_CONFIG.TEMP_PREFIX}{file_path.name}",
153-
delete=False
154-
) as tmp_file:
155-
tmp_path = tmp_file.name
156-
try:
157-
yield tmp_file
158-
tmp_file.flush()
159-
os.fsync(tmp_file.fileno())
160-
except Exception:
161-
# Clean up temp file on error
162-
try:
163-
os.unlink(tmp_path)
164-
except Exception:
165-
pass
166-
raise
167-
168-
# Atomically replace target file
169-
try:
170-
os.replace(tmp_path, str(file_path))
171-
logger.debug(f"Atomically wrote to {file_path}")
172-
except Exception as e:
173-
try:
174-
os.unlink(tmp_path)
175-
except Exception:
176-
pass
177-
raise FileLockError(f"Atomic write failed for {file_path}: {e}") from e
178-
179-
180125
def atomic_write_json(
181126
file_path: Union[str, Path],
182127
data: Dict[str, Any],

0 commit comments

Comments
 (0)