Source code for aquacal.datasets.loader
"""Example dataset loading and management."""
from __future__ import annotations
import json
from dataclasses import dataclass, field
from pathlib import Path
from aquacal.config.schema import CalibrationResult
from aquacal.datasets._manifest import get_dataset_info
from aquacal.datasets.download import download_and_extract
[docs]
@dataclass
class ExampleDataset:
"""Example calibration dataset downloaded from Zenodo.
Attributes:
name: Dataset name (e.g., 'real-rig')
type: Dataset type ('real')
reference_calibration: Optional reference calibration result
metadata: Additional metadata about the dataset
cache_path: Path to cached dataset files
"""
name: str
type: str
reference_calibration: CalibrationResult | None = None
metadata: dict = field(default_factory=dict)
cache_path: Path | None = None
[docs]
def load_example(name: str) -> ExampleDataset:
"""Load an example calibration dataset.
Downloads datasets from Zenodo on first use and caches them locally.
Args:
name: Dataset name. Available options:
- 'real-rig': Real hardware calibration (Zenodo download)
Returns:
ExampleDataset with reference calibration and cache path
Raises:
ValueError: If dataset name is not recognized
Examples:
>>> from aquacal.datasets import load_example
>>> ds = load_example('real-rig')
>>> print(ds.cache_path)
"""
# Get dataset metadata from manifest
dataset_info = get_dataset_info(name)
# Download and extract (cached)
_cache_path = download_and_extract(name, dataset_info)
# Handle nested directory structure (Zenodo archives often have top-level folder)
if (_cache_path / name).exists():
actual_path = _cache_path / name
else:
actual_path = _cache_path
# Load reference calibration if available
reference_calibration = None
ref_calib_file = actual_path / "reference_calibration.json"
if ref_calib_file.exists():
with open(ref_calib_file, encoding="utf-8") as f:
ref_data = json.load(f)
# TODO: Deserialize CalibrationResult from JSON
# For now, just store the raw dict
reference_calibration = ref_data
# Read config for metadata
import yaml
config_file = actual_path / "config.yaml"
if config_file.exists():
with open(config_file, encoding="utf-8") as f:
config_data = yaml.safe_load(f)
camera_names = config_data.get("cameras", [])
else:
camera_names = []
return ExampleDataset(
name=name,
type=dataset_info["type"],
reference_calibration=reference_calibration,
metadata={
"description": dataset_info["description"],
"dataset_path": str(actual_path),
"camera_names": camera_names,
"has_reference_calibration": ref_calib_file.exists(),
},
cache_path=actual_path,
)