Source code for aliby.io.dataset

#!/usr/bin/env python3
"""
Dataset is a group of classes to manage multiple types of experiments:
 - Remote experiments on an OMERO server (located in src/aliby/io/omero.py)
 - Local experiments in a multidimensional OME-TIFF image containing the metadata
 - Local experiments in a directory containing multiple positions in independent images with or without metadata
"""
import os
import shutil
import time
import typing as t
from abc import ABC, abstractproperty, abstractmethod
from pathlib import Path, PosixPath


from aliby.io.image import ImageLocalOME


[docs]def dispatch_dataset(expt_id: int or str, **kwargs): """ Choose a subtype of dataset based on the identifier. Input: -------- expt_id: int or string serving as dataset identifier. Returns: -------- Callable Dataset instance, either network-dependent or local. """ if isinstance(expt_id, int): # Is an experiment online from aliby.io.omero import Dataset return Dataset(expt_id, **kwargs) elif isinstance(expt_id, str): # Files or Dir expt_path = Path(expt_id) if expt_path.is_dir(): return DatasetLocalDir(expt_path) else: return DatasetLocalOME(expt_path) else: raise Warning("Invalid expt_id")
[docs]class DatasetLocalABC(ABC): """ Abstract Base class to fetch local files, either OME-XML or raw images. """ _valid_suffixes = ("tiff", "png") _valid_meta_suffixes = ("txt", "log")
[docs] def __init__(self, dpath: t.Union[str, PosixPath], *args, **kwargs): self.path = Path(dpath)
def __enter__(self): return self def __exit__(self, *exc): return False @property def dataset(self): return self.path @property def name(self): return self.path.name @property def unique_name(self): return self.path.name @abstractproperty def date(self): pass @property def files(self): if not hasattr(self, "_files"): self._files = { f: f for f in self.path.rglob("*") if any( str(f).endswith(suffix) for suffix in self._valid_meta_suffixes ) } return self._files def cache_logs(self, root_dir): # Copy metadata files to results folder for name, annotation in self.files.items(): shutil.copy(annotation, root_dir / name.name) return True @abstractmethod def get_images(self): # Return a dictionary with the name of images and their unique identifiers pass
[docs]class DatasetLocalDir(DatasetLocalABC): """ Organise an entire dataset, composed of multiple images, as a directory containing directories with individual files. It relies on ImageDir to manage images. """
[docs] def __init__(self, dpath: t.Union[str, PosixPath], *args, **kwargs): super().__init__(dpath)
@property def date(self): # Use folder creation date, for cases where metadata is minimal return time.strftime( "%Y%m%d", time.strptime(time.ctime(os.path.getmtime(self.path))) ) def get_images(self): return { folder.name: folder for folder in self.path.glob("*/") if any( path for suffix in self._valid_suffixes for path in folder.glob(f"*.{suffix}") ) }
[docs]class DatasetLocalOME(DatasetLocalABC): """Load a dataset from a folder We use a given image of a dataset to obtain the metadata, as we cannot expect folders to contain this information. It uses the standard OME-TIFF file format. """
[docs] def __init__(self, dpath: t.Union[str, PosixPath], *args, **kwargs): super().__init__(dpath) assert len(self.get_images()), "No .tiff files found"
@property def date(self): # Access the date from the metadata of the first position return ImageLocalOME(list(self.get_images().values())[0]).date def get_images(self): # Fetches all valid formats and overwrites if duplicates with different suffix return { f.name: str(f) for suffix in self._valid_suffixes for f in self.path.glob(f"*.{suffix}") }