Source code for aliby.io.image

#!/usr/bin/env python3
"""
Image: Loads images and registers them.

Image instances loads images from a specified directory into an object that
also contains image properties such as name and metadata.  Pixels from images
are stored in dask arrays; the standard way is to store them in 5-dimensional
arrays: T(ime point), C(channel), Z(-stack), Y, X.

This module consists of a base Image class (BaseLocalImage).  ImageLocalOME
handles local OMERO images.  ImageDir handles cases in which images are split
into directories, with each time point and channel having its own image file.
ImageDummy is a dummy class for silent failure testing.
"""

import typing as t
from abc import ABC, abstractmethod, abstractproperty
from datetime import datetime
from importlib_resources import files
from pathlib import Path, PosixPath

import dask.array as da
import xmltodict
from dask.array.image import imread
from tifffile import TiffFile

from agora.io.metadata import dir_to_meta


[docs]def get_examples_dir(): """Get examples directory which stores dummy image for tiler""" return files("aliby").parent.parent / "examples" / "tiler"
[docs]def get_image_class(source: t.Union[str, int, t.Dict[str, str], PosixPath]): """ Wrapper to pick the appropiate Image class depending on the source of data. """ if isinstance(source, int): from aliby.io.omero import Image instatiator = Image elif isinstance(source, dict) or ( isinstance(source, (str, PosixPath)) and Path(source).is_dir() ): instatiator = ImageDir elif isinstance(source, str) and Path(source).is_file(): instatiator = ImageLocalOME else: raise Exception(f"Invalid data source at {source}") return instatiator
[docs]class BaseLocalImage(ABC): """ Base Image class to set path and provide context management method. """ _default_dimorder = "tczyx"
[docs] def __init__(self, path: t.Union[str, PosixPath]): # If directory, assume contents are naturally sorted self.path = Path(path)
def __enter__(self): return self def __exit__(self, *exc): for e in exc: if e is not None: print(e) return False def rechunk_data(self, img): # Format image using x and y size from metadata. self._rechunked_img = da.rechunk( img, chunks=( 1, 1, 1, self._meta["size_y"], self._meta["size_x"], ), ) return self._rechunked_img @abstractmethod def get_data_lazy(self) -> da.Array: pass @abstractproperty def name(self): pass @abstractproperty def dimorder(self): pass @property def data(self): return self.get_data_lazy() @property def metadata(self): return self._meta
[docs]class ImageDummy(BaseLocalImage): """ Dummy Image class. ImageDummy mimics the other Image classes in such a way that it is accepted by Tiler. The purpose of this class is for testing, in particular, identifying silent failures. If something goes wrong, we should be able to know whether it is because of bad parameters or bad input data. For the purposes of testing parameters, ImageDummy assumes that we already know the tiler parameters before Image instances are instantiated. This is true for a typical pipeline run. """
[docs] def __init__(self, tiler_parameters: dict): """Builds image instance Parameters ---------- tiler_parameters : dict Tiler parameters, in dict form. Following aliby.tile.tiler.TilerParameters, the keys are: "tile_size" (size of tile), "ref_channel" (reference channel for tiling), and "ref_z" (reference z-stack, 0 to choose a default). """ self.ref_channel = tiler_parameters["ref_channel"] self.ref_z = tiler_parameters["ref_z"]
# Goal: make Tiler happy.
[docs] @staticmethod def pad_array( image_array: da.Array, dim: int, n_empty_slices: int, image_position: int = 0, ): """Extends a dimension in a dask array and pads with zeros Extends a dimension in a dask array that has existing content, then pads with zeros. Parameters ---------- image_array : da.Array Input dask array dim : int Dimension in which to extend the dask array. n_empty_slices : int Number of empty slices to extend the dask array by, in the specified dimension/axis. image_position : int Position within the new dimension to place the input arary, default 0 (the beginning). Examples -------- ``` extended_array = pad_array( my_da_array, dim = 2, n_empty_slices = 4, image_position = 1) ``` Extends a dask array called `my_da_array` in the 3rd dimension (dimensions start from 0) by 4 slices, filled with zeros. And puts the original content in slice 1 of the 3rd dimension """ # Concats zero arrays with same dimensions as image_array, and puts # image_array as first element in list of arrays to be concatenated zeros_array = da.zeros_like(image_array) return da.concatenate( [ *([zeros_array] * image_position), image_array, *([zeros_array] * (n_empty_slices - image_position)), ], axis=dim, )
# Logic: We want to return a image instance
[docs] def get_data_lazy(self) -> da.Array: """Return 5D dask array. For lazy-loading multidimensional tiff files. Dummy image.""" examples_dir = get_examples_dir() # TODO: Make this robust to having multiple TIFF images, one for each z-section, # all falling under the same "pypipeline_unit_test_00_000001_Brightfield_*.tif" # naming scheme. The aim is to create a multidimensional dask array that stores # the z-stacks. img_filename = "pypipeline_unit_test_00_000001_Brightfield_003.tif" img_path = examples_dir / img_filename # img is a dask array has three dimensions: z, x, y # TODO: Write a test to confirm this: If everything worked well, # z = 1, x = 1200, y = 1200 img = imread(str(img_path)) # Adds t & c dimensions img = da.reshape( img, (1, 1, img.shape[-3], img.shape[-2], img.shape[-1]) ) # Pads t, c, and z dimensions img = self.pad_array( img, dim=0, n_empty_slices=199 ) # 200 timepoints total img = self.pad_array(img, dim=1, n_empty_slices=2) # 3 channels img = self.pad_array( img, dim=2, n_empty_slices=4, image_position=self.ref_z ) # 5 z-stacks return img
@property def name(self): pass @property def dimorder(self): pass
[docs]class ImageLocalOME(BaseLocalImage): """ Local OMERO Image class. This is a derivative Image class. It fetches an image from OMEXML data format, in which a multidimensional tiff image contains the metadata. """
[docs] def __init__(self, path: str, dimorder=None): super().__init__(path) self._id = str(path) meta = dict() try: with TiffFile(path) as f: self._meta = xmltodict.parse(f.ome_metadata)["OME"] for dim in self.dimorder: meta["size_" + dim.lower()] = int( self._meta["Image"]["Pixels"]["@Size" + dim] ) meta["channels"] = [ x["@Name"] for x in self._meta["Image"]["Pixels"]["Channel"] ] meta["name"] = self._meta["Image"]["@Name"] meta["type"] = self._meta["Image"]["Pixels"]["@Type"] except Exception as e: # Images not in OMEXML print("Warning:Metadata not found: {}".format(e)) print( f"Warning: No dimensional info provided. Assuming {self._default_dimorder}" ) # Mark non-existent dimensions for padding self.base = self._default_dimorder # self.ids = [self.index(i) for i in dimorder] self._dimorder = base self._meta = meta
@property def name(self): return self._meta["name"] @property def date(self): date_str = [ x for x in self._meta["StructuredAnnotations"]["TagAnnotation"] if x["Description"] == "Date" ][0]["Value"] return datetime.strptime(date_str, "%d-%b-%Y") @property def dimorder(self): """Order of dimensions in image""" if not hasattr(self, "_dimorder"): self._dimorder = self._meta["Image"]["Pixels"]["@DimensionOrder"] return self._dimorder @dimorder.setter def dimorder(self, order: str): self._dimorder = order return self._dimorder
[docs] def get_data_lazy(self) -> da.Array: """Return 5D dask array. For lazy-loading multidimensional tiff files""" if not hasattr(self, "formatted_img"): if not hasattr(self, "ids"): # Standard dimension order img = (imread(str(self.path))[0],) else: # Custom dimension order, we rearrange the axes for compatibility img = imread(str(self.path))[0] for i, d in enumerate(self._dimorder): self._meta["size_" + d.lower()] = img.shape[i] target_order = ( *self.ids, *[ i for i, d in enumerate(self.base) if d not in self.dimorder ], ) reshaped = da.reshape( img, shape=( *img.shape, *[1 for _ in range(5 - len(self.dimorder))], ), ) img = da.moveaxis( reshaped, range(len(reshaped.shape)), target_order ) return self.rechunk_data(img)
[docs]class ImageDir(BaseLocalImage): """ Image class for the case in which all images are split in one or multiple folders with time-points and channels as independent files. It inherits from BaseLocalImage so we only override methods that are critical. Assumptions: - One folders per position. - Images are flat. - Channel, Time, z-stack and the others are determined by filenames. - Provides Dimorder as it is set in the filenames, or expects order during instatiation """
[docs] def __init__(self, path: t.Union[str, PosixPath], **kwargs): super().__init__(path) self.image_id = str(self.path.stem) self._meta = dir_to_meta(self.path)
[docs] def get_data_lazy(self) -> da.Array: """Return 5D dask array. For lazy-loading local multidimensional tiff files""" img = imread(str(self.path / "*.tiff")) # If extra channels, pick the first stack of the last dimensions while len(img.shape) > 3: img = img[..., 0] if self._meta: self._meta["size_x"], self._meta["size_y"] = img.shape[-2:] # Reshape using metadata # img = da.reshape(img, (*self._meta, *img.shape[1:])) img = da.reshape(img, self._meta.values()) original_order = [ i[-1] for i in self._meta.keys() if i.startswith("size") ] # Swap axis to conform with normal order target_order = [ self._default_dimorder.index(x) for x in original_order ] img = da.moveaxis( img, list(range(len(original_order))), target_order, ) pixels = self.rechunk_data(img) return pixels
@property def name(self): return self.path.stem @property def dimorder(self): # Assumes only dimensions start with "size" return [ k.split("_")[-1] for k in self._meta.keys() if k.startswith("size") ]