Source code for agora.utils.merge

#!/usr/bin/env jupyter
"""
Functions to efficiently merge rows in DataFrames.
"""
import typing as t
from copy import copy

import numpy as np
import pandas as pd
from utils_find_1st import cmp_larger, find_1st

from agora.utils.association import validate_association


[docs]def apply_merges(data: pd.DataFrame, merges: np.ndarray):
    """Split data in two, one subset for rows relevant for merging and one
    without them. It uses an array of source tracklets and target tracklets
    to efficiently merge them.

    Parameters
    ----------
    data : pd.DataFrame
        Input DataFrame.
    merges : np.ndarray
        3-D ndarray where dimensions are (X,2,2): nmerges, source-target
        pair and single-cell identifiers, respectively.

    Examples
    --------
    FIXME: Add docs.

    """

    valid_merges, indices = validate_association(
        merges, np.array(list(data.index))
    )

    # Assign non-merged
    merged = data.loc[~indices]

    # Implement the merges and drop source rows.
    if valid_merges.any():
        to_merge = data.loc[indices]
        for target, source in merges[valid_merges]:
            target, source = tuple(target), tuple(source)
            to_merge.loc[target] = join_tracks_pair(
                to_merge.loc[target].values,
                to_merge.loc[source].values,
            )
            to_merge.drop(source, inplace=True)

        merged = pd.concat((merged, to_merge), names=data.index.names)
    return merged


[docs]def join_tracks_pair(target: np.ndarray, source: np.ndarray) -> np.ndarray:
    """
    Join two tracks and return the new value of the target.
    TODO replace this with arrays only.
    """
    target_copy = copy(target)
    end = find_1st(target_copy[::-1], 0, cmp_larger)
    target_copy[-end:] = source[-end:]
    return target_copy