Source code for agora.utils.merge

#!/usr/bin/env jupyter
"""
Functions to efficiently merge rows in DataFrames.
"""
import typing as t
from copy import copy

import numpy as np
import pandas as pd
from utils_find_1st import cmp_larger, find_1st

from agora.utils.association import validate_association


[docs]def apply_merges(data: pd.DataFrame, merges: np.ndarray): """Split data in two, one subset for rows relevant for merging and one without them. It uses an array of source tracklets and target tracklets to efficiently merge them. Parameters ---------- data : pd.DataFrame Input DataFrame. merges : np.ndarray 3-D ndarray where dimensions are (X,2,2): nmerges, source-target pair and single-cell identifiers, respectively. Examples -------- FIXME: Add docs. """ valid_merges, indices = validate_association( merges, np.array(list(data.index)) ) # Assign non-merged merged = data.loc[~indices] # Implement the merges and drop source rows. if valid_merges.any(): to_merge = data.loc[indices] for target, source in merges[valid_merges]: target, source = tuple(target), tuple(source) to_merge.loc[target] = join_tracks_pair( to_merge.loc[target].values, to_merge.loc[source].values, ) to_merge.drop(source, inplace=True) merged = pd.concat((merged, to_merge), names=data.index.names) return merged
[docs]def join_tracks_pair(target: np.ndarray, source: np.ndarray) -> np.ndarray: """ Join two tracks and return the new value of the target. TODO replace this with arrays only. """ target_copy = copy(target) end = find_1st(target_copy[::-1], 0, cmp_larger) target_copy[-end:] = source[-end:] return target_copy