Source code for Core.BandDepths.contour_banddepth

import numpy as np
from multiprocessing import get_context


[docs] def choose2(x): """ Helper function to compute x choose 2 Parameters: ----------- x : int number of elements Returns: -------- [] : int x choose 2 """ return int(1/8 * (2 * x -1 ) ** 2 - 1/8)
[docs] def get_combinations(n): """ returns (n_combination, 2) as list of pair of indices for n elements, get all 2-subsets Parameters: ----------- n : int number of elements Returns: ----------- combinations : np.ndarray array of shape (n_choose_2, 2) """ n_combinations = choose2(n) combinations = np.zeros((n_combinations,2), np.int32) idx = 0 for i in np.arange(n): for j in np.arange(i+1, n): combinations[idx] = (i, j) idx += 1 return combinations
def _epsilon_subset(A, B, eps, cardA=None): """ determine if two sets are epsilon-close (Order matters!) Parameters: ----------- A : np.ndarray binary array representing set A B : np.ndarray binary array representing set B eps : float tolerance level cardA : int, optional Pre-computed cardinality of A (for optimization) Returns: ----------- bool : True if A is an epsilon-subset of B, False otherwise """ if cardA is None: cardA = np.count_nonzero(A) if cardA == 0: return True # Fast path for eps=0 (exact subset check) if eps == 0: # A is subset of B if A & ~B is empty return np.count_nonzero(A & ~B) == 0 # General case return np.count_nonzero(A & ~B) / cardA <= eps def _portion_subset(A, B, cardA=None): """ determine if two sets are partial-overlapping (Order matters!) Parameters: ----------- A : np.ndarray binary array representing set A B : np.ndarray binary array representing set B cardA : int, optional Pre-computed cardinality of A (for optimization) Returns: ----------- float : portion of A not in B """ if cardA is None: cardA = np.count_nonzero(A) if cardA == 0: return 0 return np.count_nonzero(A & ~B) / cardA def _compute_depth_for_image_optimized(args): """ Helper function to compute depth for a single image (for multiprocessing). Optimized to avoid passing large arrays - uses indices instead. Parameters: ----------- args : tuple (tdx, target, cardA_target, binary_data, combination, allow_portion, eps) Returns: -------- tuple : (tdx, depth_value) """ tdx, target, cardA_target, binary_data, combination, allow_portion, eps = args depth = 0 for xdx, ydx in combination: intersection = binary_data[xdx] & binary_data[ydx] union = binary_data[xdx] | binary_data[ydx] if allow_portion: # Early termination: skip if intersection is empty cardA_intersection = np.count_nonzero(intersection) if cardA_intersection == 0: depth += _portion_subset(target, union, cardA_target) else: depth += _portion_subset(intersection, target, cardA_intersection) + _portion_subset(target, union, cardA_target) else: # Early termination: skip if intersection is empty if not np.any(intersection): continue if _epsilon_subset(intersection, target, eps) and _epsilon_subset(target, union, eps, cardA_target): depth += 1 return (tdx, depth) def _compute_depth_for_image(args): """ Helper function to compute depth for a single image (for multiprocessing). Parameters: ----------- args : tuple (tdx, target, cardA_target, intersections, unions, combination, allow_portion, eps) Returns: -------- tuple : (tdx, depth_value) """ tdx, target, cardA_target, intersections, unions, combination, allow_portion, eps = args depth = 0 for idx, (xdx, ydx) in enumerate(combination): intersection = intersections[idx] union = unions[idx] if allow_portion: # Early termination: skip if intersection is empty cardA_intersection = np.count_nonzero(intersection) if cardA_intersection == 0: depth += _portion_subset(target, union, cardA_target) else: depth += _portion_subset(intersection, target, cardA_intersection) + _portion_subset(target, union, cardA_target) else: # Early termination: skip if intersection is empty if not np.any(intersection): continue if _epsilon_subset(intersection, target, eps) and _epsilon_subset(target, union, eps, cardA_target): depth += 1 return (tdx, depth)
[docs] def contour_banddepth(data, combination = None, allow_portion=False, eps = 0, workers=12): """ Calculates the band depth of binary contour data using pairwise combinations. R. T. Whitaker, M. Mirzargar and R. M. Kirby, "Contour Boxplots: A Method for Characterizing Uncertainty in Feature Sets from Simulation Ensembles," in IEEE Transactions on Visualization and Computer Graphics, vol. 19, no. 12, pp. 2713-2722, Dec. 2013, doi: 10.1109/TVCG.2013.143 Parameters: ----------- data : np.ndarray Input data representing binary contours. Should be convertible to a boolean NumPy array. combination : list of tuples, optional List of index pairs (xdx, ydx) specifying which images to combine for band depth calculation. If None, combinations are generated automatically. allow_portion : bool, default False If True, uses a portion-based subset calculation for depth; otherwise, uses epsilon-based subset. eps : float, default 0 Epsilon tolerance for subset checks when `allow_portion` is False. workers : int, default 12 Number of parallel workers for multiprocessing. Set to 1 to disable parallel processing. Returns: -------- depths : np.ndarray Array of band depth values for each image in the input data. Raises ------ ValueError: If the input data cannot be converted to a boolean array. Notes ----- - The function expects binary contour data, where each image is represented as a boolean array. - Depth is computed by evaluating how each image fits within the intersection and union of pairs of images. - Helper functions `_portion_subset` and `_epsilon_subset` are used for subset checks. - Uses fork context for multiprocessing to ensure compatibility with macOS. """ ### data validation: if isinstance(data, np.ndarray): if data.dtype != np.bool_: binary_data = data.astype(np.bool_) else: binary_data = data else: try: binary_data = np.array(data, dtype=np.bool_) except Exception as e: raise ValueError("Input data could not be converted to a boolean array.") from e n_images = binary_data.shape[0] if combination is None: combination = get_combinations(n_images) # Pre-compute cardinalities for all target images (lightweight optimization) cardinalities = np.array([np.count_nonzero(img) for img in binary_data]) depths = np.zeros([n_images]) if workers == 1: # Sequential processing with pre-computed intersections/unions # Pre-compute all intersections and unions (major optimization) n_combinations = len(combination) intersections = np.empty((n_combinations,) + binary_data.shape[1:], dtype=np.bool_) unions = np.empty((n_combinations,) + binary_data.shape[1:], dtype=np.bool_) for idx, (xdx, ydx) in enumerate(combination): intersections[idx] = binary_data[xdx] & binary_data[ydx] unions[idx] = binary_data[xdx] | binary_data[ydx] for tdx in range(n_images): target = binary_data[tdx] cardA_target = cardinalities[tdx] for idx in range(n_combinations): intersection = intersections[idx] union = unions[idx] if allow_portion: # Early termination: skip if intersection is empty cardA_intersection = np.count_nonzero(intersection) if cardA_intersection == 0: depths[tdx] += _portion_subset(target, union, cardA_target) else: depths[tdx] += _portion_subset(intersection, target, cardA_intersection) + _portion_subset(target, union, cardA_target) else: # Early termination: skip if intersection is empty if not np.any(intersection): continue if _epsilon_subset(intersection, target, eps) and _epsilon_subset(target, union, eps, cardA_target): depths[tdx] += 1 else: # Parallel processing - compute intersections on-the-fly in each worker to avoid memory overhead # This trades some redundant computation for better parallelization ctx = get_context('fork') with ctx.Pool(processes=workers) as pool: # Prepare arguments for each image args_list = [ (tdx, binary_data[tdx], cardinalities[tdx], binary_data, combination, allow_portion, eps) for tdx in range(n_images) ] # Compute depths in parallel results = pool.map(_compute_depth_for_image_optimized, args_list) # Collect results for tdx, depth in results: depths[tdx] = depth return depths
__all__ = ["contour_banddepth", "choose2", "get_combinations"]