import { GraphExpander } from '../../interfaces/graph-expander.js'; import { DegreePrioritisedExpansionResult } from './degree-prioritised-expansion.js'; /** * Heterogeneity-Aware Bidirectional Expansion (HABE) * * **Novel Contribution**: Combines entropy-guided expansion with overlap-based * termination and path salience ranking to produce cross-domain paths that * traverse heterogeneous graph regions. * * **Key Innovation**: HABE integrates three thesis components: * 1. **EGE Priority**: π_HABE(v) = (1 / (H_local(v) + ε)) × log(deg(v) + 1) * - Low-entropy (homogeneous) neighborhoods expanded first * - High-entropy (heterogeneous) neighborhoods deferred * 2. **Transitive Connectivity Termination**: Terminates when overlap graph is connected * - Less strict than full pairwise, enables earlier termination * - Ensures all seeds are transitively connected * 3. **Path Salience Output**: Returns paths ranked by geometric-mean MI * - M(P) = exp((1/k) Σ log I(e)) * - Weak-link sensitivity without length bias * * **Design Motivation**: * In heterogeneous graphs (e.g., citation networks with multi-domain papers), * standard expansion may get trapped in homogeneous regions. HABE actively * defers high-entropy nodes, encouraging exploration of cross-domain connections. * * **Key Properties**: * 1. **Cross-domain sensitivity**: Prioritizes edges connecting different domains * 2. **Parameter-free termination**: Transitive connectivity, no arbitrary cutoffs * 3. **N-seed generalisation**: Single algorithm handles N=1, N=2, and N≥3 * 4. **Entropy-hub duality**: log(deg+1) maintains hub deferral from degree-prioritised * * **Algorithm**: * ``` * 1. Pre-compute local entropy H_local(v) for frontier nodes * 2. Initialize N frontiers, one per seed * 3. While any frontier is non-empty AND overlap graph not connected: * a. Select frontier with lowest π_HABE(v) node at front * b. Pop that node and expand its neighbours * c. For each new neighbour: * - Compute H_local asynchronously * - Check intersection with all other frontiers * - Record overlap events for termination check * d. If intersection found, record path between seeds * 4. Rank discovered paths by geometric-mean MI (Path Salience) * 5. Return sampled subgraph with ranked paths * ``` * * **Priority Function**: * π_HABE(v) = (1 / (H_local(v) + ε)) × log(deg(v) + 1) * * Where: * - H_local(v) = -Σ p(τ) log₂ p(τ) (Shannon entropy of neighbour types) * - p(τ) = proportion of neighbours with relationship type τ * - ε = 0.001 (prevents division by zero) * - deg(v) = total degree of node v * * **Complexity**: O(E log V) where E = edges explored, V = vertices * * @template T - Type of node data returned by expander * @example * ```typescript * const expansion = new HeterogeneityAwareExpansion(expander, ['seedA', 'seedB']); * const result = await expansion.run(); * console.log(`Found ${result.paths.length} cross-domain paths`); * console.log(`Sampled ${result.sampledNodes.size} nodes`); * ``` * * @see EntropyGuidedExpansion - Entropy-based priority without overlap termination * @see DegreePrioritisedExpansion - Base implementation using degree alone */ export declare class HeterogeneityAwareExpansion { private readonly expander; private readonly seeds; private readonly frontiers; private readonly paths; private readonly sampledEdges; private stats; /** Tracks when each node was first discovered (iteration number) */ private readonly nodeDiscoveryIteration; /** Track which frontier owns each node for O(1) intersection checking */ private readonly nodeToFrontierIndex; /** Track path signatures for O(1) deduplication */ private readonly pathSignatures; /** Overlap events for transitive connectivity termination */ private readonly overlapEvents; /** Epsilon value to prevent division by zero in entropy calculation */ private static readonly EPSILON; /** Cache of pre-computed local entropy values */ private readonly entropyCache; /** * Create a new heterogeneity-aware expansion. * * @param expander - Graph expander providing neighbour access * @param seeds - Array of seed node IDs (N ≥ 1) * @throws Error if no seeds provided */ constructor(expander: GraphExpander, seeds: readonly string[]); /** * Compute local entropy H_local(v) for a node based on neighbour type distribution. * * H_local(v) = -Σ p(τ) log₂ p(τ) * * Where p(τ) is the proportion of neighbours with relationship type τ. * * @param nodeId - Node to compute entropy for * @returns Shannon entropy of neighbour type distribution * @internal */ private computeLocalEntropy; /** * Calculate initial priority for seed nodes (degree-only, entropy computed lazily). * * @param nodeId - Node to calculate priority for * @returns Priority value * @internal */ private calculateInitialPriority; /** * Calculate HABE priority for a node (async version with entropy). * * π_HABE(v) = (1 / (H_local(v) + ε)) × log(deg(v) + 1) * * Lower priority = explored first (min-heap behaviour). * * @param nodeId - Node to calculate priority for * @returns Priority value (lower = higher priority) * @internal */ private calculateHABEPriority; /** * Check if expansion should terminate based on transitive connectivity. * * Terminates when overlap graph is connected (all seeds transitively connected). * * @returns true if expansion should terminate * @internal */ private shouldTerminate; /** * Check if an undirected graph is connected using BFS. * * @param adj - Adjacency list representation of graph * @param n - Number of nodes * @returns true if graph is connected * @internal */ private isConnected; /** * Run the expansion to completion. * * Terminates when: * - All frontiers are exhausted (no unexpanded nodes remain), OR * - Overlap graph is transitively connected (all seeds connected via overlaps) * * @returns Expansion results including paths and sampled subgraph */ run(): Promise; /** * Check if any frontier has unexpanded nodes. * @internal */ private hasNonEmptyFrontier; /** * Select the frontier with the lowest HABE-priority node at its front. * Returns -1 if all frontiers are empty. * @internal */ private selectLowestPriorityFrontier; /** * Reconstruct path from meeting point between two frontiers. * @param stateA * @param stateB * @param meetingNode * @internal */ private reconstructPath; /** * Create a unique signature for a path to enable O(1) deduplication. * Signature is bidirectional (A-B same as B-A). * @param fromSeed * @param toSeed * @param nodes * @internal */ private createPathSignature; /** * Record degree in distribution histogram. * @param degree * @internal */ private recordDegree; /** * Get histogram bucket for a degree value. * @param degree * @internal */ private getDegreeBucket; } //# sourceMappingURL=heterogeneity-aware-expansion.d.ts.map