import { Graph } from '../../../algorithms/graph/graph'; import { LoadedEdge, LoadedNode } from '../loaders/index'; /** * Node pair for testing path ranking algorithms. */ export interface TestNodePair { /** Source node ID (as it appears in the graph) */ source: string; /** Target node ID (as it appears in the graph) */ target: string; /** Optional description of this pair (e.g., "central characters", "peripheral nodes") */ description?: string; } /** * Metadata for a benchmark dataset. */ export interface BenchmarkDatasetMeta { /** Human-readable name */ name: string; /** Short identifier for tables/logs */ id: string; /** Brief description */ description: string; /** Whether edges are directed */ directed: boolean; /** Expected node count (for validation) */ expectedNodes: number; /** Expected edge count (for validation) */ expectedEdges: number; /** Expected content size in bytes (for validation, helps detect remote changes) */ expectedContentSize: number; /** File path relative to data/benchmarks/ */ relativePath: string; /** Delimiter for parsing (regex or string) */ delimiter: string | RegExp; /** Source/citation for the dataset */ source: string; /** * Remote URL for downloading the dataset. * Used for browser environments or when local files aren't available. * Should point to a raw text file in edge list format. */ remoteUrl?: string; /** * Representative node pairs for testing. * These are actual node IDs from the graph that can be used in path ranking tests. * Tests should use these rather than assuming generic IDs like "0", "1", etc. */ testPairs?: TestNodePair[]; } /** * A loaded benchmark dataset with graph and metadata. */ export interface LoadedBenchmark { /** The loaded graph */ graph: Graph; /** Dataset metadata */ meta: BenchmarkDatasetMeta; /** Actual node count after loading */ nodeCount: number; /** Actual edge count after loading */ edgeCount: number; /** Actual content size in bytes */ contentSize: number; } /** * Cora citation network. * * A citation network of machine learning papers. * Nodes are papers, edges are citations. */ export declare const CORA: BenchmarkDatasetMeta; /** * CiteSeer citation network. * * A citation network of computer science papers. * Nodes are papers, edges are citations. */ export declare const CITESEER: BenchmarkDatasetMeta; /** * Facebook ego network. * * Combined ego networks from Facebook, representing friendships. * Nodes are users, edges are friendships (undirected). */ export declare const FACEBOOK: BenchmarkDatasetMeta; /** * Zachary's Karate Club network. * * Classic social network of a university karate club. * Nodes are members, edges represent friendships outside the club. */ export declare const KARATE: BenchmarkDatasetMeta; /** * Les Misérables character co-appearance network. * * Characters from Victor Hugo's novel connected by co-appearance. * Nodes are characters, edges weighted by number of co-appearances. */ export declare const LESMIS: BenchmarkDatasetMeta; /** * DBLP co-authorship network. * * Large-scale collaboration network from computer science bibliography. * Nodes are authors, edges represent co-authorship on publications. * Note: This is a large dataset (300K+ nodes) and may take time to load. */ export declare const DBLP: BenchmarkDatasetMeta; /** * All available benchmark datasets. */ export declare const BENCHMARK_DATASETS: BenchmarkDatasetMeta[]; /** * Map of dataset IDs to metadata. */ export declare const DATASETS_BY_ID: Map; /** * Resolve the path to a benchmark dataset file. * * @param meta - Dataset metadata * @param benchmarksRoot - Optional root directory for benchmarks (defaults to repo data/benchmarks/) * @returns Absolute path to the dataset file */ export declare const resolveBenchmarkPath: (meta: BenchmarkDatasetMeta, benchmarksRoot?: string) => string; /** * Load a benchmark dataset. * * Uses remote URL with caching if configured, otherwise loads from local file. * * @param meta - Dataset metadata * @param benchmarksRoot - Optional root directory for benchmarks (only used if no remoteUrl) * @returns Loaded benchmark with graph and metadata * @throws Error if file not found or parsing fails */ export declare const loadBenchmark: (meta: BenchmarkDatasetMeta, benchmarksRoot?: string) => Promise; /** * Load a benchmark dataset by ID. * * @param id - Dataset identifier (e.g., 'cora', 'citeseer', 'facebook') * @param benchmarksRoot - Optional root directory for benchmarks * @returns Loaded benchmark with graph and metadata * @throws Error if dataset ID not found */ export declare const loadBenchmarkById: (id: string, benchmarksRoot?: string) => Promise; /** * Load all benchmark datasets. * * @param benchmarksRoot - Optional root directory for benchmarks * @returns Map of dataset ID to loaded benchmark */ export declare const loadAllBenchmarks: (benchmarksRoot?: string) => Promise>; /** * Load a benchmark dataset from a URL. * * This function works in both browser and Node.js environments. * Automatically handles gzip-compressed files (.gz extension). * * @param url - URL to the edge list file (can be .txt or .txt.gz) * @param meta - Dataset metadata (for parsing configuration) * @returns Loaded benchmark with graph and metadata * @throws Error if fetch fails or parsing fails * * @example * ```typescript * // Plain text file * const benchmark = await loadBenchmarkFromUrl( * 'https://raw.githubusercontent.com/user/repo/main/data/karate.edges', * KARATE * ); * * // Gzip-compressed file (automatically decompressed) * const compressed = await loadBenchmarkFromUrl( * 'https://snap.stanford.edu/data/facebook_combined.txt.gz', * FACEBOOK * ); * ``` */ export declare const loadBenchmarkFromUrl: (url: string, meta: BenchmarkDatasetMeta) => Promise; /** * Load a benchmark dataset by ID from a URL. * * If the dataset has a remoteUrl configured, uses that. Otherwise, you must provide a URL. * Works in both browser and Node.js environments. * * @param id - Dataset identifier (e.g., 'cora', 'karate') * @param url - Optional URL override (required if dataset has no remoteUrl) * @returns Loaded benchmark with graph and metadata * @throws Error if dataset ID not found or no URL available * * @example * ```typescript * // Using custom URL * const karate = await loadBenchmarkByIdFromUrl('karate', * 'https://example.com/datasets/karate.edges' * ); * ``` */ export declare const loadBenchmarkByIdFromUrl: (id: string, url?: string) => Promise; /** * Load a benchmark from edge list content string. * * This is the most flexible loader - works with any string content. * Useful when you've already fetched the data or have it embedded. * * @param content - Edge list content as string * @param meta - Dataset metadata (for parsing configuration) * @returns Loaded benchmark with graph and metadata * * @example * ```typescript * const content = "1 2\n2 3\n3 1"; * const benchmark = loadBenchmarkFromContent(content, { * ...KARATE, * expectedNodes: 3, * expectedEdges: 3 * }); * ``` */ export declare const loadBenchmarkFromContent: (content: string, meta: BenchmarkDatasetMeta) => LoadedBenchmark; /** * Create a custom benchmark metadata for ad-hoc datasets. * * Helper function to create metadata for datasets not in the standard list. * * @param options - Partial metadata (id and name are required) * @returns Complete benchmark metadata * * @example * ```typescript * const myDataset = createBenchmarkMeta({ * id: 'my-graph', * name: 'My Custom Graph', * expectedNodes: 100, * expectedEdges: 500, * directed: false * }); * * const benchmark = await loadBenchmarkFromUrl( * 'https://example.com/my-graph.edges', * myDataset * ); * ``` */ export declare const createBenchmarkMeta: (options: Pick & Partial>) => BenchmarkDatasetMeta; /** * Get summary statistics for a loaded benchmark. * * @param benchmark - Loaded benchmark * @returns Summary string */ export declare const getBenchmarkSummary: (benchmark: LoadedBenchmark) => string; /** * Validate that a loaded benchmark matches expected properties. * * @param benchmark - Loaded benchmark * @param tolerance - Allowed percentage difference (default 5%) * @returns Validation result with any warnings */ export declare const validateBenchmark: (benchmark: LoadedBenchmark, tolerance?: number) => { valid: boolean; warnings: string[]; }; /** * Get test node pairs for a benchmark dataset. * * Returns representative node pairs that can be used in path ranking tests. * These pairs use actual node IDs from the graph, not generic indices. * * @param benchmarkId - Dataset identifier (e.g., 'citeseer', 'lesmis') * @param pairIndex - Index of the pair to return (defaults to 0) * @returns Node pair with source and target IDs * @throws Error if benchmark not found or has no test pairs defined * * @example * ```typescript * const { source, target } = getTestNodePair('lesmis'); // { source: "Valjean", target: "Javert" } * const { source, target } = getTestNodePair('lesmis', 1); // { source: "Myriel", target: "Cosette" } * ``` */ export declare const getTestNodePair: (benchmarkId: string, pairIndex?: number) => TestNodePair; //# sourceMappingURL=benchmark-datasets.d.ts.map