/** * @fileoverview Base Extractor - Abstract base class for language-specific code extractors * @module @nahisaho/musubix-security/extractors/base-extractor * @trace TSK-001, REQ-SEC-LANG-001, REQ-SEC-LANG-002, REQ-SEC-LANG-003, REQ-SEC-LANG-004 */ import type { SourceLocation } from '../types/vulnerability.js'; /** * Supported programming languages * @trace REQ-SEC-LANG-001, REQ-SEC-LANG-002, REQ-SEC-LANG-003, REQ-SEC-LANG-004 */ export type SupportedLanguage = 'typescript' | 'javascript' | 'python' | 'php' | 'go' | 'java' | 'ruby' | 'rust' | 'kotlin' | 'swift'; /** * File extension to language mapping */ export declare const EXTENSION_TO_LANGUAGE: Record; /** * AST Node type */ export interface ASTNode { /** Unique node ID */ id: string; /** Node type (e.g., 'FunctionDeclaration', 'CallExpression') */ type: string; /** Source location */ location: SourceLocation; /** Node-specific properties */ properties: Record; /** Child node IDs */ children: string[]; /** Parent node ID */ parent?: string; /** Raw text content */ text?: string; /** Additional metadata */ metadata?: Record; } /** * AST Edge type */ export interface ASTEdge { /** Source node ID */ from: string; /** Target node ID */ to: string; /** Edge label (e.g., 'body', 'arguments', 'condition') */ label: string; } /** * Data Flow Graph Node */ export interface DFGNode { /** Unique node ID */ id: string; /** Corresponding AST node ID */ astNodeId: string; /** Node type */ nodeType: 'source' | 'sink' | 'transform' | 'sanitizer' | 'propagator'; /** Taint label (if tainted) */ taintLabel?: string; /** Expression text */ expression: string; /** Source location */ location: SourceLocation; /** Additional properties */ properties: Record; /** Variable name (optional) */ variable?: string; /** Operation type (optional) */ operation?: 'read' | 'write' | 'call' | 'param' | 'return'; /** Predecessor node IDs */ predecessors?: string[]; /** Successor node IDs */ successors?: string[]; } /** * Data Flow Graph Edge */ export interface DFGEdge { /** Source node ID */ from: string; /** Target node ID */ to: string; /** Edge type */ edgeType: 'data' | 'control' | 'implicit'; /** Edge properties */ properties: Record; /** Legacy type field (alias for edgeType) */ type?: 'data' | 'control' | 'call' | 'return'; /** Variable name */ variable?: string; /** Condition expression */ condition?: string; } /** * Data Flow Graph */ export interface DataFlowGraph { /** All DFG nodes */ nodes: Map; /** All DFG edges */ edges: DFGEdge[]; /** Source node IDs */ sources: string[]; /** Sink node IDs */ sinks: string[]; } /** * Basic Block in CFG */ export interface BasicBlock { /** Unique block ID */ id: string; /** AST node IDs in this block */ statements: string[]; /** Predecessor block IDs */ predecessors: string[]; /** Successor block IDs */ successors: string[]; /** Dominator block IDs (optional) */ dominators?: string[]; /** Is this a loop header? */ loopHeader?: boolean; /** Is this an entry block? */ isEntry?: boolean; /** Is this an exit block? */ isExit?: boolean; } /** * CFG Edge */ export interface CFGEdge { /** Source block ID */ from: string; /** Target block ID */ to: string; /** Edge type */ edgeType: 'normal' | 'true' | 'false' | 'exception' | 'finally' | 'break' | 'continue'; /** Condition expression (for conditional edges) */ condition?: string; /** Legacy type field (alias for edgeType) */ type?: 'sequential' | 'conditional' | 'back' | 'exception'; } /** * Control Flow Graph */ export interface ControlFlowGraph { /** All basic blocks */ blocks: Map; /** All CFG edges */ edges: CFGEdge[]; /** Entry block IDs */ entryBlocks: string[]; /** Exit block IDs */ exitBlocks: string[]; /** Entry block ID (singular, for legacy compatibility) */ entry?: string; /** Exit block ID (singular, for legacy compatibility) */ exit?: string; } /** * Symbol kind */ export type SymbolKind = 'function' | 'method' | 'class' | 'interface' | 'variable' | 'constant' | 'parameter' | 'property' | 'import' | 'export' | 'type' | 'enum'; /** * Symbol definition */ export interface Symbol { /** Symbol name */ name: string; /** Symbol kind */ kind: SymbolKind; /** Definition location */ location: SourceLocation; /** Type (if known) */ type?: string; /** Scope ID */ scopeId: string; /** Is exported? */ isExported?: boolean; /** Additional properties */ properties: Record; } /** * Function symbol with additional info */ export interface FunctionSymbol extends Symbol { kind: 'function' | 'method'; /** Parameters */ parameters: ParameterInfo[]; /** Return type */ returnType?: string; /** Is async? */ isAsync?: boolean; /** Is generator? */ isGenerator?: boolean; } /** * Parameter info */ export interface ParameterInfo { /** Parameter name */ name: string; /** Parameter index */ index: number; /** Parameter type */ type?: string; /** Has default value? */ hasDefault?: boolean; /** Is rest parameter? */ isRest?: boolean; } /** * Class symbol with additional info */ export interface ClassSymbol extends Omit { kind: 'class'; /** Super class name */ superClass?: string; /** Implemented interfaces */ implements?: string[]; /** Methods */ methods: string[]; /** Class properties (method names) */ properties: string[]; /** Additional metadata */ metadata?: Record; } /** * Symbol Table */ export interface SymbolTable { /** All symbols by ID */ symbols: Map; /** Function symbols */ functions: Map; /** Class symbols */ classes: Map; /** Scopes */ scopes: Map; /** Global symbols (for legacy compatibility) */ global?: Map; /** Package name (optional) */ packageName?: string; } /** * Scope info */ export interface ScopeInfo { /** Scope ID */ id: string; /** Parent scope ID */ parentId?: string; /** Symbol IDs in this scope */ symbols: string[]; /** Scope kind */ kind: 'global' | 'function' | 'block' | 'class' | 'module'; } /** * Extraction error */ export interface ExtractionError { /** Error message */ message: string; /** Error location */ location?: SourceLocation; /** Error severity */ severity: 'error' | 'warning' | 'info'; } /** * Extraction metrics */ export interface ExtractionMetrics { /** Total lines of code */ linesOfCode: number; /** Number of functions */ functionCount: number; /** Number of classes */ classCount: number; /** Number of AST nodes */ astNodeCount: number; /** Extraction time in milliseconds */ extractionTime: number; } /** * Extraction result * @trace REQ-SEC-DB-002, REQ-SEC-DB-003, REQ-SEC-DB-004 */ export interface ExtractionResult { /** Language */ language: SupportedLanguage; /** File path */ filePath: string; /** AST root node */ ast: ASTNode; /** All AST nodes */ astNodes: Map; /** AST edges */ astEdges: ASTEdge[]; /** Data flow graph */ dfg: DataFlowGraph; /** Control flow graph */ cfg: ControlFlowGraph; /** Symbol table */ symbols: SymbolTable; /** Extraction errors */ errors: ExtractionError[]; /** Extraction metrics */ metrics: ExtractionMetrics; /** Taint paths (optional) */ taintPaths?: TaintPathInfo[]; } /** * Taint path info (from extraction) */ export interface TaintPathInfo { /** Source node ID */ sourceId: string; /** Sink node ID */ sinkId: string; /** Intermediate node IDs */ path: string[]; /** Taint label */ label: string; } /** * Extraction progress */ export interface ExtractionProgress { /** Current phase */ phase: 'parsing' | 'ast' | 'dfg' | 'cfg' | 'symbols' | 'done'; /** Progress percentage (0-100) */ percentage: number; /** Current file */ file?: string; } /** * Framework source definition */ export interface FrameworkSource { /** Pattern to match */ pattern: RegExp; /** Source type */ type: string; /** Description */ description: string; /** Taint label */ taintLabel: string; } /** * Framework sink definition */ export interface FrameworkSink { /** Pattern to match */ pattern: RegExp; /** Sink type */ type: string; /** Vulnerability type this leads to */ vulnerabilityType: string; /** Severity */ severity: 'critical' | 'high' | 'medium' | 'low'; } /** * Framework sanitizer definition */ export interface FrameworkSanitizer { /** Pattern to match */ pattern: RegExp; /** What this sanitizer sanitizes */ sanitizes: string[]; } /** * Framework model */ export interface FrameworkModel { /** Framework name */ name: string; /** Supported languages */ languages: SupportedLanguage[]; /** Sources */ sources: FrameworkSource[]; /** Sinks */ sinks: FrameworkSink[]; /** Sanitizers */ sanitizers: FrameworkSanitizer[]; } /** * Extraction options */ export interface ExtractionOptions { /** Include AST in result */ includeAST?: boolean; /** Include DFG in result */ includeDFG?: boolean; /** Include CFG in result */ includeCFG?: boolean; /** Include symbols in result */ includeSymbols?: boolean; /** Framework models to apply */ frameworkModels?: FrameworkModel[]; /** Maximum AST depth */ maxDepth?: number; /** Timeout in milliseconds */ timeout?: number; } /** * Default extraction options */ export declare const DEFAULT_EXTRACTION_OPTIONS: Required; /** * Abstract base class for language-specific code extractors * @trace TSK-001 */ export declare abstract class BaseExtractor { /** * Get the supported language */ abstract readonly language: SupportedLanguage; /** * Get supported file extensions */ abstract readonly extensions: string[]; /** * Check if a file is supported by this extractor */ supports(filePath: string): boolean; /** * Extract code information from source * @param source - Source code content * @param filePath - File path * @param options - Extraction options * @returns Extraction result */ extract(source: string, filePath: string, options?: ExtractionOptions): Promise; /** * Build AST from source code * @param source - Source code * @param filePath - File path * @returns AST root node and all nodes */ protected abstract buildAST(source: string, filePath: string): Promise<{ ast: ASTNode; astNodes: Map; astEdges: ASTEdge[]; }>; /** * Build Data Flow Graph from AST * @param astNodes - All AST nodes * @param astEdges - AST edges * @param frameworkModels - Framework models to apply * @returns Data Flow Graph */ protected abstract buildDFG(astNodes: Map, astEdges: ASTEdge[], frameworkModels: FrameworkModel[]): Promise; /** * Build Control Flow Graph from AST * @param astNodes - All AST nodes * @param astEdges - AST edges * @returns Control Flow Graph */ protected abstract buildCFG(astNodes: Map, astEdges: ASTEdge[]): Promise; /** * Extract symbols from AST * @param astNodes - All AST nodes * @returns Symbol table */ protected abstract extractSymbols(astNodes: Map): Promise; /** * Get framework models supported by this extractor */ abstract getFrameworkModels(): FrameworkModel[]; /** * Generate unique node ID */ protected generateNodeId(prefix?: string): string; /** * Create source location from line/column info */ protected createLocation(file: string, startLine: number, startColumn: number, endLine: number, endColumn: number): SourceLocation; } /** * Factory function to create extractor */ export declare function createExtractor(_language: SupportedLanguage): BaseExtractor; /** * Detect language from file path */ export declare function detectLanguage(filePath: string): SupportedLanguage | null; //# sourceMappingURL=base-extractor.d.ts.map