/** * Federal Register API Extractor * * Extracts Executive Orders, proposed rules, and final rules * related to AI regulation and preemption * * @author Jason Pellerin AI Solutionist */ import { log } from 'apify'; import { FEDERAL_REGISTER_API, EXECUTIVE_ORDER_KEYWORDS, AI_REGULATORY_AGENCIES, } from '../utils/constants.js'; import type { Input, ExtractionResult, PreemptionEvent, ExecutiveOrder, FederalRegisterDocument, FederalRegisterApiResponse, FederalRegisterResult, RagChunk, } from '../utils/types.js'; import { calculatePreemptionRisk, generateRagChunks } from '../analysis/risk-scorer.js'; const DELAY_MS = 200; // Respectful rate limiting /** * Extract Executive Orders related to AI */ export async function extractExecutiveOrders(input: Input): Promise> { log.info('๐Ÿ“œ Extracting Executive Orders from Federal Register...'); const startTime = Date.now(); const results: ExecutiveOrder[] = []; const errors: string[] = []; let requestCount = 0; try { // Get ALL recent Executive Orders - use correct API parameter // presidential_document_type_id: 2 = Executive Order, 3 = Proclamation const params = new URLSearchParams({ 'conditions[presidential_document_type_id][]': '2', // Executive Orders 'per_page': String(Math.min(input.maxResults * 2, 100)), 'order': 'newest', }); // Add date range - default to 6 months for EOs if (input.dateRangeStart) { params.append('conditions[publication_date][gte]', input.dateRangeStart); } else { const sixMonthsAgo = new Date(); sixMonthsAgo.setMonth(sixMonthsAgo.getMonth() - 6); params.append('conditions[publication_date][gte]', sixMonthsAgo.toISOString().split('T')[0]); } if (input.dateRangeEnd) { params.append('conditions[publication_date][lte]', input.dateRangeEnd); } const url = `${FEDERAL_REGISTER_API}/documents.json?${params}`; log.info(`Fetching Executive Orders: ${url}`); const response = await fetch(url); requestCount++; if (!response.ok) { throw new Error(`Federal Register API error: ${response.status}`); } const data = await response.json() as FederalRegisterApiResponse; log.info(`Found ${data.count || 0} Executive Orders`); if (data.results && Array.isArray(data.results)) { for (const doc of data.results) { const eo = transformToExecutiveOrder(doc, input); if (eo && eo.preemptionRisk.score >= input.riskScoreThreshold) { results.push(eo); } } } else { log.warning('No results array in Federal Register response'); } } catch (error) { const msg = `Executive Order extraction failed: ${error}`; log.error(msg); errors.push(msg); } return { success: errors.length === 0, data: results, totalFound: results.length, extracted: results.length, errors, warnings: [], metrics: { durationMs: Date.now() - startTime, requestCount, rateLimitHits: 0, retries: 0, deduplicatedCount: 0, }, }; } /** * Extract Federal Register documents (rules, proposed rules, notices) */ export async function extractFederalRegisterDocs(input: Input): Promise> { log.info('๐Ÿ“‹ Extracting Federal Register rules and notices...'); const startTime = Date.now(); const results: FederalRegisterDocument[] = []; const errors: string[] = []; let requestCount = 0; try { // Get AI-relevant agency IDs const agencyNames = AI_REGULATORY_AGENCIES.map(a => a.name); // Build document type conditions const docTypes: string[] = []; if (input.includeProposedRules) docTypes.push('PRORULE'); if (input.includeFinalRules) docTypes.push('RULE'); docTypes.push('NOTICE'); // Always include notices for (const docType of docTypes) { const params = new URLSearchParams({ 'conditions[type][]': docType, 'conditions[term]': 'artificial intelligence OR algorithmic OR automated decision', 'per_page': String(Math.min(input.maxResults / docTypes.length, 50)), 'order': 'newest', }); // Add date range if (input.dateRangeStart) { params.append('conditions[publication_date][gte]', input.dateRangeStart); } else { const sixMonthsAgo = new Date(); sixMonthsAgo.setMonth(sixMonthsAgo.getMonth() - 6); params.append('conditions[publication_date][gte]', sixMonthsAgo.toISOString().split('T')[0]); } if (input.dateRangeEnd) { params.append('conditions[publication_date][lte]', input.dateRangeEnd); } const url = `${FEDERAL_REGISTER_API}/documents.json?${params}`; log.debug(`Fetching ${docType}: ${url}`); await delay(DELAY_MS); const response = await fetch(url); requestCount++; if (!response.ok) { log.warning(`Federal Register API error for ${docType}: ${response.status}`); continue; } const data = await response.json() as FederalRegisterApiResponse; log.info(`Found ${data.count || 0} ${docType} documents`); if (data.results && Array.isArray(data.results)) { for (const doc of data.results) { const frDoc = transformToFederalRegisterDoc(doc, input); if (frDoc && frDoc.preemptionRisk.score >= input.riskScoreThreshold) { results.push(frDoc); } } } } } catch (error) { const msg = `Federal Register extraction failed: ${error}`; log.error(msg); errors.push(msg); } // Deduplicate by document number const deduplicated = deduplicateByField(results, 'documentNumber'); return { success: errors.length === 0, data: deduplicated, totalFound: results.length, extracted: deduplicated.length, errors, warnings: [], metrics: { durationMs: Date.now() - startTime, requestCount, rateLimitHits: 0, retries: 0, deduplicatedCount: results.length - deduplicated.length, }, }; } /** * Transform Federal Register API result to ExecutiveOrder */ function transformToExecutiveOrder(doc: FederalRegisterResult, input: Input): ExecutiveOrder | null { // Accept EOs with or without explicit EO number (some API results may not populate this) const eoNumber = doc.executive_order_number || extractEoNumberFromTitle(doc.title); const id = eoNumber ? `eo-${eoNumber}` : `eo-${doc.document_number}`; const affectedStates = detectAffectedStates(doc.title + ' ' + (doc.abstract || ''), input.targetStates); const baseEvent: Partial = { id, eventType: 'executive_order', title: doc.title, summary: doc.abstract || doc.title, source: 'federal_register', sourceUrl: doc.html_url, sourceId: doc.document_number, datePublished: doc.publication_date, dateEffective: doc.effective_on || doc.signing_date, dateDiscovered: new Date().toISOString(), federalAuthority: 'executive_branch', affectedStates, affectedProvisions: detectAffectedProvisions(doc.abstract || ''), eoNumber: eoNumber || undefined, president: extractPresident(doc), federalRegisterCitation: doc.citation || `${doc.document_number}`, directedAgencies: doc.agencies?.map(a => a.name) || [], implementationDeadlines: extractDeadlines(doc), rawData: doc, extractedAt: new Date().toISOString(), }; // Calculate risk and generate RAG chunks const risk = calculatePreemptionRisk(baseEvent as PreemptionEvent, input); const ragChunks = generateRagChunks(baseEvent as PreemptionEvent); return { ...baseEvent, preemptionRisk: risk.risk, complianceImpact: risk.impact, ragChunks, citations: extractCitations(doc), relatedEvents: [], } as ExecutiveOrder; } /** * Transform Federal Register API result to FederalRegisterDocument */ function transformToFederalRegisterDoc(doc: FederalRegisterResult, input: Input): FederalRegisterDocument | null { const typeMap: Record = { 'RULE': 'federal_rule_final', 'PRORULE': 'federal_rule_proposed', 'NOTICE': 'agency_guidance', }; const eventType = typeMap[doc.type] || 'agency_guidance'; const id = `fr-${doc.document_number}`; const affectedStates = detectAffectedStates(doc.title + ' ' + (doc.abstract || ''), input.targetStates); const baseEvent: Partial = { id, eventType, title: doc.title, summary: doc.abstract || doc.title, source: 'federal_register', sourceUrl: doc.html_url, sourceId: doc.document_number, datePublished: doc.publication_date, dateEffective: doc.effective_on, dateDiscovered: new Date().toISOString(), federalAuthority: detectFederalAuthority(doc.agencies), affectedStates, affectedProvisions: detectAffectedProvisions(doc.abstract || ''), documentNumber: doc.document_number, documentType: doc.type as 'RULE' | 'PRORULE' | 'NOTICE' | 'PRESDOC', agencies: doc.agencies?.map(a => a.name) || [], cfr: doc.cfr_references?.[0] ? `${doc.cfr_references[0].title} CFR ${doc.cfr_references[0].part}` : undefined, rin: doc.regulation_id_numbers?.[0], rawData: doc, extractedAt: new Date().toISOString(), }; // Add comment period if applicable if (doc.comments_close_on) { baseEvent.commentPeriod = { startDate: doc.publication_date, endDate: doc.comments_close_on, isOpen: new Date(doc.comments_close_on) > new Date(), }; } // Calculate risk and generate RAG chunks const risk = calculatePreemptionRisk(baseEvent as PreemptionEvent, input); const ragChunks = generateRagChunks(baseEvent as PreemptionEvent); return { ...baseEvent, preemptionRisk: risk.risk, complianceImpact: risk.impact, ragChunks, citations: extractCitations(doc), relatedEvents: [], } as FederalRegisterDocument; } // โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ• // Helper Functions // โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ• function detectAffectedStates(text: string, targetStates: string[]): string[] { const statePatterns: Record = { 'CO': /\b(colorado|colo\.|co\b)/i, 'CA': /\b(california|calif\.|ca\b)/i, 'CT': /\b(connecticut|conn\.|ct\b)/i, 'IL': /\b(illinois|ill\.|il\b)/i, 'TX': /\b(texas|tex\.|tx\b)/i, 'NY': /\b(new york|n\.y\.|ny\b)/i, 'VA': /\b(virginia|va\b)/i, 'WA': /\b(washington state|wash\.|wa\b)/i, }; // Check for state-specific mentions const mentioned = Object.entries(statePatterns) .filter(([_, pattern]) => pattern.test(text)) .map(([state]) => state); // If no specific states mentioned but it's about AI regulation, assume all target states affected const isGeneralAI = /artificial intelligence|AI regulation|algorithmic|automated decision/i.test(text); const isPreemption = /preempt|state law|state regulation|federal.*state/i.test(text); if (mentioned.length > 0) { return mentioned.filter(s => targetStates.includes(s) || targetStates.includes('ALL')); } if (isGeneralAI && isPreemption) { return targetStates.filter(s => s !== 'ALL'); } return []; } function detectAffectedProvisions(text: string): string[] { const provisions: string[] = []; // Check for Colorado AI Act provisions if (/SB.?24.?205|Colorado AI Act|CAIA/i.test(text)) { provisions.push('Colorado SB 24-205'); } if (/algorithmic discrimination|disparate impact/i.test(text)) { provisions.push('Section 6-1-1602 (Algorithmic Discrimination)'); } if (/impact assessment|risk assessment/i.test(text)) { provisions.push('Section 6-1-1703 (Impact Assessments)'); } if (/consumer notification|consumer rights/i.test(text)) { provisions.push('Section 6-1-1703.4 (Consumer Rights)'); } // Check for California provisions if (/SB.?1047|frontier AI/i.test(text)) { provisions.push('California SB 1047'); } return provisions; } function detectFederalAuthority(agencies?: { name: string; id: number }[]): import('../utils/types.js').FederalAuthority { if (!agencies || agencies.length === 0) return 'executive_branch'; if (agencies.length > 1) return 'multiple'; const agencyName = agencies[0].name.toLowerCase(); if (agencyName.includes('justice')) return 'doj'; if (agencyName.includes('trade commission')) return 'ftc'; if (agencyName.includes('equal employment')) return 'eeoc'; if (agencyName.includes('securities')) return 'sec'; if (agencyName.includes('financial protection')) return 'cfpb'; if (agencyName.includes('health')) return 'hhs'; if (agencyName.includes('commerce')) return 'commerce'; return 'executive_branch'; } /** * Extract EO number from title if not in API field */ function extractEoNumberFromTitle(title: string): string | null { // Match patterns like "Executive Order 14281" or "E.O. 14281" const match = title.match(/(?:Executive Order|E\.?O\.?)\s*(\d+)/i); return match ? match[1] : null; } function extractPresident(doc: FederalRegisterResult): string { // Infer from signing date or publication date const date = new Date(doc.signing_date || doc.publication_date); if (date >= new Date('2025-01-20')) return 'Trump'; if (date >= new Date('2021-01-20')) return 'Biden'; return 'Trump'; } function extractDeadlines(doc: FederalRegisterResult): import('../utils/types.js').ComplianceDeadline[] { const deadlines: import('../utils/types.js').ComplianceDeadline[] = []; if (doc.effective_on) { deadlines.push({ description: 'Effective date', date: doc.effective_on, source: `Federal Register ${doc.document_number}`, isHard: true, }); } if (doc.comments_close_on) { deadlines.push({ description: 'Comment period closes', date: doc.comments_close_on, source: `Federal Register ${doc.document_number}`, isHard: true, }); } return deadlines; } function extractCitations(doc: FederalRegisterResult): import('../utils/types.js').Citation[] { const citations: import('../utils/types.js').Citation[] = []; if (doc.citation) { citations.push({ text: doc.citation, citation: doc.citation, type: doc.executive_order_number ? 'executive_order' : 'regulation', url: doc.html_url, verified: true, }); } if (doc.cfr_references) { for (const cfr of doc.cfr_references) { citations.push({ text: `${cfr.title} CFR Part ${cfr.part}`, citation: `${cfr.title} C.F.R. ยง ${cfr.part}`, type: 'regulation', verified: true, }); } } return citations; } function deduplicateByField(items: T[], field: string): T[] { const seen = new Set(); return items.filter(item => { const value = item[field]; if (seen.has(value)) return false; seen.add(value); return true; }); } function delay(ms: number): Promise { return new Promise(resolve => setTimeout(resolve, ms)); }