import type { BrowserSession, CrawlerJob, CrawlerResult, CrawlerSchedule, CrawlerSiteConfig, NewsMonitor, NewsSummary } from "../_internal/types.gen"; import type { RequestOptions } from "../base-client"; import { RequestBuilder } from "../request-builder"; /** * Attributes accepted when creating a NewsMonitor via the admin SDK (B11 * `:news_monitoring` capability). Typed explicitly — passing extra * properties is rejected server-side (additionalProperties: false). * * Use `frequency: "daily"` only — `"hourly"` and `"twice_daily"` * are tier-gated by `ValidateMonitorFrequencyAllowed` and rejected on * standard-tier applications until the premium frequency capability ships. */ export type CreateNewsMonitorAttributes = { name: string; topic: string; source_urls: string[]; frequency?: "daily" | "twice_daily" | "hourly"; summary_model?: "default" | "balanced" | "accurate"; tags?: string[]; metadata?: Record; }; /** * Attributes accepted when updating a NewsMonitor (PATCH semantics). * * Same tier-gating note as create: pass `frequency: "daily"` only — * `"hourly"` and `"twice_daily"` are gated by * `ValidateMonitorFrequencyAllowed` and rejected on standard-tier * applications until the premium frequency capability ships. */ export type UpdateNewsMonitorAttributes = { name?: string; topic?: string; source_urls?: string[]; frequency?: "daily" | "twice_daily" | "hourly"; summary_model?: "default" | "balanced" | "accurate"; tags?: string[]; metadata?: Record; }; /** * Attributes accepted when creating a crawl job via the admin SDK. * Mirrors `crawl_job.ex :create` accept list — passing extra properties * is rejected server-side (additionalProperties: false). */ export type AdminCreateCrawlerJobAttributes = { workspace_id: string; url: string; mode?: "single" | "site" | "sitemap"; strategy?: "native" | "sidecar" | "auto"; depth?: number; max_pages?: number; include_patterns?: string[]; exclude_patterns?: string[]; output_format?: "markdown" | "html" | "text" | "snapshot"; callback_url?: string; schedule_id?: string; metadata?: Record; browser_steps?: Array>; }; /** * Attributes accepted when creating a crawl schedule via the admin SDK. * Mirrors `crawl_schedule.ex :create` accept list. */ export type AdminCreateCrawlerScheduleAttributes = { workspace_id: string; name: string; url: string; mode?: "single" | "site" | "sitemap"; frequency?: "hourly" | "daily" | "weekly" | "monthly"; cron_expression?: string; crawl_config?: Record; notify_on_change?: boolean; next_run_at?: string; news_monitor_id?: string; }; /** * Attributes accepted when updating a crawl schedule via the admin SDK. * Mirrors `crawl_schedule.ex :update` accept list. To pause/resume use * `enable()` / `disable()` — `enabled` is not part of the update accept. */ export type AdminUpdateCrawlerScheduleAttributes = { name?: string; url?: string; mode?: "single" | "site" | "sitemap"; frequency?: "hourly" | "daily" | "weekly" | "monthly"; cron_expression?: string; crawl_config?: Record; notify_on_change?: boolean; news_monitor_id?: string; }; /** * Attributes accepted when creating a SiteConfig via the admin SDK. * Mirrors `site_config.ex :create` accept list. Includes the auth-aware * `browser_auth_session_id` + `browser_fallback_strategy` fields. */ export type AdminCreateSiteConfigAttributes = { workspace_id: string; domain: string; rate_limit_rpm?: number; preferred_strategy?: "native" | "sidecar" | "auto" | "browser_session"; requires_js?: boolean; custom_headers?: Record; user_agent?: string; respect_robots?: boolean; browser_auth_session_id?: string; browser_fallback_strategy?: "native" | "sidecar"; }; /** * Attributes accepted when updating a SiteConfig via the admin SDK. * Mirrors `site_config.ex :update` accept list. */ export type AdminUpdateSiteConfigAttributes = { rate_limit_rpm?: number; preferred_strategy?: "native" | "sidecar" | "auto" | "browser_session"; requires_js?: boolean; custom_headers?: Record; user_agent?: string; respect_robots?: boolean; browser_auth_session_id?: string; browser_fallback_strategy?: "native" | "sidecar"; }; /** * Attributes accepted when creating a BrowserSession via the admin SDK. * Mirrors `browser_session.ex :create` accept list. `session_state` is * encrypted at rest via `EncryptSessionState` change. */ export type AdminCreateBrowserSessionAttributes = { workspace_id: string; domain: string; session_state: string; expires_at?: string; }; /** * Attributes accepted when refreshing a BrowserSession. Mirrors * `browser_session.ex :refresh` accept list — only `session_state` may * be rotated; the refresh action re-encrypts and stamps `last_used_at`. */ export type AdminRefreshBrowserSessionAttributes = { session_state: string; }; /** * Admin crawler namespace — full crawl lifecycle management including site configs. * * Extends the client crawler namespace with SiteConfig CRUD (per-domain rate limits, * strategy preferences, custom headers) which is admin-only. * * @example * ```typescript * const admin = new GptAdmin({ apiKey: 'sk_srv_...' }); * * // List all crawl jobs * const jobs = await admin.crawler.jobs.list(); * * // Configure per-domain crawl settings * const config = await admin.crawler.siteConfigs.create({ * workspace_id: 'ws_abc', * domain: 'docs.example.com', * rate_limit_rpm: 30, * }); * ``` */ export declare function createCrawlerNamespace(rb: RequestBuilder): { /** Crawl job management — create, cancel, delete jobs. */ jobs: { /** * List crawl jobs across all workspaces visible to the admin actor. * * @param options - Optional request-level overrides. * @returns A promise resolving to an array of {@link CrawlerJob} records. * * @example * ```typescript * const jobs = await admin.crawler.jobs.list(); * console.log(`${jobs.length} jobs across all workspaces`); * ``` */ list: (options?: RequestOptions) => Promise; /** * Fetch a single crawl job by id. * * @param id - The crawl job id. * @param options - Optional request-level overrides. * @returns A promise resolving to the matching {@link CrawlerJob}. */ get: (id: string, options?: RequestOptions) => Promise; /** * Start a new crawl job (admin context — may target any workspace). * * @param attributes - Crawl job attributes; must include `workspace_id` * and `url`. Extra properties are dropped by the destructure helper * before they reach the server. * @param options - Optional request-level overrides. * @returns A promise resolving to the newly created {@link CrawlerJob}. * * @example * ```typescript * const job = await admin.crawler.jobs.create({ * workspace_id: 'ws_abc', * url: 'https://docs.example.com', * mode: 'site', * depth: 2, * }); * ``` */ create: (attributes: AdminCreateCrawlerJobAttributes, options?: RequestOptions) => Promise; /** * Cancel an in-progress crawl job. Sends a PATCH with no attributes. * * @param id - The crawl job id. * @param options - Optional request-level overrides. * @returns A promise resolving to the updated {@link CrawlerJob} * with `status: 'cancelled'`. * * @example * ```typescript * await admin.crawler.jobs.cancel('cjob_abc'); * ``` */ cancel: (id: string, options?: RequestOptions) => Promise; /** * Permanently delete a crawl job and its associated results. * * @param id - The crawl job id. * @param options - Optional request-level overrides. * @returns A promise resolving to `true` on successful deletion. * * @example * ```typescript * await admin.crawler.jobs.delete('cjob_abc'); * ``` */ delete: (id: string, options?: RequestOptions) => Promise; }; /** Crawl schedule management — CRUD + enable/disable/trigger. */ schedules: { /** * List crawl schedules across all workspaces visible to the admin actor. * * @param options - Optional request-level overrides. * @returns A promise resolving to an array of {@link CrawlerSchedule} records. * * @example * ```typescript * const schedules = await admin.crawler.schedules.list(); * ``` */ list: (options?: RequestOptions) => Promise; /** * Fetch a single crawl schedule by id. * * @param id - The schedule id. * @param options - Optional request-level overrides. * @returns A promise resolving to the matching {@link CrawlerSchedule}. * * @example * ```typescript * const schedule = await admin.crawler.schedules.get('sched_abc'); * ``` */ get: (id: string, options?: RequestOptions) => Promise; /** * Create a new recurring crawl schedule. To pause/resume use * `enable()` / `disable()` — `enabled` is not part of the accept list. * * @param attributes - Schedule attributes; must include `workspace_id`, * `name`, and `url`. * @param options - Optional request-level overrides. * @returns A promise resolving to the newly created {@link CrawlerSchedule}. * * @example * ```typescript * const schedule = await admin.crawler.schedules.create({ * workspace_id: 'ws_abc', * name: 'Daily docs', * url: 'https://docs.example.com', * frequency: 'daily', * }); * ``` */ create: (attributes: AdminCreateCrawlerScheduleAttributes, options?: RequestOptions) => Promise; /** * Update an existing crawl schedule (PATCH semantics). * * @param id - The schedule id. * @param attributes - Attributes to change. * @param options - Optional request-level overrides. * @returns A promise resolving to the updated {@link CrawlerSchedule}. * * @example * ```typescript * await admin.crawler.schedules.update('csched_abc', { * frequency: 'weekly', * }); * ``` */ update: (id: string, attributes: AdminUpdateCrawlerScheduleAttributes, options?: RequestOptions) => Promise; /** * Enable a paused crawl schedule. Sends a PATCH with no attributes. * * @param id - The schedule id. * @param options - Optional request-level overrides. * @returns A promise resolving to the updated {@link CrawlerSchedule}. * * @example * ```typescript * await admin.crawler.schedules.enable('csched_abc'); * ``` */ enable: (id: string, options?: RequestOptions) => Promise; /** * Disable a crawl schedule (pause without deleting). Sends a PATCH * with no attributes. * * @param id - The schedule id. * @param options - Optional request-level overrides. * @returns A promise resolving to the updated {@link CrawlerSchedule}. * * @example * ```typescript * await admin.crawler.schedules.disable('csched_abc'); * ``` */ disable: (id: string, options?: RequestOptions) => Promise; /** * Trigger a scheduled crawl immediately. Sends a PATCH with no * attributes; stamps `last_run_at` and enqueues a job. * * @param id - The schedule id. * @param options - Optional request-level overrides. * @returns A promise resolving to the updated {@link CrawlerSchedule}. * * @example * ```typescript * await admin.crawler.schedules.trigger('csched_abc'); * ``` */ trigger: (id: string, options?: RequestOptions) => Promise; /** * Permanently delete a crawl schedule. * * @param id - The schedule id. * @param options - Optional request-level overrides. * @returns A promise resolving to `true` on successful deletion. * * @example * ```typescript * await admin.crawler.schedules.delete('csched_abc'); * ``` */ delete: (id: string, options?: RequestOptions) => Promise; }; /** Crawl result access — read extracted page content. */ results: { /** * List crawl results across all jobs visible to the admin actor. * * @param options - Optional request-level overrides. * @returns A promise resolving to an array of {@link CrawlerResult} records. * * @example * ```typescript * const results = await admin.crawler.results.list(); * ``` */ list: (options?: RequestOptions) => Promise; /** * Fetch a single crawl result by id, including the extracted page * content (URL, title, Markdown body, metadata). * * @param id - The crawl result id. * @param options - Optional request-level overrides. * @returns A promise resolving to the matching {@link CrawlerResult}. * * @example * ```typescript * const result = await admin.crawler.results.get('res_abc'); * console.log(result.markdown); * ``` */ get: (id: string, options?: RequestOptions) => Promise; }; /** Site configuration — per-domain crawl settings (admin-only). */ siteConfigs: { /** * List site configurations across all workspaces. * * @param options - Optional request-level overrides. * @returns A promise resolving to an array of {@link CrawlerSiteConfig} records. */ list: (options?: RequestOptions) => Promise; /** * Fetch a single site configuration by id. * * @param id - The site config id. * @param options - Optional request-level overrides. * @returns A promise resolving to the matching {@link CrawlerSiteConfig}. */ get: (id: string, options?: RequestOptions) => Promise; /** * Create per-domain crawl settings (rate limit, strategy, custom * headers, robots.txt preference, browser-auth session binding). * * @param attributes - Site config attributes; must include * `workspace_id` and `domain`. * @param options - Optional request-level overrides. * @returns A promise resolving to the newly created {@link CrawlerSiteConfig}. * * @example * ```typescript * const config = await admin.crawler.siteConfigs.create({ * workspace_id: 'ws_abc', * domain: 'docs.example.com', * rate_limit_rpm: 30, * preferred_strategy: 'auto', * }); * ``` */ create: (attributes: AdminCreateSiteConfigAttributes, options?: RequestOptions) => Promise; /** * Update an existing site configuration (PATCH semantics). * * @param id - The site config id. * @param attributes - Site config attributes to change. * @param options - Optional request-level overrides. * @returns A promise resolving to the updated {@link CrawlerSiteConfig}. * * @example * ```typescript * await admin.crawler.siteConfigs.update('sc_abc', { * rate_limit_rpm: 60, * }); * ``` */ update: (id: string, attributes: AdminUpdateSiteConfigAttributes, options?: RequestOptions) => Promise; /** * Permanently delete a site configuration. * * @param id - The site config id. * @param options - Optional request-level overrides. * @returns A promise resolving to `true` on successful deletion. * * @example * ```typescript * await admin.crawler.siteConfigs.delete('sc_abc'); * ``` */ delete: (id: string, options?: RequestOptions) => Promise; }; /** * News Monitors — workspace-scoped news monitoring configurations * (B11, `:news_monitoring` capability). Same surface as the client SDK * — Admin mounts the same routes under `/admin/crawler/news-monitors`. * * Mutating endpoints return HTTP 403 with code * `news_monitoring_capability_disabled` when the workspace's * Application lacks the capability. */ newsMonitors: { /** * List NewsMonitors for the current workspace context, with offset * pagination support. Mirrors `admin.crawler.newsMonitors.list` on * the client SDK. * * @param options - Pagination (`page`, `pageSize`) + request options. * @returns A `Promise` resolving to the monitors page. * @example * ```ts * const monitors = await admin.crawler.newsMonitors.list({ page: 1, pageSize: 25 }); * ``` */ list: (options?: { page?: number; pageSize?: number; } & RequestOptions) => Promise; /** * Get a NewsMonitor by id. * * @param id - NewsMonitor UUID. * @param options - Request options. * @returns A `Promise`. */ get: (id: string, options?: RequestOptions) => Promise; /** * Create a NewsMonitor. Capability-gated on `:news_monitoring` — * returns HTTP 403 `news_monitoring_capability_disabled` when the * workspace's Application lacks the capability. * * @param attributes - Create attributes (typed; extra properties rejected server-side). * @param options - Request options. * @returns A `Promise`. * @example * ```ts * const monitor = await admin.crawler.newsMonitors.create({ * name: "policy-monitor", * topic: "policy updates", * source_urls: ["https://example.com/news"], * frequency: "daily", * }); * ``` */ create: (attributes: CreateNewsMonitorAttributes, options?: RequestOptions) => Promise; /** * Update a NewsMonitor (PATCH). Capability-gated on * `:news_monitoring`. * * @param id - NewsMonitor UUID. * @param attributes - Update attributes (PATCH semantics — only * provided fields are modified). * @param options - Request options. * @returns A `Promise`. * @example * ```ts * await admin.crawler.newsMonitors.update(id, { topic: "new topic" }); * ``` */ update: (id: string, attributes: UpdateNewsMonitorAttributes, options?: RequestOptions) => Promise; /** * Pause a NewsMonitor — sets status to `:paused`. The scheduler * skips paused monitors. * * @param id - NewsMonitor UUID. * @param options - Request options. * @returns A `Promise` with `status: "paused"`. * @example * ```ts * await admin.crawler.newsMonitors.pause(monitorId); * ``` */ pause: (id: string, options?: RequestOptions) => Promise; /** * Resume a paused NewsMonitor — sets status back to `:active`. * * @param id - NewsMonitor UUID. * @param options - Request options. * @returns A `Promise` with `status: "active"`. * @example * ```ts * await admin.crawler.newsMonitors.resume(monitorId); * ``` */ resume: (id: string, options?: RequestOptions) => Promise; /** * Archive a NewsMonitor — sets status to `:archived` (terminal). * Archived monitors are excluded from scheduler sweeps. * * @param id - NewsMonitor UUID. * @param options - Request options. * @returns A `Promise` with `status: "archived"`. * * @example * ```ts * await admin.crawler.newsMonitors.archive(monitorId); * ``` */ archive: (id: string, options?: RequestOptions) => Promise; /** * Permanently delete a NewsMonitor. * * @param id - NewsMonitor UUID. * @param options - Request options. * @returns A `Promise` on success. * * @example * ```ts * await admin.crawler.newsMonitors.delete(monitorId); * ``` */ delete: (id: string, options?: RequestOptions) => Promise; }; /** * News Summaries — read-only access to AI-generated summaries produced * by NewsMonitor runs. */ newsSummaries: { /** * List NewsSummaries for the current workspace context, with offset * pagination support. Mirrors the client SDK list shape. * * @param options - Pagination (`page`, `pageSize`) + request options. * @returns A `Promise`. * * @example * ```typescript * const summaries = await admin.crawler.newsSummaries.list({ pageSize: 50 }); * ``` */ list: (options?: { page?: number; pageSize?: number; } & RequestOptions) => Promise; /** * Get a NewsSummary by id. * * @param id - NewsSummary UUID. * @param options - Request options. * @returns A `Promise`. * * @example * ```typescript * const summary = await admin.crawler.newsSummaries.get('sum_abc'); * ``` */ get: (id: string, options?: RequestOptions) => Promise; }; /** * Browser Sessions — encrypted, per-domain authenticated session * state used by `:browser_session` strategy crawls. `session_state` * is encrypted at rest via `EncryptSessionState` and is NEVER * returned in API responses (marked sensitive on the resource). * * ISVs configure these to support authenticated crawling without * repeating the login flow on every scheduled run. */ browserSessions: { /** * List browser sessions across all workspaces visible to the admin * actor. * * @param options - Optional request-level overrides. * @returns A promise resolving to an array of {@link BrowserSession} records. */ list: (options?: RequestOptions) => Promise; /** * Fetch a single browser session by id. * * @param id - The browser session id. * @param options - Optional request-level overrides. * @returns A promise resolving to the matching {@link BrowserSession}. */ get: (id: string, options?: RequestOptions) => Promise; /** * Create a new browser session for a domain. `session_state` is * encrypted via the `EncryptSessionState` change before persistence. * * @param attributes - Session attributes; must include `workspace_id`, * `domain`, and `session_state`. * @param options - Optional request-level overrides. * @returns A promise resolving to the newly created {@link BrowserSession}. * * @example * ```typescript * const session = await admin.crawler.browserSessions.create({ * workspace_id: 'ws_abc', * domain: 'example.com', * session_state: JSON.stringify({ cookies: [...] }), * expires_at: '2026-06-01T00:00:00Z', * }); * ``` */ create: (attributes: AdminCreateBrowserSessionAttributes, options?: RequestOptions) => Promise; /** * Refresh a browser session by rotating `session_state`. Re-encrypts * the new state and stamps `last_used_at`. Only `session_state` may * be modified via this endpoint. * * @param id - The browser session id. * @param attributes - Refresh attributes — only `session_state`. * @param options - Optional request-level overrides. * @returns A promise resolving to the refreshed {@link BrowserSession}. * * @example * ```typescript * await admin.crawler.browserSessions.refresh(sessionId, { * session_state: JSON.stringify({ cookies: [...] }), * }); * ``` */ refresh: (id: string, attributes: AdminRefreshBrowserSessionAttributes, options?: RequestOptions) => Promise; /** * Permanently delete a browser session. * * @param id - The browser session id. * @param options - Optional request-level overrides. * @returns A promise resolving to `true` on successful deletion. * * @example * ```typescript * await admin.crawler.browserSessions.delete(sessionId); * ``` */ delete: (id: string, options?: RequestOptions) => Promise; }; }; export type AdminCrawlerAPI = ReturnType; //# sourceMappingURL=crawler.d.ts.map