import { AssetLibrary } from "./asset";
import { Chat } from "./chat";
import { CompletionsAPI } from "./completions";
import { FineTuningClient } from "./tune";
/**
 *  Future class for endpoints that support server side async inferences.
 *
 * @param poll_url - A unique URL to poll status of inference.
 * @param response_id - Unique identifier for inference
 *
 * @remarks
 * This class is used in conjunction with the {@link Client.inferAsync} method.
 * Please reference that methods remarks for more information.
 * {@link Client.inferAsync} returns an InferenceFuture, which can then be
 * used with {@link Client.isFutureReady} to see the status.  Once it returns
 * `true`, you can use the {@link Client.getFutureResult} to get the response
 * for your InferenceFuture.
 */
export interface InferenceFuture {
    poll_url: string;
    response_id: string;
}
/**
 * This class represents the responses from the poll_url of a
 * {@link InferenceFuture}
 *
 * @param status - Inference status of either pending, running, completed
 * @param response_url - URL to get the results once it's ready.
 *
 * @remarks
 * This class is used in conjunction with the {@link Client.inferAsync} method.
 * Please reference that methods remarks for more information.
 */
export interface InferenceFutureResponse {
    status: string;
    response_url: string;
}
/**
 * A client that allows inferences from existing OctoAI endpoints.  Sets
 * various headers, establishes clients for {@link Chat} under `Client.chat`,
 * {@link AssetLibrary} under `Client.asset`, {@link FineTuningClient} under
 * `Client.tune`, and will check for `OCTOAI_TOKEN`
 * from environment variable if no token is provided.
 *
 * @throws {@link OctoAIClientError} - For client-side failures (throttled, no token)
 * @throws {@link OctoAIServerError} - For server-side failures (unreachable, etc)
 *
 * @remarks
 * You can create an OctoAI API token by following the guide at
 * {@link https://docs.octoai.cloud/docs/how-to-create-an-octoai-access-token |
 * How to Create an OctoAI Access Token}
 */
export declare class Client {
    /**
     * Headers used to interact with OctoAI servers.  Communicates authorization
     * and request type.
     */
    readonly headers: {
        Authorization: string;
        "Content-Type": string;
        "User-Agent": string;
        "X-OctoAI-Async": string;
        Accept: string;
    };
    /**
     * The {@link Chat} client, accessible with `Client.chat`.
     */
    readonly chat: Chat;
    /**
     * The {@link AssetLibrary} client, accessible with `Client.asset`.
     */
    readonly asset: AssetLibrary;
    /**
     * The {@link FineTuningClient}, accessible with `Client.tune`.
     */
    readonly tune: FineTuningClient;
    /**
     * The {@link CompletionsAPI} client, accessible with `Client.completions`.
     */
    readonly completions: CompletionsAPI;
    /**
     * Set to true to use the SecureLink API.
     */
    readonly secureLink: boolean;
    /**
     * Constructor for the Client class.
     *
     * @param token - OctoAI token.  If none is set, checks for an `OCTOAI_TOKEN`
     * envvar, or will default to null.
     * @param secureLink - Set to true to use SecureLink API instead of public API
     */
    constructor(token?: string | null, secureLink?: boolean);
    /**
     * Send a request to the given endpoint with inputs as request body.
     * For LLaMA2 LLMs, this requires `"stream": false` in the inputs.  To stream
     * for LLMs, please see the {@link inferStream} method.
     *
     * @param endpointUrl - Target URL to run inference
     * @param inputs - Necessary inputs for the endpointURL to run inference
     *
     * @returns JSON outputs from the endpoint
     */
    infer<T>(endpointUrl: string, inputs: Record<string, any>): Promise<T>;
    /**
     * Stream text event response body for supporting endpoints.  This is an
     * alternative to loading all response body into memory at once.  Recommended
     * for use with LLM models.  Requires `"stream": true` in the inputs for
     * LLaMA2 LLMs.
     *
     * @param endpointUrl - Target URL to run inference
     * @param inputs - Necessary inputs for the endpointURL to run inference
     * @returns Compatible with getReader method.
     *
     * @remarks
     * This allows you to stream back tokens from the LLMs.  Below is an example
     * on how to do this with a LLaMA2 LLM using a completions style API.
     *
     * HuggingFace style APIs will usually use the variable `done` below to
     * indicate the end of the stream.  OpenAI style APIs will often send a
     * string in the stream `"data: [DONE]\n"` to indicate the stream is complete.
     *
     * This example concatenates all values from the tokens into a single text
     * variable.  How you choose to use the tokens will likely be different, so
     * please modify the code.
     *
     * This examples assumes:
     * 1) You've followed the guide at
     * {@link https://docs.octoai.cloud/docs/how-to-create-an-octoai-access-token |
     * How to Create an OctoAI Access Token} to create and set your OctoAI access
     * token
     * 2) Either that you will set this token as an OCTOAI_TOKEN envvar
     * or edit the snippet to pass it as a value in the `{@link Client.constructor}`.
     * 3) You have assigned your endpoint URL and inputs into variables named
     * llamaEndpoint and streamInputs.
     *
     *```ts
     * const client = new Client();
     *     const readableStream = await client.inferStream(
     *       llamaEndpoint,
     *       streamInputs
     *     );
     * let text = ``;
     * const streamReader = readableStream.getReader();
     * for (
     *   let { value, done } = await streamReader.read();
     *   !done;
     *   { value, done } = await streamReader.read()
     * ) {
     *   if (done) break;
     *   const decoded = new TextDecoder().decode(value);
     *   if (
     *     decoded === "data: [DONE]\n" ||
     *     decoded.includes('"finish_reason": "')
     *   ) {
     *     break;
     *   }
     *   const token = JSON.parse(decoded.substring(5));
     *   if (token.object === "chat.completion.chunk") {
     *     text += token.choices[0].delta.content;
     *   }
     * console.log(text);
     *```
     * The `const token = JSON.parse(decoded.substring(5))` line strips `"data"`
     * from the returned text/event-stream then parses the token as an object.
     */
    inferStream(endpointUrl: string, inputs: Record<string, any>): Promise<Response>;
    private coldStartWarning;
    /**
     * Check health of an endpoint using a get request.  Try until timeout.
     *
     * @param endpointUrl - Target URL to run the health check.
     * @param timeoutMS - Milliseconds before request times out.  Default is 15
     * minutes.
     * @param intervalMS - Interval in milliseconds before the healthCheck method
     * queries
     * @returns HTTP status code.
     *
     * @remarks
     * The default timeout is set to 15 minutes to allow for potential cold start.
     *
     * For custom containers, please follow
     * {@link https://docs.octoai.cloud/docs/health-check-paths-in-custom-containers
     * | Health Check Paths in Custom Containers} to set a health check endpoint.
     *
     * Information about health check endpoint URLs are available on relevant
     * QuickStart Templates.
     */
    healthCheck(endpointUrl: string, timeoutMS?: number, // 15 minutes for cold start
    intervalMS?: number): Promise<number>;
    /**
     * Execute an inference in the background on the server.
     *
     * @param endpointUrl - Target URL to send inference request.
     * @param inputs - Contains necessary inputs for endpoint to run inference.
     * @returns Future allows checking if results are ready then accessing them.
     *
     * @remarks
     * Please read the {@link https://docs.octoai.cloud/reference/inference |
     * Async Inference Reference} for more information.
     * {@link Client.inferAsync} returns an {@link InferenceFuture},
     * which can then be used with {@link Client.isFutureReady} to see the
     * status.  Once it returns `true`, you can use the
     * {@link Client.getFutureResult} to get the response for your
     * InferenceFuture.
     *
     * Assuming you have a variable with your target endpoint URL and the inputs
     * the model needs, and an `OCTOAI_TOKEN` set as an environment variable, you
     * can run a server-side asynchronous inference from
     * {@link https://docs.octoai.cloud/docs/welcome-to-the-octoai-compute-service-copy | QuickStart Template}
     * endpoints with something like the below.
     *
     * ```ts
     *  const client = new Client();
     *  const future = await client.inferAsync(url, inputs);
     *  if (await client.isFutureReady(future) === true) {
     *    return await client.getFutureResult(future);
     *  }
     * ```
     */
    inferAsync(endpointUrl: string, inputs: Record<string, any>): Promise<InferenceFuture>;
    private pollFuture;
    /**
     * Return whether the {@link InferenceFuture} generated from
     * {@link Client.inferAsync} has been computed and can return results.
     *
     * @param future - Created from {@link Client.inferAsync}.
     * @returns True if the {@link InferenceFuture}
     * inference is completed and are able to use {@link Client.getFutureResult}.
     * Else returns false.
     */
    isFutureReady(future: InferenceFuture): Promise<boolean>;
    /**
     * Return the result of a {@link InferenceFuture} generated from
     * {@link Client.inferAsync} as long as {@link Client.isFutureReady} returned
     * `true`.
     *
     * @param future - An {@link InferenceFuture} generated from
     * {@link Client.inferAsync}
     * @returns JSON outputs from the endpoint.
     */
    getFutureResult(future: InferenceFuture): Promise<Record<string, any>>;
}