import { Stream, StreamIO } from "../data"; import { BooleanType, DictType, EastFunction, EastType, FloatType, IntegerType, StringType, StructType, Variable } from "../east"; import { PipelineBuilder, TabularPipelineBuilder } from '../pipeline'; import { Builder, ModulePath, Template } from "../template"; import { ModuleBuilder } from '../template/ModuleBuilder'; /** The machine learning model types that can be used for a {@link FloatMLModel}. */ export type FloatMLModelType = "boosted_trees" | "constant" | "gaussian_process" | "linear" | "neural_network"; /** The machine learning model types that can be used for a {@link StringMLModel}. */ export type StringMLModelType = "boosted_trees_string" | "neural_network_string"; /** The machine learning model types that can be used for a {@link MLModel}. */ export type MLModelType = FloatMLModelType | StringMLModelType; /** The output distribution to sample to create predictions (i.e. the posterior distribution). A maximum likelihood estimate is provided by "none". */ /** The noise modelled in the {@link FloatMLModel} output. */ export type FloatMLModelNoise = "none" | "gaussian"; /** The noise modelled in the {@link StringMLModel} output. */ export type StringMLModelNoise = "maximum_likelihood" | "weighted"; /** The machine learning model applicable to {@link FloatType} outputs */ export type FloatMLModel = { type: FloatMLModelType; noise: FloatMLModelNoise; }; /** The machine learning model applicable to {@link StringType} outputs */ export type StringMLModel = { type: StringMLModelType; noise: StringMLModelNoise; }; /** Extra configuration that can modify ML evaluation */ export type MLPredictionConfiguration = T extends FloatType ? { min: EastFunction; max: EastFunction; } : Record; export declare function defaultMLPredictionConfiguration(type: EastType): Record; export type MLTrainingConfiguration = M extends "boosted_trees_string" ? { loss: "softprob"; train_iterations: number; tune_iterations: number; max_samples: number; max_depth: number; } : M extends "neural_network_string" ? { loss: "softprob"; train_iterations: number; max_samples: number; shape: number[]; learning_rate: number; } : M extends "neural_network" ? { loss: "L2"; train_iterations: number; max_samples: number; shape: number[]; learning_rate: number; } : M extends "gaussian_process" ? { loss: "L2" | "LogLikelihood"; tune_iterations: number; max_samples: number; } : M extends "boosted_trees" ? { loss: "L2" | "L1"; train_iterations: number; tune_iterations: number; max_samples: number; max_depth: number; } : { loss: "L2"; max_samples: number; }; /** @internal */ export type MLModel = FloatMLModel | StringMLModel; /** @internal */ export declare function mlModelEqual(model1: MLModel, model2: MLModel): boolean; /** @internal */ export type MLTrainingTaskDescription = { task_type: 'ml_training'; module: ModulePath; name: string; model: MLModel; features: Record; output_name: string; training_configuration: MLTrainingConfiguration; inputs: { input: StreamIO>; }; outputs: { model: StreamIO; training: StreamIO; validation: StreamIO; }; }; /** * A builder to define (and optionally train) a machine learning model. * * @category ML * * @example * ```typescript * // use a DictType stream * const training_stream = Stream( * "My Stream", * DictType( * StringType, * StructType({ * x: FloatType, * y: FloatType, * }) * ) * ); * * // create a gradient boosted tree * const ml_model = new MLModelBuilder("ML Training") * .feature("x", FloatType) * .output(FloatType) * .model({ type: "boosted_trees", noise: "none" }) * .trainFromStream({ * output_name: "y", * input: training_stream, * }); * ``` * * */ export declare class MLModelBuilder = {}> { private name; private features; module: ModulePath; /** * Construct a new ML model with a given name * * @category ML * * @example * ```typescript * // use a DictType stream * const training_stream = Stream( * "My Stream", * DictType( * StringType, * StructType({ * x: FloatType, * y: FloatType, * }) * ) * ); * * // create a gradient boosted tree * const ml_model = new MLModelBuilder("ML Training") * .feature("x", FloatType) * .output(FloatType) * .model({ type: "boosted_trees", noise: "none" }) * .trainFromStream({ * output_name: "y", * input: training_stream, * }); * ``` * * */ constructor(name: Name, module?: ModuleBuilder | ModulePath); /** @internal */ constructor(name: Name, module: ModulePath, features: Features); /** * Define an input feature for the model. * * @param name the name of the feature * @param type the {@link EastType} of the feature * * @category ML * * @example * ```typescript * // create a gradient boosted tree * const ml_model = new MLModelBuilder("ML Training") * .feature("x", FloatType) * .output(FloatType) * ``` * * */ feature(name: FeatureName, type: T): MLModelBuilder; /** * Define the EastType the model will predict. * * @param type the {@link EastType} of the output * * @category ML * * @example * ```typescript * // create a gradient boosted tree * const ml_model = new MLModelBuilder("ML Training") * .output(FloatType) * ``` * * */ output(type: T): T extends FloatType ? FloatMLBuilder : StringMLBuilder; } /** @internal */ export declare abstract class AbstractMLBuilder, T extends EastType, Model extends MLModel> extends Builder { protected name: Name; protected features: Features; protected output_type: T; protected model_type: Model; constructor(name: Name, module: ModulePath, features: Features, output_type: T, model_type: Model); /** * Return the datastream containing the parameters of trained ML model. * * @returns The parameter {@link Stream} * * @category ML * */ abstract modelStream(): Stream; /** * Return a datastream containing the training data associated predictions. * * @returns The training {@link Stream} * * @category ML * * */ abstract trainingStream(): Stream; output: T; train: BooleanType; test: BooleanType; }>>>; } /** * {@inheritDoc MLModelBuilder} * * @category ML * */ export declare class FloatMLBuilder, Model extends FloatMLModel> extends AbstractMLBuilder { private training?; constructor(name: Name, module: ModulePath, features: Features, model_type: Model, training?: { type: "stream"; output_name: string; input: Stream>; config: MLTrainingConfiguration; } | { type: "pipeline"; output_name: string; input: TabularPipelineBuilder, Record>; config: MLTrainingConfiguration; } | undefined); /** * Define which `MLModel` to use. * * @param model_type the {@link FloatMLModel} to apply * * @category ML * * @example * ```typescript * // create a gradient boosted tree * const boosted_tree = new MLModelBuilder("ML Training") * .feature("x", FloatType) * .output(FloatType) * .model({ type: "boosted_tree", noise: "none" }) * * // create a gradient boosted tree * const gaussian_process = new MLModelBuilder("ML Training") * .feature("x", FloatType) * .output(FloatType) * .model({ type: "gaussian_process", noise: "gaussian" }) * ``` * * */ model(model_type: M): FloatMLBuilder; /** * Create a training task based on some input data in an existing datastream. * * @param output_name the name of the output field in the input * @param input the input {@link Stream} containing training data * @param config the configuration for training (optional) * * @category ML * * @example * ```typescript * // use a DictType stream * const training_stream = Stream( * "My Stream", * DictType( * StringType, * StructType({ * x: FloatType, * y: FloatType, * }) * ) * ); * * // create a gradient boosted tree * const ml_model = new MLModelBuilder("ML Training") * .feature("x", FloatType) * .output(FloatType) * .model({ type: "boosted_trees", noise: "none" }) * .trainFromStream({ * output_name: "y", * input: training_stream, * }); * ``` * * */ trainFromStream(config: { output_name: Output; input: Stream>>; config?: Partial>; }): FloatMLBuilder; /** * Create a training task based on some input data assembed by a pipeline. * * @param output_name the name of the output field in the input * @param input the input {@link Stream} containing training data * @param config (optional) the training configuration {@link MLTrainingConfiguration} * * @category ML * * @example * ```typescript * // use a DictType stream * const training_stream = Stream( * "My Stream", * DictType( * StringType, * StructType({ * x: FloatType, * y: FloatType, * }) * ) * ); * * // create a gradient boosted tree * const ml_model = new MLModelBuilder("ML Training") * .feature("x", FloatType) * .output(FloatType) * .model({ type: "boosted_trees", noise: "none" }) * .trainFromPipeline({ * output_name: "y", * pipeline: builder => builder * .from(training_stream) * .filter(fields => Greater(fields.x, 0)), * }); * ``` * * */ trainFromPipeline(config: { output_name: Output; pipeline: (builder: PipelineBuilder) => TabularPipelineBuilder>, Record>; config?: Partial>; }): FloatMLBuilder; /** @internal */ private parametersType; /** @internal */ private transformType; /** * Return the datastream containing the parameters of trained ML model. * * @returns The parameter {@link Stream} * * @category ML * */ modelStream(): Stream; /** * Return a datastream containing the training data associated predictions. * * @returns The training {@link Stream} * * @category ML * * */ trainingStream(): Stream; output: FloatType; train: BooleanType; test: BooleanType; }>>>; /** * Return a datastream containing training statistics per training iteration * * @returns The validation {@link Stream} * * @category ML * * */ validationStream(): Stream; }>>>; /** @internal */ private trainingTask; /** * Convert the built ML model into an {@link Template}, for inclusion in an EDK project. * * @returns The {@link Template} containing the ML Model * * @category ML * * */ toTemplate(): Template; } export declare class StringMLBuilder, Model extends StringMLModel> extends AbstractMLBuilder { private training?; constructor(name: Name, module: ModulePath, features: Features, model_type: Model, training?: { type: "stream"; output_name: string; input: Stream>; config: MLTrainingConfiguration; } | { type: "pipeline"; output_name: string; input: TabularPipelineBuilder, Record>; config: MLTrainingConfiguration; } | undefined); /** * Define which `MLModel` to use. * * @param model_type the {@link StringMLModel} to apply * * @category ML * * @example * ```typescript * // create a gradient boosted tree * const boosted_tree = new MLModelBuilder("ML Training") * .feature("x", FloatType) * .output(StringType) * .model({ type: "boosted_tree", noise: "none" }) * ``` * * */ model(model_type: M): StringMLBuilder; /** * Create a training task based on some input data in an existing datastream. * * @param output_name the name of the output field in the input * @param input the input {@link Stream} containing training data * @param config (optional) the training configuration {@link MLTrainingConfiguration} * * @category ML * * @example * ```typescript * // use a DictType stream * const training_stream = Stream( * "My Stream", * DictType( * StringType, * StructType({ * x: FloatType, * y: FloatType, * }) * ) * ); * * // create a gradient boosted tree * const ml_model = new MLModelBuilder("ML Training") * .feature("x", FloatType) * .output(FloatType) * .model({ type: "boosted_trees", noise: "none" }) * .trainFromStream({ * output_name: "y", * input: training_stream, * }); * ``` * * */ trainFromStream(config: { output_name: Output; input: Stream>>; config?: Partial>; }): StringMLBuilder; /** * Create a training task based on some input data assembed by a pipeline. * * @param output_name the name of the output field in the input * @param pipeline a pipeline {@link PipelineBuilder} that assembles the input data for training * @param config (optional) the training configuration {@link MLTrainingConfiguration} * * @category ML * * @example * ```typescript * // use a DictType stream * const training_stream = Stream( * "My Stream", * DictType( * StringType, * StructType({ * x: FloatType, * y: FloatType, * }) * ) * ); * * // create a gradient boosted tree * const ml_model = new MLModelBuilder("ML Training") * .feature("x", FloatType) * .output(FloatType) * .model({ type: "boosted_trees", noise: "none" }) * .trainFromPipeline({ * output_name: "y", * pipeline: builder => builder * .from(training_stream) * .filter(fields => Greater(fields.x, 0)), * }); * ``` * * */ trainFromPipeline(config: { output_name: Output; pipeline: (builder: PipelineBuilder) => TabularPipelineBuilder>, Record>; config?: Partial>; }): StringMLBuilder; /** @internal */ private parametersType; /** @internal */ private transformType; /** * Return the datastream containing the parameters of trained ML model. * * @returns The parameter {@link Stream} * * @category ML * */ modelStream(): Stream; /** * Return a datastream containing the training data associated predictions. * * @returns The training {@link Stream} * * @category ML * * */ trainingStream(): Stream; output: StringType; train: BooleanType; test: BooleanType; }>>>; /** * Return a datastream containing training statistics per training iteration * * @returns The validation {@link Stream} * * @category ML * * */ validationStream(): Stream; }>>>; /** @internal */ private trainingTask; /** * Convert the built ML model into an {@link Template}, for inclusion in an EDK project. * * @returns The {@link Template} containing the ML Model * * @category ML * * */ toTemplate(): Template; }