import * as pulumi from "@pulumi/pulumi"; /** * Creates a job on Dataflow, which is an implementation of Apache Beam running on Google Compute Engine. For more information see * the official documentation for * [Beam](https://beam.apache.org) and [Dataflow](https://cloud.google.com/dataflow/). * * ## Example Usage * * ```typescript * import * as pulumi from "@pulumi/pulumi"; * import * as gcp from "@pulumi/gcp"; * * const bigDataJob = new gcp.dataflow.Job("big_data_job", { * name: "dataflow-job", * templateGcsPath: "gs://my-bucket/templates/template_file", * tempGcsLocation: "gs://my-bucket/tmp_dir", * parameters: { * foo: "bar", * baz: "qux", * }, * }); * ``` * * ### Streaming Job * * ```typescript * import * as pulumi from "@pulumi/pulumi"; * import * as gcp from "@pulumi/gcp"; * * const topic = new gcp.pubsub.Topic("topic", {name: "dataflow-job1"}); * const bucket1 = new gcp.storage.Bucket("bucket1", { * name: "tf-test-bucket1", * location: "US", * forceDestroy: true, * }); * const bucket2 = new gcp.storage.Bucket("bucket2", { * name: "tf-test-bucket2", * location: "US", * forceDestroy: true, * }); * const pubsubStream = new gcp.dataflow.Job("pubsub_stream", { * name: "tf-test-dataflow-job1", * templateGcsPath: "gs://my-bucket/templates/template_file", * tempGcsLocation: "gs://my-bucket/tmp_dir", * enableStreamingEngine: true, * parameters: { * inputFilePattern: pulumi.interpolate`${bucket1.url}/*.json`, * outputTopic: topic.id, * }, * transformNameMapping: { * name: "test_job", * env: "test", * }, * onDelete: "cancel", * }); * ``` * * ## Note on "destroy" / "apply" * * There are many types of Dataflow jobs. Some Dataflow jobs run constantly, getting new data from (e.g.) a GCS bucket, and outputting data continuously. Some jobs process a set amount of data then terminate. All jobs can fail while running due to programming errors or other issues. In this way, Dataflow jobs are different from most other Google resources. * * The Dataflow resource is considered 'existing' while it is in a nonterminal state. If it reaches a terminal state (e.g. 'FAILED', 'COMPLETE', 'CANCELLED'), it will be recreated on the next 'apply'. This is as expected for jobs which run continuously, but may surprise users who use this resource for other kinds of Dataflow jobs. * * A Dataflow job which is 'destroyed' may be "cancelled" or "drained". If "cancelled", the job terminates - any data written remains where it is, but no new data will be processed. If "drained", no new data will enter the pipeline, but any data currently in the pipeline will finish being processed. The default is "drain". When `onDelete` is set to `"drain"` in the configuration, you may experience a long wait for your `pulumi destroy` to complete. * * You can potentially short-circuit the wait by setting `skipWaitOnJobTermination` to `true`, but beware that unless you take active steps to ensure that the job `name` parameter changes between instances, the name will conflict and the launch of the new job will fail. One way to do this is with a randomId resource, for example: * * ```typescript * import * as pulumi from "@pulumi/pulumi"; * import * as gcp from "@pulumi/gcp"; * import * as random from "@pulumi/random"; * * const config = new pulumi.Config(); * const bigDataJobSubscriptionId = config.get("bigDataJobSubscriptionId") || "projects/myproject/subscriptions/messages"; * const bigDataJobNameSuffix = new random.index.Id("big_data_job_name_suffix", { * byteLength: 4, * keepers: { * region: region, * subscriptionId: bigDataJobSubscriptionId, * }, * }); * const bigDataJob = new gcp.dataflow.FlexTemplateJob("big_data_job", { * name: `dataflow-flextemplates-job-${bigDataJobNameSuffix.dec}`, * region: region, * containerSpecGcsPath: "gs://my-bucket/templates/template.json", * skipWaitOnJobTermination: true, * parameters: { * inputSubscription: bigDataJobSubscriptionId, * }, * }); * ``` * * ## Import * * Dataflow jobs can be imported using the job `id` e.g. * * * `{{id}}` * * When using the `pulumi import` command, dataflow jobs can be imported using one of the formats above. For example: * * ```sh * $ pulumi import gcp:dataflow/job:Job default {{id}} * ``` */ export declare class Job extends pulumi.CustomResource { /** * Get an existing Job resource's state with the given name, ID, and optional extra * properties used to qualify the lookup. * * @param name The _unique_ name of the resulting resource. * @param id The _unique_ provider ID of the resource to lookup. * @param state Any extra arguments used during the lookup. * @param opts Optional settings to control the behavior of the CustomResource. */ static get(name: string, id: pulumi.Input, state?: JobState, opts?: pulumi.CustomResourceOptions): Job; /** * Returns true if the given object is an instance of Job. This is designed to work even * when multiple copies of the Pulumi SDK have been loaded into the same process. */ static isInstance(obj: any): obj is Job; /** * List of experiments that should be used by the job. An example value is `["enableStackdriverAgentMetrics"]`. */ readonly additionalExperiments: pulumi.Output; /** * All of labels (key/value pairs) present on the resource in GCP, including the labels configured through Pulumi, other clients and services. */ readonly effectiveLabels: pulumi.Output<{ [key: string]: string; }>; /** * Enable/disable the use of [Streaming Engine](https://cloud.google.com/dataflow/docs/guides/deploying-a-pipeline#streaming-engine) for the job. Note that Streaming Engine is enabled by default for pipelines developed against the Beam SDK for Python v2.21.0 or later when using Python 3. */ readonly enableStreamingEngine: pulumi.Output; /** * The configuration for VM IPs. Options are `"WORKER_IP_PUBLIC"` or `"WORKER_IP_PRIVATE"`. */ readonly ipConfiguration: pulumi.Output; /** * The unique ID of this job. */ readonly jobId: pulumi.Output; /** * The name for the Cloud KMS key for the job. Key format is: `projects/PROJECT_ID/locations/LOCATION/keyRings/KEY_RING/cryptoKeys/KEY` */ readonly kmsKeyName: pulumi.Output; /** * User labels to be specified for the job. Keys and values should follow the restrictions * specified in the [labeling restrictions](https://cloud.google.com/compute/docs/labeling-resources#restrictions) page. * **Note**: This field is non-authoritative, and will only manage the labels present in your configuration. Please refer to the field `effectiveLabels` for all of the labels present on the resource. */ readonly labels: pulumi.Output<{ [key: string]: string; } | undefined>; /** * The machine type to use for the job. */ readonly machineType: pulumi.Output; /** * The number of workers permitted to work on the job. More workers may improve processing speed at additional cost. */ readonly maxWorkers: pulumi.Output; /** * A unique name for the resource, required by Dataflow. */ readonly name: pulumi.Output; /** * The network to which VMs will be assigned. If it is not provided, "default" will be used. */ readonly network: pulumi.Output; /** * One of "drain" or "cancel". Specifies behavior of deletion during `pulumi destroy`. See above note. */ readonly onDelete: pulumi.Output; /** * **Template specific** Key/Value pairs to be forwarded to the pipeline's options; keys are * case-sensitive based on the language on which the pipeline is coded, mostly Java. * **Note**: do not configure Dataflow options here in parameters. */ readonly parameters: pulumi.Output<{ [key: string]: string; } | undefined>; /** * The project in which the resource belongs. If it is not provided, the provider project is used. */ readonly project: pulumi.Output; /** * The combination of labels configured directly on the resource and default labels configured on the provider. */ readonly pulumiLabels: pulumi.Output<{ [key: string]: string; }>; /** * The region in which the created job should run. */ readonly region: pulumi.Output; /** * The Service Account email used to create the job. This should be just an email e.g. `myserviceaccount@myproject.iam.gserviceaccount.com`. Do not include any `serviceAccount:` or other prefix. */ readonly serviceAccountEmail: pulumi.Output; /** * If set to `true`, Pulumi will treat `DRAINING` and `CANCELLING` as terminal states when deleting the resource, and will remove the resource from Pulumi state and move on. See above note. */ readonly skipWaitOnJobTermination: pulumi.Output; /** * The current state of the resource, selected from the [JobState enum](https://cloud.google.com/dataflow/docs/reference/rest/v1b3/projects.jobs#Job.JobState) */ readonly state: pulumi.Output; /** * The subnetwork to which VMs will be assigned. Should be of the form "regions/REGION/subnetworks/SUBNETWORK". If the [subnetwork is located in a Shared VPC network](https://cloud.google.com/dataflow/docs/guides/specifying-networks#shared), you must use the complete URL. For example `"googleapis.com/compute/v1/projects/PROJECT_ID/regions/REGION/subnetworks/SUBNET_NAME"` */ readonly subnetwork: pulumi.Output; /** * A writeable location on GCS for the Dataflow job to dump its temporary data. * * - - - */ readonly tempGcsLocation: pulumi.Output; /** * The GCS path to the Dataflow job template. */ readonly templateGcsPath: pulumi.Output; /** * Only applicable when updating a pipeline. Map of transform name prefixes of the job to be replaced with the corresponding name prefixes of the new job. This field is not used outside of update. */ readonly transformNameMapping: pulumi.Output<{ [key: string]: string; } | undefined>; /** * The type of this job, selected from the [JobType enum](https://cloud.google.com/dataflow/docs/reference/rest/v1b3/projects.jobs#Job.JobType) */ readonly type: pulumi.Output; /** * The zone in which the created job should run. If it is not provided, the provider zone is used. */ readonly zone: pulumi.Output; /** * Create a Job resource with the given unique name, arguments, and options. * * @param name The _unique_ name of the resource. * @param args The arguments to use to populate this resource's properties. * @param opts A bag of options that control this resource's behavior. */ constructor(name: string, args: JobArgs, opts?: pulumi.CustomResourceOptions); } /** * Input properties used for looking up and filtering Job resources. */ export interface JobState { /** * List of experiments that should be used by the job. An example value is `["enableStackdriverAgentMetrics"]`. */ additionalExperiments?: pulumi.Input[]>; /** * All of labels (key/value pairs) present on the resource in GCP, including the labels configured through Pulumi, other clients and services. */ effectiveLabels?: pulumi.Input<{ [key: string]: pulumi.Input; }>; /** * Enable/disable the use of [Streaming Engine](https://cloud.google.com/dataflow/docs/guides/deploying-a-pipeline#streaming-engine) for the job. Note that Streaming Engine is enabled by default for pipelines developed against the Beam SDK for Python v2.21.0 or later when using Python 3. */ enableStreamingEngine?: pulumi.Input; /** * The configuration for VM IPs. Options are `"WORKER_IP_PUBLIC"` or `"WORKER_IP_PRIVATE"`. */ ipConfiguration?: pulumi.Input; /** * The unique ID of this job. */ jobId?: pulumi.Input; /** * The name for the Cloud KMS key for the job. Key format is: `projects/PROJECT_ID/locations/LOCATION/keyRings/KEY_RING/cryptoKeys/KEY` */ kmsKeyName?: pulumi.Input; /** * User labels to be specified for the job. Keys and values should follow the restrictions * specified in the [labeling restrictions](https://cloud.google.com/compute/docs/labeling-resources#restrictions) page. * **Note**: This field is non-authoritative, and will only manage the labels present in your configuration. Please refer to the field `effectiveLabels` for all of the labels present on the resource. */ labels?: pulumi.Input<{ [key: string]: pulumi.Input; }>; /** * The machine type to use for the job. */ machineType?: pulumi.Input; /** * The number of workers permitted to work on the job. More workers may improve processing speed at additional cost. */ maxWorkers?: pulumi.Input; /** * A unique name for the resource, required by Dataflow. */ name?: pulumi.Input; /** * The network to which VMs will be assigned. If it is not provided, "default" will be used. */ network?: pulumi.Input; /** * One of "drain" or "cancel". Specifies behavior of deletion during `pulumi destroy`. See above note. */ onDelete?: pulumi.Input; /** * **Template specific** Key/Value pairs to be forwarded to the pipeline's options; keys are * case-sensitive based on the language on which the pipeline is coded, mostly Java. * **Note**: do not configure Dataflow options here in parameters. */ parameters?: pulumi.Input<{ [key: string]: pulumi.Input; }>; /** * The project in which the resource belongs. If it is not provided, the provider project is used. */ project?: pulumi.Input; /** * The combination of labels configured directly on the resource and default labels configured on the provider. */ pulumiLabels?: pulumi.Input<{ [key: string]: pulumi.Input; }>; /** * The region in which the created job should run. */ region?: pulumi.Input; /** * The Service Account email used to create the job. This should be just an email e.g. `myserviceaccount@myproject.iam.gserviceaccount.com`. Do not include any `serviceAccount:` or other prefix. */ serviceAccountEmail?: pulumi.Input; /** * If set to `true`, Pulumi will treat `DRAINING` and `CANCELLING` as terminal states when deleting the resource, and will remove the resource from Pulumi state and move on. See above note. */ skipWaitOnJobTermination?: pulumi.Input; /** * The current state of the resource, selected from the [JobState enum](https://cloud.google.com/dataflow/docs/reference/rest/v1b3/projects.jobs#Job.JobState) */ state?: pulumi.Input; /** * The subnetwork to which VMs will be assigned. Should be of the form "regions/REGION/subnetworks/SUBNETWORK". If the [subnetwork is located in a Shared VPC network](https://cloud.google.com/dataflow/docs/guides/specifying-networks#shared), you must use the complete URL. For example `"googleapis.com/compute/v1/projects/PROJECT_ID/regions/REGION/subnetworks/SUBNET_NAME"` */ subnetwork?: pulumi.Input; /** * A writeable location on GCS for the Dataflow job to dump its temporary data. * * - - - */ tempGcsLocation?: pulumi.Input; /** * The GCS path to the Dataflow job template. */ templateGcsPath?: pulumi.Input; /** * Only applicable when updating a pipeline. Map of transform name prefixes of the job to be replaced with the corresponding name prefixes of the new job. This field is not used outside of update. */ transformNameMapping?: pulumi.Input<{ [key: string]: pulumi.Input; }>; /** * The type of this job, selected from the [JobType enum](https://cloud.google.com/dataflow/docs/reference/rest/v1b3/projects.jobs#Job.JobType) */ type?: pulumi.Input; /** * The zone in which the created job should run. If it is not provided, the provider zone is used. */ zone?: pulumi.Input; } /** * The set of arguments for constructing a Job resource. */ export interface JobArgs { /** * List of experiments that should be used by the job. An example value is `["enableStackdriverAgentMetrics"]`. */ additionalExperiments?: pulumi.Input[]>; /** * Enable/disable the use of [Streaming Engine](https://cloud.google.com/dataflow/docs/guides/deploying-a-pipeline#streaming-engine) for the job. Note that Streaming Engine is enabled by default for pipelines developed against the Beam SDK for Python v2.21.0 or later when using Python 3. */ enableStreamingEngine?: pulumi.Input; /** * The configuration for VM IPs. Options are `"WORKER_IP_PUBLIC"` or `"WORKER_IP_PRIVATE"`. */ ipConfiguration?: pulumi.Input; /** * The name for the Cloud KMS key for the job. Key format is: `projects/PROJECT_ID/locations/LOCATION/keyRings/KEY_RING/cryptoKeys/KEY` */ kmsKeyName?: pulumi.Input; /** * User labels to be specified for the job. Keys and values should follow the restrictions * specified in the [labeling restrictions](https://cloud.google.com/compute/docs/labeling-resources#restrictions) page. * **Note**: This field is non-authoritative, and will only manage the labels present in your configuration. Please refer to the field `effectiveLabels` for all of the labels present on the resource. */ labels?: pulumi.Input<{ [key: string]: pulumi.Input; }>; /** * The machine type to use for the job. */ machineType?: pulumi.Input; /** * The number of workers permitted to work on the job. More workers may improve processing speed at additional cost. */ maxWorkers?: pulumi.Input; /** * A unique name for the resource, required by Dataflow. */ name?: pulumi.Input; /** * The network to which VMs will be assigned. If it is not provided, "default" will be used. */ network?: pulumi.Input; /** * One of "drain" or "cancel". Specifies behavior of deletion during `pulumi destroy`. See above note. */ onDelete?: pulumi.Input; /** * **Template specific** Key/Value pairs to be forwarded to the pipeline's options; keys are * case-sensitive based on the language on which the pipeline is coded, mostly Java. * **Note**: do not configure Dataflow options here in parameters. */ parameters?: pulumi.Input<{ [key: string]: pulumi.Input; }>; /** * The project in which the resource belongs. If it is not provided, the provider project is used. */ project?: pulumi.Input; /** * The region in which the created job should run. */ region?: pulumi.Input; /** * The Service Account email used to create the job. This should be just an email e.g. `myserviceaccount@myproject.iam.gserviceaccount.com`. Do not include any `serviceAccount:` or other prefix. */ serviceAccountEmail?: pulumi.Input; /** * If set to `true`, Pulumi will treat `DRAINING` and `CANCELLING` as terminal states when deleting the resource, and will remove the resource from Pulumi state and move on. See above note. */ skipWaitOnJobTermination?: pulumi.Input; /** * The subnetwork to which VMs will be assigned. Should be of the form "regions/REGION/subnetworks/SUBNETWORK". If the [subnetwork is located in a Shared VPC network](https://cloud.google.com/dataflow/docs/guides/specifying-networks#shared), you must use the complete URL. For example `"googleapis.com/compute/v1/projects/PROJECT_ID/regions/REGION/subnetworks/SUBNET_NAME"` */ subnetwork?: pulumi.Input; /** * A writeable location on GCS for the Dataflow job to dump its temporary data. * * - - - */ tempGcsLocation: pulumi.Input; /** * The GCS path to the Dataflow job template. */ templateGcsPath: pulumi.Input; /** * Only applicable when updating a pipeline. Map of transform name prefixes of the job to be replaced with the corresponding name prefixes of the new job. This field is not used outside of update. */ transformNameMapping?: pulumi.Input<{ [key: string]: pulumi.Input; }>; /** * The zone in which the created job should run. If it is not provided, the provider zone is used. */ zone?: pulumi.Input; }