specification/ml/put_data_frame_analytics/MlPutDataFrameAnalyticsRequest.ts

/*
 * Licensed to Elasticsearch B.V. under one or more contributor
 * license agreements. See the NOTICE file distributed with
 * this work for additional information regarding copyright
 * ownership. Elasticsearch B.V. licenses this file to you under
 * the Apache License, Version 2.0 (the "License"); you may
 * not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *    http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */

import {
  DataframeAnalysisAnalyzedFields,
  DataframeAnalysisContainer,
  DataframeAnalyticsDestination,
  DataframeAnalyticsSource
} from '@ml/_types/DataframeAnalytics'
import { RequestBase } from '@_types/Base'
import { HttpHeaders, Id, Metadata, VersionString } from '@_types/common'
import { integer } from '@_types/Numeric'

/**
 * Create a data frame analytics job.
 * This API creates a data frame analytics job that performs an analysis on the
 * source indices and stores the outcome in a destination index.
 * @rest_spec_name ml.put_data_frame_analytics
 * @availability stack since=7.3.0 stability=stable
 * @availability serverless stability=stable visibility=public
 * @cluster_privileges manage_ml
 * @index_privileges create_index, index, manage, read, view_index_metadata
 * @doc_id put-dfanalytics
 * @doc_tag ml data frame
 */
export interface Request extends RequestBase {
  path_parts: {
    /**
     * Identifier for the data frame analytics job. This identifier can contain
     * lowercase alphanumeric characters (a-z and 0-9), hyphens, and
     * underscores. It must start and end with alphanumeric characters.
     */
    id: Id
  }
  body: {
    /**
     * Specifies whether this job can start when there is insufficient machine
     * learning node capacity for it to be immediately assigned to a node. If
     * set to `false` and a machine learning node with capacity to run the job
     * cannot be immediately found, the API returns an error. If set to `true`,
     * the API does not return an error; the job waits in the `starting` state
     * until sufficient machine learning node capacity is available. This
     * behavior is also affected by the cluster-wide
     * `xpack.ml.max_lazy_ml_nodes` setting.
     * @server_default false
     * @doc_id ml-settings
     */
    allow_lazy_start?: boolean
    /**
     * The analysis configuration, which contains the information necessary to
     * perform one of the following types of analysis: classification, outlier
     * detection, or regression.
     */
    analysis: DataframeAnalysisContainer
    /**
     * Specifies `includes` and/or `excludes` patterns to select which fields
     * will be included in the analysis. The patterns specified in `excludes`
     * are applied last, therefore `excludes` takes precedence. In other words,
     * if the same field is specified in both `includes` and `excludes`, then
     * the field will not be included in the analysis. If `analyzed_fields` is
     * not set, only the relevant fields will be included. For example, all the
     * numeric fields for outlier detection.
     * The supported fields vary for each type of analysis. Outlier detection
     * requires numeric or `boolean` data to analyze. The algorithms don’t
     * support missing values therefore fields that have data types other than
     * numeric or boolean are ignored. Documents where included fields contain
     * missing values, null values, or an array are also ignored. Therefore the
     * `dest` index may contain documents that don’t have an outlier score.
     * Regression supports fields that are numeric, `boolean`, `text`,
     * `keyword`, and `ip` data types. It is also tolerant of missing values.
     * Fields that are supported are included in the analysis, other fields are
     * ignored. Documents where included fields contain an array with two or
     * more values are also ignored. Documents in the `dest` index that don’t
     * contain a results field are not included in the regression analysis.
     * Classification supports fields that are numeric, `boolean`, `text`,
     * `keyword`, and `ip` data types. It is also tolerant of missing values.
     * Fields that are supported are included in the analysis, other fields are
     * ignored. Documents where included fields contain an array with two or
     * more values are also ignored. Documents in the `dest` index that don’t
     * contain a results field are not included in the classification analysis.
     * Classification analysis can be improved by mapping ordinal variable
     * values to a single number. For example, in case of age ranges, you can
     * model the values as `0-14 = 0`, `15-24 = 1`, `25-34 = 2`, and so on.
     */
    analyzed_fields?: DataframeAnalysisAnalyzedFields
    /**
     * A description of the job.
     */
    description?: string
    /**
     * The destination configuration.
     */
    dest: DataframeAnalyticsDestination
    /**
     * The maximum number of threads to be used by the analysis. Using more
     * threads may decrease the time necessary to complete the analysis at the
     * cost of using more CPU. Note that the process may use additional threads
     * for operational functionality other than the analysis itself.
     * @server_default 1
     */
    max_num_threads?: integer

    _meta?: Metadata
    /**
     * The approximate maximum amount of memory resources that are permitted for
     * analytical processing. If your `elasticsearch.yml` file contains an
     * `xpack.ml.max_model_memory_limit` setting, an error occurs when you try
     * to create data frame analytics jobs that have `model_memory_limit` values
     * greater than that setting.
     * @server_default 1gb
     */
    model_memory_limit?: string
    /**
     * The configuration of how to source the analysis data.
     */
    source: DataframeAnalyticsSource
    /**
     * @availability stack since=8.0.0
     * @availability serverless
     */
    headers?: HttpHeaders
    /**
     * @availability stack since=7.16.0
     * @availability serverless
     */
    version?: VersionString
  }
}