/**
 * (C) Copyright IBM Corp. 2025-2026.
 *
 * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
 * in compliance with the License. You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software distributed under the License
 * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
 * or implied. See the License for the specific language governing permissions and limitations under
 * the License.
 */
import type { DefaultParams, JsonObject, CacheConfig } from "../../../types/common.mjs";
import type { StreamOptions } from "../gateway.mjs";
import type { ChatsMessage, ChatsTextContentPart } from "./messages.mjs";
import type { ChatsToolChoice, ChatsRequestTool, ChatsFunctionCall } from "./tools.mjs";
/** Constants for the `createChatCompletions` operation. */
export declare namespace CreateChatCompletionsConstants {
    /**
     * Constrains effort on reasoning for reasoning models. For OpenAI, currently supported by `o1`
     * models only. Reducing reasoning effort can result in faster responses and fewer tokens used on
     * reasoning in a response.
     */
    enum ReasoningEffort {
        LOW = "low",
        MEDIUM = "medium",
        HIGH = "high"
    }
}
/** Parameters for the chat completion operation. */
export interface CreateChatCompletionsParams extends DefaultParams {
    /**
     * A list of messages comprising the chat conversation so far. Depending on the model you use,
     * different message types (modalities) are supported, like `"text"`, `"images"`, and `"audio"`.
     */
    messages: ChatsMessage[];
    /** ID or alias of the model to forward the chat request to. */
    model: string;
    /**
     * Parameters for audio output. Only required when audio output is requested with modalities:
     * `["audio"]`.
     *
     * See: [OpenAI's Audio Guide](https://platform.openai.com/docs/guides/audio) for more
     * information.
     */
    audio?: JsonObject;
    /** Caching configuration for a request. Cache is only supported for non-streaming requests. */
    cache?: CacheConfig;
    /**
     * A number between `-2.0` and `2.0`. Positive values penalize new tokens based on their existing
     * frequency in the text so far, decreasing the model's likelihood to repeat the same line
     * verbatim.
     */
    frequencyPenalty?: number;
    /**
     * Controls which (if any) function is called by the model.
     *
     * - `"none"` means the model will not call a function and instead generates a message.
     * - `"auto"` means the model can pick between generating a message or calling a function.
     * - Specifying a particular function via `{"name": "my_function"}` forces the model to call that
     *   function.
     *
     * `"none"` is the default when no functions are present. `"auto"` is the default if functions are
     * present.
     *
     * Deprecated: `function_call` has been deprecated by OpenAI in favor of `tool_choice`.
     */
    functionCall?: string | ChatsFunctionCall;
    /**
     * A list of functions the model may generate JSON inputs for.
     *
     * Deprecated: `functions` has been deprecated by OpenAI in favor of `tools`.
     */
    functions?: JsonObject;
    /**
     * Modifies the likelihood of specified tokens appearing in the completion. Accepts a JSON object
     * that maps tokens (specified by their token ID in the tokenizer) to an associated bias value
     * from `-100` to `100`. Mathematically, the bias is added to the logits generated by the model
     * prior to sampling. The exact effect will vary per model, but values between `-1` and `1` should
     * decrease or increase likelihood of selection; values like `-100` or `100` should result in a
     * ban or exclusive selection of the relevant token.
     */
    logitBias?: JsonObject;
    /**
     * Indicates whether to return log probabilities of the output tokens or not. If `true`, returns
     * the log probabilities of each output token returned in the content of message.
     */
    logprobs?: boolean;
    /**
     * Specifies an upper bound for the number of tokens that can be generated for a completion,
     * including visible output tokens and [reasoning tokens][reasoning tokens].
     *
     * [reasoning tokens]: https://platform.openai.com/docs/guides/reasoning.
     */
    maxCompletionTokens?: number;
    /**
     * Specifies a maximum number of tokens that can be generated in the chat completion. This value
     * can be used to control costs for text generated via API.
     *
     * Deprecated: `max_tokens` has been deprecated by OpenAI in favor of `max_completion_tokens`, and
     * is not compatible with `o1` series models.
     */
    maxTokens?: number;
    /** Contains developer-defined tags and values used for filtering completions. */
    metadata?: JsonObject;
    /**
     * Specifies the output types that you would like the model to generate for this request. Most
     * models are capable of generating text, which is the default (`["text"]`). Some models can
     * generate audio. For OpenAI, the `gpt-4o-audio-preview` model can be used to [generate
     * audio][generate audio]. To request that this model generate both text and audio responses, you
     * can use `["text", "audio"]`.
     *
     * [generate audio]: https://platform.openai.com/docs/guides/audio.
     */
    modalities?: string[];
    /**
     * Specifies how many chat completion choices to generate for each input message.
     *
     * Note: you will be charged based on the number of generated tokens across all choices, keep
     * `"n"` set to `1` to minimize costs.
     */
    n?: number;
    /** Specifies whether to enable parallel function calling during tool use. */
    parallelToolCalls?: boolean;
    /**
     * The configuration for a [Predicted Output][Predicted Output], which can greatly improve
     * response times when large parts of the model response are known ahead of time. This is most
     * common when you are regenerating a file with only minor changes to most of the content.
     *
     * [Predicted Output]: https://platform.openai.com/docs/guides/predicted-outputs.
     */
    prediction?: ChatsPrediction;
    /**
     * A number between `-2.0` and `2.0`. Positive values penalize new tokens based on whether they
     * appear in the text so far, increasing the model's likelihood to talk about new topics.
     */
    presencePenalty?: number;
    /**
     * Constrains effort on reasoning for reasoning models. For OpenAI, currently supported by `o1`
     * models only. Reducing reasoning effort can result in faster responses and fewer tokens used on
     * reasoning in a response.
     */
    reasoningEffort?: 'low' | 'medium' | 'high';
    /**
     * An object specifying the format that the model must output.
     *
     * - Setting to `{ "type": "json_schema", "json_schema": {...} }` enables [Structured
     *   Outputs][Structured Outputs] which ensures the model will match your supplied JSON schema.
     * - Setting to `{ "type": "json_object" }` enables JSON mode, which ensures the message the model
     *   generates is valid JSON.
     *
     * Important: when using JSON mode, you must also instruct the model to produce JSON yourself via
     * a system or user message. Without this, the model may generate an unending stream of whitespace
     * until the generation reaches the token limit, resulting in a long-running and seemingly "stuck"
     * request. Also note that the message content may be partially cut off if `"finish_reason"` is
     * set to `"length"`, which indicates the generation exceeded `max_tokens` or the conversation
     * exceeded the max context length.
     *
     * [Structured Outputs]: https://platform.openai.com/docs/guides/structured-outputs.
     */
    responseFormat?: ChatsResponseFormat;
    /** Specifies model routing configurations for the request. */
    router?: JsonObject;
    /**
     * The seed for the model request. For OpenAI, this feature is in Beta. If specified, OpenAI's
     * system will make a best effort attempt to sample deterministically, such that repeated requests
     * with the same seed and parameters should return the same result. Determinism is not guaranteed,
     * and you should refer to the `system_fingerprint` response parameter to monitor changes in the
     * backend.
     */
    seed?: number;
    /** The service tier used for processing a request. */
    serviceTier?: string;
    /** Specifies up to 4 sequences where the API will stop generating further tokens. */
    stop?: string[];
    /**
     * Indicates whether to store the output of this chat completion request for use in OpenAI's
     * [model distillation][model distillation] or [evals][evals] products.
     *
     * [model distillation]: https://platform.openai.com/docs/guides/distillation
     *
     * [evals]: https://platform.openai.com/docs/guides/evals.
     */
    store?: boolean;
    /**
     * Indicates whether to stream the model response to the user. If set, partial message deltas will
     * be sent, like in ChatGPT. Tokens will be sent as data-only server-sent events as they become
     * available, with the stream terminated by a data: `[DONE]` message.
     */
    stream?: boolean;
    /** Options for streaming response. Only set this when you set `stream` to `true`. */
    streamOptions?: StreamOptions;
    /**
     * Specifies what sampling temperature to use. Higher values like `0.8` will make the output more
     * random, while lower values like `0.2` will make it more focused and deterministic.
     *
     * Note: OpenAI generally recommends altering this or `top_p` but not both.
     */
    temperature?: number;
    /**
     * Controls which (if any) tool is called by the model.
     *
     * - `"none"` means the model will not call any tool and instead generates a message.
     * - `"auto"` means the model can pick between generating a message or calling one or more tools.
     * - `"required"` means the model must call one or more tools.
     * - Specifying a particular tool via `{"type": "function", "function": {"name": "my_function"}}`
     *   forces the model to call that tool.
     *
     * `"none"` is the default when no tools are present. `"auto"` is the default if tools are
     * present.
     */
    toolChoice?: ChatsToolChoice;
    /**
     * A list of tools the model may call. Currently, only functions are supported as tools. Use this
     * to provide a list of functions the model may generate JSON inputs for. A max of 128 functions
     * are supported.
     */
    tools?: ChatsRequestTool[];
    /**
     * An integer between `0` and `20` specifying the number of most likely tokens to return at each
     * token position, each with an associated log probability. LogProbs must be set to `true` if this
     * parameter is used.
     */
    topLogprobs?: number;
    /**
     * An alternative to sampling with `temperature`, called nucleus sampling, where the model
     * considers the results of the tokens with `top_p` probability mass. Example: `0.1` means only
     * the tokens comprising the top 10% probability mass are considered.
     *
     * Note: OpenAI generally recommends altering this or `temperature` but not both.
     */
    topP?: number;
    /**
     * A unique identifier representing your end-user, which can help OpenAI to monitor and detect
     * abuse.
     */
    user?: string;
}
export declare namespace ChatsPrediction {
    namespace Constants {
        /** Type of predicted content you want to provide, should always be `"content"`. */
        enum Type {
            CONTENT = "content"
        }
    }
}
/**
 * The configuration for a [Predicted Output][Predicted Output], which can greatly improve response
 * times when large parts of the model response are known ahead of time. This is most common when
 * you are regenerating a file with only minor changes to most of the content.
 *
 * [Predicted Output]: https://platform.openai.com/docs/guides/predicted-outputs.
 */
export interface ChatsPrediction {
    /**
     * Content that should be matched when generating a model response. If generated tokens would
     * match this content, the entire model response can be returned much more quickly.
     */
    content: string | ChatsTextContentPart[];
    /** Type of predicted content you want to provide, should always be `"content"`. */
    type: ChatsPrediction.Constants.Type | string;
}
/**
 * An object specifying the format that the model must output.
 *
 * - Setting to `{ "type": "json_schema", "json_schema": {...} }` enables [Structured
 *   Outputs][Structured Outputs] which ensures the model will match your supplied JSON schema.
 * - Setting to `{ "type": "json_object" }` enables JSON mode, which ensures the message the model
 *   generates is valid JSON.
 *
 * Important: when using JSON mode, you must also instruct the model to produce JSON yourself via a
 * system or user message. Without this, the model may generate an unending stream of whitespace
 * until the generation reaches the token limit, resulting in a long-running and seemingly "stuck"
 * request. Also note that the message content may be partially cut off if `"finish_reason"` is set
 * to `"length"`, which indicates the generation exceeded `max_tokens` or the conversation exceeded
 * the max context length.
 *
 * [Structured Outputs]: https://platform.openai.com/docs/guides/structured-outputs.
 */
export type ChatsResponseFormat = ChatsResponseFormatJSON | ChatsResponseFormatJSONSchema | ChatsResponseFormatText;
/** Represents JSON format for a chat response. */
export interface ChatsResponseFormatJSON {
    /** Type of response format being defined, in this case should always be `"json_schema"`. */
    type: ChatsResponseFormatJSON.Constants.Type | string;
}
export declare namespace ChatsResponseFormatJSON {
    namespace Constants {
        /** Type of response format being defined, in this case should always be `"json_schema"`. */
        enum Type {
            JSON_SCHEMA = "json_schema"
        }
    }
}
/** Custom schema used to generate structures JSON responses. */
export interface ChatsJSONSchema {
    /**
     * Description of what the response format is for. Used by the model to determine how to respond
     * in the format.
     */
    description?: string;
    /**
     * Name of the response format. Must be a-z, A-Z, 0-9, or contain underscores and dashes, with a
     * maximum length of 64.
     */
    name: string;
    /**
     * Schema for the response format, described as a JSON Schema object.
     *
     * See [JSON Schema](https://json-schema.org/) for more information.
     */
    schema?: JsonObject;
    /**
     * Indicates whether to enable strict schema adherence when generating the output. If set to
     * `true`, the model will always follow the exact schema defined in the schema field. Only a
     * subset of JSON Schema is supported when `strict` is `true`.
     */
    strict?: boolean;
}
/** Represents custom JSON schema format for a chat response. */
export interface ChatsResponseFormatJSONSchema {
    /** Custom schema used to generate structures JSON responses. */
    json_schema: ChatsJSONSchema;
    /** Type of response format being defined, in this case should always be `"json_schema"`. */
    type: ChatsResponseFormatJSONSchema.Constants.Type | string;
}
export declare namespace ChatsResponseFormatJSONSchema {
    namespace Constants {
        /** Type of response format being defined, in this case should always be `"json_schema"`. */
        enum Type {
            JSON_SCHEMA = "json_schema"
        }
    }
}
/** Represents text format for a chat response. */
export interface ChatsResponseFormatText {
    /** Type of response format being defined, in this case should always be `"json_schema"`. */
    type: ChatsResponseFormatText.Constants.Type | string;
}
export declare namespace ChatsResponseFormatText {
    namespace Constants {
        /** Type of response format being defined, in this case should always be `"json_schema"`. */
        enum Type {
            JSON_SCHEMA = "json_schema"
        }
    }
}
//# sourceMappingURL=request.d.mts.map