/**
 * (C) Copyright IBM Corp. 2025-2026.
 *
 * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
 * in compliance with the License. You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software distributed under the License
 * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
 * or implied. See the License for the specific language governing permissions and limitations under
 * the License.
 */
import type { DefaultParams, JsonObject, CacheConfig } from "../../../types/common.js";
import type { StreamOptions } from "../gateway.js";
import type { ModelRouter } from "../models/response.js";
/** Parameters for the `completions.create` operation. */
export interface CreateCompletionsParams extends DefaultParams {
    /** Model is the ID of the model to use. */
    model: string;
    /**
     * Prompt(s) to generate completions for, encoded as a string, array of strings, array of tokens,
     * or array of token arrays.
     *
     * Note: `<|endoftext|>` is the document separator that the model sees during training, so if a
     * prompt is not specified the model will generate as if from the beginning of a new document.
     */
    prompt: string;
    /**
     * Generates `best_of` number of completions server-side and returns the "best" (the one with the
     * highest log probability per token). Results cannot be streamed. When used with `n`, `best_of`
     * controls the number of candidate completions and `n` specifies how many to return – `best_of`
     * must be greater than `n`.
     *
     * Note: Because this parameter generates many completions, it can quickly consume your token
     * quota. Use carefully and ensure that you have reasonable settings for `max_tokens` and `stop`.
     */
    bestOf?: number;
    /** Caching configuration for the request. Cache is only supported for non-streaming requests. */
    cache?: CacheConfig;
    /** Echo back the prompt in addition to the completion. */
    echo?: boolean;
    /**
     * A number between `-2.0` and `2.0`. Positive values penalize new tokens based on their existing
     * frequency in the text so far, decreasing the model's likelihood to repeat the same line
     * verbatim.
     */
    frequencyPenalty?: number;
    /**
     * Used to modify the likelihood of specified tokens appearing in the completion. Accepts a JSON
     * object that maps tokens (specified by their token ID in the GPT tokenizer) to an associated
     * bias value from -100 to 100. You can use this tokenizer tool to convert text to token IDs.
     * Mathematically, the bias is added to the logits generated by the model prior to sampling.
     *
     * The exact effect will vary per model, but:
     *
     * - Values between `-1` and `1` should decrease or increase likelihood of selection and
     * - Values like `-100` or `100` should result in a ban or exclusive selection of the relevant
     *   token.
     *
     * As an example, you can pass `{"50256": -100}` to prevent the `<|endoftext|>` token from being
     * generated.
     */
    logitBias?: JsonObject;
    /**
     * The number of most likely output tokens to include the log probabilities of, as well the chosen
     * tokens. For example, if `logprobs` is `5`, the API will return a list of the 5 most likely
     * tokens. The API will always return the `logprob` of the sampled token, so there may be up to
     * `logprobs+1` elements in the response. The maximum value for `logprobs` is `5`.
     */
    logprobs?: number;
    /**
     * The maximum number of tokens that can be generated in the completion. The token count of your
     * prompt plus `max_tokens` cannot exceed the model's context length.
     */
    maxTokens?: number;
    /** Contains developer-defined tags and values used for filtering completions. */
    metadata?: JsonObject;
    /**
     * Specifies how many completions to generate for each prompt.
     *
     * Note: Because this parameter generates many completions, it can quickly consume your token
     * quota. Use carefully and ensure that you have reasonable settings for `max_tokens` and `stop`.
     */
    n?: number;
    /**
     * A number between `-2.0` and `2.0`. Positive values penalize new tokens based on whether they
     * appear in the text so far, increasing the model's likelihood to talk about new topics.
     */
    presencePenalty?: number;
    /** Router is the model routing configuration for the request. */
    router?: ModelRouter;
    /**
     * The seed for the model request. If specified, OpenAI's system will make a best effort to sample
     * deterministically, such that repeated requests with the same `seed` and parameters should
     * return the same result.
     *
     * Determinism is not guaranteed, and you should refer to the `system_fingerprint` response
     * parameter to monitor changes in the backend.
     */
    seed?: number;
    /** Specifies up to 4 sequences where the API will stop generating further tokens. */
    stop?: string[];
    /**
     * Indicates whether to stream back partial progress. If set, tokens will be sent as data-only
     * [server-sent events][server-sent events] as they become available, with the stream terminated
     * by a `data: [DONE]` message.
     *
     * [server-sent events]: https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format.
     */
    stream?: boolean;
    /** Options for streaming response. Only set this when you set `stream` to `true`. */
    streamOptions?: StreamOptions;
    /**
     * Text that comes after a completion of inserted text. On OpenAI, this parameter is only
     * supported for `gpt-3.5-turbo-instruct`.
     */
    suffix?: string;
    /**
     * Specifies what temperature to use for sample, between `0` and `2`. Higher values like `0.8`
     * will make the output more random, while lower values like `0.2` will make it more focused and
     * deterministic.
     *
     * Note: OpenAI generally recommends altering this or `top_p` but not both.
     */
    temperature?: number;
    /**
     * An alternative to sampling with `temperature`, called nucleus sampling, where the model
     * considers the results of the tokens with `top_p` probability mass. So `0.1` means only the
     * tokens comprising the top 10% probability mass are considered.
     *
     * Note: OpenAI generally recommends altering this or `temperature` but not both.
     */
    topP?: number;
    /**
     * A unique identifier representing your end-user, which can help Services to monitor and detect
     * abuse.
     */
    user?: string;
}
/** Parameters for the `completions.create` operation without stream. */
export interface CreateBasicCompletionsParams extends CreateCompletionsParams {
    stream?: false;
}
/** Parameters for the `completions.create` operation with stream. */
export interface CreateStreamCompletionsParams extends CreateCompletionsParams {
    stream: true;
    /** Indicates return type of stream chunks */
    returnObject?: boolean;
}
//# sourceMappingURL=request.d.ts.map