import { TogetherChatModel, TogetherCodeModel, TogetherEmbeddingModel, TogetherImageModel, TogetherLanguageModel, TogetherModerationModel } from './models.types'; export interface ChatMessage { /** * The message content. */ content: string; /** * The message author. */ role: 'user' | 'system' | 'assistant'; } export interface TokenUsage { /** * The number of tokens used for the prompt. */ promptTokens: number; /** * The number of tokens used for the completion. */ completionTokens: number; /** * The total number of tokens used. */ totalTokens: number; } export interface TogetherChatParams { /** * The name of the model to query. */ model: TogetherChatModel; /** * A list of messages comprising the conversation so far. */ messages: ChatMessage[]; /** * The maximum number of tokens to generate. * @default 512 */ maxTokens?: number; /** * A list of string sequences that will truncate (stop) inference text output. * For example, "" will stop generation as soon as the model generates the given token. * @default [] */ stop?: string[]; /** * A decimal number that determines the degree of randomness in the response. * A value of 1 will always yield the same output. * A temperature less than 1 favors more correctness and is appropriate for question answering or summarization. * A value greater than 1 introduces more randomness in the output. * @default 0.7 */ temperature?: number; /** * The nucleus parameter is used to dynamically adjust the number of choices for each predicted token based on the cumulative probabilities. * It specifies a probability threshold, below which all less likely tokens are filtered out. * This technique helps to maintain diversity and generate more fluent and natural-sounding text. * @default 0.7 */ topP?: number; /** * Used to limit the number of choices for the next predicted word or token. * It specifies the maximum number of tokens to consider at each step, based on their probability of occurrence. * This technique helps to speed up the generation process * and can improve the quality of the generated text by focusing on the most likely options. * @default 50 */ topK?: number; /** * A number that controls the diversity of generated text by reducing the likelihood of repeated sequences. * Higher values decrease repetition. * @default 1 */ repetitionPenalty?: number; /** * How many completions to generate for each prompt. * @default 1 */ n?: number; /** * If undefined, the function will hold until the entire response is generated until returning. * If not undefined, the response will be streamed to this function as it is received. * The callback is async so that it can be awaited for processing each event in order. * If you have no need for order, then you can simply not return a promise. * @param partialCompletion - a partial completion of the response, or 'done' when the response is complete. */ streamCallback?: (partialCompletion: 'done' | TogetherChatStreamedResponse) => Promise | void; } export interface TogetherChatResponse { /** * A unique identifier for the response. */ id: string; /** * The list of completions for the prompt. * The number of completions is determined by the `n` parameter. */ choices: Array<{ /** * Why the response was finished. */ finishReason: string; /** * The index of the choice (corresponding to the `n` parameter) */ index: number; logprobs: null; /** * The message completed from the LLM. */ message: ChatMessage; }>; /** * The number of tokens used for the prompt & completion. */ usage: TokenUsage; /** * The timestamp of when the response was created. */ created: number; /** * Which model was used to generate the response. */ model: TogetherChatModel; /** * Which type of object this is. */ object: 'chat.completion'; } export interface TogetherChatStreamedResponse { /** * A unique identifier for the response. */ id: string; /** * The timestamp of when the response was created. */ created: number; /** * Which type of object this is. */ object: 'chat.completion.chunk'; /** * The partial completion of the response for each choice. */ choices: Array<{ /** * The index of the choice. */ index: number; /** * The partial completion of the response. */ delta: { content: string; }; }>; /** * Information about which token was generated. */ token: { /** * The unique identifier for the token. */ id: number; /** * The text version of the token. */ text: string; /** * The log probability of the token. */ logprob: number; /** * If the token is special. */ special: boolean; }; /** * The reason for finishing the response. * Undefined if the response is not finished. */ finishReason?: string; /** * The complete generated text. * Undefined if the response is not finished. */ generatedText?: string; stats: null; /** * The usage information to be provided in the final event. * Undefined if the response is not finished. */ usage?: TokenUsage; } export type TogetherLanguageParams = Omit & { /** * The name of the model to query. */ model: TogetherLanguageModel; /** * A string providing context for the model to complete. */ prompt: string; /** * If undefined, the function will hold until the entire response is generated until returning. * If not undefined, the response will be streamed to this function as it is received. * The callback is async so that it can be awaited for processing each event in order. * If you have no need for order, then you can simply not return a promise. * @param partialCompletion - a partial completion of the response, or 'done' when the response is complete. */ streamCallback?: (partialCompletion: 'done' | TogetherLanguageStreamedResponse) => Promise | void; }; export type TogetherLanguageResponse = Omit & { /** * The name of the model to query. */ model: TogetherLanguageModel; /** * Which type of object this is. */ object: 'language.completion'; /** * The list of completions for the prompt. * The number of completions is determined by the `n` parameter. */ choices: Array<{ /** * The completion of the prompt */ text: string; }>; }; export type TogetherLanguageStreamedResponse = Omit & { /** * The choices coming back from the model. */ choices: Array<{ text: string; }>; }; export type TogetherCodeParams = Omit & { /** * The name of the model to query. */ model: TogetherCodeModel; }; export type TogetherCodeResponse = TogetherLanguageResponse; export interface TogetherEmbeddingParams { /** * The name of the model to query. */ model: TogetherEmbeddingModel; /** * The string to embed. */ input: string; } export interface TogetherEmbeddingResponse { test: string; } export interface TogetherImageParams { /** * The name of the model to query. */ model: TogetherImageModel; /** * The prompt for the image generator */ prompt: string; /** * The prompt for what not to include in the image * @default nothing */ negativePrompt?: string; /** * Width of the image to be generated * @default 1024 */ width?: number; /** * Height of the image to be generated * @default 1024 */ height?: number; /** * Number of steps to perform iterations on * @default 20 */ steps?: number; /** * A seed to produce deterministic results * @default randomly generated */ seed?: number; /** * Number of images to generate * @default 1 */ n?: number; } export type TogetherImageResponse = Pick; export interface TogetherInferenceParams { model: TogetherChatModel | TogetherLanguageModel | TogetherCodeModel | TogetherImageModel | TogetherModerationModel | TogetherEmbeddingModel; prompt?: string; negativePrompt?: string; n?: number; requestType: string; messages?: ChatMessage[]; maxTokens?: number; promptFormatString?: string; repetitionPenalty?: number; stop?: string[]; temperature?: number; topP?: number; topK?: number; seed?: number; width?: number; height?: number; steps?: number; } export interface TogetherInferenceResponse { args: TogetherInferenceParams; model: TogetherInferenceParams['model']; modelOwner: string; numReturns: number; output: { requestId: string; resultType: string; choices: Array<{ imageBase64: string; }>; }; prompt: string[]; status: string; subjobs: unknown[]; tags: {}; }