{
  "01-ai/yi-1.5-34b-chat": {
    "description": "Zero One Everything, the latest open-source fine-tuned model with 34 billion parameters, supports various dialogue scenarios with high-quality training data aligned with human preferences."
  },
  "01-ai/yi-1.5-9b-chat": {
    "description": "Zero One Everything, the latest open-source fine-tuned model with 9 billion parameters, supports various dialogue scenarios with high-quality training data aligned with human preferences."
  },
  "360/deepseek-r1": {
    "description": "[360 Deployment Version] DeepSeek-R1 extensively utilizes reinforcement learning techniques in the post-training phase, significantly enhancing model inference capabilities with minimal labeled data. It performs comparably to OpenAI's o1 official version in tasks such as mathematics, coding, and natural language reasoning."
  },
  "360gpt-pro": {
    "description": "360GPT Pro, as an important member of the 360 AI model series, meets diverse natural language application scenarios with efficient text processing capabilities, supporting long text understanding and multi-turn dialogue."
  },
  "360gpt-pro-trans": {
    "description": "A translation-specific model, finely tuned for optimal translation results."
  },
  "360gpt-turbo": {
    "description": "360GPT Turbo offers powerful computation and dialogue capabilities, with excellent semantic understanding and generation efficiency, making it an ideal intelligent assistant solution for enterprises and developers."
  },
  "360gpt-turbo-responsibility-8k": {
    "description": "360GPT Turbo Responsibility 8K emphasizes semantic safety and responsibility, designed specifically for applications with high content safety requirements, ensuring accuracy and robustness in user experience."
  },
  "360gpt2-o1": {
    "description": "360gpt2-o1 builds a chain of thought using tree search and incorporates a reflection mechanism, trained with reinforcement learning, enabling the model to self-reflect and correct errors."
  },
  "360gpt2-pro": {
    "description": "360GPT2 Pro is an advanced natural language processing model launched by 360, featuring exceptional text generation and understanding capabilities, particularly excelling in generation and creative tasks, capable of handling complex language transformations and role-playing tasks."
  },
  "360zhinao2-o1": {
    "description": "360zhinao2-o1 uses tree search to build a chain of thought and introduces a reflection mechanism, utilizing reinforcement learning for training, enabling the model to possess self-reflection and error-correction capabilities."
  },
  "4.0Ultra": {
    "description": "Spark4.0 Ultra is the most powerful version in the Spark large model series, enhancing text content understanding and summarization capabilities while upgrading online search links. It is a comprehensive solution for improving office productivity and accurately responding to demands, leading the industry as an intelligent product."
  },
  "Baichuan2-Turbo": {
    "description": "Utilizes search enhancement technology to achieve comprehensive links between large models and domain knowledge, as well as knowledge from the entire web. Supports uploads of various documents such as PDF and Word, and URL input, providing timely and comprehensive information retrieval with accurate and professional output."
  },
  "Baichuan3-Turbo": {
    "description": "Optimized for high-frequency enterprise scenarios, significantly improving performance and cost-effectiveness. Compared to the Baichuan2 model, content creation improves by 20%, knowledge Q&A by 17%, and role-playing ability by 40%. Overall performance is superior to GPT-3.5."
  },
  "Baichuan3-Turbo-128k": {
    "description": "Features a 128K ultra-long context window, optimized for high-frequency enterprise scenarios, significantly improving performance and cost-effectiveness. Compared to the Baichuan2 model, content creation improves by 20%, knowledge Q&A by 17%, and role-playing ability by 40%. Overall performance is superior to GPT-3.5."
  },
  "Baichuan4": {
    "description": "The model is the best in the country, surpassing mainstream foreign models in Chinese tasks such as knowledge encyclopedias, long texts, and creative generation. It also boasts industry-leading multimodal capabilities, excelling in multiple authoritative evaluation benchmarks."
  },
  "Baichuan4-Air": {
    "description": "The leading model in the country, surpassing mainstream foreign models in Chinese tasks such as knowledge encyclopedias, long texts, and creative generation. It also possesses industry-leading multimodal capabilities, excelling in multiple authoritative evaluation benchmarks."
  },
  "Baichuan4-Turbo": {
    "description": "The leading model in the country, surpassing mainstream foreign models in Chinese tasks such as knowledge encyclopedias, long texts, and creative generation. It also possesses industry-leading multimodal capabilities, excelling in multiple authoritative evaluation benchmarks."
  },
  "DeepSeek-R1": {
    "description": "A state-of-the-art efficient LLM, skilled in reasoning, mathematics, and programming."
  },
  "DeepSeek-R1-Distill-Llama-70B": {
    "description": "DeepSeek R1— the larger and smarter model in the DeepSeek suite— distilled into the Llama 70B architecture. Based on benchmark testing and human evaluation, this model is smarter than the original Llama 70B, particularly excelling in tasks requiring mathematical and factual accuracy."
  },
  "DeepSeek-R1-Distill-Qwen-1.5B": {
    "description": "The DeepSeek-R1 distillation model based on Qwen2.5-Math-1.5B optimizes inference performance through reinforcement learning and cold-start data, refreshing the benchmark for open-source models across multiple tasks."
  },
  "DeepSeek-R1-Distill-Qwen-14B": {
    "description": "The DeepSeek-R1 distillation model based on Qwen2.5-14B optimizes inference performance through reinforcement learning and cold-start data, refreshing the benchmark for open-source models across multiple tasks."
  },
  "DeepSeek-R1-Distill-Qwen-32B": {
    "description": "The DeepSeek-R1 series optimizes inference performance through reinforcement learning and cold-start data, refreshing the benchmark for open-source models across multiple tasks, surpassing the level of OpenAI-o1-mini."
  },
  "DeepSeek-R1-Distill-Qwen-7B": {
    "description": "The DeepSeek-R1 distillation model based on Qwen2.5-Math-7B optimizes inference performance through reinforcement learning and cold-start data, refreshing the benchmark for open-source models across multiple tasks."
  },
  "DeepSeek-V3": {
    "description": "DeepSeek-V3 is a MoE model developed in-house by Deep Seek Company. Its performance surpasses that of other open-source models such as Qwen2.5-72B and Llama-3.1-405B in multiple assessments, and it stands on par with the world's top proprietary models like GPT-4o and Claude-3.5-Sonnet."
  },
  "Doubao-1.5-thinking-pro-m": {
    "description": "Doubao-1.5 is a new deep thinking model (the m version comes with native multimodal deep reasoning capabilities) that excels in specialized fields such as mathematics, programming, scientific reasoning, and general tasks like creative writing, achieving or nearing top-tier performance in authoritative benchmarks such as AIME 2024, Codeforces, and GPQA. It supports a 128k context window and 16k output."
  },
  "Doubao-1.5-thinking-vision-pro": {
    "description": "A brand new visual deep thinking model, equipped with stronger general multimodal understanding and reasoning capabilities, achieving SOTA performance in 37 out of 59 public evaluation benchmarks."
  },
  "Doubao-1.5-vision-pro": {
    "description": "Doubao-1.5-vision-pro is a newly upgraded multimodal large model that supports image recognition at any resolution and extreme aspect ratios, enhancing visual reasoning, document recognition, detail comprehension, and instruction following capabilities."
  },
  "Doubao-1.5-vision-pro-32k": {
    "description": "Doubao-1.5-vision-pro is a newly upgraded multimodal large model that supports image recognition at any resolution and extreme aspect ratios, enhancing visual reasoning, document recognition, detail understanding, and instruction-following capabilities."
  },
  "Doubao-lite-128k": {
    "description": "Doubao-lite provides extreme response speed and better cost-effectiveness, offering flexible options for various customer scenarios. It supports inference and fine-tuning with a 128k context window."
  },
  "Doubao-lite-32k": {
    "description": "Doubao-lite offers extreme response speed and better cost-effectiveness, providing flexible options for various customer scenarios. It supports inference and fine-tuning with a 32k context window."
  },
  "Doubao-lite-4k": {
    "description": "Doubao-lite boasts extreme response speed and better cost-effectiveness, providing flexible options for various customer scenarios. It supports inference and fine-tuning with a 4k context window."
  },
  "Doubao-pro-128k": {
    "description": "The best-performing primary model designed to handle complex tasks, achieving strong performance in scenarios such as reference Q&A, summarization, creative writing, text classification, and role-playing. It supports inference and fine-tuning with a 128k context window."
  },
  "Doubao-pro-256k": {
    "description": "The best-performing flagship model, suitable for handling complex tasks, with excellent results in reference Q&A, summarization, creative writing, text classification, role-playing, and more. It supports reasoning and fine-tuning with a 256k context window."
  },
  "Doubao-pro-32k": {
    "description": "The best-performing primary model suited for complex tasks, showing great results in reference Q&A, summarization, creative writing, text classification, and role-playing. It supports inference and fine-tuning with a 32k context window."
  },
  "Doubao-pro-4k": {
    "description": "The best-performing primary model suitable for handling complex tasks, demonstrating excellent performance in scenarios such as reference Q&A, summarization, creative writing, text classification, and role-playing. It supports inference and fine-tuning with a 4k context window."
  },
  "Doubao-vision-lite-32k": {
    "description": "The Doubao-vision model is a multimodal large model launched by Doubao, featuring powerful image understanding and reasoning capabilities, as well as precise instruction comprehension. The model has demonstrated strong performance in image-text information extraction and image-based reasoning tasks, making it applicable to more complex and broader visual question-answering tasks."
  },
  "Doubao-vision-pro-32k": {
    "description": "The Doubao-vision model is a multimodal large model launched by Doubao, featuring powerful image understanding and reasoning capabilities, as well as precise instruction comprehension. The model has demonstrated strong performance in image-text information extraction and image-based reasoning tasks, making it applicable to more complex and broader visual question-answering tasks."
  },
  "ERNIE-3.5-128K": {
    "description": "Baidu's self-developed flagship large-scale language model, covering a vast amount of Chinese and English corpus. It possesses strong general capabilities, meeting the requirements for most dialogue Q&A, creative generation, and plugin application scenarios; it supports automatic integration with Baidu's search plugin to ensure the timeliness of Q&A information."
  },
  "ERNIE-3.5-8K": {
    "description": "Baidu's self-developed flagship large-scale language model, covering a vast amount of Chinese and English corpus. It possesses strong general capabilities, meeting the requirements for most dialogue Q&A, creative generation, and plugin application scenarios; it supports automatic integration with Baidu's search plugin to ensure the timeliness of Q&A information."
  },
  "ERNIE-3.5-8K-Preview": {
    "description": "Baidu's self-developed flagship large-scale language model, covering a vast amount of Chinese and English corpus. It possesses strong general capabilities, meeting the requirements for most dialogue Q&A, creative generation, and plugin application scenarios; it supports automatic integration with Baidu's search plugin to ensure the timeliness of Q&A information."
  },
  "ERNIE-4.0-8K-Latest": {
    "description": "Baidu's self-developed flagship ultra-large-scale language model, which has achieved a comprehensive upgrade in model capabilities compared to ERNIE 3.5, widely applicable to complex task scenarios across various fields; supports automatic integration with Baidu search plugins to ensure the timeliness of Q&A information."
  },
  "ERNIE-4.0-8K-Preview": {
    "description": "Baidu's self-developed flagship ultra-large-scale language model, which has achieved a comprehensive upgrade in model capabilities compared to ERNIE 3.5, widely applicable to complex task scenarios across various fields; supports automatic integration with Baidu search plugins to ensure the timeliness of Q&A information."
  },
  "ERNIE-4.0-Turbo-8K-Latest": {
    "description": "Baidu's self-developed flagship ultra-large-scale language model, demonstrating excellent overall performance, suitable for complex task scenarios across various fields; supports automatic integration with Baidu search plugins to ensure the timeliness of Q&A information. It offers better performance compared to ERNIE 4.0."
  },
  "ERNIE-4.0-Turbo-8K-Preview": {
    "description": "Baidu's self-developed flagship ultra-large-scale language model, demonstrating excellent overall performance, widely applicable to complex task scenarios across various fields; supports automatic integration with Baidu search plugins to ensure the timeliness of Q&A information. It outperforms ERNIE 4.0 in performance."
  },
  "ERNIE-Character-8K": {
    "description": "Baidu's self-developed vertical scene large language model, suitable for applications such as game NPCs, customer service dialogues, and role-playing conversations, featuring more distinct and consistent character styles, stronger adherence to instructions, and superior inference performance."
  },
  "ERNIE-Lite-Pro-128K": {
    "description": "Baidu's self-developed lightweight large language model, balancing excellent model performance with inference efficiency, offering better results than ERNIE Lite, suitable for inference on low-power AI acceleration cards."
  },
  "ERNIE-Speed-128K": {
    "description": "Baidu's latest self-developed high-performance large language model released in 2024, with outstanding general capabilities, suitable as a base model for fine-tuning, effectively addressing specific scenario issues while also exhibiting excellent inference performance."
  },
  "ERNIE-Speed-Pro-128K": {
    "description": "Baidu's latest self-developed high-performance large language model released in 2024, with outstanding general capabilities, providing better results than ERNIE Speed, suitable as a base model for fine-tuning, effectively addressing specific scenario issues while also exhibiting excellent inference performance."
  },
  "Gryphe/MythoMax-L2-13b": {
    "description": "MythoMax-L2 (13B) is an innovative model suitable for multi-domain applications and complex tasks."
  },
  "InternVL2-8B": {
    "description": "InternVL2-8B is a powerful visual language model that supports multimodal processing of images and text, capable of accurately recognizing image content and generating relevant descriptions or answers."
  },
  "InternVL2.5-26B": {
    "description": "InternVL2.5-26B is a powerful visual language model that supports multimodal processing of images and text, capable of accurately recognizing image content and generating relevant descriptions or answers."
  },
  "Llama-3.2-11B-Vision-Instruct": {
    "description": "Exhibits outstanding image reasoning capabilities on high-resolution images, suitable for visual understanding applications."
  },
  "Llama-3.2-90B-Vision-Instruct\t": {
    "description": "Advanced image reasoning capabilities suitable for visual understanding agent applications."
  },
  "Meta-Llama-3.1-405B-Instruct": {
    "description": "Llama 3.1 instruction-tuned text model optimized for multilingual dialogue use cases, performing excellently on common industry benchmarks among many available open-source and closed chat models."
  },
  "Meta-Llama-3.1-70B-Instruct": {
    "description": "Llama 3.1 instruction-tuned text model optimized for multilingual dialogue use cases, performing excellently on common industry benchmarks among many available open-source and closed chat models."
  },
  "Meta-Llama-3.1-8B-Instruct": {
    "description": "Llama 3.1 instruction-tuned text model optimized for multilingual dialogue use cases, performing excellently on common industry benchmarks among many available open-source and closed chat models."
  },
  "Meta-Llama-3.2-1B-Instruct": {
    "description": "An advanced cutting-edge small language model with language understanding, excellent reasoning capabilities, and text generation abilities."
  },
  "Meta-Llama-3.2-3B-Instruct": {
    "description": "An advanced cutting-edge small language model with language understanding, excellent reasoning capabilities, and text generation abilities."
  },
  "Meta-Llama-3.3-70B-Instruct": {
    "description": "Llama 3.3 is the most advanced multilingual open-source large language model in the Llama series, offering performance comparable to a 405B model at a very low cost. Based on the Transformer architecture, it enhances usability and safety through supervised fine-tuning (SFT) and reinforcement learning from human feedback (RLHF). Its instruction-tuned version is optimized for multilingual dialogue and outperforms many open-source and closed chat models on various industry benchmarks. Knowledge cutoff date is December 2023."
  },
  "MiniMax-Text-01": {
    "description": "In the MiniMax-01 series of models, we have made bold innovations: for the first time, we have implemented a linear attention mechanism on a large scale, making the traditional Transformer architecture no longer the only option. This model has a parameter count of up to 456 billion, with a single activation of 45.9 billion. Its overall performance rivals that of top overseas models while efficiently handling the world's longest context of 4 million tokens, which is 32 times that of GPT-4o and 20 times that of Claude-3.5-Sonnet."
  },
  "NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO": {
    "description": "Nous Hermes 2 - Mixtral 8x7B-DPO (46.7B) is a high-precision instruction model suitable for complex computations."
  },
  "Phi-3-medium-128k-instruct": {
    "description": "The same Phi-3-medium model, but with a larger context size for RAG or few-shot prompting."
  },
  "Phi-3-medium-4k-instruct": {
    "description": "A 14B parameter model that provides better quality than Phi-3-mini, focusing on high-quality, reasoning-dense data."
  },
  "Phi-3-mini-128k-instruct": {
    "description": "The same Phi-3-mini model, but with a larger context size for RAG or few-shot prompting."
  },
  "Phi-3-mini-4k-instruct": {
    "description": "The smallest member of the Phi-3 family, optimized for both quality and low latency."
  },
  "Phi-3-small-128k-instruct": {
    "description": "The same Phi-3-small model, but with a larger context size for RAG or few-shot prompting."
  },
  "Phi-3-small-8k-instruct": {
    "description": "A 7B parameter model that provides better quality than Phi-3-mini, focusing on high-quality, reasoning-dense data."
  },
  "Phi-3.5-mini-instruct": {
    "description": "An updated version of the Phi-3-mini model."
  },
  "Phi-3.5-vision-instrust": {
    "description": "An updated version of the Phi-3-vision model."
  },
  "Pro/Qwen/Qwen2-7B-Instruct": {
    "description": "Qwen2-7B-Instruct is an instruction-tuned large language model in the Qwen2 series, with a parameter size of 7B. This model is based on the Transformer architecture and employs techniques such as the SwiGLU activation function, attention QKV bias, and group query attention. It can handle large-scale inputs. The model excels in language understanding, generation, multilingual capabilities, coding, mathematics, and reasoning across multiple benchmark tests, surpassing most open-source models and demonstrating competitive performance comparable to proprietary models in certain tasks. Qwen2-7B-Instruct outperforms Qwen1.5-7B-Chat in multiple evaluations, showing significant performance improvements."
  },
  "Pro/Qwen/Qwen2.5-7B-Instruct": {
    "description": "Qwen2.5-7B-Instruct is one of the latest large language models released by Alibaba Cloud. This 7B model shows significant improvements in coding and mathematics. It also provides multilingual support, covering over 29 languages, including Chinese and English. The model has made notable advancements in instruction following, understanding structured data, and generating structured outputs, especially JSON."
  },
  "Pro/Qwen/Qwen2.5-Coder-7B-Instruct": {
    "description": "Qwen2.5-Coder-7B-Instruct is the latest version in Alibaba Cloud's series of code-specific large language models. This model significantly enhances code generation, reasoning, and repair capabilities based on Qwen2.5, trained on 55 trillion tokens. It not only improves coding abilities but also maintains advantages in mathematics and general capabilities, providing a more comprehensive foundation for practical applications such as code agents."
  },
  "Pro/Qwen/Qwen2.5-VL-7B-Instruct": {
    "description": "Qwen2.5-VL is the newest addition to the Qwen series, featuring enhanced visual comprehension capabilities. It can analyze text, charts, and layouts within images, comprehend long videos while capturing events. The model supports reasoning, tool manipulation, multi-format object localization, and structured output generation. It incorporates optimized dynamic resolution and frame rate training for video understanding, along with improved efficiency in its visual encoder."
  },
  "Pro/THUDM/glm-4-9b-chat": {
    "description": "GLM-4-9B-Chat is the open-source version of the GLM-4 series pre-trained models launched by Zhipu AI. This model excels in semantics, mathematics, reasoning, code, and knowledge. In addition to supporting multi-turn dialogues, GLM-4-9B-Chat also features advanced capabilities such as web browsing, code execution, custom tool invocation (Function Call), and long-text reasoning. The model supports 26 languages, including Chinese, English, Japanese, Korean, and German. In multiple benchmark tests, GLM-4-9B-Chat has demonstrated excellent performance, such as in AlignBench-v2, MT-Bench, MMLU, and C-Eval. The model supports a maximum context length of 128K, making it suitable for academic research and commercial applications."
  },
  "Pro/deepseek-ai/DeepSeek-R1": {
    "description": "DeepSeek-R1 is a reinforcement learning (RL) driven inference model that addresses issues of repetitiveness and readability in models. Prior to RL, DeepSeek-R1 introduced cold start data to further optimize inference performance. It performs comparably to OpenAI-o1 in mathematical, coding, and reasoning tasks, and enhances overall effectiveness through carefully designed training methods."
  },
  "Pro/deepseek-ai/DeepSeek-R1-0120": {
    "description": "DeepSeek-R1 is a reinforcement learning (RL) driven reasoning model that addresses issues of repetition and readability. Before RL, it introduced cold-start data to further optimize reasoning performance. It performs comparably to OpenAI-o1 in mathematics, coding, and reasoning tasks and improves overall effectiveness through carefully designed training methods."
  },
  "Pro/deepseek-ai/DeepSeek-R1-Distill-Qwen-7B": {
    "description": "DeepSeek-R1-Distill-Qwen-7B is a model derived from Qwen2.5-Math-7B through knowledge distillation. It was fine-tuned using 800,000 carefully selected samples generated by DeepSeek-R1, demonstrating exceptional reasoning capabilities. The model achieves outstanding performance across multiple benchmarks, including 92.8% accuracy on MATH-500, a 55.5% pass rate on AIME 2024, and a score of 1189 on CodeForces, showcasing strong mathematical and programming abilities for a 7B-scale model."
  },
  "Pro/deepseek-ai/DeepSeek-V3": {
    "description": "DeepSeek-V3 is a mixed expert (MoE) language model with 671 billion parameters, utilizing multi-head latent attention (MLA) and the DeepSeekMoE architecture, combined with a load balancing strategy without auxiliary loss to optimize inference and training efficiency. Pre-trained on 14.8 trillion high-quality tokens and fine-tuned with supervision and reinforcement learning, DeepSeek-V3 outperforms other open-source models and approaches leading closed-source models."
  },
  "Pro/deepseek-ai/DeepSeek-V3-1226": {
    "description": "DeepSeek-V3 is a mixture of experts (MoE) language model with 671 billion parameters, utilizing multi-head latent attention (MLA) and the DeepSeekMoE architecture, combined with a load balancing strategy without auxiliary loss to optimize inference and training efficiency. Pre-trained on 14.8 trillion high-quality tokens and fine-tuned with supervised learning and reinforcement learning, DeepSeek-V3 outperforms other open-source models and approaches leading closed-source models in performance."
  },
  "QwQ-32B-Preview": {
    "description": "QwQ-32B-Preview is an innovative natural language processing model capable of efficiently handling complex dialogue generation and context understanding tasks."
  },
  "Qwen/QVQ-72B-Preview": {
    "description": "QVQ-72B-Preview is a research-oriented model developed by the Qwen team, focusing on visual reasoning capabilities, with unique advantages in understanding complex scenes and solving visually related mathematical problems."
  },
  "Qwen/QwQ-32B": {
    "description": "QwQ is the inference model of the Qwen series. Compared to traditional instruction-tuned models, QwQ possesses reasoning and cognitive abilities, achieving significantly enhanced performance in downstream tasks, especially in solving difficult problems. QwQ-32B is a medium-sized inference model that competes effectively against state-of-the-art inference models (such as DeepSeek-R1 and o1-mini). This model employs technologies such as RoPE, SwiGLU, RMSNorm, and Attention QKV bias, featuring a 64-layer network structure and 40 Q attention heads (with 8 KV heads in the GQA architecture)."
  },
  "Qwen/QwQ-32B-Preview": {
    "description": "QwQ-32B-Preview is Qwen's latest experimental research model, focusing on enhancing AI reasoning capabilities. By exploring complex mechanisms such as language mixing and recursive reasoning, its main advantages include strong analytical reasoning, mathematical, and programming abilities. However, it also faces challenges such as language switching issues, reasoning loops, safety considerations, and differences in other capabilities."
  },
  "Qwen/Qwen2-72B-Instruct": {
    "description": "Qwen2 is an advanced general-purpose language model that supports various types of instructions."
  },
  "Qwen/Qwen2-7B-Instruct": {
    "description": "Qwen2-72B-Instruct is an instruction-tuned large language model in the Qwen2 series, with a parameter size of 72B. This model is based on the Transformer architecture and employs techniques such as the SwiGLU activation function, attention QKV bias, and group query attention. It can handle large-scale inputs. The model excels in language understanding, generation, multilingual capabilities, coding, mathematics, and reasoning across multiple benchmark tests, surpassing most open-source models and demonstrating competitive performance comparable to proprietary models in certain tasks."
  },
  "Qwen/Qwen2-VL-72B-Instruct": {
    "description": "Qwen2-VL is the latest iteration of the Qwen-VL model, achieving state-of-the-art performance in visual understanding benchmarks."
  },
  "Qwen/Qwen2.5-14B-Instruct": {
    "description": "Qwen2.5 is a brand new series of large language models designed to optimize the handling of instruction-based tasks."
  },
  "Qwen/Qwen2.5-32B-Instruct": {
    "description": "Qwen2.5 is a brand new series of large language models designed to optimize the handling of instruction-based tasks."
  },
  "Qwen/Qwen2.5-72B-Instruct": {
    "description": "A large language model developed by the Alibaba Cloud Tongyi Qianwen team"
  },
  "Qwen/Qwen2.5-72B-Instruct-128K": {
    "description": "Qwen2.5 is a new large language model series with enhanced understanding and generation capabilities."
  },
  "Qwen/Qwen2.5-72B-Instruct-Turbo": {
    "description": "Qwen2.5 is a new large language model series designed to optimize instruction-based task processing."
  },
  "Qwen/Qwen2.5-7B-Instruct": {
    "description": "Qwen2.5 is a brand new series of large language models designed to optimize the handling of instruction-based tasks."
  },
  "Qwen/Qwen2.5-7B-Instruct-Turbo": {
    "description": "Qwen2.5 is a new large language model series designed to optimize instruction-based task processing."
  },
  "Qwen/Qwen2.5-Coder-32B-Instruct": {
    "description": "Qwen2.5-Coder focuses on code writing."
  },
  "Qwen/Qwen2.5-Coder-7B-Instruct": {
    "description": "Qwen2.5-Coder-7B-Instruct is the latest version in Alibaba Cloud's series of code-specific large language models. This model significantly enhances code generation, reasoning, and repair capabilities based on Qwen2.5, trained on 55 trillion tokens. It not only improves coding abilities but also maintains advantages in mathematics and general capabilities, providing a more comprehensive foundation for practical applications such as code agents."
  },
  "Qwen/Qwen2.5-VL-32B-Instruct": {
    "description": "Qwen2.5-VL-32B-Instruct is a multimodal large language model developed by the Tongyi Qianwen team, representing part of the Qwen2.5-VL series. This model excels not only in recognizing common objects but also in analyzing text, charts, icons, graphics, and layouts within images. It functions as a visual agent capable of reasoning and dynamically manipulating tools, with the ability to operate computers and mobile devices. Additionally, the model can precisely locate objects in images and generate structured outputs for documents like invoices and tables. Compared to its predecessor Qwen2-VL, this version demonstrates enhanced mathematical and problem-solving capabilities through reinforcement learning, while also exhibiting more human-preferred response styles."
  },
  "Qwen/Qwen2.5-VL-72B-Instruct": {
    "description": "Qwen2.5-VL is the vision-language model in the Qwen2.5 series. This model demonstrates significant improvements across multiple dimensions: enhanced visual comprehension capable of recognizing common objects, analyzing text, charts, and layouts; serving as a visual agent that can reason and dynamically guide tool usage; supporting understanding of long videos exceeding 1 hour while capturing key events; able to precisely locate objects in images by generating bounding boxes or points; and capable of producing structured outputs particularly suitable for scanned data like invoices and forms."
  },
  "Qwen/Qwen3-14B": {
    "description": "Qwen3 is a next-generation model with significantly enhanced capabilities, achieving industry-leading levels in reasoning, general tasks, agent functions, and multilingual support, with a switchable thinking mode."
  },
  "Qwen/Qwen3-235B-A22B": {
    "description": "Qwen3 is a next-generation model with significantly enhanced capabilities, achieving industry-leading levels in reasoning, general tasks, agent functions, and multilingual support, with a switchable thinking mode."
  },
  "Qwen/Qwen3-30B-A3B": {
    "description": "Qwen3 is a next-generation model with significantly enhanced capabilities, achieving industry-leading levels in reasoning, general tasks, agent functions, and multilingual support, with a switchable thinking mode."
  },
  "Qwen/Qwen3-32B": {
    "description": "Qwen3 is a next-generation model with significantly enhanced capabilities, achieving industry-leading levels in reasoning, general tasks, agent functions, and multilingual support, with a switchable thinking mode."
  },
  "Qwen/Qwen3-8B": {
    "description": "Qwen3 is a next-generation model with significantly enhanced capabilities, achieving industry-leading levels in reasoning, general tasks, agent functions, and multilingual support, with a switchable thinking mode."
  },
  "Qwen2-72B-Instruct": {
    "description": "Qwen2 is the latest series of the Qwen model, supporting 128k context. Compared to the current best open-source models, Qwen2-72B significantly surpasses leading models in natural language understanding, knowledge, coding, mathematics, and multilingual capabilities."
  },
  "Qwen2-7B-Instruct": {
    "description": "Qwen2 is the latest series of the Qwen model, capable of outperforming optimal open-source models of similar size and even larger models. Qwen2 7B has achieved significant advantages in multiple evaluations, especially in coding and Chinese comprehension."
  },
  "Qwen2-VL-72B": {
    "description": "Qwen2-VL-72B is a powerful visual language model that supports multimodal processing of images and text, capable of accurately recognizing image content and generating relevant descriptions or answers."
  },
  "Qwen2.5-14B-Instruct": {
    "description": "Qwen2.5-14B-Instruct is a large language model with 14 billion parameters, delivering excellent performance, optimized for Chinese and multilingual scenarios, and supporting applications such as intelligent Q&A and content generation."
  },
  "Qwen2.5-32B-Instruct": {
    "description": "Qwen2.5-32B-Instruct is a large language model with 32 billion parameters, offering balanced performance, optimized for Chinese and multilingual scenarios, and supporting applications such as intelligent Q&A and content generation."
  },
  "Qwen2.5-72B-Instruct": {
    "description": "Qwen2.5-72B-Instruct supports 16k context and generates long texts exceeding 8K. It enables seamless interaction with external systems through function calls, greatly enhancing flexibility and scalability. The model's knowledge has significantly increased, and its coding and mathematical abilities have been greatly improved, with multilingual support for over 29 languages."
  },
  "Qwen2.5-7B-Instruct": {
    "description": "Qwen2.5-7B-Instruct is a large language model with 7 billion parameters, supporting function calls and seamless interaction with external systems, greatly enhancing flexibility and scalability. It is optimized for Chinese and multilingual scenarios, supporting applications such as intelligent Q&A and content generation."
  },
  "Qwen2.5-Coder-14B-Instruct": {
    "description": "Qwen2.5-Coder-14B-Instruct is a large-scale pre-trained programming instruction model with strong code understanding and generation capabilities, efficiently handling various programming tasks, particularly suited for intelligent code writing, automated script generation, and programming problem-solving."
  },
  "Qwen2.5-Coder-32B-Instruct": {
    "description": "Qwen2.5-Coder-32B-Instruct is a large language model specifically designed for code generation, code understanding, and efficient development scenarios, featuring an industry-leading 32 billion parameters to meet diverse programming needs."
  },
  "SenseChat": {
    "description": "Basic version model (V4) with a context length of 4K, featuring strong general capabilities."
  },
  "SenseChat-128K": {
    "description": "Basic version model (V4) with a context length of 128K, excelling in long text comprehension and generation tasks."
  },
  "SenseChat-32K": {
    "description": "Basic version model (V4) with a context length of 32K, flexibly applicable to various scenarios."
  },
  "SenseChat-5": {
    "description": "The latest version model (V5.5) with a context length of 128K shows significant improvements in mathematical reasoning, English conversation, instruction following, and long text comprehension, comparable to GPT-4o."
  },
  "SenseChat-5-1202": {
    "description": "Based on version V5.5, this latest release shows significant improvements over the previous version in foundational Chinese and English capabilities, chat, science knowledge, humanities knowledge, writing, mathematical logic, and word count control."
  },
  "SenseChat-5-Cantonese": {
    "description": "With a context length of 32K, it surpasses GPT-4 in Cantonese conversation comprehension and is competitive with GPT-4 Turbo in knowledge, reasoning, mathematics, and code writing across multiple domains."
  },
  "SenseChat-5-beta": {
    "description": "Partially outperforms SenseCat-5-1202"
  },
  "SenseChat-Character": {
    "description": "Standard version model with an 8K context length and high response speed."
  },
  "SenseChat-Character-Pro": {
    "description": "Advanced version model with a context length of 32K, offering comprehensive capability enhancements and supporting both Chinese and English conversations."
  },
  "SenseChat-Turbo": {
    "description": "Suitable for fast question answering and model fine-tuning scenarios."
  },
  "SenseChat-Turbo-1202": {
    "description": "This is the latest lightweight version model, achieving over 90% of the full model's capabilities while significantly reducing inference costs."
  },
  "SenseChat-Vision": {
    "description": "The latest version model (V5.5) supports multi-image input and fully optimizes the model's basic capabilities, achieving significant improvements in object attribute recognition, spatial relationships, action event recognition, scene understanding, emotion recognition, logical reasoning, and text understanding and generation."
  },
  "SenseNova-V6-Pro": {
    "description": "Achieves a native unification of image, text, and video capabilities, breaking through the limitations of traditional discrete multimodality, winning dual championships in the OpenCompass and SuperCLUE evaluations."
  },
  "SenseNova-V6-Reasoner": {
    "description": "Balances visual and linguistic deep reasoning, enabling slow thinking and profound inference, presenting a complete chain of thought process."
  },
  "SenseNova-V6-Turbo": {
    "description": "Achieves a native unification of image, text, and video capabilities, breaking through the limitations of traditional discrete multimodality, leading comprehensively in core dimensions such as multimodal foundational abilities and linguistic foundational abilities, excelling in both literature and science, and consistently ranking among the top tier in various assessments both domestically and internationally."
  },
  "Skylark2-lite-8k": {
    "description": "Skylark 2nd generation model, Skylark2-lite model is characterized by high response speed, suitable for high real-time requirements, cost-sensitive scenarios, and situations where model accuracy is less critical, with a context window length of 8k."
  },
  "Skylark2-pro-32k": {
    "description": "Skylark 2nd generation model, Skylark2-pro version has high model accuracy, suitable for more complex text generation scenarios such as professional field copy generation, novel writing, and high-quality translation, with a context window length of 32k."
  },
  "Skylark2-pro-4k": {
    "description": "Skylark 2nd generation model, Skylark2-pro model has high model accuracy, suitable for more complex text generation scenarios such as professional field copy generation, novel writing, and high-quality translation, with a context window length of 4k."
  },
  "Skylark2-pro-character-4k": {
    "description": "Skylark 2nd generation model, Skylark2-pro-character has excellent role-playing and chat capabilities, adept at engaging in conversations with users based on their prompt requests, showcasing distinct character styles and flowing dialogue, making it well-suited for building chatbots, virtual assistants, and online customer service, with high response speed."
  },
  "Skylark2-pro-turbo-8k": {
    "description": "Skylark 2nd generation model, Skylark2-pro-turbo-8k provides faster inference at a lower cost, with a context window length of 8k."
  },
  "THUDM/GLM-4-32B-0414": {
    "description": "GLM-4-32B-0414 is the next-generation open-source model in the GLM series, boasting 32 billion parameters. Its performance is comparable to OpenAI's GPT series and DeepSeek's V3/R1 series."
  },
  "THUDM/GLM-4-9B-0414": {
    "description": "GLM-4-9B-0414 is a small model in the GLM series, with 9 billion parameters. This model inherits the technical characteristics of the GLM-4-32B series while providing a more lightweight deployment option. Despite its smaller size, GLM-4-9B-0414 still demonstrates excellent capabilities in tasks such as code generation, web design, SVG graphics generation, and search-based writing."
  },
  "THUDM/GLM-Z1-32B-0414": {
    "description": "GLM-Z1-32B-0414 is a reasoning model with deep thinking capabilities. This model is developed based on GLM-4-32B-0414 through cold start and extended reinforcement learning, with further training in mathematics, coding, and logic tasks. Compared to the base model, GLM-Z1-32B-0414 significantly enhances mathematical abilities and the capacity to solve complex tasks."
  },
  "THUDM/GLM-Z1-9B-0414": {
    "description": "GLM-Z1-9B-0414 is a small model in the GLM series, with only 9 billion parameters, yet it demonstrates remarkable capabilities while maintaining the open-source tradition. Despite its smaller size, this model excels in mathematical reasoning and general tasks, leading the performance among similarly sized open-source models."
  },
  "THUDM/GLM-Z1-Rumination-32B-0414": {
    "description": "GLM-Z1-Rumination-32B-0414 is a deep reasoning model with reflective capabilities (comparable to OpenAI's Deep Research). Unlike typical deep thinking models, reflective models engage in longer periods of deep thought to tackle more open and complex problems."
  },
  "THUDM/glm-4-9b-chat": {
    "description": "GLM-4 9B is an open-source version that provides an optimized conversational experience for chat applications."
  },
  "Tongyi-Zhiwen/QwenLong-L1-32B": {
    "description": "QwenLong-L1-32B is the first large reasoning model (LRM) trained with reinforcement learning for long-context tasks, optimized specifically for long-text reasoning. It achieves stable transfer from short to long contexts through a progressive context expansion reinforcement learning framework. In seven long-context document QA benchmarks, QwenLong-L1-32B outperforms flagship models like OpenAI-o3-mini and Qwen3-235B-A22B, with performance comparable to Claude-3.7-Sonnet-Thinking. The model excels in complex tasks such as mathematical reasoning, logical reasoning, and multi-hop reasoning."
  },
  "Yi-34B-Chat": {
    "description": "Yi-1.5-34B significantly enhances mathematical logic and coding abilities by incrementally training on 500 billion high-quality tokens while maintaining the excellent general language capabilities of the original series."
  },
  "abab5.5-chat": {
    "description": "Targeted at productivity scenarios, supporting complex task processing and efficient text generation, suitable for professional applications."
  },
  "abab5.5s-chat": {
    "description": "Designed for Chinese persona dialogue scenarios, providing high-quality Chinese dialogue generation capabilities, suitable for various application contexts."
  },
  "abab6.5g-chat": {
    "description": "Designed for multilingual persona dialogue, supporting high-quality dialogue generation in English and other languages."
  },
  "abab6.5s-chat": {
    "description": "Suitable for a wide range of natural language processing tasks, including text generation and dialogue systems."
  },
  "abab6.5t-chat": {
    "description": "Optimized for Chinese persona dialogue scenarios, providing smooth dialogue generation that aligns with Chinese expression habits."
  },
  "accounts/fireworks/models/deepseek-r1": {
    "description": "DeepSeek-R1 is a state-of-the-art large language model optimized through reinforcement learning and cold-start data, excelling in reasoning, mathematics, and programming performance."
  },
  "accounts/fireworks/models/deepseek-v3": {
    "description": "A powerful Mixture-of-Experts (MoE) language model provided by Deepseek, with a total parameter count of 671B, activating 37B parameters per token."
  },
  "accounts/fireworks/models/llama-v3-70b-instruct": {
    "description": "Llama 3 70B instruction model, optimized for multilingual dialogues and natural language understanding, outperforming most competitive models."
  },
  "accounts/fireworks/models/llama-v3-8b-instruct": {
    "description": "Llama 3 8B instruction model, optimized for dialogues and multilingual tasks, delivering outstanding and efficient performance."
  },
  "accounts/fireworks/models/llama-v3-8b-instruct-hf": {
    "description": "Llama 3 8B instruction model (HF version), consistent with official implementation results, featuring high consistency and cross-platform compatibility."
  },
  "accounts/fireworks/models/llama-v3p1-405b-instruct": {
    "description": "Llama 3.1 405B instruction model, equipped with massive parameters, suitable for complex tasks and instruction following in high-load scenarios."
  },
  "accounts/fireworks/models/llama-v3p1-70b-instruct": {
    "description": "Llama 3.1 70B instruction model provides exceptional natural language understanding and generation capabilities, making it an ideal choice for dialogue and analysis tasks."
  },
  "accounts/fireworks/models/llama-v3p1-8b-instruct": {
    "description": "Llama 3.1 8B instruction model, optimized for multilingual dialogues, capable of surpassing most open-source and closed-source models on common industry benchmarks."
  },
  "accounts/fireworks/models/llama-v3p2-11b-vision-instruct": {
    "description": "Meta's 11B parameter instruction-tuned image reasoning model. This model is optimized for visual recognition, image reasoning, image description, and answering general questions about images. It understands visual data like charts and graphs, generating text descriptions of image details to bridge the gap between vision and language."
  },
  "accounts/fireworks/models/llama-v3p2-3b-instruct": {
    "description": "The Llama 3.2 3B instruction model is a lightweight multilingual model introduced by Meta. This model aims to enhance efficiency, providing significant improvements in latency and cost compared to larger models. Sample use cases include querying, prompt rewriting, and writing assistance."
  },
  "accounts/fireworks/models/llama-v3p2-90b-vision-instruct": {
    "description": "Meta's 90B parameter instruction-tuned image reasoning model. This model is optimized for visual recognition, image reasoning, image description, and answering general questions about images. It understands visual data like charts and graphs, generating text descriptions of image details to bridge the gap between vision and language."
  },
  "accounts/fireworks/models/llama-v3p3-70b-instruct": {
    "description": "Llama 3.3 70B Instruct is the December update of Llama 3.1 70B. This model builds upon Llama 3.1 70B (released in July 2024) with enhancements in tool invocation, multilingual text support, mathematics, and programming capabilities. It achieves industry-leading performance in reasoning, mathematics, and instruction following, providing similar performance to 3.1 405B while offering significant advantages in speed and cost."
  },
  "accounts/fireworks/models/mistral-small-24b-instruct-2501": {
    "description": "A 24B parameter model that possesses state-of-the-art capabilities comparable to larger models."
  },
  "accounts/fireworks/models/mixtral-8x22b-instruct": {
    "description": "Mixtral MoE 8x22B instruction model, featuring large-scale parameters and a multi-expert architecture, fully supporting efficient processing of complex tasks."
  },
  "accounts/fireworks/models/mixtral-8x7b-instruct": {
    "description": "Mixtral MoE 8x7B instruction model, with a multi-expert architecture providing efficient instruction following and execution."
  },
  "accounts/fireworks/models/mythomax-l2-13b": {
    "description": "MythoMax L2 13B model, combining novel merging techniques, excels in narrative and role-playing."
  },
  "accounts/fireworks/models/phi-3-vision-128k-instruct": {
    "description": "Phi 3 Vision instruction model, a lightweight multimodal model capable of handling complex visual and textual information, with strong reasoning abilities."
  },
  "accounts/fireworks/models/qwen-qwq-32b-preview": {
    "description": "The QwQ model is an experimental research model developed by the Qwen team, focusing on enhancing AI reasoning capabilities."
  },
  "accounts/fireworks/models/qwen2-vl-72b-instruct": {
    "description": "The 72B version of the Qwen-VL model is the latest iteration from Alibaba, representing nearly a year of innovation."
  },
  "accounts/fireworks/models/qwen2p5-72b-instruct": {
    "description": "Qwen2.5 is a series of decoder-only language models developed by the Alibaba Cloud Qwen team. These models come in different sizes including 0.5B, 1.5B, 3B, 7B, 14B, 32B, and 72B, available in both base and instruct variants."
  },
  "accounts/fireworks/models/qwen2p5-coder-32b-instruct": {
    "description": "Qwen2.5 Coder 32B Instruct is the latest version in Alibaba Cloud's series of code-specific large language models. This model significantly enhances code generation, reasoning, and repair capabilities based on Qwen2.5, trained on 55 trillion tokens. It not only improves coding abilities but also maintains advantages in mathematics and general capabilities, providing a more comprehensive foundation for practical applications such as code agents."
  },
  "accounts/yi-01-ai/models/yi-large": {
    "description": "Yi-Large model, featuring exceptional multilingual processing capabilities, suitable for various language generation and understanding tasks."
  },
  "ai21-jamba-1.5-large": {
    "description": "A 398B parameter (94B active) multilingual model, offering a 256K long context window, function calling, structured output, and grounded generation."
  },
  "ai21-jamba-1.5-mini": {
    "description": "A 52B parameter (12B active) multilingual model, offering a 256K long context window, function calling, structured output, and grounded generation."
  },
  "ai21-labs/AI21-Jamba-1.5-Large": {
    "description": "A 398B parameter (94B active) multilingual model providing a 256K long context window, function calling, structured output, and fact-based generation."
  },
  "ai21-labs/AI21-Jamba-1.5-Mini": {
    "description": "A 52B parameter (12B active) multilingual model offering a 256K long context window, function calling, structured output, and fact-based generation."
  },
  "anthropic.claude-3-5-sonnet-20240620-v1:0": {
    "description": "Claude 3.5 Sonnet raises the industry standard, outperforming competitor models and Claude 3 Opus, excelling in a wide range of evaluations while maintaining the speed and cost of our mid-tier models."
  },
  "anthropic.claude-3-5-sonnet-20241022-v2:0": {
    "description": "Claude 3.5 Sonnet raises the industry standard, outperforming competing models and Claude 3 Opus, excelling in extensive evaluations while maintaining the speed and cost of our mid-tier models."
  },
  "anthropic.claude-3-haiku-20240307-v1:0": {
    "description": "Claude 3 Haiku is Anthropic's fastest and most compact model, providing near-instantaneous response times. It can quickly answer simple queries and requests. Customers will be able to build seamless AI experiences that mimic human interaction. Claude 3 Haiku can process images and return text output, with a context window of 200K."
  },
  "anthropic.claude-3-opus-20240229-v1:0": {
    "description": "Claude 3 Opus is Anthropic's most powerful AI model, featuring state-of-the-art performance on highly complex tasks. It can handle open-ended prompts and unseen scenarios, demonstrating exceptional fluency and human-like understanding. Claude 3 Opus showcases the forefront of generative AI possibilities. Claude 3 Opus can process images and return text output, with a context window of 200K."
  },
  "anthropic.claude-3-sonnet-20240229-v1:0": {
    "description": "Anthropic's Claude 3 Sonnet strikes an ideal balance between intelligence and speed—especially suited for enterprise workloads. It offers maximum utility at a price lower than competitors and is designed to be a reliable, durable workhorse for scalable AI deployments. Claude 3 Sonnet can process images and return text output, with a context window of 200K."
  },
  "anthropic.claude-instant-v1": {
    "description": "A fast, economical, yet still highly capable model that can handle a range of tasks, including everyday conversations, text analysis, summarization, and document Q&A."
  },
  "anthropic.claude-v2": {
    "description": "Anthropic's model demonstrates high capability across a wide range of tasks, from complex conversations and creative content generation to detailed instruction following."
  },
  "anthropic.claude-v2:1": {
    "description": "An updated version of Claude 2, featuring double the context window and improvements in reliability, hallucination rates, and evidence-based accuracy in long documents and RAG contexts."
  },
  "anthropic/claude-3-haiku": {
    "description": "Claude 3 Haiku is Anthropic's fastest and most compact model, designed for near-instantaneous responses. It features quick and accurate directional performance."
  },
  "anthropic/claude-3-opus": {
    "description": "Claude 3 Opus is Anthropic's most powerful model for handling highly complex tasks. It excels in performance, intelligence, fluency, and comprehension."
  },
  "anthropic/claude-3.5-haiku": {
    "description": "Claude 3.5 Haiku is Anthropic's fastest next-generation model. Compared to Claude 3 Haiku, Claude 3.5 Haiku shows improvements across various skills and surpasses the previous generation's largest model, Claude 3 Opus, in many intelligence benchmarks."
  },
  "anthropic/claude-3.5-sonnet": {
    "description": "Claude 3.5 Sonnet offers capabilities that surpass Opus and faster speeds than Sonnet, while maintaining the same pricing as Sonnet. Sonnet excels particularly in programming, data science, visual processing, and agent tasks."
  },
  "anthropic/claude-3.7-sonnet": {
    "description": "Claude 3.7 Sonnet is Anthropic's most advanced model to date and the first hybrid reasoning model on the market. Claude 3.7 Sonnet can generate near-instant responses or extended step-by-step reasoning, allowing users to clearly observe these processes. Sonnet excels particularly in programming, data science, visual processing, and agent tasks."
  },
  "anthropic/claude-opus-4": {
    "description": "Claude Opus 4 is Anthropic's most powerful model designed for handling highly complex tasks. It excels in performance, intelligence, fluency, and comprehension."
  },
  "anthropic/claude-sonnet-4": {
    "description": "Claude Sonnet 4 can generate near-instant responses or extended step-by-step reasoning, allowing users to clearly observe these processes. API users also have fine-grained control over the model's thinking time."
  },
  "aya": {
    "description": "Aya 23 is a multilingual model launched by Cohere, supporting 23 languages, facilitating diverse language applications."
  },
  "aya:35b": {
    "description": "Aya 23 is a multilingual model launched by Cohere, supporting 23 languages, facilitating diverse language applications."
  },
  "baichuan/baichuan2-13b-chat": {
    "description": "Baichuan-13B is an open-source, commercially usable large language model developed by Baichuan Intelligence, containing 13 billion parameters, achieving the best results in its size on authoritative Chinese and English benchmarks."
  },
  "c4ai-aya-expanse-32b": {
    "description": "Aya Expanse is a high-performance 32B multilingual model designed to challenge the performance of single-language models through innovations in instruction tuning, data arbitrage, preference training, and model merging. It supports 23 languages."
  },
  "c4ai-aya-expanse-8b": {
    "description": "Aya Expanse is a high-performance 8B multilingual model designed to challenge the performance of single-language models through innovations in instruction tuning, data arbitrage, preference training, and model merging. It supports 23 languages."
  },
  "c4ai-aya-vision-32b": {
    "description": "Aya Vision is a state-of-the-art multimodal model that excels in multiple key benchmarks for language, text, and image capabilities. This 32 billion parameter version focuses on cutting-edge multilingual performance and supports 23 languages."
  },
  "c4ai-aya-vision-8b": {
    "description": "Aya Vision is a state-of-the-art multimodal model that excels in multiple key benchmarks for language, text, and image capabilities. This 8 billion parameter version focuses on low latency and optimal performance."
  },
  "charglm-3": {
    "description": "CharGLM-3 is designed for role-playing and emotional companionship, supporting ultra-long multi-turn memory and personalized dialogue, with wide applications."
  },
  "charglm-4": {
    "description": "CharGLM-4 is designed for role-playing and emotional companionship, supporting ultra-long multi-turn memory and personalized dialogue, with wide-ranging applications."
  },
  "chatglm3": {
    "description": "ChatGLM3 is a closed-source model released by Zhipu AI and Tsinghua KEG Lab. It has been pre-trained on a massive amount of Chinese and English identifiers and fine-tuned with human preference alignment. Compared to the first-generation model, it has achieved improvements of 16%, 36%, and 280% in MMLU, C-Eval, and GSM8K, respectively, and topped the Chinese task leaderboard C-Eval. It is suitable for scenarios that require a high level of knowledge, reasoning, and creativity, such as advertising copywriting, novel writing, knowledge-based writing, and code generation."
  },
  "chatglm3-6b-base": {
    "description": "ChatGLM3-6b-base is the latest generation of the ChatGLM series, a 6 billion parameter open-source base model developed by Zhipu."
  },
  "chatgpt-4o-latest": {
    "description": "ChatGPT-4o is a dynamic model that updates in real-time to stay current with the latest version. It combines powerful language understanding and generation capabilities, making it suitable for large-scale applications, including customer service, education, and technical support."
  },
  "claude-2.0": {
    "description": "Claude 2 provides advancements in key capabilities for enterprises, including industry-leading 200K token context, significantly reducing the occurrence of model hallucinations, system prompts, and a new testing feature: tool invocation."
  },
  "claude-2.1": {
    "description": "Claude 2 provides advancements in key capabilities for enterprises, including industry-leading 200K token context, significantly reducing the occurrence of model hallucinations, system prompts, and a new testing feature: tool invocation."
  },
  "claude-3-5-haiku-20241022": {
    "description": "Claude 3.5 Haiku is Anthropic's fastest next-generation model. Compared to Claude 3 Haiku, Claude 3.5 Haiku has improved in various skills and has surpassed the previous generation's largest model, Claude 3 Opus, in many intelligence benchmark tests."
  },
  "claude-3-5-sonnet-20240620": {
    "description": "Claude 3.5 Sonnet offers capabilities that surpass Opus and faster speeds than Sonnet, while maintaining the same price as Sonnet. Sonnet excels particularly in programming, data science, visual processing, and agent tasks."
  },
  "claude-3-5-sonnet-20241022": {
    "description": "Claude 3.5 Sonnet offers capabilities that surpass Opus and faster speeds than Sonnet, while maintaining the same pricing as Sonnet. Sonnet excels particularly in programming, data science, visual processing, and agent tasks."
  },
  "claude-3-7-sonnet-20250219": {
    "description": "Claude 3.7 Sonnet is Anthropic's latest model, offering a balance of speed and performance. It excels in a wide range of tasks, including programming, data science, visual processing, and agent tasks."
  },
  "claude-3-haiku-20240307": {
    "description": "Claude 3 Haiku is Anthropic's fastest and most compact model, designed for near-instantaneous responses. It features rapid and accurate directional performance."
  },
  "claude-3-opus-20240229": {
    "description": "Claude 3 Opus is Anthropic's most powerful model for handling highly complex tasks. It excels in performance, intelligence, fluency, and comprehension."
  },
  "claude-3-sonnet-20240229": {
    "description": "Claude 3 Sonnet provides an ideal balance of intelligence and speed for enterprise workloads. It offers maximum utility at a lower price, reliable and suitable for large-scale deployment."
  },
  "claude-opus-4-20250514": {
    "description": "Claude Opus 4 is Anthropic's most powerful model for handling highly complex tasks. It excels in performance, intelligence, fluency, and comprehension."
  },
  "claude-sonnet-4-20250514": {
    "description": "Claude 4 Sonnet can generate near-instant responses or extended, step-by-step reasoning, allowing users to clearly observe these processes. API users can also have fine control over the time the model takes to think."
  },
  "codegeex-4": {
    "description": "CodeGeeX-4 is a powerful AI programming assistant that supports intelligent Q&A and code completion in various programming languages, enhancing development efficiency."
  },
  "codegeex4-all-9b": {
    "description": "CodeGeeX4-ALL-9B is a multilingual code generation model that supports comprehensive functions including code completion and generation, code interpretation, web search, function calls, and repository-level code Q&A, covering various scenarios in software development. It is a top-tier code generation model with fewer than 10B parameters."
  },
  "codegemma": {
    "description": "CodeGemma is a lightweight language model dedicated to various programming tasks, supporting rapid iteration and integration."
  },
  "codegemma:2b": {
    "description": "CodeGemma is a lightweight language model dedicated to various programming tasks, supporting rapid iteration and integration."
  },
  "codellama": {
    "description": "Code Llama is an LLM focused on code generation and discussion, combining extensive programming language support, suitable for developer environments."
  },
  "codellama/CodeLlama-34b-Instruct-hf": {
    "description": "Code Llama is an LLM focused on code generation and discussion, with extensive support for various programming languages, suitable for developer environments."
  },
  "codellama:13b": {
    "description": "Code Llama is an LLM focused on code generation and discussion, combining extensive programming language support, suitable for developer environments."
  },
  "codellama:34b": {
    "description": "Code Llama is an LLM focused on code generation and discussion, combining extensive programming language support, suitable for developer environments."
  },
  "codellama:70b": {
    "description": "Code Llama is an LLM focused on code generation and discussion, combining extensive programming language support, suitable for developer environments."
  },
  "codeqwen": {
    "description": "CodeQwen1.5 is a large language model trained on extensive code data, specifically designed to solve complex programming tasks."
  },
  "codestral": {
    "description": "Codestral is Mistral AI's first code model, providing excellent support for code generation tasks."
  },
  "codestral-latest": {
    "description": "Codestral is a cutting-edge generative model focused on code generation, optimized for intermediate filling and code completion tasks."
  },
  "codex-mini-latest": {
    "description": "codex-mini-latest is a fine-tuned version of o4-mini, specifically designed for Codex CLI. For direct API usage, we recommend starting with gpt-4.1."
  },
  "cognitivecomputations/dolphin-mixtral-8x22b": {
    "description": "Dolphin Mixtral 8x22B is a model designed for instruction following, dialogue, and programming."
  },
  "cohere-command-r": {
    "description": "Command R is a scalable generative model targeting RAG and Tool Use to enable production-scale AI for enterprises."
  },
  "cohere-command-r-plus": {
    "description": "Command R+ is a state-of-the-art RAG-optimized model designed to tackle enterprise-grade workloads."
  },
  "cohere/Cohere-command-r": {
    "description": "Command R is a scalable generative model designed for RAG and tool usage, enabling enterprises to achieve production-grade AI."
  },
  "cohere/Cohere-command-r-plus": {
    "description": "Command R+ is a state-of-the-art RAG-optimized model designed to handle enterprise-level workloads."
  },
  "command": {
    "description": "An instruction-following dialogue model that delivers high quality and reliability in language tasks, with a longer context length compared to our base generation models."
  },
  "command-a-03-2025": {
    "description": "Command A is our most powerful model to date, excelling in tool usage, agent tasks, retrieval-augmented generation (RAG), and multilingual applications. Command A features a context length of 256K and can run on just two GPUs, achieving a 150% increase in throughput compared to Command R+ 08-2024."
  },
  "command-light": {
    "description": "A smaller, faster version of Command that is nearly as powerful but operates at a higher speed."
  },
  "command-light-nightly": {
    "description": "To shorten the time interval between major version releases, we have launched nightly versions of the Command model. For the command-light series, this version is called command-light-nightly. Please note that command-light-nightly is the latest, most experimental, and (potentially) unstable version. Nightly versions are updated regularly without prior notice, so they are not recommended for production use."
  },
  "command-nightly": {
    "description": "To shorten the time interval between major version releases, we have launched nightly versions of the Command model. For the Command series, this version is called command-cightly. Please note that command-nightly is the latest, most experimental, and (potentially) unstable version. Nightly versions are updated regularly without prior notice, so they are not recommended for production use."
  },
  "command-r": {
    "description": "Command R is an LLM optimized for dialogue and long context tasks, particularly suitable for dynamic interactions and knowledge management."
  },
  "command-r-03-2024": {
    "description": "Command R is an instruction-following dialogue model that provides higher quality and reliability in language tasks, with a longer context length than previous models. It can be used for complex workflows such as code generation, retrieval-augmented generation (RAG), tool usage, and agent tasks."
  },
  "command-r-08-2024": {
    "description": "command-r-08-2024 is an updated version of the Command R model, released in August 2024."
  },
  "command-r-plus": {
    "description": "Command R+ is a high-performance large language model designed for real enterprise scenarios and complex applications."
  },
  "command-r-plus-04-2024": {
    "description": "Command R+ is an instruction-following dialogue model that delivers higher quality and reliability in language tasks, with a longer context length than previous models. It is best suited for complex RAG workflows and multi-step tool usage."
  },
  "command-r-plus-08-2024": {
    "description": "Command R+ is an instruction-following conversational model that delivers higher quality and reliability in language tasks, with a longer context length compared to previous models. It is best suited for complex RAG workflows and multi-step tool usage."
  },
  "command-r7b-12-2024": {
    "description": "command-r7b-12-2024 is a compact and efficient updated version, released in December 2024. It excels in tasks requiring complex reasoning and multi-step processing, such as RAG, tool usage, and agent tasks."
  },
  "compound-beta": {
    "description": "Compound-beta is a composite AI system supported by multiple publicly available models in GroqCloud, intelligently and selectively using tools to answer user queries."
  },
  "compound-beta-mini": {
    "description": "Compound-beta-mini is a composite AI system supported by publicly available models in GroqCloud, intelligently and selectively using tools to answer user queries."
  },
  "computer-use-preview": {
    "description": "The computer-use-preview model is a dedicated model designed for \"computer usage tools,\" trained to understand and execute computer-related tasks."
  },
  "dall-e-2": {
    "description": "The second generation DALL·E model, supporting more realistic and accurate image generation, with a resolution four times that of the first generation."
  },
  "dall-e-3": {
    "description": "The latest DALL·E model, released in November 2023. It supports more realistic and accurate image generation with enhanced detail representation."
  },
  "databricks/dbrx-instruct": {
    "description": "DBRX Instruct provides highly reliable instruction processing capabilities, supporting applications across multiple industries."
  },
  "deepseek-ai/DeepSeek-R1": {
    "description": "DeepSeek-R1 is a reinforcement learning (RL) driven inference model that addresses issues of repetitiveness and readability within the model. Prior to RL, DeepSeek-R1 introduced cold start data to further optimize inference performance. It performs comparably to OpenAI-o1 in mathematical, coding, and reasoning tasks, and enhances overall effectiveness through meticulously designed training methods."
  },
  "deepseek-ai/DeepSeek-R1-0528": {
    "description": "DeepSeek R1 significantly enhances its reasoning and inference depth by leveraging increased computational resources and introducing algorithmic optimizations during post-training. The model performs excellently across various benchmarks, including mathematics, programming, and general logic. Its overall performance now approaches leading models such as O3 and Gemini 2.5 Pro."
  },
  "deepseek-ai/DeepSeek-R1-0528-Qwen3-8B": {
    "description": "DeepSeek-R1-0528-Qwen3-8B is a model distilled from DeepSeek-R1-0528's chain of thought into Qwen3 8B Base. It achieves state-of-the-art (SOTA) performance among open-source models, surpassing Qwen3 8B by 10% in the AIME 2024 test and reaching the performance level of Qwen3-235B-thinking. The model excels in mathematics reasoning, programming, and general logic benchmarks. It shares the same architecture as Qwen3-8B but uses the tokenizer configuration from DeepSeek-R1-0528."
  },
  "deepseek-ai/DeepSeek-R1-Distill-Llama-70B": {
    "description": "The DeepSeek-R1 distillation model optimizes inference performance through reinforcement learning and cold-start data, refreshing the benchmark for open-source models across multiple tasks."
  },
  "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B": {
    "description": "The DeepSeek-R1 distillation model optimizes inference performance through reinforcement learning and cold-start data, refreshing the benchmark for open-source models across multiple tasks."
  },
  "deepseek-ai/DeepSeek-R1-Distill-Qwen-14B": {
    "description": "The DeepSeek-R1 distillation model optimizes inference performance through reinforcement learning and cold-start data, refreshing the benchmark for open-source models across multiple tasks."
  },
  "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B": {
    "description": "DeepSeek-R1-Distill-Qwen-32B is a model obtained through knowledge distillation based on Qwen2.5-32B. This model is fine-tuned using 800,000 selected samples generated by DeepSeek-R1, demonstrating exceptional performance in mathematics, programming, and reasoning across multiple domains. It has achieved excellent results in various benchmark tests, including a 94.3% accuracy rate on MATH-500, showcasing strong mathematical reasoning capabilities."
  },
  "deepseek-ai/DeepSeek-R1-Distill-Qwen-7B": {
    "description": "DeepSeek-R1-Distill-Qwen-7B is a model obtained through knowledge distillation based on Qwen2.5-Math-7B. This model is fine-tuned using 800,000 selected samples generated by DeepSeek-R1, demonstrating excellent reasoning capabilities. It has performed outstandingly in multiple benchmark tests, achieving a 92.8% accuracy rate on MATH-500, a 55.5% pass rate on AIME 2024, and a score of 1189 on CodeForces, showcasing strong mathematical and programming abilities as a 7B scale model."
  },
  "deepseek-ai/DeepSeek-V2.5": {
    "description": "DeepSeek V2.5 combines the excellent features of previous versions, enhancing general and coding capabilities."
  },
  "deepseek-ai/DeepSeek-V3": {
    "description": "DeepSeek-V3 is a mixture of experts (MoE) language model with 671 billion parameters, utilizing multi-head latent attention (MLA) and the DeepSeekMoE architecture, combined with a load balancing strategy that does not rely on auxiliary loss, optimizing inference and training efficiency. Pre-trained on 14.8 trillion high-quality tokens and fine-tuned with supervision and reinforcement learning, DeepSeek-V3 outperforms other open-source models and approaches leading closed-source models in performance."
  },
  "deepseek-ai/deepseek-llm-67b-chat": {
    "description": "DeepSeek 67B is an advanced model trained for highly complex conversations."
  },
  "deepseek-ai/deepseek-r1": {
    "description": "A state-of-the-art efficient LLM skilled in reasoning, mathematics, and programming."
  },
  "deepseek-ai/deepseek-vl2": {
    "description": "DeepSeek-VL2 is a mixture of experts (MoE) visual language model developed based on DeepSeekMoE-27B, employing a sparsely activated MoE architecture that achieves outstanding performance while activating only 4.5 billion parameters. This model excels in various tasks, including visual question answering, optical character recognition, document/table/chart understanding, and visual localization."
  },
  "deepseek-chat": {
    "description": "A new open-source model that integrates general and coding capabilities, retaining the general conversational abilities of the original Chat model and the powerful code handling capabilities of the Coder model, while better aligning with human preferences. Additionally, DeepSeek-V2.5 has achieved significant improvements in writing tasks, instruction following, and more."
  },
  "deepseek-coder-33B-instruct": {
    "description": "DeepSeek Coder 33B is a code language model trained on 20 trillion data points, of which 87% are code and 13% are in Chinese and English. The model introduces a 16K window size and fill-in-the-blank tasks, providing project-level code completion and snippet filling capabilities."
  },
  "deepseek-coder-v2": {
    "description": "DeepSeek Coder V2 is an open-source hybrid expert code model that performs excellently in coding tasks, comparable to GPT4-Turbo."
  },
  "deepseek-coder-v2:236b": {
    "description": "DeepSeek Coder V2 is an open-source hybrid expert code model that performs excellently in coding tasks, comparable to GPT4-Turbo."
  },
  "deepseek-r1": {
    "description": "DeepSeek-R1 is a reinforcement learning (RL) driven inference model that addresses issues of repetitiveness and readability within the model. Prior to RL, DeepSeek-R1 introduced cold start data to further optimize inference performance. It performs comparably to OpenAI-o1 in mathematical, coding, and reasoning tasks, and enhances overall effectiveness through meticulously designed training methods."
  },
  "deepseek-r1-0528": {
    "description": "The full-capacity 685B model released on May 28, 2025. DeepSeek-R1 extensively employs reinforcement learning during post-training, significantly enhancing reasoning capabilities with minimal labeled data. It demonstrates strong performance in mathematics, coding, and natural language reasoning tasks."
  },
  "deepseek-r1-70b-fast-online": {
    "description": "DeepSeek R1 70B fast version, supporting real-time online search, providing faster response times while maintaining model performance."
  },
  "deepseek-r1-70b-online": {
    "description": "DeepSeek R1 70B standard version, supporting real-time online search, suitable for dialogue and text processing tasks that require the latest information."
  },
  "deepseek-r1-distill-llama": {
    "description": "deepseek-r1-distill-llama is a model distilled from DeepSeek-R1 based on Llama."
  },
  "deepseek-r1-distill-llama-70b": {
    "description": "DeepSeek R1—the larger and smarter model in the DeepSeek suite—has been distilled into the Llama 70B architecture. Based on benchmark tests and human evaluations, this model is smarter than the original Llama 70B, especially excelling in tasks requiring mathematical and factual accuracy."
  },
  "deepseek-r1-distill-llama-8b": {
    "description": "The DeepSeek-R1-Distill series models are fine-tuned versions of samples generated by DeepSeek-R1, using knowledge distillation techniques on open-source models like Qwen and Llama."
  },
  "deepseek-r1-distill-qianfan-llama-70b": {
    "description": "First released on February 14, 2025, distilled by the Qianfan model development team using Llama3_70B as the base model (Built with Meta Llama), with Qianfan's corpus also added to the distilled data."
  },
  "deepseek-r1-distill-qianfan-llama-8b": {
    "description": "First released on February 14, 2025, distilled by the Qianfan model development team using Llama3_8B as the base model (Built with Meta Llama), with Qianfan's corpus also added to the distilled data."
  },
  "deepseek-r1-distill-qwen": {
    "description": "deepseek-r1-distill-qwen is a model distilled from DeepSeek-R1 based on Qwen."
  },
  "deepseek-r1-distill-qwen-1.5b": {
    "description": "The DeepSeek-R1-Distill series models are fine-tuned versions of samples generated by DeepSeek-R1, using knowledge distillation techniques on open-source models like Qwen and Llama."
  },
  "deepseek-r1-distill-qwen-14b": {
    "description": "The DeepSeek-R1-Distill series models are fine-tuned versions of samples generated by DeepSeek-R1, using knowledge distillation techniques on open-source models like Qwen and Llama."
  },
  "deepseek-r1-distill-qwen-32b": {
    "description": "The DeepSeek-R1-Distill series models are fine-tuned versions of samples generated by DeepSeek-R1, using knowledge distillation techniques on open-source models like Qwen and Llama."
  },
  "deepseek-r1-distill-qwen-7b": {
    "description": "The DeepSeek-R1-Distill series models are fine-tuned versions of samples generated by DeepSeek-R1, using knowledge distillation techniques on open-source models like Qwen and Llama."
  },
  "deepseek-r1-fast-online": {
    "description": "DeepSeek R1 full fast version, supporting real-time online search, combining the powerful capabilities of 671B parameters with faster response times."
  },
  "deepseek-r1-online": {
    "description": "DeepSeek R1 full version, with 671B parameters, supporting real-time online search, offering enhanced understanding and generation capabilities."
  },
  "deepseek-reasoner": {
    "description": "The reasoning model launched by DeepSeek. Before outputting the final answer, the model first provides a chain of thought to enhance the accuracy of the final response."
  },
  "deepseek-v2": {
    "description": "DeepSeek V2 is an efficient Mixture-of-Experts language model, suitable for cost-effective processing needs."
  },
  "deepseek-v2:236b": {
    "description": "DeepSeek V2 236B is the design code model of DeepSeek, providing powerful code generation capabilities."
  },
  "deepseek-v3": {
    "description": "DeepSeek-V3 is a MoE model developed by Hangzhou DeepSeek Artificial Intelligence Technology Research Co., Ltd., achieving outstanding results in multiple evaluations and ranking first among open-source models on mainstream leaderboards. Compared to the V2.5 model, V3 has achieved a threefold increase in generation speed, providing users with a faster and smoother experience."
  },
  "deepseek-v3-0324": {
    "description": "DeepSeek-V3-0324 is a 671B parameter MoE model, excelling in programming and technical capabilities, contextual understanding, and long text processing."
  },
  "deepseek/deepseek-chat-v3-0324": {
    "description": "DeepSeek V3 is a 685B parameter expert mixture model, the latest iteration in the DeepSeek team's flagship chat model series.\n\nIt inherits from the [DeepSeek V3](/deepseek/deepseek-chat-v3) model and performs excellently across various tasks."
  },
  "deepseek/deepseek-chat-v3-0324:free": {
    "description": "DeepSeek V3 is a 685B parameter expert mixture model, the latest iteration in the DeepSeek team's flagship chat model series.\n\nIt inherits from the [DeepSeek V3](/deepseek/deepseek-chat-v3) model and performs excellently across various tasks."
  },
  "deepseek/deepseek-r1": {
    "description": "DeepSeek-R1 significantly enhances model reasoning capabilities with minimal labeled data. Before outputting the final answer, the model first provides a chain of thought to improve the accuracy of the final response."
  },
  "deepseek/deepseek-r1-0528": {
    "description": "DeepSeek-R1 greatly improves model reasoning capabilities with minimal labeled data. Before outputting the final answer, the model first generates a chain of thought to enhance answer accuracy."
  },
  "deepseek/deepseek-r1-0528:free": {
    "description": "DeepSeek-R1 greatly improves model reasoning capabilities with minimal labeled data. Before outputting the final answer, the model first generates a chain of thought to enhance answer accuracy."
  },
  "deepseek/deepseek-r1-distill-llama-70b": {
    "description": "DeepSeek R1 Distill Llama 70B is a large language model based on Llama3.3 70B, which achieves competitive performance comparable to large cutting-edge models by utilizing fine-tuning from DeepSeek R1 outputs."
  },
  "deepseek/deepseek-r1-distill-llama-8b": {
    "description": "DeepSeek R1 Distill Llama 8B is a distilled large language model based on Llama-3.1-8B-Instruct, trained using outputs from DeepSeek R1."
  },
  "deepseek/deepseek-r1-distill-qwen-14b": {
    "description": "DeepSeek R1 Distill Qwen 14B is a distilled large language model based on Qwen 2.5 14B, trained using outputs from DeepSeek R1. This model has surpassed OpenAI's o1-mini in several benchmark tests, achieving state-of-the-art results for dense models. Here are some benchmark results:\nAIME 2024 pass@1: 69.7\nMATH-500 pass@1: 93.9\nCodeForces Rating: 1481\nThis model demonstrates competitive performance comparable to larger cutting-edge models through fine-tuning from DeepSeek R1 outputs."
  },
  "deepseek/deepseek-r1-distill-qwen-32b": {
    "description": "DeepSeek R1 Distill Qwen 32B is a distilled large language model based on Qwen 2.5 32B, trained using outputs from DeepSeek R1. This model has surpassed OpenAI's o1-mini in several benchmark tests, achieving state-of-the-art results for dense models. Here are some benchmark results:\nAIME 2024 pass@1: 72.6\nMATH-500 pass@1: 94.3\nCodeForces Rating: 1691\nThis model demonstrates competitive performance comparable to larger cutting-edge models through fine-tuning from DeepSeek R1 outputs."
  },
  "deepseek/deepseek-r1/community": {
    "description": "DeepSeek R1 is the latest open-source model released by the DeepSeek team, featuring impressive inference performance, particularly in mathematics, programming, and reasoning tasks, reaching levels comparable to OpenAI's o1 model."
  },
  "deepseek/deepseek-r1:free": {
    "description": "DeepSeek-R1 significantly enhances model reasoning capabilities with minimal labeled data. Before outputting the final answer, the model first provides a chain of thought to improve the accuracy of the final response."
  },
  "deepseek/deepseek-v3": {
    "description": "DeepSeek-V3 has achieved a significant breakthrough in inference speed compared to previous models. It ranks first among open-source models and can compete with the world's most advanced closed-source models. DeepSeek-V3 employs Multi-Head Latent Attention (MLA) and DeepSeekMoE architectures, which have been thoroughly validated in DeepSeek-V2. Additionally, DeepSeek-V3 introduces an auxiliary lossless strategy for load balancing and sets multi-label prediction training objectives for enhanced performance."
  },
  "deepseek/deepseek-v3/community": {
    "description": "DeepSeek-V3 has achieved a significant breakthrough in inference speed compared to previous models. It ranks first among open-source models and can compete with the world's most advanced closed-source models. DeepSeek-V3 employs Multi-Head Latent Attention (MLA) and DeepSeekMoE architectures, which have been thoroughly validated in DeepSeek-V2. Additionally, DeepSeek-V3 introduces an auxiliary lossless strategy for load balancing and sets multi-label prediction training objectives for enhanced performance."
  },
  "deepseek_r1": {
    "description": "DeepSeek-R1 is a reinforcement learning (RL) driven reasoning model that addresses issues of repetition and readability within the model. Prior to RL, DeepSeek-R1 introduced cold start data to further optimize inference performance. It performs comparably to OpenAI-o1 in mathematics, coding, and reasoning tasks, and enhances overall effectiveness through carefully designed training methods."
  },
  "deepseek_r1_distill_llama_70b": {
    "description": "DeepSeek-R1-Distill-Llama-70B is a model obtained through distillation training based on Llama-3.3-70B-Instruct. This model is part of the DeepSeek-R1 series and showcases excellent performance in mathematics, programming, and reasoning through fine-tuning with samples generated by DeepSeek-R1."
  },
  "deepseek_r1_distill_qwen_14b": {
    "description": "DeepSeek-R1-Distill-Qwen-14B is a model derived from Qwen2.5-14B through knowledge distillation. This model is fine-tuned using 800,000 curated samples generated by DeepSeek-R1, showcasing excellent reasoning capabilities."
  },
  "deepseek_r1_distill_qwen_32b": {
    "description": "DeepSeek-R1-Distill-Qwen-32B is a model derived from Qwen2.5-32B through knowledge distillation. This model is fine-tuned using 800,000 curated samples generated by DeepSeek-R1, demonstrating outstanding performance across multiple domains such as mathematics, programming, and reasoning."
  },
  "doubao-1.5-lite-32k": {
    "description": "Doubao-1.5-lite is a new generation lightweight model, offering extreme response speed with performance and latency at a world-class level."
  },
  "doubao-1.5-pro-256k": {
    "description": "Doubao-1.5-pro-256k is an upgraded version of Doubao-1.5-Pro, significantly enhancing overall performance by 10%. It supports reasoning with a 256k context window and an output length of up to 12k tokens. With higher performance, a larger window, and exceptional cost-effectiveness, it is suitable for a wider range of applications."
  },
  "doubao-1.5-pro-32k": {
    "description": "Doubao-1.5-pro is a new generation flagship model with comprehensive performance upgrades, excelling in knowledge, coding, reasoning, and more."
  },
  "doubao-1.5-thinking-pro": {
    "description": "Doubao-1.5 is a brand new deep thinking model that excels in specialized fields such as mathematics, programming, and scientific reasoning, as well as in general tasks like creative writing. It has achieved or is close to the top tier of industry standards in several authoritative benchmarks, including AIME 2024, Codeforces, and GPQA. It supports a 128k context window and 16k output."
  },
  "doubao-1.5-vision-lite": {
    "description": "Doubao-1.5-vision-lite is a newly upgraded multimodal large model that supports image recognition at any resolution and extreme aspect ratios, enhancing visual reasoning, document recognition, detail comprehension, and instruction following capabilities. It supports a context window of 128k and an output length of up to 16k tokens."
  },
  "doubao-seed-1.6": {
    "description": "Doubao-Seed-1.6 is a brand-new multimodal deep thinking model supporting auto, thinking, and non-thinking modes. In non-thinking mode, its performance significantly surpasses Doubao-1.5-pro/250115. It supports a 256k context window and output lengths up to 16k tokens."
  },
  "doubao-seed-1.6-flash": {
    "description": "Doubao-Seed-1.6-flash is an ultra-fast multimodal deep thinking model with inference speed as low as 10ms on TPOT; it supports both text and visual understanding. Its text comprehension exceeds the previous lite generation, and its visual understanding rivals competitor pro series models. It supports a 256k context window and output lengths up to 16k tokens."
  },
  "doubao-seed-1.6-thinking": {
    "description": "Doubao-Seed-1.6-thinking significantly enhances thinking capabilities compared to Doubao-1.5-thinking-pro, with further improvements in coding, math, and logical reasoning skills. It supports visual understanding and a 256k context window, with output lengths up to 16k tokens."
  },
  "emohaa": {
    "description": "Emohaa is a psychological model with professional counseling capabilities, helping users understand emotional issues."
  },
  "ernie-3.5-128k": {
    "description": "Baidu's flagship large-scale language model, covering a vast amount of Chinese and English corpus, possesses strong general capabilities to meet the requirements of most dialogue Q&A, creative generation, and plugin application scenarios; it supports automatic integration with Baidu search plugins to ensure the timeliness of Q&A information."
  },
  "ernie-3.5-8k": {
    "description": "Baidu's flagship large-scale language model, covering a vast amount of Chinese and English corpus, possesses strong general capabilities to meet the requirements of most dialogue Q&A, creative generation, and plugin application scenarios; it supports automatic integration with Baidu search plugins to ensure the timeliness of Q&A information."
  },
  "ernie-3.5-8k-preview": {
    "description": "Baidu's flagship large-scale language model, covering a vast amount of Chinese and English corpus, possesses strong general capabilities to meet the requirements of most dialogue Q&A, creative generation, and plugin application scenarios; it supports automatic integration with Baidu search plugins to ensure the timeliness of Q&A information."
  },
  "ernie-4.0-8k-latest": {
    "description": "Baidu's flagship ultra-large-scale language model, which has achieved a comprehensive upgrade in model capabilities compared to ERNIE 3.5, widely applicable to complex task scenarios across various fields; it supports automatic integration with Baidu search plugins to ensure the timeliness of Q&A information."
  },
  "ernie-4.0-8k-preview": {
    "description": "Baidu's flagship ultra-large-scale language model, which has achieved a comprehensive upgrade in model capabilities compared to ERNIE 3.5, widely applicable to complex task scenarios across various fields; it supports automatic integration with Baidu search plugins to ensure the timeliness of Q&A information."
  },
  "ernie-4.0-turbo-128k": {
    "description": "Baidu's flagship ultra-large-scale language model, demonstrating outstanding overall performance, widely applicable to complex task scenarios across various fields; it supports automatic integration with Baidu search plugins to ensure the timeliness of Q&A information. It performs better than ERNIE 4.0 in terms of performance."
  },
  "ernie-4.0-turbo-8k-latest": {
    "description": "Baidu's flagship ultra-large-scale language model, demonstrating outstanding overall performance, widely applicable to complex task scenarios across various fields; it supports automatic integration with Baidu search plugins to ensure the timeliness of Q&A information. It performs better than ERNIE 4.0 in terms of performance."
  },
  "ernie-4.0-turbo-8k-preview": {
    "description": "Baidu's flagship ultra-large-scale language model, demonstrating outstanding overall performance, widely applicable to complex task scenarios across various fields; it supports automatic integration with Baidu search plugins to ensure the timeliness of Q&A information. It performs better than ERNIE 4.0 in terms of performance."
  },
  "ernie-4.5-8k-preview": {
    "description": "ERNIE 4.5 is Baidu's self-developed next-generation native multimodal foundational model, achieving collaborative optimization through joint modeling of multiple modalities, with excellent multimodal understanding capabilities; it features enhanced language abilities, with significant improvements in understanding, generation, logic, and memory, as well as reduced hallucinations and improved logical reasoning and coding capabilities."
  },
  "ernie-4.5-turbo-128k": {
    "description": "Wenxin 4.5 Turbo shows significant enhancements in reducing hallucinations, logical reasoning, and coding capabilities. Compared to Wenxin 4.5, it is faster and more cost-effective. The model's capabilities have been comprehensively improved to better meet the needs of multi-turn long history dialogue processing and long document understanding Q&A tasks."
  },
  "ernie-4.5-turbo-32k": {
    "description": "Wenxin 4.5 Turbo has also shown significant enhancements in reducing hallucinations, logical reasoning, and coding capabilities. Compared to Wenxin 4.5, it is faster and more cost-effective. There are notable improvements in text creation and knowledge Q&A capabilities. The output length and sentence delay have increased compared to ERNIE 4.5."
  },
  "ernie-4.5-turbo-vl-32k": {
    "description": "A brand new version of the Wenxin large model, with significant improvements in image understanding, creation, translation, and coding capabilities, now supports a context length of 32K for the first time, with a significant reduction in first token delay."
  },
  "ernie-char-8k": {
    "description": "Baidu's vertical scene large language model, suitable for applications such as game NPCs, customer service dialogues, and role-playing conversations, with a more distinct and consistent character style, stronger instruction-following capabilities, and superior inference performance."
  },
  "ernie-char-fiction-8k": {
    "description": "Baidu's vertical scene large language model, suitable for applications such as game NPCs, customer service dialogues, and role-playing conversations, with a more distinct and consistent character style, stronger instruction-following capabilities, and superior inference performance."
  },
  "ernie-lite-8k": {
    "description": "ERNIE Lite is Baidu's lightweight large language model, balancing excellent model performance with inference efficiency, suitable for low-power AI acceleration card inference."
  },
  "ernie-lite-pro-128k": {
    "description": "Baidu's lightweight large language model, balancing excellent model performance with inference efficiency, offering better performance than ERNIE Lite, suitable for low-power AI acceleration card inference."
  },
  "ernie-novel-8k": {
    "description": "Baidu's general-purpose large language model, which has a significant advantage in novel continuation capabilities and can also be used in short plays, movies, and other scenarios."
  },
  "ernie-speed-128k": {
    "description": "Baidu's latest self-developed high-performance large language model released in 2024, with excellent general capabilities, suitable as a base model for fine-tuning to better address specific scenario issues while also demonstrating excellent inference performance."
  },
  "ernie-speed-pro-128k": {
    "description": "Baidu's latest self-developed high-performance large language model released in 2024, with excellent general capabilities, offering better performance than ERNIE Speed, suitable as a base model for fine-tuning to better address specific scenario issues while also demonstrating excellent inference performance."
  },
  "ernie-tiny-8k": {
    "description": "ERNIE Tiny is Baidu's ultra-high-performance large language model, with the lowest deployment and fine-tuning costs among the Wenxin series models."
  },
  "ernie-x1-32k": {
    "description": "Possesses stronger abilities in understanding, planning, reflection, and evolution. As a more comprehensive deep thinking model, Wenxin X1 combines accuracy, creativity, and eloquence, excelling in areas such as Chinese knowledge Q&A, literary creation, document writing, daily conversation, logical reasoning, complex calculations, and tool invocation."
  },
  "ernie-x1-32k-preview": {
    "description": "The ERNIE X1 model possesses stronger understanding, planning, reflection, and evolution capabilities. As a more comprehensive deep thinking model, ERNIE X1 excels in accuracy, creativity, and eloquence, particularly in Chinese knowledge Q&A, literary creation, document writing, daily conversation, logical reasoning, complex calculations, and tool invocation."
  },
  "ernie-x1-turbo-32k": {
    "description": "The model performs better in terms of effectiveness and performance compared to ERNIE-X1-32K."
  },
  "gemini-1.0-pro-001": {
    "description": "Gemini 1.0 Pro 001 (Tuning) offers stable and tunable performance, making it an ideal choice for complex task solutions."
  },
  "gemini-1.0-pro-002": {
    "description": "Gemini 1.0 Pro 002 (Tuning) provides excellent multimodal support, focusing on effective solutions for complex tasks."
  },
  "gemini-1.0-pro-latest": {
    "description": "Gemini 1.0 Pro is Google's high-performance AI model, designed for extensive task scaling."
  },
  "gemini-1.5-flash-001": {
    "description": "Gemini 1.5 Flash 001 is an efficient multimodal model that supports extensive application scaling."
  },
  "gemini-1.5-flash-002": {
    "description": "Gemini 1.5 Flash 002 is an efficient multimodal model that supports a wide range of applications."
  },
  "gemini-1.5-flash-8b": {
    "description": "Gemini 1.5 Flash 8B is an efficient multimodal model that supports a wide range of applications."
  },
  "gemini-1.5-flash-8b-exp-0924": {
    "description": "Gemini 1.5 Flash 8B 0924 is the latest experimental model, showcasing significant performance improvements in both text and multimodal use cases."
  },
  "gemini-1.5-flash-8b-latest": {
    "description": "Gemini 1.5 Flash 8B is a highly efficient multimodal model designed for scalable applications."
  },
  "gemini-1.5-flash-exp-0827": {
    "description": "Gemini 1.5 Flash 0827 provides optimized multimodal processing capabilities, suitable for various complex task scenarios."
  },
  "gemini-1.5-flash-latest": {
    "description": "Gemini 1.5 Flash is Google's latest multimodal AI model, featuring fast processing capabilities and supporting text, image, and video inputs, making it suitable for efficient scaling across various tasks."
  },
  "gemini-1.5-pro-001": {
    "description": "Gemini 1.5 Pro 001 is a scalable multimodal AI solution that supports a wide range of complex tasks."
  },
  "gemini-1.5-pro-002": {
    "description": "Gemini 1.5 Pro 002 is the latest production-ready model, delivering higher quality outputs, with notable enhancements in mathematics, long-context, and visual tasks."
  },
  "gemini-1.5-pro-exp-0801": {
    "description": "Gemini 1.5 Pro 0801 offers excellent multimodal processing capabilities, providing greater flexibility for application development."
  },
  "gemini-1.5-pro-exp-0827": {
    "description": "Gemini 1.5 Pro 0827 combines the latest optimization technologies for more efficient multimodal data processing."
  },
  "gemini-1.5-pro-latest": {
    "description": "Gemini 1.5 Pro supports up to 2 million tokens, making it an ideal choice for medium-sized multimodal models, providing multifaceted support for complex tasks."
  },
  "gemini-2.0-flash": {
    "description": "Gemini 2.0 Flash offers next-generation features and improvements, including exceptional speed, native tool usage, multimodal generation, and a 1M token context window."
  },
  "gemini-2.0-flash-001": {
    "description": "Gemini 2.0 Flash offers next-generation features and improvements, including exceptional speed, native tool usage, multimodal generation, and a 1M token context window."
  },
  "gemini-2.0-flash-exp": {
    "description": "Gemini 2.0 Flash model variant optimized for cost-effectiveness and low latency."
  },
  "gemini-2.0-flash-exp-image-generation": {
    "description": "Gemini 2.0 Flash experimental model, supports image generation"
  },
  "gemini-2.0-flash-lite": {
    "description": "Gemini 2.0 Flash is a variant of the model optimized for cost-effectiveness and low latency."
  },
  "gemini-2.0-flash-lite-001": {
    "description": "Gemini 2.0 Flash is a variant of the model optimized for cost-effectiveness and low latency."
  },
  "gemini-2.0-flash-preview-image-generation": {
    "description": "Gemini 2.0 Flash preview model, supports image generation"
  },
  "gemini-2.5-flash-preview-04-17": {
    "description": "Gemini 2.5 Flash Preview is Google's most cost-effective model, offering a comprehensive set of features."
  },
  "gemini-2.5-flash-preview-04-17-thinking": {
    "description": "Gemini 2.5 Flash Preview is Google's most cost-effective model, offering comprehensive capabilities."
  },
  "gemini-2.5-flash-preview-05-20": {
    "description": "Gemini 2.5 Flash Preview is Google's most cost-effective model, offering comprehensive capabilities."
  },
  "gemini-2.5-pro-exp-03-25": {
    "description": "Gemini 2.5 Pro Experimental is Google's most advanced thinking model, capable of reasoning about complex problems in code, mathematics, and STEM fields, as well as analyzing large datasets, codebases, and documents using long context."
  },
  "gemini-2.5-pro-preview-03-25": {
    "description": "Gemini 2.5 Pro Preview is Google's most advanced thinking model, capable of reasoning about complex problems in code, mathematics, and STEM fields, as well as analyzing large datasets, codebases, and documents using long-context analysis."
  },
  "gemini-2.5-pro-preview-05-06": {
    "description": "Gemini 2.5 Pro Preview is Google's most advanced reasoning model, capable of reasoning about complex problems in code, mathematics, and STEM fields, as well as analyzing large datasets, codebases, and documents using long context."
  },
  "gemini-2.5-pro-preview-06-05": {
    "description": "Gemini 2.5 Pro Preview is Google's most advanced cognitive model, capable of reasoning through complex problems in code, mathematics, and STEM fields, as well as analyzing large datasets, codebases, and documents using long-context understanding."
  },
  "gemma-7b-it": {
    "description": "Gemma 7B is suitable for medium to small-scale task processing, offering cost-effectiveness."
  },
  "gemma2": {
    "description": "Gemma 2 is an efficient model launched by Google, covering a variety of application scenarios from small applications to complex data processing."
  },
  "gemma2-9b-it": {
    "description": "Gemma 2 9B is a model optimized for specific tasks and tool integration."
  },
  "gemma2:27b": {
    "description": "Gemma 2 is an efficient model launched by Google, covering a variety of application scenarios from small applications to complex data processing."
  },
  "gemma2:2b": {
    "description": "Gemma 2 is an efficient model launched by Google, covering a variety of application scenarios from small applications to complex data processing."
  },
  "generalv3": {
    "description": "Spark Pro is a high-performance large language model optimized for professional fields, focusing on mathematics, programming, healthcare, education, and more, supporting online search and built-in plugins for weather, dates, etc. Its optimized model demonstrates excellent performance and efficiency in complex knowledge Q&A, language understanding, and high-level text creation, making it an ideal choice for professional application scenarios."
  },
  "generalv3.5": {
    "description": "Spark3.5 Max is the most comprehensive version, supporting online search and numerous built-in plugins. Its fully optimized core capabilities, along with system role settings and function calling features, enable it to perform exceptionally well in various complex application scenarios."
  },
  "glm-4": {
    "description": "GLM-4 is the old flagship version released in January 2024, currently replaced by the more powerful GLM-4-0520."
  },
  "glm-4-0520": {
    "description": "GLM-4-0520 is the latest model version designed for highly complex and diverse tasks, demonstrating outstanding performance."
  },
  "glm-4-9b-chat": {
    "description": "GLM-4-9B-Chat demonstrates high performance across various aspects, including semantics, mathematics, reasoning, coding, and knowledge. It also features web browsing, code execution, custom tool invocation, and long text reasoning, supporting 26 languages including Japanese, Korean, and German."
  },
  "glm-4-air": {
    "description": "GLM-4-Air is a cost-effective version with performance close to GLM-4, offering fast speed at an affordable price."
  },
  "glm-4-air-250414": {
    "description": "GLM-4-Air is a cost-effective version, with performance close to GLM-4, offering fast speed at an affordable price."
  },
  "glm-4-airx": {
    "description": "GLM-4-AirX provides an efficient version of GLM-4-Air, with inference speeds up to 2.6 times faster."
  },
  "glm-4-alltools": {
    "description": "GLM-4-AllTools is a multifunctional intelligent agent model optimized to support complex instruction planning and tool invocation, such as web browsing, code interpretation, and text generation, suitable for multitasking."
  },
  "glm-4-flash": {
    "description": "GLM-4-Flash is the ideal choice for handling simple tasks, being the fastest and most cost-effective."
  },
  "glm-4-flash-250414": {
    "description": "GLM-4-Flash is the ideal choice for handling simple tasks, being the fastest and free."
  },
  "glm-4-flashx": {
    "description": "GLM-4-FlashX is an enhanced version of Flash, featuring ultra-fast inference speed."
  },
  "glm-4-long": {
    "description": "GLM-4-Long supports ultra-long text inputs, suitable for memory-based tasks and large-scale document processing."
  },
  "glm-4-plus": {
    "description": "GLM-4-Plus, as a high-intelligence flagship, possesses strong capabilities for processing long texts and complex tasks, with overall performance improvements."
  },
  "glm-4v": {
    "description": "GLM-4V provides strong image understanding and reasoning capabilities, supporting various visual tasks."
  },
  "glm-4v-flash": {
    "description": "GLM-4V-Flash focuses on efficient single image understanding, suitable for scenarios that require rapid image parsing, such as real-time image analysis or batch image processing."
  },
  "glm-4v-plus": {
    "description": "GLM-4V-Plus has the ability to understand video content and multiple images, suitable for multimodal tasks."
  },
  "glm-4v-plus-0111": {
    "description": "GLM-4V-Plus has the capability to understand video content and multiple images, making it suitable for multimodal tasks."
  },
  "glm-z1-air": {
    "description": "Reasoning model: possesses strong reasoning capabilities, suitable for tasks requiring deep reasoning."
  },
  "glm-z1-airx": {
    "description": "Ultra-fast reasoning: features extremely fast reasoning speed and powerful reasoning effects."
  },
  "glm-z1-flash": {
    "description": "The GLM-Z1 series possesses strong complex reasoning capabilities, excelling in logical reasoning, mathematics, programming, and more. The maximum context length is 32K."
  },
  "glm-zero-preview": {
    "description": "GLM-Zero-Preview possesses strong complex reasoning abilities, excelling in logical reasoning, mathematics, programming, and other fields."
  },
  "google/gemini-2.0-flash-001": {
    "description": "Gemini 2.0 Flash offers next-generation features and improvements, including exceptional speed, native tool usage, multimodal generation, and a 1M token context window."
  },
  "google/gemini-2.0-flash-exp:free": {
    "description": "Gemini 2.0 Flash Experimental is Google's latest experimental multimodal AI model, showing a quality improvement compared to historical versions, especially in world knowledge, code, and long context."
  },
  "google/gemini-2.5-flash-preview": {
    "description": "Gemini 2.5 Flash is Google's most advanced flagship model, designed for advanced reasoning, coding, mathematics, and scientific tasks. It includes built-in 'thinking' capabilities that allow it to provide responses with higher accuracy and detailed context handling.\n\nNote: This model has two variants: thinking and non-thinking. Output pricing varies significantly based on whether the thinking capability is activated. If you choose the standard variant (without the ':thinking' suffix), the model will explicitly avoid generating thinking tokens.\n\nTo leverage the thinking capability and receive thinking tokens, you must select the ':thinking' variant, which will incur higher thinking output pricing.\n\nAdditionally, Gemini 2.5 Flash can be configured via the 'maximum tokens for reasoning' parameter, as described in the documentation (https://openrouter.ai/docs/use-cases/reasoning-tokens#max-tokens-for-reasoning)."
  },
  "google/gemini-2.5-flash-preview:thinking": {
    "description": "Gemini 2.5 Flash is Google's most advanced flagship model, designed for advanced reasoning, coding, mathematics, and scientific tasks. It includes built-in 'thinking' capabilities that allow it to provide responses with higher accuracy and detailed context handling.\n\nNote: This model has two variants: thinking and non-thinking. Output pricing varies significantly based on whether the thinking capability is activated. If you choose the standard variant (without the ':thinking' suffix), the model will explicitly avoid generating thinking tokens.\n\nTo leverage the thinking capability and receive thinking tokens, you must select the ':thinking' variant, which will incur higher thinking output pricing.\n\nAdditionally, Gemini 2.5 Flash can be configured via the 'maximum tokens for reasoning' parameter, as described in the documentation (https://openrouter.ai/docs/use-cases/reasoning-tokens#max-tokens-for-reasoning)."
  },
  "google/gemini-2.5-pro-preview-03-25": {
    "description": "Gemini 2.5 Pro is Google's cutting-edge AI model designed for advanced reasoning, coding, mathematics, and scientific tasks. It features 'thinking' capabilities that enable it to reason responses with higher accuracy and detailed context handling. Gemini 2.5 Pro has achieved top performance in multiple benchmark tests, including ranking first on the LMArena leaderboard, reflecting exceptional human preference alignment and complex problem-solving abilities."
  },
  "google/gemini-flash-1.5": {
    "description": "Gemini 1.5 Flash offers optimized multimodal processing capabilities, suitable for various complex task scenarios."
  },
  "google/gemini-pro-1.5": {
    "description": "Gemini 1.5 Pro combines the latest optimization technologies to deliver more efficient multimodal data processing capabilities."
  },
  "google/gemma-2-27b": {
    "description": "Gemma 2 is an efficient model launched by Google, covering a variety of application scenarios from small applications to complex data processing."
  },
  "google/gemma-2-27b-it": {
    "description": "Gemma 2 continues the design philosophy of being lightweight and efficient."
  },
  "google/gemma-2-2b-it": {
    "description": "Google's lightweight instruction-tuning model."
  },
  "google/gemma-2-9b": {
    "description": "Gemma 2 is an efficient model launched by Google, covering a variety of application scenarios from small applications to complex data processing."
  },
  "google/gemma-2-9b-it": {
    "description": "Gemma 2 is Google's lightweight open-source text model series."
  },
  "google/gemma-2-9b-it:free": {
    "description": "Gemma 2 is Google's lightweight open-source text model series."
  },
  "google/gemma-2b-it": {
    "description": "Gemma Instruct (2B) provides basic instruction processing capabilities, suitable for lightweight applications."
  },
  "google/gemma-3-27b-it": {
    "description": "Gemma 3 27B is an open-source language model from Google that sets new standards in efficiency and performance."
  },
  "gpt-3.5-turbo": {
    "description": "GPT 3.5 Turbo is suitable for various text generation and understanding tasks. Currently points to gpt-3.5-turbo-0125."
  },
  "gpt-3.5-turbo-0125": {
    "description": "GPT 3.5 Turbo is suitable for various text generation and understanding tasks. Currently points to gpt-3.5-turbo-0125."
  },
  "gpt-3.5-turbo-1106": {
    "description": "GPT 3.5 Turbo is suitable for various text generation and understanding tasks. Currently points to gpt-3.5-turbo-0125."
  },
  "gpt-3.5-turbo-instruct": {
    "description": "GPT 3.5 Turbo is suitable for various text generation and understanding tasks. Currently points to gpt-3.5-turbo-0125."
  },
  "gpt-35-turbo": {
    "description": "GPT 3.5 Turbo is an efficient model provided by OpenAI, suitable for chat and text generation tasks, supporting parallel function calls."
  },
  "gpt-35-turbo-16k": {
    "description": "GPT 3.5 Turbo 16k is a high-capacity text generation model suitable for complex tasks."
  },
  "gpt-4": {
    "description": "GPT-4 offers a larger context window, capable of handling longer text inputs, making it suitable for scenarios that require extensive information integration and data analysis."
  },
  "gpt-4-0125-preview": {
    "description": "The latest GPT-4 Turbo model features visual capabilities. Now, visual requests can be made using JSON format and function calls. GPT-4 Turbo is an enhanced version that provides cost-effective support for multimodal tasks. It strikes a balance between accuracy and efficiency, making it suitable for applications requiring real-time interaction."
  },
  "gpt-4-0613": {
    "description": "GPT-4 offers a larger context window, capable of handling longer text inputs, making it suitable for scenarios that require extensive information integration and data analysis."
  },
  "gpt-4-1106-preview": {
    "description": "The latest GPT-4 Turbo model features visual capabilities. Now, visual requests can be made using JSON format and function calls. GPT-4 Turbo is an enhanced version that provides cost-effective support for multimodal tasks. It strikes a balance between accuracy and efficiency, making it suitable for applications requiring real-time interaction."
  },
  "gpt-4-32k": {
    "description": "GPT-4 offers a larger context window, capable of handling longer text inputs, making it suitable for scenarios that require extensive information integration and data analysis."
  },
  "gpt-4-32k-0613": {
    "description": "GPT-4 offers a larger context window, capable of handling longer text inputs, making it suitable for scenarios that require extensive information integration and data analysis."
  },
  "gpt-4-turbo": {
    "description": "The latest GPT-4 Turbo model features visual capabilities. Now, visual requests can be made using JSON format and function calls. GPT-4 Turbo is an enhanced version that provides cost-effective support for multimodal tasks. It strikes a balance between accuracy and efficiency, making it suitable for applications requiring real-time interaction."
  },
  "gpt-4-turbo-2024-04-09": {
    "description": "The latest GPT-4 Turbo model features visual capabilities. Now, visual requests can be made using JSON format and function calls. GPT-4 Turbo is an enhanced version that provides cost-effective support for multimodal tasks. It strikes a balance between accuracy and efficiency, making it suitable for applications requiring real-time interaction."
  },
  "gpt-4-turbo-preview": {
    "description": "The latest GPT-4 Turbo model features visual capabilities. Now, visual requests can be made using JSON format and function calls. GPT-4 Turbo is an enhanced version that provides cost-effective support for multimodal tasks. It strikes a balance between accuracy and efficiency, making it suitable for applications requiring real-time interaction."
  },
  "gpt-4-vision-preview": {
    "description": "The latest GPT-4 Turbo model features visual capabilities. Now, visual requests can be made using JSON format and function calls. GPT-4 Turbo is an enhanced version that provides cost-effective support for multimodal tasks. It strikes a balance between accuracy and efficiency, making it suitable for applications requiring real-time interaction."
  },
  "gpt-4.1": {
    "description": "GPT-4.1 is our flagship model for complex tasks. It excels at solving problems across various domains."
  },
  "gpt-4.1-mini": {
    "description": "GPT-4.1 mini offers a balance of intelligence, speed, and cost, making it an attractive model for many use cases."
  },
  "gpt-4.1-nano": {
    "description": "GPT-4.1 nano provides a balance of intelligence, speed, and cost, making it an appealing model for numerous applications."
  },
  "gpt-4.5-preview": {
    "description": "The research preview of GPT-4.5, our largest and most powerful GPT model to date. It possesses extensive world knowledge and better understands user intent, excelling in creative tasks and autonomous planning. GPT-4.5 accepts both text and image inputs and generates text outputs (including structured outputs). It supports key developer features such as function calling, batch API, and streaming output. GPT-4.5 particularly shines in tasks that require creativity, open-ended thinking, and dialogue, such as writing, learning, or exploring new ideas. Knowledge cutoff date is October 2023."
  },
  "gpt-4o": {
    "description": "ChatGPT-4o is a dynamic model that updates in real-time to stay current with the latest version. It combines powerful language understanding and generation capabilities, making it suitable for large-scale applications, including customer service, education, and technical support."
  },
  "gpt-4o-2024-05-13": {
    "description": "ChatGPT-4o is a dynamic model that updates in real-time to stay current with the latest version. It combines powerful language understanding and generation capabilities, making it suitable for large-scale applications, including customer service, education, and technical support."
  },
  "gpt-4o-2024-08-06": {
    "description": "ChatGPT-4o is a dynamic model that updates in real-time to stay current with the latest version. It combines powerful language understanding and generation capabilities, making it suitable for large-scale applications, including customer service, education, and technical support."
  },
  "gpt-4o-2024-11-20": {
    "description": "ChatGPT-4o is a dynamic model that updates in real-time to maintain the latest version. It combines powerful language understanding and generation capabilities, making it suitable for large-scale applications including customer service, education, and technical support."
  },
  "gpt-4o-audio-preview": {
    "description": "GPT-4o Audio model, supporting audio input and output."
  },
  "gpt-4o-mini": {
    "description": "GPT-4o mini is the latest model released by OpenAI after GPT-4 Omni, supporting both image and text input while outputting text. As their most advanced small model, it is significantly cheaper than other recent cutting-edge models, costing over 60% less than GPT-3.5 Turbo. It maintains state-of-the-art intelligence while offering remarkable cost-effectiveness. GPT-4o mini scored 82% on the MMLU test and currently ranks higher than GPT-4 in chat preferences."
  },
  "gpt-4o-mini-audio-preview": {
    "description": "GPT-4o mini Audio model supports audio input and output."
  },
  "gpt-4o-mini-realtime-preview": {
    "description": "GPT-4o-mini real-time version, supporting real-time audio and text input and output."
  },
  "gpt-4o-mini-search-preview": {
    "description": "GPT-4o mini Search Preview is a model specifically trained to understand and execute web search queries, using the Chat Completions API. In addition to token fees, web search queries incur charges per tool invocation."
  },
  "gpt-4o-mini-tts": {
    "description": "GPT-4o mini TTS is a text-to-speech model based on GPT-4o mini, providing high-quality speech generation at a lower cost."
  },
  "gpt-4o-realtime-preview": {
    "description": "GPT-4o real-time version, supporting real-time audio and text input and output."
  },
  "gpt-4o-realtime-preview-2024-10-01": {
    "description": "GPT-4o real-time version, supporting real-time audio and text input and output."
  },
  "gpt-4o-realtime-preview-2024-12-17": {
    "description": "GPT-4o real-time version, supporting real-time audio and text input and output."
  },
  "gpt-4o-search-preview": {
    "description": "GPT-4o Search Preview is a model specifically trained to understand and execute web search queries, using the Chat Completions API. In addition to token fees, web search queries incur charges per tool invocation."
  },
  "grok-2-1212": {
    "description": "This model has improved in accuracy, instruction adherence, and multilingual capabilities."
  },
  "grok-2-vision-1212": {
    "description": "This model has improved in accuracy, instruction adherence, and multilingual capabilities."
  },
  "grok-3": {
    "description": "A flagship model skilled in data extraction, programming, and text summarization for enterprise applications, with deep knowledge in finance, healthcare, law, and science."
  },
  "grok-3-fast": {
    "description": "A flagship model skilled in data extraction, programming, and text summarization for enterprise applications, with deep knowledge in finance, healthcare, law, and science."
  },
  "grok-3-mini": {
    "description": "A lightweight model that thinks before responding. It runs fast and intelligently, suitable for logical tasks that do not require deep domain knowledge, and can provide raw thought trajectories."
  },
  "grok-3-mini-fast": {
    "description": "A lightweight model that thinks before responding. It runs fast and intelligently, suitable for logical tasks that do not require deep domain knowledge, and can provide raw thought trajectories."
  },
  "gryphe/mythomax-l2-13b": {
    "description": "MythoMax l2 13B is a language model that combines creativity and intelligence by merging multiple top models."
  },
  "hunyuan-code": {
    "description": "The latest code generation model from Hunyuan, trained on a base model with 200B high-quality code data, iteratively trained for six months with high-quality SFT data, increasing the context window length to 8K. It ranks among the top in automatic evaluation metrics for code generation across five major programming languages, and performs in the first tier for comprehensive human quality assessments across ten aspects of coding tasks."
  },
  "hunyuan-functioncall": {
    "description": "The latest MOE architecture FunctionCall model from Hunyuan, trained on high-quality FunctionCall data, with a context window of 32K, leading in multiple dimensions of evaluation metrics."
  },
  "hunyuan-large": {
    "description": "The Hunyuan-large model has a total parameter count of approximately 389B, with about 52B active parameters, making it the largest and most effective open-source MoE model in the industry based on the Transformer architecture."
  },
  "hunyuan-large-longcontext": {
    "description": "Specializes in handling long text tasks such as document summarization and question answering, while also capable of general text generation tasks. It excels in analyzing and generating long texts, effectively addressing complex and detailed long-form content processing needs."
  },
  "hunyuan-large-vision": {
    "description": "This model is designed for image-text understanding scenarios. It is a vision-language large model based on Hunyuan Large training, supporting multi-image plus text input at any resolution to generate textual content. It focuses on image-text understanding tasks and shows significant improvements in multilingual image-text comprehension."
  },
  "hunyuan-lite": {
    "description": "Upgraded to a MOE structure with a context window of 256k, leading many open-source models in various NLP, coding, mathematics, and industry benchmarks."
  },
  "hunyuan-lite-vision": {
    "description": "The latest 7B multimodal model from Hunyuan, with a context window of 32K, supports multimodal dialogue in both Chinese and English scenarios, image object recognition, document table understanding, and multimodal mathematics, outperforming 7B competing models across multiple evaluation dimensions."
  },
  "hunyuan-pro": {
    "description": "A trillion-parameter scale MOE-32K long text model. Achieves absolute leading levels across various benchmarks, capable of handling complex instructions and reasoning, with advanced mathematical abilities, supporting function calls, and optimized for applications in multilingual translation, finance, law, and healthcare."
  },
  "hunyuan-role": {
    "description": "The latest role-playing model from Hunyuan, fine-tuned and trained by Hunyuan's official team, based on the Hunyuan model combined with role-playing scenario datasets for enhanced foundational performance in role-playing contexts."
  },
  "hunyuan-standard": {
    "description": "Utilizes a superior routing strategy while alleviating issues of load balancing and expert convergence. For long texts, the needle-in-a-haystack metric reaches 99.9%. MOE-32K offers a relatively higher cost-performance ratio, balancing effectiveness and price while enabling processing of long text inputs."
  },
  "hunyuan-standard-256K": {
    "description": "Utilizes a superior routing strategy while alleviating issues of load balancing and expert convergence. For long texts, the needle-in-a-haystack metric reaches 99.9%. MOE-256K further breaks through in length and effectiveness, greatly expanding the input length capacity."
  },
  "hunyuan-standard-vision": {
    "description": "The latest multimodal model from Hunyuan, supporting multilingual responses with balanced capabilities in both Chinese and English."
  },
  "hunyuan-t1-20250321": {
    "description": "Comprehensively builds model capabilities in both arts and sciences, with strong long-text information capture ability. Supports reasoning and answering various scientific questions, including mathematics, logic, science, and code, of varying difficulty."
  },
  "hunyuan-t1-20250403": {
    "description": "Enhance project-level code generation capabilities; improve the quality of text generation and writing; enhance multi-turn topic understanding, ToB instruction compliance, and word comprehension; optimize issues with mixed traditional and simplified Chinese as well as mixed Chinese and English output."
  },
  "hunyuan-t1-20250529": {
    "description": "Optimized for text creation and essay writing, with enhanced abilities in frontend coding, mathematics, logical reasoning, and improved instruction-following capabilities."
  },
  "hunyuan-t1-latest": {
    "description": "The industry's first ultra-large-scale Hybrid-Transformer-Mamba inference model, enhancing reasoning capabilities with exceptional decoding speed, further aligning with human preferences."
  },
  "hunyuan-t1-vision": {
    "description": "Hunyuan is a multimodal deep thinking model supporting native multimodal chain-of-thought reasoning, excelling in various image reasoning scenarios and significantly outperforming fast-thinking models on science problems."
  },
  "hunyuan-turbo": {
    "description": "The preview version of the next-generation Hunyuan large language model, featuring a brand-new mixed expert model (MoE) structure, which offers faster inference efficiency and stronger performance compared to Hunyuan Pro."
  },
  "hunyuan-turbo-20241223": {
    "description": "This version optimizes: data instruction scaling, significantly enhancing the model's generalization capabilities; greatly improving mathematical, coding, and logical reasoning abilities; optimizing text understanding and word comprehension capabilities; enhancing the quality of content generation in text creation."
  },
  "hunyuan-turbo-latest": {
    "description": "General experience optimization, including NLP understanding, text creation, casual conversation, knowledge Q&A, translation, and domain-specific tasks; enhanced personification and emotional intelligence of the model; improved the model's ability to clarify when intentions are ambiguous; enhanced handling of word parsing-related questions; improved the quality and interactivity of creative outputs; enhanced multi-turn experience."
  },
  "hunyuan-turbo-vision": {
    "description": "The next-generation flagship visual language model from Hunyuan, utilizing a new mixed expert model (MoE) structure, with comprehensive improvements in basic recognition, content creation, knowledge Q&A, and analytical reasoning capabilities compared to the previous generation model."
  },
  "hunyuan-turbos-20250313": {
    "description": "Standardize the style of mathematical problem-solving steps and strengthen multi-turn math Q&A. Optimize text creation by refining response style, removing AI-like tone, and adding literary flair."
  },
  "hunyuan-turbos-20250416": {
    "description": "Upgrade the pre-training foundation to strengthen instruction understanding and compliance; enhance STEM abilities in mathematics, coding, logic, and science during alignment; improve humanities capabilities such as creative writing quality, text comprehension, translation accuracy, and knowledge Q&A; boost agent capabilities across various domains, with a focus on multi-turn dialogue understanding."
  },
  "hunyuan-turbos-20250604": {
    "description": "Upgraded pretraining foundation with improved writing and reading comprehension skills, significantly enhanced coding and STEM abilities, and continuous improvements in following complex instructions."
  },
  "hunyuan-turbos-latest": {
    "description": "The latest version of hunyuan-TurboS, the flagship model of Hunyuan, features enhanced reasoning capabilities and improved user experience."
  },
  "hunyuan-turbos-longtext-128k-20250325": {
    "description": "Specializes in handling long text tasks such as document summarization and question answering, while also capable of general text generation tasks. It excels in analyzing and generating long texts, effectively addressing complex and detailed long-form content processing needs."
  },
  "hunyuan-turbos-role-plus": {
    "description": "The latest Hunyuan role-playing model, officially fine-tuned and trained by Hunyuan. It is further trained on role-playing scenario datasets based on the Hunyuan model, delivering better foundational performance in role-playing contexts."
  },
  "hunyuan-vision": {
    "description": "The latest multimodal model from Hunyuan, supporting image + text input to generate textual content."
  },
  "internlm/internlm2_5-7b-chat": {
    "description": "InternLM2.5 offers intelligent dialogue solutions across multiple scenarios."
  },
  "internlm2.5-latest": {
    "description": "Our latest model series, featuring exceptional reasoning performance, supporting a context length of 1M, and enhanced instruction following and tool invocation capabilities."
  },
  "internlm3-latest": {
    "description": "Our latest model series boasts exceptional inference performance, leading the pack among open-source models of similar scale. It defaults to our most recently released InternLM3 series models."
  },
  "internvl2.5-latest": {
    "description": "The InternVL2.5 version we continue to maintain, offering excellent and stable performance. It defaults to our latest released InternVL2.5 series model, currently pointing to internvl2.5-78b."
  },
  "internvl3-latest": {
    "description": "Our latest released multimodal large model, featuring enhanced image-text understanding capabilities and long-sequence image comprehension, performs on par with top proprietary models. It defaults to our latest released InternVL series model, currently pointing to internvl3-78b."
  },
  "jamba-large": {
    "description": "Our most powerful and advanced model, designed for handling complex enterprise-level tasks with exceptional performance."
  },
  "jamba-mini": {
    "description": "The most efficient model in its class, balancing speed and quality while maintaining a smaller size."
  },
  "jina-deepsearch-v1": {
    "description": "DeepSearch combines web search, reading, and reasoning for comprehensive investigations. You can think of it as an agent that takes on your research tasks—it conducts extensive searches and iterates multiple times before providing answers. This process involves ongoing research, reasoning, and problem-solving from various angles. This fundamentally differs from standard large models that generate answers directly from pre-trained data and traditional RAG systems that rely on one-time surface searches."
  },
  "kimi-latest": {
    "description": "The Kimi Smart Assistant product uses the latest Kimi large model, which may include features that are not yet stable. It supports image understanding and will automatically select the 8k/32k/128k model as the billing model based on the length of the request context."
  },
  "kimi-thinking-preview": {
    "description": "kimi-thinking-preview is a multimodal thinking model provided by Dark Side of the Moon, featuring multimodal and general reasoning abilities. It excels at deep reasoning to help solve more complex and challenging problems."
  },
  "learnlm-1.5-pro-experimental": {
    "description": "LearnLM is an experimental, task-specific language model trained to align with learning science principles, capable of following systematic instructions in teaching and learning scenarios, acting as an expert tutor, among other roles."
  },
  "learnlm-2.0-flash-experimental": {
    "description": "LearnLM is an experimental, task-specific language model trained to align with the principles of learning science, capable of following systematic instructions in teaching and learning scenarios, acting as an expert tutor, among other roles."
  },
  "lite": {
    "description": "Spark Lite is a lightweight large language model with extremely low latency and efficient processing capabilities, completely free and open, supporting real-time online search functionality. Its quick response feature makes it excel in inference applications and model fine-tuning on low-power devices, providing users with excellent cost-effectiveness and intelligent experiences, particularly in knowledge Q&A, content generation, and search scenarios."
  },
  "llama-2-7b-chat": {
    "description": "Llama2 is a series of large language models (LLMs) developed and open-sourced by Meta. This series includes generative text models of varying sizes, ranging from 7 billion to 70 billion parameters, which have been pre-trained and fine-tuned. Architecturally, Llama2 is an autoregressive language model that uses an optimized transformer architecture. The fine-tuned versions leverage supervised fine-tuning (SFT) and reinforcement learning with human feedback (RLHF) to align with human preferences for usefulness and safety. Llama2 outperforms the Llama series on multiple academic datasets and provides valuable insights for the design and development of other models."
  },
  "llama-3.1-70b-versatile": {
    "description": "Llama 3.1 70B provides enhanced AI reasoning capabilities, suitable for complex applications, supporting extensive computational processing while ensuring efficiency and accuracy."
  },
  "llama-3.1-8b-instant": {
    "description": "Llama 3.1 8B is a high-performance model that offers rapid text generation capabilities, making it ideal for applications requiring large-scale efficiency and cost-effectiveness."
  },
  "llama-3.1-instruct": {
    "description": "The Llama 3.1 instruction-tuned model is optimized for conversational scenarios, outperforming many existing open-source chat models on common industry benchmarks."
  },
  "llama-3.2-11b-vision-instruct": {
    "description": "Excellent image reasoning capabilities on high-resolution images, suitable for visual understanding applications."
  },
  "llama-3.2-11b-vision-preview": {
    "description": "Llama 3.2 is designed to handle tasks that combine visual and textual data. It excels in tasks such as image description and visual question answering, bridging the gap between language generation and visual reasoning."
  },
  "llama-3.2-90b-vision-instruct": {
    "description": "Advanced image reasoning capabilities suitable for visual understanding agent applications."
  },
  "llama-3.2-90b-vision-preview": {
    "description": "Llama 3.2 is designed to handle tasks that combine visual and textual data. It excels in tasks such as image description and visual question answering, bridging the gap between language generation and visual reasoning."
  },
  "llama-3.2-vision-instruct": {
    "description": "The Llama 3.2-Vision instruction-tuned model is optimized for visual recognition, image reasoning, image captioning, and answering general questions related to images."
  },
  "llama-3.3-70b-instruct": {
    "description": "Llama 3.3 is the most advanced multilingual open-source large language model in the Llama series, offering performance comparable to a 405B model at an extremely low cost. Based on the Transformer architecture, it enhances usability and safety through supervised fine-tuning (SFT) and reinforcement learning from human feedback (RLHF). Its instruction-tuned version is optimized for multilingual dialogue and outperforms many open-source and closed chat models on various industry benchmarks. Knowledge cutoff date is December 2023."
  },
  "llama-3.3-70b-versatile": {
    "description": "Meta Llama 3.3 is a multilingual large language model (LLM) with 70 billion parameters (text input/text output), featuring pre-training and instruction-tuning. The instruction-tuned pure text model of Llama 3.3 is optimized for multilingual conversational use cases and outperforms many available open-source and closed chat models on common industry benchmarks."
  },
  "llama-3.3-instruct": {
    "description": "The Llama 3.3 instruction-tuned model is optimized for conversational scenarios, outperforming many existing open-source chat models on common industry benchmarks."
  },
  "llama3-70b-8192": {
    "description": "Meta Llama 3 70B provides unparalleled complexity handling capabilities, tailored for high-demand projects."
  },
  "llama3-8b-8192": {
    "description": "Meta Llama 3 8B delivers high-quality reasoning performance, suitable for diverse application needs."
  },
  "llama3-groq-70b-8192-tool-use-preview": {
    "description": "Llama 3 Groq 70B Tool Use offers powerful tool invocation capabilities, supporting efficient processing of complex tasks."
  },
  "llama3-groq-8b-8192-tool-use-preview": {
    "description": "Llama 3 Groq 8B Tool Use is a model optimized for efficient tool usage, supporting fast parallel computation."
  },
  "llama3.1": {
    "description": "Llama 3.1 is a leading model launched by Meta, supporting up to 405B parameters, applicable in complex dialogues, multilingual translation, and data analysis."
  },
  "llama3.1:405b": {
    "description": "Llama 3.1 is a leading model launched by Meta, supporting up to 405B parameters, applicable in complex dialogues, multilingual translation, and data analysis."
  },
  "llama3.1:70b": {
    "description": "Llama 3.1 is a leading model launched by Meta, supporting up to 405B parameters, applicable in complex dialogues, multilingual translation, and data analysis."
  },
  "llava": {
    "description": "LLaVA is a multimodal model that combines a visual encoder with Vicuna for powerful visual and language understanding."
  },
  "llava-v1.5-7b-4096-preview": {
    "description": "LLaVA 1.5 7B offers integrated visual processing capabilities, generating complex outputs from visual information inputs."
  },
  "llava:13b": {
    "description": "LLaVA is a multimodal model that combines a visual encoder with Vicuna for powerful visual and language understanding."
  },
  "llava:34b": {
    "description": "LLaVA is a multimodal model that combines a visual encoder with Vicuna for powerful visual and language understanding."
  },
  "mathstral": {
    "description": "MathΣtral is designed for scientific research and mathematical reasoning, providing effective computational capabilities and result interpretation."
  },
  "max-32k": {
    "description": "Spark Max 32K is configured with large context processing capabilities, enhanced contextual understanding, and logical reasoning abilities, supporting text input of 32K tokens, suitable for long document reading, private knowledge Q&A, and other scenarios."
  },
  "megrez-3b-instruct": {
    "description": "Megrez-3B-Instruct is a large language model fully trained by Wuwen Xin Qiong. Megrez-3B-Instruct aims to create an ultra-fast, compact, and easy-to-use intelligent solution for edge devices through the concept of hardware-software co-design."
  },
  "meta-llama-3-70b-instruct": {
    "description": "A powerful 70-billion parameter model excelling in reasoning, coding, and broad language applications."
  },
  "meta-llama-3-8b-instruct": {
    "description": "A versatile 8-billion parameter model optimized for dialogue and text generation tasks."
  },
  "meta-llama-3.1-405b-instruct": {
    "description": "The Llama 3.1 instruction-tuned text-only models are optimized for multilingual dialogue use cases and outperform many of the available open-source and closed chat models on common industry benchmarks."
  },
  "meta-llama-3.1-70b-instruct": {
    "description": "The Llama 3.1 instruction-tuned text-only models are optimized for multilingual dialogue use cases and outperform many of the available open-source and closed chat models on common industry benchmarks."
  },
  "meta-llama-3.1-8b-instruct": {
    "description": "The Llama 3.1 instruction-tuned text-only models are optimized for multilingual dialogue use cases and outperform many of the available open-source and closed chat models on common industry benchmarks."
  },
  "meta-llama/Llama-2-13b-chat-hf": {
    "description": "LLaMA-2 Chat (13B) offers excellent language processing capabilities and outstanding interactive experiences."
  },
  "meta-llama/Llama-2-70b-hf": {
    "description": "LLaMA-2 provides excellent language processing capabilities and outstanding interactive experiences."
  },
  "meta-llama/Llama-3-70b-chat-hf": {
    "description": "LLaMA-3 Chat (70B) is a powerful chat model that supports complex conversational needs."
  },
  "meta-llama/Llama-3-8b-chat-hf": {
    "description": "LLaMA-3 Chat (8B) provides multilingual support, covering a rich array of domain knowledge."
  },
  "meta-llama/Llama-3.2-11B-Vision-Instruct-Turbo": {
    "description": "LLaMA 3.2 is designed for tasks involving both visual and textual data. It excels in tasks like image description and visual question answering, bridging the gap between language generation and visual reasoning."
  },
  "meta-llama/Llama-3.2-3B-Instruct-Turbo": {
    "description": "LLaMA 3.2 is designed for tasks involving both visual and textual data. It excels in tasks like image description and visual question answering, bridging the gap between language generation and visual reasoning."
  },
  "meta-llama/Llama-3.2-90B-Vision-Instruct-Turbo": {
    "description": "LLaMA 3.2 is designed for tasks involving both visual and textual data. It excels in tasks like image description and visual question answering, bridging the gap between language generation and visual reasoning."
  },
  "meta-llama/Llama-3.3-70B-Instruct-Turbo": {
    "description": "Meta Llama 3.3 is a multilingual large language model (LLM) that is a pre-trained and instruction-tuned generative model within the 70B (text input/text output) framework. The instruction-tuned pure text model is optimized for multilingual dialogue use cases and outperforms many available open-source and closed chat models on common industry benchmarks."
  },
  "meta-llama/Llama-Vision-Free": {
    "description": "LLaMA 3.2 is designed for tasks involving both visual and textual data. It excels in tasks like image description and visual question answering, bridging the gap between language generation and visual reasoning."
  },
  "meta-llama/Meta-Llama-3-70B-Instruct-Lite": {
    "description": "Llama 3 70B Instruct Lite is suitable for environments requiring high performance and low latency."
  },
  "meta-llama/Meta-Llama-3-70B-Instruct-Turbo": {
    "description": "Llama 3 70B Instruct Turbo offers exceptional language understanding and generation capabilities, suitable for the most demanding computational tasks."
  },
  "meta-llama/Meta-Llama-3-8B-Instruct-Lite": {
    "description": "Llama 3 8B Instruct Lite is designed for resource-constrained environments, providing excellent balanced performance."
  },
  "meta-llama/Meta-Llama-3-8B-Instruct-Turbo": {
    "description": "Llama 3 8B Instruct Turbo is a high-performance large language model, supporting a wide range of application scenarios."
  },
  "meta-llama/Meta-Llama-3.1-405B-Instruct": {
    "description": "LLaMA 3.1 405B is a powerful model for pre-training and instruction tuning."
  },
  "meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo": {
    "description": "The 405B Llama 3.1 Turbo model provides massive context support for big data processing, excelling in large-scale AI applications."
  },
  "meta-llama/Meta-Llama-3.1-70B": {
    "description": "Llama 3.1 is a leading model launched by Meta, supporting up to 405B parameters, applicable in complex conversations, multilingual translation, and data analysis."
  },
  "meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo": {
    "description": "Llama 3.1 70B model is finely tuned for high-load applications, quantized to FP8 for enhanced computational efficiency and accuracy, ensuring outstanding performance in complex scenarios."
  },
  "meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo": {
    "description": "Llama 3.1 8B model utilizes FP8 quantization, supporting up to 131,072 context tokens, making it a standout in open-source models, excelling in complex tasks and outperforming many industry benchmarks."
  },
  "meta-llama/llama-3-70b-instruct": {
    "description": "Llama 3 70B Instruct is optimized for high-quality conversational scenarios, demonstrating excellent performance in various human evaluations."
  },
  "meta-llama/llama-3-8b-instruct": {
    "description": "Llama 3 8B Instruct is optimized for high-quality conversational scenarios, performing better than many closed-source models."
  },
  "meta-llama/llama-3.1-70b-instruct": {
    "description": "Llama 3.1 70B Instruct is designed for high-quality conversations, excelling in human evaluations, particularly in highly interactive scenarios."
  },
  "meta-llama/llama-3.1-8b-instruct": {
    "description": "Llama 3.1 8B Instruct is the latest version released by Meta, optimized for high-quality conversational scenarios, outperforming many leading closed-source models."
  },
  "meta-llama/llama-3.1-8b-instruct:free": {
    "description": "LLaMA 3.1 offers multilingual support and is one of the industry's leading generative models."
  },
  "meta-llama/llama-3.2-11b-vision-instruct": {
    "description": "LLaMA 3.2 is designed to handle tasks that combine visual and textual data. It excels in tasks such as image description and visual question answering, bridging the gap between language generation and visual reasoning."
  },
  "meta-llama/llama-3.2-3b-instruct": {
    "description": "meta-llama/llama-3.2-3b-instruct"
  },
  "meta-llama/llama-3.2-90b-vision-instruct": {
    "description": "LLaMA 3.2 is designed to handle tasks that combine visual and textual data. It excels in tasks such as image description and visual question answering, bridging the gap between language generation and visual reasoning."
  },
  "meta-llama/llama-3.3-70b-instruct": {
    "description": "Llama 3.3 is the most advanced multilingual open-source large language model in the Llama series, offering performance comparable to a 405B model at an extremely low cost. Based on the Transformer architecture, it enhances usability and safety through supervised fine-tuning (SFT) and reinforcement learning from human feedback (RLHF). Its instruction-tuned version is optimized for multilingual dialogue and outperforms many open-source and closed chat models on various industry benchmarks. Knowledge cutoff date is December 2023."
  },
  "meta-llama/llama-3.3-70b-instruct:free": {
    "description": "Llama 3.3 is the most advanced multilingual open-source large language model in the Llama series, offering performance comparable to a 405B model at an extremely low cost. Based on the Transformer architecture, it enhances usability and safety through supervised fine-tuning (SFT) and reinforcement learning from human feedback (RLHF). Its instruction-tuned version is optimized for multilingual dialogue and outperforms many open-source and closed chat models on various industry benchmarks. Knowledge cutoff date is December 2023."
  },
  "meta.llama3-1-405b-instruct-v1:0": {
    "description": "Meta Llama 3.1 405B Instruct is the largest and most powerful model in the Llama 3.1 Instruct series. It is a highly advanced conversational reasoning and synthetic data generation model, which can also serve as a foundation for specialized continuous pre-training or fine-tuning in specific domains. The multilingual large language models (LLMs) provided by Llama 3.1 are a set of pre-trained, instruction-tuned generative models, including sizes of 8B, 70B, and 405B (text input/output). The instruction-tuned text models (8B, 70B, 405B) are optimized for multilingual conversational use cases and have outperformed many available open-source chat models in common industry benchmarks. Llama 3.1 is designed for commercial and research purposes across multiple languages. The instruction-tuned text models are suitable for assistant-like chat, while the pre-trained models can adapt to various natural language generation tasks. The Llama 3.1 models also support improving other models using their outputs, including synthetic data generation and refinement. Llama 3.1 is an autoregressive language model built using an optimized transformer architecture. The tuned versions utilize supervised fine-tuning (SFT) and reinforcement learning with human feedback (RLHF) to align with human preferences for helpfulness and safety."
  },
  "meta.llama3-1-70b-instruct-v1:0": {
    "description": "The updated version of Meta Llama 3.1 70B Instruct includes an extended 128K context length, multilingual capabilities, and improved reasoning abilities. The multilingual large language models (LLMs) provided by Llama 3.1 are a set of pre-trained, instruction-tuned generative models, including sizes of 8B, 70B, and 405B (text input/output). The instruction-tuned text models (8B, 70B, 405B) are optimized for multilingual dialogue use cases and have surpassed many available open-source chat models in common industry benchmarks. Llama 3.1 is designed for commercial and research purposes in multiple languages. The instruction-tuned text models are suitable for assistant-like chat, while the pre-trained models can adapt to various natural language generation tasks. The Llama 3.1 model also supports using its outputs to improve other models, including synthetic data generation and refinement. Llama 3.1 is an autoregressive language model using optimized transformer architecture. The tuned versions use supervised fine-tuning (SFT) and reinforcement learning with human feedback (RLHF) to align with human preferences for helpfulness and safety."
  },
  "meta.llama3-1-8b-instruct-v1:0": {
    "description": "The updated version of Meta Llama 3.1 8B Instruct includes an extended 128K context length, multilingual capabilities, and improved reasoning abilities. The multilingual large language models (LLMs) provided by Llama 3.1 are a set of pre-trained, instruction-tuned generative models, including sizes of 8B, 70B, and 405B (text input/output). The instruction-tuned text models (8B, 70B, 405B) are optimized for multilingual dialogue use cases and have surpassed many available open-source chat models in common industry benchmarks. Llama 3.1 is designed for commercial and research purposes in multiple languages. The instruction-tuned text models are suitable for assistant-like chat, while the pre-trained models can adapt to various natural language generation tasks. The Llama 3.1 model also supports using its outputs to improve other models, including synthetic data generation and refinement. Llama 3.1 is an autoregressive language model using optimized transformer architecture. The tuned versions use supervised fine-tuning (SFT) and reinforcement learning with human feedback (RLHF) to align with human preferences for helpfulness and safety."
  },
  "meta.llama3-70b-instruct-v1:0": {
    "description": "Meta Llama 3 is an open large language model (LLM) aimed at developers, researchers, and enterprises, designed to help them build, experiment, and responsibly scale their generative AI ideas. As part of a foundational system for global community innovation, it is particularly suitable for content creation, conversational AI, language understanding, R&D, and enterprise applications."
  },
  "meta.llama3-8b-instruct-v1:0": {
    "description": "Meta Llama 3 is an open large language model (LLM) aimed at developers, researchers, and enterprises, designed to help them build, experiment, and responsibly scale their generative AI ideas. As part of a foundational system for global community innovation, it is particularly suitable for those with limited computational power and resources, edge devices, and faster training times."
  },
  "meta/Llama-3.2-11B-Vision-Instruct": {
    "description": "Exhibits excellent image reasoning capabilities on high-resolution images, suitable for visual understanding applications."
  },
  "meta/Llama-3.2-90B-Vision-Instruct": {
    "description": "Advanced image reasoning capabilities designed for visual understanding agent applications."
  },
  "meta/Llama-3.3-70B-Instruct": {
    "description": "Llama 3.3 is the most advanced multilingual open-source large language model in the Llama series, offering performance comparable to a 405B model at a very low cost. Based on the Transformer architecture, it is enhanced through supervised fine-tuning (SFT) and reinforcement learning with human feedback (RLHF) to improve usefulness and safety. Its instruction-tuned version is optimized for multilingual dialogue and outperforms many open-source and closed chat models on multiple industry benchmarks. Knowledge cutoff date: December 2023."
  },
  "meta/Meta-Llama-3-70B-Instruct": {
    "description": "A powerful 70 billion parameter model excelling in reasoning, coding, and a wide range of language applications."
  },
  "meta/Meta-Llama-3-8B-Instruct": {
    "description": "A versatile 8 billion parameter model optimized for dialogue and text generation tasks."
  },
  "meta/Meta-Llama-3.1-405B-Instruct": {
    "description": "Llama 3.1 instruction-tuned text model optimized for multilingual dialogue use cases, performing excellently on common industry benchmarks among many available open-source and closed chat models."
  },
  "meta/Meta-Llama-3.1-70B-Instruct": {
    "description": "Llama 3.1 instruction-tuned text model optimized for multilingual dialogue use cases, performing excellently on common industry benchmarks among many available open-source and closed chat models."
  },
  "meta/Meta-Llama-3.1-8B-Instruct": {
    "description": "Llama 3.1 instruction-tuned text model optimized for multilingual dialogue use cases, performing excellently on common industry benchmarks among many available open-source and closed chat models."
  },
  "meta/llama-3.1-405b-instruct": {
    "description": "An advanced LLM supporting synthetic data generation, knowledge distillation, and reasoning, suitable for chatbots, programming, and domain-specific tasks."
  },
  "meta/llama-3.1-70b-instruct": {
    "description": "Empowering complex conversations with exceptional context understanding, reasoning capabilities, and text generation abilities."
  },
  "meta/llama-3.1-8b-instruct": {
    "description": "An advanced cutting-edge model with language understanding, excellent reasoning capabilities, and text generation abilities."
  },
  "meta/llama-3.2-11b-vision-instruct": {
    "description": "A state-of-the-art vision-language model adept at high-quality reasoning from images."
  },
  "meta/llama-3.2-1b-instruct": {
    "description": "A cutting-edge small language model with language understanding, excellent reasoning capabilities, and text generation abilities."
  },
  "meta/llama-3.2-3b-instruct": {
    "description": "A cutting-edge small language model with language understanding, excellent reasoning capabilities, and text generation abilities."
  },
  "meta/llama-3.2-90b-vision-instruct": {
    "description": "A state-of-the-art vision-language model adept at high-quality reasoning from images."
  },
  "meta/llama-3.3-70b-instruct": {
    "description": "An advanced LLM skilled in reasoning, mathematics, common sense, and function calling."
  },
  "microsoft/Phi-3-medium-128k-instruct": {
    "description": "The same Phi-3-medium model but with a larger context size, suitable for RAG or few-shot prompting."
  },
  "microsoft/Phi-3-medium-4k-instruct": {
    "description": "A 14 billion parameter model with higher quality than Phi-3-mini, focusing on high-quality, reasoning-intensive data."
  },
  "microsoft/Phi-3-mini-128k-instruct": {
    "description": "The same Phi-3-mini model but with a larger context size, suitable for RAG or few-shot prompting."
  },
  "microsoft/Phi-3-mini-4k-instruct": {
    "description": "The smallest member of the Phi-3 family, optimized for quality and low latency."
  },
  "microsoft/Phi-3-small-128k-instruct": {
    "description": "The same Phi-3-small model but with a larger context size, suitable for RAG or few-shot prompting."
  },
  "microsoft/Phi-3-small-8k-instruct": {
    "description": "A 7 billion parameter model with higher quality than Phi-3-mini, focusing on high-quality, reasoning-intensive data."
  },
  "microsoft/Phi-3.5-mini-instruct": {
    "description": "An updated version of the Phi-3-mini model."
  },
  "microsoft/Phi-3.5-vision-instruct": {
    "description": "An updated version of the Phi-3-vision model."
  },
  "microsoft/WizardLM-2-8x22B": {
    "description": "WizardLM 2 is a language model provided by Microsoft AI, excelling in complex dialogues, multilingual capabilities, reasoning, and intelligent assistant tasks."
  },
  "microsoft/wizardlm-2-8x22b": {
    "description": "WizardLM-2 8x22B is Microsoft's state-of-the-art Wizard model, demonstrating extremely competitive performance."
  },
  "minicpm-v": {
    "description": "MiniCPM-V is a next-generation multimodal large model launched by OpenBMB, boasting exceptional OCR recognition and multimodal understanding capabilities, supporting a wide range of application scenarios."
  },
  "ministral-3b-latest": {
    "description": "Ministral 3B is Mistral's top-tier edge model."
  },
  "ministral-8b-latest": {
    "description": "Ministral 8B is Mistral's cost-effective edge model."
  },
  "mistral": {
    "description": "Mistral is a 7B model released by Mistral AI, suitable for diverse language processing needs."
  },
  "mistral-ai/Mistral-Large-2411": {
    "description": "Mistral's flagship model, ideal for large-scale reasoning or highly specialized complex tasks such as synthetic text generation, code generation, RAG, or agents."
  },
  "mistral-ai/Mistral-Nemo": {
    "description": "Mistral Nemo is a cutting-edge large language model (LLM) with state-of-the-art reasoning, world knowledge, and coding capabilities in its size category."
  },
  "mistral-ai/mistral-small-2503": {
    "description": "Mistral Small is suitable for any language-based task requiring high efficiency and low latency."
  },
  "mistral-large": {
    "description": "Mixtral Large is Mistral's flagship model, combining capabilities in code generation, mathematics, and reasoning, supporting a 128k context window."
  },
  "mistral-large-instruct": {
    "description": "Mistral-Large-Instruct-2407 is an advanced dense large language model (LLM) with 123 billion parameters, featuring state-of-the-art reasoning, knowledge, and coding capabilities."
  },
  "mistral-large-latest": {
    "description": "Mistral Large is the flagship model, excelling in multilingual tasks, complex reasoning, and code generation, making it an ideal choice for high-end applications."
  },
  "mistral-medium-latest": {
    "description": "Mistral Medium 3 offers state-of-the-art performance at 8 times the cost, fundamentally simplifying enterprise deployment."
  },
  "mistral-nemo": {
    "description": "Mistral Nemo, developed in collaboration with Mistral AI and NVIDIA, is a high-performance 12B model."
  },
  "mistral-nemo-instruct": {
    "description": "Mistral-Nemo-Instruct-2407 is the instruction-tuned version of the Mistral-Nemo-Base-2407 large language model (LLM)."
  },
  "mistral-small": {
    "description": "Mistral Small can be used for any language-based task that requires high efficiency and low latency."
  },
  "mistral-small-latest": {
    "description": "Mistral Small is a cost-effective, fast, and reliable option suitable for use cases such as translation, summarization, and sentiment analysis."
  },
  "mistralai/Mistral-7B-Instruct-v0.1": {
    "description": "Mistral (7B) Instruct is known for its high performance, suitable for various language tasks."
  },
  "mistralai/Mistral-7B-Instruct-v0.2": {
    "description": "Mistral 7B is a model fine-tuned on demand, providing optimized answers for tasks."
  },
  "mistralai/Mistral-7B-Instruct-v0.3": {
    "description": "Mistral (7B) Instruct v0.3 offers efficient computational power and natural language understanding, suitable for a wide range of applications."
  },
  "mistralai/Mistral-7B-v0.1": {
    "description": "Mistral 7B is a compact yet high-performance model, adept at handling batch processing and simple tasks like classification and text generation, featuring good reasoning capabilities."
  },
  "mistralai/Mixtral-8x22B-Instruct-v0.1": {
    "description": "Mixtral-8x22B Instruct (141B) is a super large language model that supports extremely high processing demands."
  },
  "mistralai/Mixtral-8x7B-Instruct-v0.1": {
    "description": "Mixtral 8x7B is a pre-trained sparse mixture of experts model for general text tasks."
  },
  "mistralai/Mixtral-8x7B-v0.1": {
    "description": "Mixtral 8x7B is a sparse expert model that utilizes multiple parameters to enhance reasoning speed, suitable for multilingual and code generation tasks."
  },
  "mistralai/mistral-7b-instruct": {
    "description": "Mistral 7B Instruct is a high-performance industry-standard model optimized for speed and long context support."
  },
  "mistralai/mistral-nemo": {
    "description": "Mistral Nemo is a multilingual model with 7.3 billion parameters, designed for high-performance programming."
  },
  "mixtral": {
    "description": "Mixtral is an expert model from Mistral AI, featuring open-source weights and providing support in code generation and language understanding."
  },
  "mixtral-8x7b-32768": {
    "description": "Mixtral 8x7B provides high fault-tolerant parallel computing capabilities, suitable for complex tasks."
  },
  "mixtral:8x22b": {
    "description": "Mixtral is an expert model from Mistral AI, featuring open-source weights and providing support in code generation and language understanding."
  },
  "moonshot-v1-128k": {
    "description": "Moonshot V1 128K is a model with ultra-long context processing capabilities, suitable for generating extremely long texts, meeting the demands of complex generation tasks, capable of handling up to 128,000 tokens, making it ideal for research, academia, and large document generation."
  },
  "moonshot-v1-128k-vision-preview": {
    "description": "The Kimi visual model (including moonshot-v1-8k-vision-preview, moonshot-v1-32k-vision-preview, moonshot-v1-128k-vision-preview, etc.) can understand image content, including text in images, colors, and shapes of objects."
  },
  "moonshot-v1-32k": {
    "description": "Moonshot V1 32K offers medium-length context processing capabilities, able to handle 32,768 tokens, particularly suitable for generating various long documents and complex dialogues, applicable in content creation, report generation, and dialogue systems."
  },
  "moonshot-v1-32k-vision-preview": {
    "description": "The Kimi visual model (including moonshot-v1-8k-vision-preview, moonshot-v1-32k-vision-preview, moonshot-v1-128k-vision-preview, etc.) can understand image content, including text in images, colors, and shapes of objects."
  },
  "moonshot-v1-8k": {
    "description": "Moonshot V1 8K is designed for generating short text tasks, featuring efficient processing performance, capable of handling 8,192 tokens, making it ideal for brief dialogues, note-taking, and rapid content generation."
  },
  "moonshot-v1-8k-vision-preview": {
    "description": "The Kimi visual model (including moonshot-v1-8k-vision-preview, moonshot-v1-32k-vision-preview, moonshot-v1-128k-vision-preview, etc.) can understand image content, including text in images, colors, and shapes of objects."
  },
  "moonshot-v1-auto": {
    "description": "Moonshot V1 Auto can select the appropriate model based on the number of tokens used in the current context."
  },
  "nousresearch/hermes-2-pro-llama-3-8b": {
    "description": "Hermes 2 Pro Llama 3 8B is an upgraded version of Nous Hermes 2, featuring the latest internally developed datasets."
  },
  "nvidia/Llama-3.1-Nemotron-70B-Instruct-HF": {
    "description": "Llama 3.1 Nemotron 70B is a large language model customized by NVIDIA, designed to enhance the helpfulness of LLM-generated responses to user queries. The model has excelled in benchmark tests such as Arena Hard, AlpacaEval 2 LC, and GPT-4-Turbo MT-Bench, ranking first in all three automatic alignment benchmarks as of October 1, 2024. The model is trained using RLHF (specifically REINFORCE), Llama-3.1-Nemotron-70B-Reward, and HelpSteer2-Preference prompts based on the Llama-3.1-70B-Instruct model."
  },
  "nvidia/llama-3.1-nemotron-51b-instruct": {
    "description": "A unique language model offering unparalleled accuracy and efficiency."
  },
  "nvidia/llama-3.1-nemotron-70b-instruct": {
    "description": "Llama-3.1-Nemotron-70B-Instruct is a custom large language model by NVIDIA designed to enhance the helpfulness of LLM-generated responses."
  },
  "o1": {
    "description": "Focused on advanced reasoning and solving complex problems, including mathematical and scientific tasks. It is particularly well-suited for applications that require deep contextual understanding and agent workflow."
  },
  "o1-mini": {
    "description": "o1-mini is a fast and cost-effective reasoning model designed for programming, mathematics, and scientific applications. This model features a 128K context and has a knowledge cutoff date of October 2023."
  },
  "o1-preview": {
    "description": "o1 is OpenAI's new reasoning model, suitable for complex tasks that require extensive general knowledge. This model features a 128K context and has a knowledge cutoff date of October 2023."
  },
  "o1-pro": {
    "description": "The o1 series models are trained with reinforcement learning to think before answering and perform complex reasoning tasks. The o1-pro model uses more computational resources for deeper thinking, consistently delivering higher-quality responses."
  },
  "o3": {
    "description": "o3 is a versatile and powerful model that excels across multiple domains. It sets new benchmarks for tasks in mathematics, science, programming, and visual reasoning. It is also skilled in technical writing and instruction following, allowing users to analyze text, code, and images to solve complex multi-step problems."
  },
  "o3-mini": {
    "description": "o3-mini is our latest small inference model that delivers high intelligence while maintaining the same cost and latency targets as o1-mini."
  },
  "o3-pro": {
    "description": "The o3-pro model employs increased computation for deeper thinking and consistently better answers. It is only available for use under the Responses API."
  },
  "o4-mini": {
    "description": "o4-mini is our latest small model in the o series. It is optimized for fast and efficient inference, demonstrating high efficiency and performance in coding and visual tasks."
  },
  "open-codestral-mamba": {
    "description": "Codestral Mamba is a language model focused on code generation, providing strong support for advanced coding and reasoning tasks."
  },
  "open-mistral-7b": {
    "description": "Mistral 7B is a compact yet high-performance model, excelling in batch processing and simple tasks such as classification and text generation, with good reasoning capabilities."
  },
  "open-mistral-nemo": {
    "description": "Mistral Nemo is a 12B model developed in collaboration with Nvidia, offering outstanding reasoning and coding performance, easy to integrate and replace."
  },
  "open-mixtral-8x22b": {
    "description": "Mixtral 8x22B is a larger expert model focused on complex tasks, providing excellent reasoning capabilities and higher throughput."
  },
  "open-mixtral-8x7b": {
    "description": "Mixtral 8x7B is a sparse expert model that leverages multiple parameters to enhance reasoning speed, suitable for handling multilingual and code generation tasks."
  },
  "openai/gpt-4.1": {
    "description": "GPT-4.1 is our flagship model for complex tasks. It is particularly well-suited for cross-domain problem solving."
  },
  "openai/gpt-4.1-mini": {
    "description": "GPT-4.1 mini strikes a balance between intelligence, speed, and cost, making it an attractive model for many use cases."
  },
  "openai/gpt-4.1-nano": {
    "description": "GPT-4.1 nano is the fastest and most cost-effective version of the GPT-4.1 model."
  },
  "openai/gpt-4o": {
    "description": "ChatGPT-4o is a dynamic model that updates in real-time to maintain the latest version. It combines powerful language understanding and generation capabilities, suitable for large-scale application scenarios, including customer service, education, and technical support."
  },
  "openai/gpt-4o-mini": {
    "description": "GPT-4o mini is the latest model released by OpenAI following GPT-4 Omni, supporting both text and image input while outputting text. As their most advanced small model, it is significantly cheaper than other recent cutting-edge models and over 60% cheaper than GPT-3.5 Turbo. It maintains state-of-the-art intelligence while offering remarkable cost-effectiveness. GPT-4o mini scored 82% on the MMLU test and currently ranks higher than GPT-4 in chat preferences."
  },
  "openai/o1": {
    "description": "o1 is OpenAI's new reasoning model that supports multimodal input and outputs text, suitable for complex tasks requiring broad general knowledge. This model features a 200K context window and a knowledge cutoff date of October 2023."
  },
  "openai/o1-mini": {
    "description": "o1-mini is a fast and cost-effective reasoning model designed for programming, mathematics, and scientific applications. This model features a 128K context and has a knowledge cutoff date of October 2023."
  },
  "openai/o1-preview": {
    "description": "o1 is OpenAI's new reasoning model, suitable for complex tasks that require extensive general knowledge. This model features a 128K context and has a knowledge cutoff date of October 2023."
  },
  "openai/o3": {
    "description": "O3 is a versatile and powerful model that excels in multiple domains. It sets a new benchmark for tasks in mathematics, science, programming, and visual reasoning. It is also proficient in technical writing and following instructions. Users can leverage it to analyze text, code, and images, solving complex problems that require multiple steps."
  },
  "openai/o3-mini": {
    "description": "O3-mini delivers high intelligence at the same cost and latency targets as o1-mini."
  },
  "openai/o3-mini-high": {
    "description": "O3-mini high inference level version provides high intelligence at the same cost and latency targets as o1-mini."
  },
  "openai/o4-mini": {
    "description": "o4-mini is optimized for fast and efficient inference, demonstrating high efficiency and performance in coding and visual tasks."
  },
  "openai/o4-mini-high": {
    "description": "o4-mini high inference level version, optimized for fast and efficient inference, demonstrating high efficiency and performance in coding and visual tasks."
  },
  "openrouter/auto": {
    "description": "Based on context length, topic, and complexity, your request will be sent to Llama 3 70B Instruct, Claude 3.5 Sonnet (self-regulating), or GPT-4o."
  },
  "phi3": {
    "description": "Phi-3 is a lightweight open model launched by Microsoft, suitable for efficient integration and large-scale knowledge reasoning."
  },
  "phi3:14b": {
    "description": "Phi-3 is a lightweight open model launched by Microsoft, suitable for efficient integration and large-scale knowledge reasoning."
  },
  "pixtral-12b-2409": {
    "description": "The Pixtral model demonstrates strong capabilities in tasks such as chart and image understanding, document question answering, multimodal reasoning, and instruction following. It can ingest images at natural resolutions and aspect ratios and handle an arbitrary number of images within a long context window of up to 128K tokens."
  },
  "pixtral-large-latest": {
    "description": "Pixtral Large is an open-source multimodal model with 124 billion parameters, built on Mistral Large 2. This is the second model in our multimodal family, showcasing cutting-edge image understanding capabilities."
  },
  "pro-128k": {
    "description": "Spark Pro 128K is equipped with an extra-large context processing capability, able to handle up to 128K of contextual information, making it particularly suitable for long-form content that requires comprehensive analysis and long-term logical connections, providing smooth and consistent logic and diverse citation support in complex text communication."
  },
  "qvq-72b-preview": {
    "description": "The QVQ model is an experimental research model developed by the Qwen team, focusing on enhancing visual reasoning capabilities, particularly in the field of mathematical reasoning."
  },
  "qvq-max": {
    "description": "Tongyi Qianwen QVQ visual reasoning model supports visual input and chain-of-thought output, demonstrating stronger capabilities in mathematics, programming, visual analysis, creation, and general tasks."
  },
  "qvq-plus": {
    "description": "A visual reasoning model supporting visual inputs and chain-of-thought outputs. The plus version, succeeding the qvq-max model, offers faster reasoning speed and a more balanced trade-off between performance and cost."
  },
  "qwen-coder-plus": {
    "description": "Tongyi Qianwen coding model."
  },
  "qwen-coder-turbo": {
    "description": "Tongyi Qianwen coding model."
  },
  "qwen-coder-turbo-latest": {
    "description": "The Tongyi Qianwen Coder model."
  },
  "qwen-long": {
    "description": "Qwen is a large-scale language model that supports long text contexts and dialogue capabilities based on long documents and multiple documents."
  },
  "qwen-math-plus": {
    "description": "Tongyi Qianwen math model specialized for solving mathematical problems."
  },
  "qwen-math-plus-latest": {
    "description": "The Tongyi Qianwen Math model is specifically designed for solving mathematical problems."
  },
  "qwen-math-turbo": {
    "description": "Tongyi Qianwen math model specialized for solving mathematical problems."
  },
  "qwen-math-turbo-latest": {
    "description": "The Tongyi Qianwen Math model is specifically designed for solving mathematical problems."
  },
  "qwen-max": {
    "description": "Qwen Max is a trillion-level large-scale language model that supports input in various languages including Chinese and English, and is the API model behind the current Qwen 2.5 product version."
  },
  "qwen-omni-turbo": {
    "description": "Qwen-Omni series models support multi-modal inputs including video, audio, images, and text, and output audio and text."
  },
  "qwen-plus": {
    "description": "Qwen Plus is an enhanced large-scale language model supporting input in various languages including Chinese and English."
  },
  "qwen-turbo": {
    "description": "Qwen Turbo is a large-scale language model supporting input in various languages including Chinese and English."
  },
  "qwen-vl-chat-v1": {
    "description": "Qwen VL supports flexible interaction methods, including multi-image, multi-turn Q&A, and creative capabilities."
  },
  "qwen-vl-max": {
    "description": "Tongyi Qianwen ultra-large-scale vision-language model. Compared to the enhanced version, it further improves visual reasoning and instruction compliance, providing higher levels of visual perception and cognition."
  },
  "qwen-vl-max-latest": {
    "description": "Tongyi Qianwen's ultra-large-scale visual language model. Compared to the enhanced version, it further improves visual reasoning and instruction-following abilities, providing a higher level of visual perception and cognition."
  },
  "qwen-vl-ocr": {
    "description": "Tongyi Qianwen OCR is a dedicated text extraction model focusing on documents, tables, exam questions, handwritten text, and other image types. It can recognize multiple languages currently supported: Chinese, English, French, Japanese, Korean, German, Russian, Italian, Vietnamese, and Arabic."
  },
  "qwen-vl-plus": {
    "description": "Enhanced version of Tongyi Qianwen large-scale vision-language model. Greatly improves detail recognition and text recognition capabilities, supporting images with resolutions over one million pixels and arbitrary aspect ratios."
  },
  "qwen-vl-plus-latest": {
    "description": "Tongyi Qianwen's large-scale visual language model enhanced version. Significantly improves detail recognition and text recognition capabilities, supporting ultra-high pixel resolution and images of any aspect ratio."
  },
  "qwen-vl-v1": {
    "description": "Initialized with the Qwen-7B language model, this pre-trained model adds an image model with an input resolution of 448."
  },
  "qwen/qwen-2-7b-instruct": {
    "description": "Qwen2 is a brand new series of large language models. Qwen2 7B is a transformer-based model that excels in language understanding, multilingual capabilities, programming, mathematics, and reasoning."
  },
  "qwen/qwen-2-7b-instruct:free": {
    "description": "Qwen2 is a brand new series of large language models with enhanced understanding and generation capabilities."
  },
  "qwen/qwen-2-vl-72b-instruct": {
    "description": "Qwen2-VL is the latest iteration of the Qwen-VL model, achieving state-of-the-art performance in visual understanding benchmarks, including MathVista, DocVQA, RealWorldQA, and MTVQA. Qwen2-VL can understand videos over 20 minutes long for high-quality video-based Q&A, dialogue, and content creation. It also possesses complex reasoning and decision-making capabilities, allowing integration with mobile devices, robots, and more for automated operations based on visual environments and text instructions. In addition to English and Chinese, Qwen2-VL now supports understanding text in different languages within images, including most European languages, Japanese, Korean, Arabic, and Vietnamese."
  },
  "qwen/qwen-2.5-72b-instruct": {
    "description": "Qwen2.5-72B-Instruct is one of the latest large language model series released by Alibaba Cloud. This 72B model has significantly improved capabilities in coding and mathematics. The model also offers multilingual support, covering over 29 languages, including Chinese and English. It shows significant enhancements in instruction following, understanding structured data, and generating structured outputs (especially JSON)."
  },
  "qwen/qwen2.5-32b-instruct": {
    "description": "Qwen2.5-32B-Instruct is one of the latest large language model series released by Alibaba Cloud. This 32B model has significantly improved capabilities in coding and mathematics. The model provides multilingual support, covering over 29 languages, including Chinese and English. It shows significant enhancements in instruction following, understanding structured data, and generating structured outputs (especially JSON)."
  },
  "qwen/qwen2.5-7b-instruct": {
    "description": "An LLM focused on both Chinese and English, targeting language, programming, mathematics, reasoning, and more."
  },
  "qwen/qwen2.5-coder-32b-instruct": {
    "description": "An advanced LLM supporting code generation, reasoning, and debugging, covering mainstream programming languages."
  },
  "qwen/qwen2.5-coder-7b-instruct": {
    "description": "A powerful medium-sized code model supporting 32K context length, proficient in multilingual programming."
  },
  "qwen/qwen3-14b": {
    "description": "Qwen3-14B is a dense 14.8 billion parameter causal language model in the Qwen3 series, designed for complex reasoning and efficient dialogue. It supports seamless switching between a 'thinking' mode for tasks such as mathematics, programming, and logical reasoning, and a 'non-thinking' mode for general conversation. This model is fine-tuned for instruction following, agent tool usage, creative writing, and multilingual tasks across more than 100 languages and dialects. It natively handles a 32K token context and can be extended to 131K tokens using YaRN."
  },
  "qwen/qwen3-14b:free": {
    "description": "Qwen3-14B is a dense 14.8 billion parameter causal language model in the Qwen3 series, designed for complex reasoning and efficient dialogue. It supports seamless switching between a 'thinking' mode for tasks such as mathematics, programming, and logical reasoning, and a 'non-thinking' mode for general conversation. This model is fine-tuned for instruction following, agent tool usage, creative writing, and multilingual tasks across more than 100 languages and dialects. It natively handles a 32K token context and can be extended to 131K tokens using YaRN."
  },
  "qwen/qwen3-235b-a22b": {
    "description": "Qwen3-235B-A22B is a 235 billion parameter mixture of experts (MoE) model developed by Qwen, activating 22 billion parameters per forward pass. It supports seamless switching between a 'thinking' mode for complex reasoning, mathematics, and coding tasks, and a 'non-thinking' mode for general conversational efficiency. This model showcases strong reasoning capabilities, multilingual support (over 100 languages and dialects), advanced instruction following, and agent tool invocation capabilities. It natively handles a 32K token context window and can be extended to 131K tokens using YaRN."
  },
  "qwen/qwen3-235b-a22b:free": {
    "description": "Qwen3-235B-A22B is a 235 billion parameter mixture of experts (MoE) model developed by Qwen, activating 22 billion parameters per forward pass. It supports seamless switching between a 'thinking' mode for complex reasoning, mathematics, and coding tasks, and a 'non-thinking' mode for general conversational efficiency. This model showcases strong reasoning capabilities, multilingual support (over 100 languages and dialects), advanced instruction following, and agent tool invocation capabilities. It natively handles a 32K token context window and can be extended to 131K tokens using YaRN."
  },
  "qwen/qwen3-30b-a3b": {
    "description": "Qwen3 is the latest generation in the Qwen large language model series, featuring a dense and mixture of experts (MoE) architecture that excels in reasoning, multilingual support, and advanced agent tasks. Its unique ability to seamlessly switch between a thinking mode for complex reasoning and a non-thinking mode for efficient dialogue ensures versatile and high-quality performance.\n\nQwen3 significantly outperforms previous models such as QwQ and Qwen2.5, offering exceptional capabilities in mathematics, coding, common sense reasoning, creative writing, and interactive dialogue. The Qwen3-30B-A3B variant contains 30.5 billion parameters (3.3 billion active parameters), 48 layers, 128 experts (activating 8 for each task), and supports up to 131K token context (using YaRN), setting a new standard for open-source models."
  },
  "qwen/qwen3-30b-a3b:free": {
    "description": "Qwen3 is the latest generation in the Qwen large language model series, featuring a dense and mixture of experts (MoE) architecture that excels in reasoning, multilingual support, and advanced agent tasks. Its unique ability to seamlessly switch between a thinking mode for complex reasoning and a non-thinking mode for efficient dialogue ensures versatile and high-quality performance.\n\nQwen3 significantly outperforms previous models such as QwQ and Qwen2.5, offering exceptional capabilities in mathematics, coding, common sense reasoning, creative writing, and interactive dialogue. The Qwen3-30B-A3B variant contains 30.5 billion parameters (3.3 billion active parameters), 48 layers, 128 experts (activating 8 for each task), and supports up to 131K token context (using YaRN), setting a new standard for open-source models."
  },
  "qwen/qwen3-32b": {
    "description": "Qwen3-32B is a dense 32.8 billion parameter causal language model in the Qwen3 series, optimized for complex reasoning and efficient dialogue. It supports seamless switching between a 'thinking' mode for tasks such as mathematics, coding, and logical reasoning, and a 'non-thinking' mode for faster, general conversation. This model demonstrates strong performance in instruction following, agent tool usage, creative writing, and multilingual tasks across more than 100 languages and dialects. It natively handles a 32K token context and can be extended to 131K tokens using YaRN."
  },
  "qwen/qwen3-32b:free": {
    "description": "Qwen3-32B is a dense 32.8 billion parameter causal language model in the Qwen3 series, optimized for complex reasoning and efficient dialogue. It supports seamless switching between a 'thinking' mode for tasks such as mathematics, coding, and logical reasoning, and a 'non-thinking' mode for faster, general conversation. This model demonstrates strong performance in instruction following, agent tool usage, creative writing, and multilingual tasks across more than 100 languages and dialects. It natively handles a 32K token context and can be extended to 131K tokens using YaRN."
  },
  "qwen/qwen3-8b:free": {
    "description": "Qwen3-8B is a dense 8.2 billion parameter causal language model in the Qwen3 series, designed for reasoning-intensive tasks and efficient dialogue. It supports seamless switching between a 'thinking' mode for mathematics, coding, and logical reasoning, and a 'non-thinking' mode for general conversation. This model is fine-tuned for instruction following, agent integration, creative writing, and multilingual use across more than 100 languages and dialects. It natively supports a 32K token context window and can be extended to 131K tokens via YaRN."
  },
  "qwen2": {
    "description": "Qwen2 is Alibaba's next-generation large-scale language model, supporting diverse application needs with excellent performance."
  },
  "qwen2-72b-instruct": {
    "description": "Qwen2 is the new generation of large language model series introduced by the Qwen team. It is based on the Transformer architecture and incorporates technologies such as the SwiGLU activation function, attention QKV bias, group query attention, a mixture of sliding window attention, and full attention. Additionally, the Qwen team has improved the tokenizer to better adapt to multiple natural languages and code."
  },
  "qwen2-7b-instruct": {
    "description": "Qwen2 is the new generation of large language model series introduced by the Qwen team. It is based on the Transformer architecture and incorporates technologies such as the SwiGLU activation function, attention QKV bias, group query attention, a mixture of sliding window attention, and full attention. Additionally, the Qwen team has improved the tokenizer to better adapt to multiple natural languages and code."
  },
  "qwen2.5": {
    "description": "Qwen2.5 is Alibaba's next-generation large-scale language model, supporting diverse application needs with outstanding performance."
  },
  "qwen2.5-14b-instruct": {
    "description": "The 14B model of Tongyi Qianwen 2.5 is open-sourced."
  },
  "qwen2.5-14b-instruct-1m": {
    "description": "The Tongyi Qianwen 2.5 model is open-sourced at a scale of 72B."
  },
  "qwen2.5-32b-instruct": {
    "description": "The 32B model of Tongyi Qianwen 2.5 is open-sourced."
  },
  "qwen2.5-72b-instruct": {
    "description": "The 72B model of Tongyi Qianwen 2.5 is open-sourced."
  },
  "qwen2.5-7b-instruct": {
    "description": "The 7B model of Tongyi Qianwen 2.5 is open-sourced."
  },
  "qwen2.5-coder-1.5b-instruct": {
    "description": "Open-source version of the Qwen coding model."
  },
  "qwen2.5-coder-14b-instruct": {
    "description": "Open-source version of Tongyi Qianwen coding model."
  },
  "qwen2.5-coder-32b-instruct": {
    "description": "Open-source version of the Tongyi Qianwen code model."
  },
  "qwen2.5-coder-7b-instruct": {
    "description": "The open-source version of the Tongyi Qianwen Coder model."
  },
  "qwen2.5-coder-instruct": {
    "description": "Qwen2.5-Coder is the latest code-specific large language model in the Qwen series (formerly known as CodeQwen)."
  },
  "qwen2.5-instruct": {
    "description": "Qwen2.5 is the latest series of Qwen large language models. For Qwen2.5, we have released multiple base language models and instruction-tuned language models with parameter sizes ranging from 0.5 billion to 7.2 billion."
  },
  "qwen2.5-math-1.5b-instruct": {
    "description": "Qwen-Math model has powerful mathematical problem-solving capabilities."
  },
  "qwen2.5-math-72b-instruct": {
    "description": "The Qwen-Math model possesses strong capabilities for solving mathematical problems."
  },
  "qwen2.5-math-7b-instruct": {
    "description": "The Qwen-Math model possesses strong capabilities for solving mathematical problems."
  },
  "qwen2.5-omni-7b": {
    "description": "The Qwen-Omni series models support input of various modalities, including video, audio, images, and text, and output audio and text."
  },
  "qwen2.5-vl-32b-instruct": {
    "description": "The Qwen2.5-VL model series enhances the model's intelligence level, practicality, and applicability, delivering superior performance in scenarios such as natural conversations, content creation, professional knowledge services, and code development. The 32B version employs reinforcement learning techniques to optimize the model, offering more human-preferred output styles, enhanced reasoning capabilities for complex mathematical problems, and fine-grained image understanding and reasoning compared to other models in the Qwen2.5-VL series."
  },
  "qwen2.5-vl-72b-instruct": {
    "description": "This version enhances instruction following, mathematics, problem-solving, and coding capabilities, improving the ability to recognize various formats and accurately locate visual elements. It supports understanding long video files (up to 10 minutes) and pinpointing events in seconds, comprehending the sequence and speed of time, and based on parsing and locating capabilities, it supports controlling OS or Mobile agents. It has strong key information extraction and JSON output capabilities, and this version is the most powerful in the series at 72B."
  },
  "qwen2.5-vl-7b-instruct": {
    "description": "This version enhances instruction following, mathematics, problem-solving, and coding capabilities, improving the ability to recognize various formats and accurately locate visual elements. It supports understanding long video files (up to 10 minutes) and pinpointing events in seconds, comprehending the sequence and speed of time, and based on parsing and locating capabilities, it supports controlling OS or Mobile agents. It has strong key information extraction and JSON output capabilities, and this version is the most powerful in the series at 72B."
  },
  "qwen2.5-vl-instruct": {
    "description": "Qwen2.5-VL is the latest version of the visual language model in the Qwen model family."
  },
  "qwen2.5:0.5b": {
    "description": "Qwen2.5 is Alibaba's next-generation large-scale language model, supporting diverse application needs with outstanding performance."
  },
  "qwen2.5:1.5b": {
    "description": "Qwen2.5 is Alibaba's next-generation large-scale language model, supporting diverse application needs with outstanding performance."
  },
  "qwen2.5:72b": {
    "description": "Qwen2.5 is Alibaba's next-generation large-scale language model, supporting diverse application needs with outstanding performance."
  },
  "qwen2:0.5b": {
    "description": "Qwen2 is Alibaba's next-generation large-scale language model, supporting diverse application needs with excellent performance."
  },
  "qwen2:1.5b": {
    "description": "Qwen2 is Alibaba's next-generation large-scale language model, supporting diverse application needs with excellent performance."
  },
  "qwen2:72b": {
    "description": "Qwen2 is Alibaba's next-generation large-scale language model, supporting diverse application needs with excellent performance."
  },
  "qwen3": {
    "description": "Qwen3 is Alibaba's next-generation large-scale language model, designed to support diverse application needs with outstanding performance."
  },
  "qwen3-0.6b": {
    "description": "Qwen3 is a next-generation model with significantly enhanced capabilities, achieving industry-leading levels in reasoning, general tasks, agent functionality, and multilingual support, while also supporting mode switching."
  },
  "qwen3-1.7b": {
    "description": "Qwen3 is a next-generation model with significantly enhanced capabilities, achieving industry-leading levels in reasoning, general tasks, agent functionality, and multilingual support, while also supporting mode switching."
  },
  "qwen3-14b": {
    "description": "Qwen3 is a next-generation model with significantly enhanced capabilities, achieving industry-leading levels in reasoning, general tasks, agent functionality, and multilingual support, while also supporting mode switching."
  },
  "qwen3-235b-a22b": {
    "description": "Qwen3 is a next-generation model with significantly enhanced capabilities, achieving industry-leading levels in reasoning, general tasks, agent functionality, and multilingual support, while also supporting mode switching."
  },
  "qwen3-30b-a3b": {
    "description": "Qwen3 is a next-generation model with significantly enhanced capabilities, achieving industry-leading levels in reasoning, general tasks, agent functionality, and multilingual support, while also supporting mode switching."
  },
  "qwen3-32b": {
    "description": "Qwen3 is a next-generation model with significantly enhanced capabilities, achieving industry-leading levels in reasoning, general tasks, agent functionality, and multilingual support, while also supporting mode switching."
  },
  "qwen3-4b": {
    "description": "Qwen3 is a next-generation model with significantly enhanced capabilities, achieving industry-leading levels in reasoning, general tasks, agent functionality, and multilingual support, while also supporting mode switching."
  },
  "qwen3-8b": {
    "description": "Qwen3 is a next-generation model with significantly enhanced capabilities, achieving industry-leading levels in reasoning, general tasks, agent functionality, and multilingual support, while also supporting mode switching."
  },
  "qwq": {
    "description": "QwQ is an experimental research model focused on improving AI reasoning capabilities."
  },
  "qwq-32b": {
    "description": "The QwQ inference model is trained based on the Qwen2.5-32B model, significantly enhancing its reasoning capabilities through reinforcement learning. The core metrics of the model, including mathematical code (AIME 24/25, LiveCodeBench) and some general metrics (IFEval, LiveBench, etc.), reach the level of the full version of DeepSeek-R1, with all metrics significantly surpassing those of DeepSeek-R1-Distill-Qwen-32B, which is also based on Qwen2.5-32B."
  },
  "qwq-32b-preview": {
    "description": "The QwQ model is an experimental research model developed by the Qwen team, focusing on enhancing AI reasoning capabilities."
  },
  "qwq-plus": {
    "description": "QwQ reasoning model trained based on Qwen2.5, significantly enhancing reasoning ability through reinforcement learning. Core metrics in mathematics and coding (AIME 24/25, LiveCodeBench) and some general benchmarks (IFEval, LiveBench, etc.) reach the full-power level of DeepSeek-R1."
  },
  "qwq_32b": {
    "description": "A medium-sized reasoning model in the Qwen series. Compared to traditional instruction-tuned models, QwQ, with its thinking and reasoning capabilities, significantly enhances performance in downstream tasks, especially in solving challenging problems."
  },
  "r1-1776": {
    "description": "R1-1776 is a version of the DeepSeek R1 model, fine-tuned to provide unfiltered, unbiased factual information."
  },
  "solar-mini": {
    "description": "Solar Mini is a compact LLM that outperforms GPT-3.5, featuring strong multilingual capabilities and supporting English and Korean, providing an efficient and compact solution."
  },
  "solar-mini-ja": {
    "description": "Solar Mini (Ja) extends the capabilities of Solar Mini, focusing on Japanese while maintaining efficiency and excellent performance in English and Korean usage."
  },
  "solar-pro": {
    "description": "Solar Pro is a highly intelligent LLM launched by Upstage, focusing on single-GPU instruction-following capabilities, with an IFEval score above 80. Currently supports English, with a formal version planned for release in November 2024, which will expand language support and context length."
  },
  "sonar": {
    "description": "A lightweight search product based on contextual search, faster and cheaper than Sonar Pro."
  },
  "sonar-deep-research": {
    "description": "Deep Research conducts comprehensive expert-level studies and synthesizes them into accessible, actionable reports."
  },
  "sonar-pro": {
    "description": "An advanced search product that supports contextual search, advanced queries, and follow-ups."
  },
  "sonar-reasoning": {
    "description": "A new API product powered by the DeepSeek reasoning model."
  },
  "sonar-reasoning-pro": {
    "description": "A new API product powered by the DeepSeek reasoning model."
  },
  "step-1-128k": {
    "description": "Balances performance and cost, suitable for general scenarios."
  },
  "step-1-256k": {
    "description": "Equipped with ultra-long context processing capabilities, especially suitable for long document analysis."
  },
  "step-1-32k": {
    "description": "Supports medium-length dialogues, applicable to various application scenarios."
  },
  "step-1-8k": {
    "description": "Small model, suitable for lightweight tasks."
  },
  "step-1-flash": {
    "description": "High-speed model, suitable for real-time dialogues."
  },
  "step-1.5v-mini": {
    "description": "This model has powerful video understanding capabilities."
  },
  "step-1o-turbo-vision": {
    "description": "This model has powerful image understanding capabilities, outperforming 1o in mathematical and coding fields. The model is smaller than 1o and has a faster output speed."
  },
  "step-1o-vision-32k": {
    "description": "This model possesses powerful image understanding capabilities. Compared to the step-1v series models, it offers enhanced visual performance."
  },
  "step-1v-32k": {
    "description": "Supports visual input, enhancing multimodal interaction experiences."
  },
  "step-1v-8k": {
    "description": "A small visual model suitable for basic text and image tasks."
  },
  "step-2-16k": {
    "description": "Supports large-scale context interactions, suitable for complex dialogue scenarios."
  },
  "step-2-16k-exp": {
    "description": "An experimental version of the step-2 model, featuring the latest capabilities and rolling updates. Not recommended for use in formal production environments."
  },
  "step-2-mini": {
    "description": "A high-speed large model based on the next-generation self-developed Attention architecture MFA, achieving results similar to step-1 at a very low cost, while maintaining higher throughput and faster response times. It is capable of handling general tasks and has specialized skills in coding."
  },
  "step-r1-v-mini": {
    "description": "This model is a powerful reasoning model with strong image understanding capabilities, able to process both image and text information, generating text content after deep reasoning. It excels in visual reasoning while also possessing first-tier capabilities in mathematics, coding, and text reasoning. The context length is 100k."
  },
  "taichu_llm": {
    "description": "The ZD Taichu language model possesses strong language understanding capabilities and excels in text creation, knowledge Q&A, code programming, mathematical calculations, logical reasoning, sentiment analysis, and text summarization. It innovatively combines large-scale pre-training with rich knowledge from multiple sources, continuously refining algorithmic techniques and absorbing new knowledge in vocabulary, structure, grammar, and semantics from vast text data, resulting in an evolving model performance. It provides users with more convenient information and services, as well as a more intelligent experience."
  },
  "taichu_o1": {
    "description": "taichu_o1 is a next-generation reasoning model that achieves human-like thinking chains through multimodal interaction and reinforcement learning, supporting complex decision-making scenarios while maintaining high-precision outputs and demonstrating model reasoning pathways, suitable for strategy analysis and deep thinking."
  },
  "taichu_vl": {
    "description": "Integrates capabilities in image understanding, knowledge transfer, and logical attribution, excelling in the field of image-text question answering."
  },
  "text-embedding-3-large": {
    "description": "The most powerful vectorization model, suitable for both English and non-English tasks."
  },
  "text-embedding-3-small": {
    "description": "An efficient and cost-effective next-generation embedding model, suitable for knowledge retrieval, RAG applications, and more."
  },
  "thudm/glm-4-32b": {
    "description": "GLM-4-32B-0414 is a 32B bilingual (Chinese-English) open-weight language model optimized for code generation, function calls, and agent-based tasks. It has been pre-trained on 15T of high-quality and re-reasoning data and further refined using human preference alignment, rejection sampling, and reinforcement learning. The model excels in complex reasoning, artifact generation, and structured output tasks, achieving performance comparable to GPT-4o and DeepSeek-V3-0324 in multiple benchmark tests."
  },
  "thudm/glm-4-32b:free": {
    "description": "GLM-4-32B-0414 is a 32B bilingual (Chinese-English) open-weight language model optimized for code generation, function calls, and agent-based tasks. It has been pre-trained on 15T of high-quality and re-reasoning data and further refined using human preference alignment, rejection sampling, and reinforcement learning. The model excels in complex reasoning, artifact generation, and structured output tasks, achieving performance comparable to GPT-4o and DeepSeek-V3-0324 in multiple benchmark tests."
  },
  "thudm/glm-4-9b-chat": {
    "description": "The open-source version of the latest generation pre-trained model from the GLM-4 series released by Zhiyuan AI."
  },
  "thudm/glm-4-9b:free": {
    "description": "GLM-4-9B-0414 is a 9 billion parameter language model in the GLM-4 series developed by THUDM. GLM-4-9B-0414 is trained using the same reinforcement learning and alignment strategies as its larger 32B counterpart, achieving high performance relative to its scale, making it suitable for resource-constrained deployments that still require strong language understanding and generation capabilities."
  },
  "thudm/glm-z1-32b": {
    "description": "GLM-Z1-32B-0414 is an enhanced reasoning variant of GLM-4-32B, built for deep mathematics, logic, and code-oriented problem solving. It applies extended reinforcement learning (task-specific and based on general pairwise preferences) to improve performance on complex multi-step tasks. Compared to the base GLM-4-32B model, Z1 significantly enhances capabilities in structured reasoning and formal domains.\n\nThis model supports enforcing 'thinking' steps through prompt engineering and provides improved coherence for long-format outputs. It is optimized for agent workflows and supports long context (via YaRN), JSON tool calls, and fine-grained sampling configurations for stable reasoning. It is ideal for use cases requiring thoughtful, multi-step reasoning or formal derivation."
  },
  "thudm/glm-z1-32b:free": {
    "description": "GLM-Z1-32B-0414 is an enhanced reasoning variant of GLM-4-32B, built for deep mathematics, logic, and code-oriented problem solving. It applies extended reinforcement learning (task-specific and based on general pairwise preferences) to improve performance on complex multi-step tasks. Compared to the base GLM-4-32B model, Z1 significantly enhances capabilities in structured reasoning and formal domains.\n\nThis model supports enforcing 'thinking' steps through prompt engineering and provides improved coherence for long-format outputs. It is optimized for agent workflows and supports long context (via YaRN), JSON tool calls, and fine-grained sampling configurations for stable reasoning. It is ideal for use cases requiring thoughtful, multi-step reasoning or formal derivation."
  },
  "thudm/glm-z1-9b:free": {
    "description": "GLM-Z1-9B-0414 is a 9 billion parameter language model in the GLM-4 series developed by THUDM. It employs techniques initially applied to the larger GLM-Z1 model, including extended reinforcement learning, pairwise ranking alignment, and training for reasoning-intensive tasks such as mathematics, coding, and logic. Despite its smaller scale, it demonstrates strong performance on general reasoning tasks and outperforms many open-source models at its weight level."
  },
  "thudm/glm-z1-rumination-32b": {
    "description": "THUDM: GLM Z1 Rumination 32B is a deep reasoning model with 32 billion parameters in the GLM-4-Z1 series, optimized for complex, open-ended tasks that require prolonged thought. It builds upon glm-4-32b-0414, adding additional reinforcement learning stages and multi-stage alignment strategies, introducing a 'rumination' capability designed to simulate extended cognitive processing. This includes iterative reasoning, multi-hop analysis, and tool-enhanced workflows such as search, retrieval, and citation-aware synthesis.\n\nThe model excels in research-style writing, comparative analysis, and complex question answering. It supports function calls for search and navigation primitives (`search`, `click`, `open`, `finish`), allowing it to be used in agent-based pipelines. The rumination behavior is shaped by rule-based rewards and a delayed decision-making mechanism, controlled by multi-round cycles, benchmarked against deep research frameworks like OpenAI's internal alignment stack. This variant is suitable for scenarios requiring depth over speed."
  },
  "tngtech/deepseek-r1t-chimera:free": {
    "description": "DeepSeek-R1T-Chimera is created by merging DeepSeek-R1 and DeepSeek-V3 (0324), combining the reasoning capabilities of R1 with the token efficiency improvements of V3. It is based on the DeepSeek-MoE Transformer architecture and optimized for general text generation tasks.\n\nThis model merges the pre-trained weights of the two source models to balance performance in reasoning, efficiency, and instruction following tasks. It is released under the MIT license, intended for research and commercial use."
  },
  "togethercomputer/StripedHyena-Nous-7B": {
    "description": "StripedHyena Nous (7B) provides enhanced computational capabilities through efficient strategies and model architecture."
  },
  "tts-1": {
    "description": "The latest text-to-speech model, optimized for speed in real-time scenarios."
  },
  "tts-1-hd": {
    "description": "The latest text-to-speech model, optimized for quality."
  },
  "upstage/SOLAR-10.7B-Instruct-v1.0": {
    "description": "Upstage SOLAR Instruct v1 (11B) is suitable for refined instruction tasks, offering excellent language processing capabilities."
  },
  "us.anthropic.claude-3-5-sonnet-20241022-v2:0": {
    "description": "Claude 3.5 Sonnet raises the industry standard, outperforming competitor models and Claude 3 Opus, excelling in a wide range of evaluations while maintaining the speed and cost of our mid-tier models."
  },
  "us.anthropic.claude-3-7-sonnet-20250219-v1:0": {
    "description": "Claude 3.7 Sonnet is Anthropic's fastest next-generation model. Compared to Claude 3 Haiku, Claude 3.7 Sonnet shows improvements across various skills and surpasses the previous generation's largest model, Claude 3 Opus, in many intelligence benchmark tests."
  },
  "whisper-1": {
    "description": "A universal speech recognition model that supports multilingual speech recognition, speech translation, and language identification."
  },
  "wizardlm2": {
    "description": "WizardLM 2 is a language model provided by Microsoft AI, excelling in complex dialogues, multilingual capabilities, reasoning, and intelligent assistant applications."
  },
  "wizardlm2:8x22b": {
    "description": "WizardLM 2 is a language model provided by Microsoft AI, excelling in complex dialogues, multilingual capabilities, reasoning, and intelligent assistant applications."
  },
  "x1": {
    "description": "The Spark X1 model will undergo further upgrades, achieving results in reasoning, text generation, and language understanding tasks that match OpenAI o1 and DeepSeek R1, building on its leading position in domestic mathematical tasks."
  },
  "yi-1.5-34b-chat": {
    "description": "Yi-1.5 is an upgraded version of Yi. It continues pre-training on Yi using a high-quality corpus of 500B tokens and is fine-tuned on 3M diverse samples."
  },
  "yi-large": {
    "description": "A new trillion-parameter model, providing super strong question-answering and text generation capabilities."
  },
  "yi-large-fc": {
    "description": "Based on the yi-large model, supports and enhances tool invocation capabilities, suitable for various business scenarios requiring agent or workflow construction."
  },
  "yi-large-preview": {
    "description": "Initial version, recommended to use yi-large (new version)."
  },
  "yi-large-rag": {
    "description": "High-level service based on the yi-large super strong model, combining retrieval and generation techniques to provide precise answers and real-time information retrieval services."
  },
  "yi-large-turbo": {
    "description": "Exceptional performance at a high cost-performance ratio. Conducts high-precision tuning based on performance, inference speed, and cost."
  },
  "yi-lightning": {
    "description": "The latest high-performance model, ensuring high-quality output while significantly improving reasoning speed."
  },
  "yi-lightning-lite": {
    "description": "A lightweight version, recommended to use yi-lightning."
  },
  "yi-medium": {
    "description": "Medium-sized model upgraded and fine-tuned, balanced capabilities, and high cost-performance ratio. Deeply optimized instruction-following capabilities."
  },
  "yi-medium-200k": {
    "description": "200K ultra-long context window, providing deep understanding and generation capabilities for long texts."
  },
  "yi-spark": {
    "description": "Small yet powerful, lightweight and fast model. Provides enhanced mathematical computation and coding capabilities."
  },
  "yi-vision": {
    "description": "Model for complex visual tasks, providing high-performance image understanding and analysis capabilities."
  },
  "yi-vision-v2": {
    "description": "A complex visual task model that provides high-performance understanding and analysis capabilities based on multiple images."
  }
}
