{"version":3,"file":"llama_cpp.cjs","names":["LlamaChatSession"],"sources":["../../src/utils/llama_cpp.ts"],"sourcesContent":["import {\n  LlamaModel,\n  LlamaContext,\n  LlamaEmbeddingContext,\n  LlamaChatSession,\n  LlamaJsonSchemaGrammar,\n  LlamaGrammar,\n  type LlamaModelOptions,\n  LlamaContextOptions,\n  LlamaEmbeddingContextOptions,\n  GbnfJsonSchema,\n  Llama,\n} from \"node-llama-cpp\";\n\n/**\n * Note that the modelPath is the only required parameter. For testing you\n * can set this in the environment variable `LLAMA_PATH`.\n */\nexport interface LlamaBaseCppInputs {\n  /** Prompt processing batch size. */\n  batchSize?: number;\n  /** Text context size. */\n  contextSize?: number;\n  /** Embedding mode only. */\n  embedding?: boolean;\n  /** Use fp16 for KV cache. */\n  f16Kv?: boolean;\n  /** Number of layers to store in VRAM. */\n  gpuLayers?: number;\n  /** The llama_eval() call computes all logits, not just the last one. */\n  logitsAll?: boolean;\n  /** */\n  maxTokens?: number;\n  /** Path to the model on the filesystem. */\n  modelPath: string;\n  /** Add the begining of sentence token.  */\n  prependBos?: boolean;\n  /** If null, a random seed will be used. */\n  seed?: null | number;\n  /** The randomness of the responses, e.g. 0.1 deterministic, 1.5 creative, 0.8 balanced, 0 disables. */\n  temperature?: number;\n  /** Number of threads to use to evaluate tokens. */\n  threads?: number;\n  /** Trim whitespace from the end of the generated text Disabled by default. */\n  trimWhitespaceSuffix?: boolean;\n  /** Consider the n most likely tokens, where n is 1 to vocabulary size, 0 disables (uses full vocabulary). Note: only applies when `temperature` > 0. */\n  topK?: number;\n  /** Selects the smallest token set whose probability exceeds P, where P is between 0 - 1, 1 disables. Note: only applies when `temperature` > 0. */\n  topP?: number;\n  /** Force system to keep model in RAM. */\n  useMlock?: boolean;\n  /** Use mmap if possible. */\n  useMmap?: boolean;\n  /** Only load the vocabulary, no weights. */\n  vocabOnly?: boolean;\n  /** JSON schema to be used to format output. Also known as `grammar`. */\n  jsonSchema?: object;\n  /** GBNF string to be used to format output. Also known as `grammar`. */\n  gbnf?: string;\n}\n\nexport async function createLlamaModel(\n  inputs: LlamaBaseCppInputs,\n  llama: Llama\n): Promise<LlamaModel> {\n  const options: LlamaModelOptions = {\n    gpuLayers: inputs?.gpuLayers,\n    modelPath: inputs.modelPath,\n    useMlock: inputs?.useMlock,\n    useMmap: inputs?.useMmap,\n    vocabOnly: inputs?.vocabOnly,\n  };\n\n  return llama.loadModel(options);\n}\n\nexport async function createLlamaContext(\n  model: LlamaModel,\n  inputs: LlamaBaseCppInputs\n): Promise<LlamaContext> {\n  const options: LlamaContextOptions = {\n    batchSize: inputs?.batchSize,\n    contextSize: inputs?.contextSize,\n    threads: inputs?.threads,\n  };\n\n  return model.createContext(options);\n}\n\nexport async function createLlamaEmbeddingContext(\n  model: LlamaModel,\n  inputs: LlamaBaseCppInputs\n): Promise<LlamaEmbeddingContext> {\n  const options: LlamaEmbeddingContextOptions = {\n    batchSize: inputs?.batchSize,\n    contextSize: inputs?.contextSize,\n    threads: inputs?.threads,\n  };\n\n  return model.createEmbeddingContext(options);\n}\n\nexport function createLlamaSession(context: LlamaContext): LlamaChatSession {\n  return new LlamaChatSession({ contextSequence: context.getSequence() });\n}\n\nexport async function createLlamaJsonSchemaGrammar(\n  schemaString: object | undefined,\n  llama: Llama\n): Promise<LlamaJsonSchemaGrammar<GbnfJsonSchema> | undefined> {\n  if (schemaString === undefined) {\n    return undefined;\n  }\n\n  const schemaJSON = schemaString as GbnfJsonSchema;\n  return await llama.createGrammarForJsonSchema(schemaJSON);\n}\n\nexport async function createCustomGrammar(\n  filePath: string | undefined,\n  llama: Llama\n): Promise<LlamaGrammar | undefined> {\n  if (filePath === undefined) {\n    return undefined;\n  }\n\n  return llama.createGrammar({\n    grammar: filePath,\n  });\n}\n"],"mappings":";;;AA6DA,eAAsB,iBACpB,QACA,OACqB;CACrB,MAAM,UAA6B;EACjC,WAAW,QAAQ;EACnB,WAAW,OAAO;EAClB,UAAU,QAAQ;EAClB,SAAS,QAAQ;EACjB,WAAW,QAAQ;EACpB;AAED,QAAO,MAAM,UAAU,QAAQ;;AAGjC,eAAsB,mBACpB,OACA,QACuB;CACvB,MAAM,UAA+B;EACnC,WAAW,QAAQ;EACnB,aAAa,QAAQ;EACrB,SAAS,QAAQ;EAClB;AAED,QAAO,MAAM,cAAc,QAAQ;;AAGrC,eAAsB,4BACpB,OACA,QACgC;CAChC,MAAM,UAAwC;EAC5C,WAAW,QAAQ;EACnB,aAAa,QAAQ;EACrB,SAAS,QAAQ;EAClB;AAED,QAAO,MAAM,uBAAuB,QAAQ;;AAG9C,SAAgB,mBAAmB,SAAyC;AAC1E,QAAO,IAAIA,eAAAA,iBAAiB,EAAE,iBAAiB,QAAQ,aAAa,EAAE,CAAC;;AAGzE,eAAsB,6BACpB,cACA,OAC6D;AAC7D,KAAI,iBAAiB,KAAA,EACnB;CAGF,MAAM,aAAa;AACnB,QAAO,MAAM,MAAM,2BAA2B,WAAW;;AAG3D,eAAsB,oBACpB,UACA,OACmC;AACnC,KAAI,aAAa,KAAA,EACf;AAGF,QAAO,MAAM,cAAc,EACzB,SAAS,UACV,CAAC"}