{"version":3,"file":"obsidian.cjs","names":["BaseDocumentLoader","yaml","Document","DirectoryLoader","UnknownHandling"],"sources":["../../../src/document_loaders/fs/obsidian.ts"],"sourcesContent":["import type { basename as BasenameT } from \"node:path\";\nimport type { readFile as ReadFileT, stat as StatT } from \"node:fs/promises\";\nimport yaml from \"js-yaml\";\nimport { Document } from \"@langchain/core/documents\";\nimport { getEnv } from \"@langchain/core/utils/env\";\nimport { BaseDocumentLoader } from \"@langchain/core/document_loaders/base\";\nimport {\n  DirectoryLoader,\n  UnknownHandling,\n} from \"@langchain/classic/document_loaders/fs/directory\";\n\nexport type FrontMatter = {\n  title?: string;\n  description?: string;\n  tags?: string[] | string;\n  [key: string]: unknown;\n};\n\nexport interface ObsidianFileLoaderOptions {\n  encoding?: BufferEncoding;\n  collectMetadata?: boolean;\n}\n\n/**\n * Represents a loader for Obsidian markdown files. This loader extends the BaseDocumentLoader\n * and provides functionality to parse and extract metadata, tags, and dataview fields from\n * Obsidian markdown files.\n */\nclass ObsidianFileLoader extends BaseDocumentLoader {\n  private filePath: string;\n\n  private encoding: BufferEncoding;\n\n  private collectMetadata: boolean;\n\n  /**\n   * Initializes a new instance of the ObsidianFileLoader class.\n   * @param filePath The path to the Obsidian markdown file.\n   * @param encoding The character encoding to use when reading the file. Defaults to 'utf-8'.\n   * @param collectMetadata Determines whether metadata should be collected from the file. Defaults to true.\n   */\n  constructor(\n    filePath: string,\n    {\n      encoding = \"utf-8\",\n      collectMetadata = true,\n    }: ObsidianFileLoaderOptions = {}\n  ) {\n    super();\n    this.filePath = filePath;\n    this.encoding = encoding;\n    this.collectMetadata = collectMetadata;\n  }\n\n  private static FRONT_MATTER_REGEX = /^---\\n(.*?)\\n---\\n/s;\n\n  /**\n   * Parses the YAML front matter from the given content string.\n   * @param content The string content of the markdown file.\n   * @returns An object representing the parsed front matter.\n   */\n  private parseFrontMatter(content: string): FrontMatter {\n    if (!this.collectMetadata) {\n      return {};\n    }\n\n    const match = content.match(ObsidianFileLoader.FRONT_MATTER_REGEX);\n    if (!match) {\n      return {};\n    }\n\n    try {\n      const frontMatter = yaml.load(match[1]) as FrontMatter;\n      if (frontMatter.tags && typeof frontMatter.tags === \"string\") {\n        frontMatter.tags = frontMatter.tags.split(\", \");\n      }\n\n      return frontMatter;\n    } catch {\n      console.warn(\"Encountered non-yaml frontmatter\");\n      return {};\n    }\n  }\n\n  /**\n   * Removes YAML front matter from the given content string.\n   * @param content The string content of the markdown file.\n   * @returns The content string with the front matter removed.\n   */\n  private removeFrontMatter(content: string): string {\n    if (!this.collectMetadata) {\n      return content;\n    }\n\n    return content.replace(ObsidianFileLoader.FRONT_MATTER_REGEX, \"\");\n  }\n\n  private static TAG_REGEX = /(?:\\s|^)#([a-zA-Z_][\\w/-]*)/g;\n\n  /**\n   * Parses Obsidian-style tags from the given content string.\n   * @param content The string content of the markdown file.\n   * @returns A set of parsed tags.\n   */\n  private parseObsidianTags(content: string): Set<string> {\n    if (!this.collectMetadata) {\n      return new Set();\n    }\n\n    const matches = content.matchAll(ObsidianFileLoader.TAG_REGEX);\n    const tags = new Set<string>();\n    for (const match of matches) {\n      tags.add(match[1]);\n    }\n\n    return tags;\n  }\n\n  private static DATAVIEW_LINE_REGEX = /^\\s*(\\w+)::\\s*(.*)$/gm;\n\n  private static DATAVIEW_INLINE_BRACKET_REGEX = /\\[(\\w+)::\\s*(.*)\\]/gm;\n\n  private static DATAVIEW_INLINE_PAREN_REGEX = /\\((\\w+)::\\s*(.*)\\)/gm;\n\n  /**\n   * Parses dataview fields from the given content string.\n   * @param content The string content of the markdown file.\n   * @returns A record object containing key-value pairs of dataview fields.\n   */\n  private parseObsidianDataviewFields(content: string): Record<string, string> {\n    if (!this.collectMetadata) {\n      return {};\n    }\n\n    const fields: Record<string, string> = {};\n    const lineMatches = content.matchAll(\n      ObsidianFileLoader.DATAVIEW_LINE_REGEX\n    );\n    for (const [, key, value] of lineMatches) {\n      fields[key] = value;\n    }\n\n    const bracketMatches = content.matchAll(\n      ObsidianFileLoader.DATAVIEW_INLINE_BRACKET_REGEX\n    );\n    for (const [, key, value] of bracketMatches) {\n      fields[key] = value;\n    }\n\n    const parenMatches = content.matchAll(\n      ObsidianFileLoader.DATAVIEW_INLINE_PAREN_REGEX\n    );\n    for (const [, key, value] of parenMatches) {\n      fields[key] = value;\n    }\n\n    return fields;\n  }\n\n  /**\n   * Converts metadata to a format compatible with Langchain.\n   * @param metadata The metadata object to convert.\n   * @returns A record object containing key-value pairs of Langchain-compatible metadata.\n   */\n  private toLangchainCompatibleMetadata(metadata: Record<string, unknown>) {\n    const result: Record<string, unknown> = {};\n    for (const [key, value] of Object.entries(metadata)) {\n      if (typeof value === \"string\" || typeof value === \"number\") {\n        result[key] = value;\n      } else {\n        result[key] = JSON.stringify(value);\n      }\n    }\n    return result;\n  }\n\n  /**\n   * It loads the Obsidian file, parses it, and returns a `Document` instance.\n   * @returns An array of `Document` instances to comply with the BaseDocumentLoader interface.\n   */\n  public async load(): Promise<Document[]> {\n    const documents: Document[] = [];\n\n    const { basename, readFile, stat } = await ObsidianFileLoader.imports();\n    const fileName = basename(this.filePath);\n    const stats = await stat(this.filePath);\n    let content = await readFile(this.filePath, this.encoding);\n\n    const frontMatter = this.parseFrontMatter(content);\n    const tags = this.parseObsidianTags(content);\n    const dataviewFields = this.parseObsidianDataviewFields(content);\n    content = this.removeFrontMatter(content);\n\n    const metadata: Document[\"metadata\"] = {\n      source: fileName,\n      path: this.filePath,\n      created: stats.birthtimeMs,\n      lastModified: stats.mtimeMs,\n      lastAccessed: stats.atimeMs,\n      ...this.toLangchainCompatibleMetadata(frontMatter),\n      ...dataviewFields,\n    };\n\n    if (tags.size || frontMatter.tags) {\n      metadata.tags = Array.from(\n        new Set([...tags, ...(frontMatter.tags ?? [])])\n      ).join(\",\");\n    }\n\n    documents.push(\n      new Document({\n        pageContent: content,\n        metadata,\n      })\n    );\n\n    return documents;\n  }\n\n  /**\n   * Imports the necessary functions from the `node:path` and\n   * `node:fs/promises` modules. It is used to dynamically import the\n   * functions when needed. If the import fails, it throws an error\n   * indicating that the modules failed to load.\n   * @returns A promise that resolves to an object containing the imported functions.\n   */\n  static async imports(): Promise<{\n    basename: typeof BasenameT;\n    readFile: typeof ReadFileT;\n    stat: typeof StatT;\n  }> {\n    try {\n      const { basename } = await import(\"node:path\");\n      const { readFile, stat } = await import(\"node:fs/promises\");\n      return { basename, readFile, stat };\n    } catch (e) {\n      console.error(e);\n      throw new Error(\n        `Failed to load fs/promises. ObsidianFileLoader available only on environment 'node'. It appears you are running environment '${getEnv()}'. See https://<link to docs> for alternatives.`\n      );\n    }\n  }\n}\n\n/**\n * Represents a loader for directories containing Obsidian markdown files. This loader extends\n * the DirectoryLoader and provides functionality to load and parse '.md' files with YAML frontmatter,\n * Obsidian tags, and Dataview fields.\n */\nexport class ObsidianLoader extends DirectoryLoader {\n  /**\n   * Initializes a new instance of the ObsidianLoader class.\n   * @param directoryPath The path to the directory containing Obsidian markdown files.\n   * @param encoding The character encoding to use when reading files. Defaults to 'utf-8'.\n   * @param collectMetadata Determines whether metadata should be collected from the files. Defaults to true.\n   */\n  constructor(directoryPath: string, options?: ObsidianFileLoaderOptions) {\n    super(\n      directoryPath,\n      {\n        \".md\": (filePath) => new ObsidianFileLoader(filePath, options),\n      },\n      true,\n      UnknownHandling.Ignore\n    );\n  }\n}\n"],"mappings":";;;;;;;;;;;;;;;AA4BA,IAAM,qBAAN,MAAM,2BAA2BA,sCAAAA,mBAAmB;CAClD;CAEA;CAEA;;;;;;;CAQA,YACE,UACA,EACE,WAAW,SACX,kBAAkB,SACW,EAAE,EACjC;AACA,SAAO;AACP,OAAK,WAAW;AAChB,OAAK,WAAW;AAChB,OAAK,kBAAkB;;CAGzB,OAAe,qBAAqB;;;;;;CAOpC,iBAAyB,SAA8B;AACrD,MAAI,CAAC,KAAK,gBACR,QAAO,EAAE;EAGX,MAAM,QAAQ,QAAQ,MAAM,mBAAmB,mBAAmB;AAClE,MAAI,CAAC,MACH,QAAO,EAAE;AAGX,MAAI;GACF,MAAM,cAAcC,QAAAA,QAAK,KAAK,MAAM,GAAG;AACvC,OAAI,YAAY,QAAQ,OAAO,YAAY,SAAS,SAClD,aAAY,OAAO,YAAY,KAAK,MAAM,KAAK;AAGjD,UAAO;UACD;AACN,WAAQ,KAAK,mCAAmC;AAChD,UAAO,EAAE;;;;;;;;CASb,kBAA0B,SAAyB;AACjD,MAAI,CAAC,KAAK,gBACR,QAAO;AAGT,SAAO,QAAQ,QAAQ,mBAAmB,oBAAoB,GAAG;;CAGnE,OAAe,YAAY;;;;;;CAO3B,kBAA0B,SAA8B;AACtD,MAAI,CAAC,KAAK,gBACR,wBAAO,IAAI,KAAK;EAGlB,MAAM,UAAU,QAAQ,SAAS,mBAAmB,UAAU;EAC9D,MAAM,uBAAO,IAAI,KAAa;AAC9B,OAAK,MAAM,SAAS,QAClB,MAAK,IAAI,MAAM,GAAG;AAGpB,SAAO;;CAGT,OAAe,sBAAsB;CAErC,OAAe,gCAAgC;CAE/C,OAAe,8BAA8B;;;;;;CAO7C,4BAAoC,SAAyC;AAC3E,MAAI,CAAC,KAAK,gBACR,QAAO,EAAE;EAGX,MAAM,SAAiC,EAAE;EACzC,MAAM,cAAc,QAAQ,SAC1B,mBAAmB,oBACpB;AACD,OAAK,MAAM,GAAG,KAAK,UAAU,YAC3B,QAAO,OAAO;EAGhB,MAAM,iBAAiB,QAAQ,SAC7B,mBAAmB,8BACpB;AACD,OAAK,MAAM,GAAG,KAAK,UAAU,eAC3B,QAAO,OAAO;EAGhB,MAAM,eAAe,QAAQ,SAC3B,mBAAmB,4BACpB;AACD,OAAK,MAAM,GAAG,KAAK,UAAU,aAC3B,QAAO,OAAO;AAGhB,SAAO;;;;;;;CAQT,8BAAsC,UAAmC;EACvE,MAAM,SAAkC,EAAE;AAC1C,OAAK,MAAM,CAAC,KAAK,UAAU,OAAO,QAAQ,SAAS,CACjD,KAAI,OAAO,UAAU,YAAY,OAAO,UAAU,SAChD,QAAO,OAAO;MAEd,QAAO,OAAO,KAAK,UAAU,MAAM;AAGvC,SAAO;;;;;;CAOT,MAAa,OAA4B;EACvC,MAAM,YAAwB,EAAE;EAEhC,MAAM,EAAE,UAAU,UAAU,SAAS,MAAM,mBAAmB,SAAS;EACvE,MAAM,WAAW,SAAS,KAAK,SAAS;EACxC,MAAM,QAAQ,MAAM,KAAK,KAAK,SAAS;EACvC,IAAI,UAAU,MAAM,SAAS,KAAK,UAAU,KAAK,SAAS;EAE1D,MAAM,cAAc,KAAK,iBAAiB,QAAQ;EAClD,MAAM,OAAO,KAAK,kBAAkB,QAAQ;EAC5C,MAAM,iBAAiB,KAAK,4BAA4B,QAAQ;AAChE,YAAU,KAAK,kBAAkB,QAAQ;EAEzC,MAAM,WAAiC;GACrC,QAAQ;GACR,MAAM,KAAK;GACX,SAAS,MAAM;GACf,cAAc,MAAM;GACpB,cAAc,MAAM;GACpB,GAAG,KAAK,8BAA8B,YAAY;GAClD,GAAG;GACJ;AAED,MAAI,KAAK,QAAQ,YAAY,KAC3B,UAAS,OAAO,MAAM,KACpB,IAAI,IAAI,CAAC,GAAG,MAAM,GAAI,YAAY,QAAQ,EAAE,CAAE,CAAC,CAChD,CAAC,KAAK,IAAI;AAGb,YAAU,KACR,IAAIC,0BAAAA,SAAS;GACX,aAAa;GACb;GACD,CAAC,CACH;AAED,SAAO;;;;;;;;;CAUT,aAAa,UAIV;AACD,MAAI;GACF,MAAM,EAAE,aAAa,MAAM,OAAO;GAClC,MAAM,EAAE,UAAU,SAAS,MAAM,OAAO;AACxC,UAAO;IAAE;IAAU;IAAU;IAAM;WAC5B,GAAG;AACV,WAAQ,MAAM,EAAE;AAChB,SAAM,IAAI,MACR,iIAAA,GAAA,0BAAA,SAAwI,CAAC,iDAC1I;;;;;;;;;AAUP,IAAa,iBAAb,cAAoCC,iDAAAA,gBAAgB;;;;;;;CAOlD,YAAY,eAAuB,SAAqC;AACtE,QACE,eACA,EACE,QAAQ,aAAa,IAAI,mBAAmB,UAAU,QAAQ,EAC/D,EACD,MACAC,iDAAAA,gBAAgB,OACjB"}