import { Document } from "../types";
import { BaseDocumentLoader } from "./base";
import * as mammoth from "mammoth";

export class DocxDocumentLoader extends BaseDocumentLoader {
  supportedExtensions = ["docx", "doc"];

  async load(filePath: string, content: Buffer): Promise<Document> {
    try {
      const result = await mammoth.extractRawText({ buffer: content });
      const cleanedText = this.cleanText(result.value);

      const metadata = this.createBaseMetadata(filePath, content.length);

      return {
        id: this.generateDocumentId(filePath),
        content: cleanedText,
        metadata,
        source: filePath,
      };
    } catch (error) {
      throw new Error(`Failed to load DOCX document: ${error}`);
    }
  }
}
