import { Document, DocumentMetadata } from "../types";

export abstract class BaseDocumentLoader {
  abstract supportedExtensions: string[];

  abstract load(filePath: string, content: Buffer): Promise<Document>;

  canLoad(fileExtension: string): boolean {
    return this.supportedExtensions.includes(fileExtension.toLowerCase());
  }

  protected generateDocumentId(filePath: string): string {
    return `doc_${Buffer.from(filePath).toString("base64")}_${Date.now()}`;
  }

  protected createBaseMetadata(
    filePath: string,
    fileSize: number
  ): DocumentMetadata {
    const fileName = filePath.split("/").pop() || "unknown";
    const fileExtension = fileName.split(".").pop() || "";

    return {
      title: fileName,
      createdAt: new Date(),
      updatedAt: new Date(),
      fileType: fileExtension,
      fileSize,
      tags: [],
      categories: [],
    };
  }

  protected cleanText(text: string): string {
    // Remove extra whitespace and normalize line breaks
    return text
      .replace(/\r\n/g, "\n")
      .replace(/\r/g, "\n")
      .replace(/\n\s*\n/g, "\n\n")
      .replace(/[ \t]+/g, " ")
      .trim();
  }
}
