import Parser from "rss-parser";
import { BaseDocumentLoader } from "./base";
import { Document, DocumentMetadata } from "../types";

export interface RSSLoaderConfig {
  maxItems?: number;
  includeContent?: boolean;
  requestOptions?: {
    timeout?: number;
    headers?: Record<string, string>;
  };
}

export interface RSSItem {
  title?: string;
  link?: string;
  content?: string;
  contentSnippet?: string;
  author?: string;
  pubDate?: string;
  categories?: string[];
  guid?: string;
}

export interface RSSFeed {
  title?: string;
  description?: string;
  link?: string;
  language?: string;
  lastBuildDate?: string;
  items: RSSItem[];
}

export class RSSLoader extends BaseDocumentLoader {
  supportedExtensions: string[] = ["rss", "xml"];

  private parser: Parser;
  private config: RSSLoaderConfig;

  constructor(config: RSSLoaderConfig = {}) {
    super();
    this.config = {
      maxItems: 50,
      includeContent: true,
      requestOptions: {
        timeout: 10000,
        headers: {
          "User-Agent": "Mozilla/5.0 (compatible; RAG-System/1.0)",
        },
      },
      ...config,
    };

    this.parser = new Parser({
      timeout: this.config.requestOptions?.timeout,
      headers: this.config.requestOptions?.headers,
      customFields: {
        item: [
          ["content:encoded", "contentEncoded"],
          ["description", "description"],
        ],
      },
    });
  }

  async load(filePath: string, content: Buffer): Promise<Document> {
    // For RSS files, we load the first document from the feed
    const documents = await this.loadFromString(
      content.toString("utf-8"),
      filePath
    );
    if (documents.length === 0) {
      throw new Error(`No documents found in RSS feed: ${filePath}`);
    }
    return documents[0];
  }

  async loadFromURL(url: string): Promise<Document[]> {
    try {
      const feed = await this.parser.parseURL(url);
      return this.processFeed(feed, url);
    } catch (error) {
      throw new Error(`Failed to load RSS feed from ${url}: ${error}`);
    }
  }

  async loadFromString(
    rssContent: string,
    sourceUrl: string
  ): Promise<Document[]> {
    try {
      const feed = await this.parser.parseString(rssContent);
      return this.processFeed(feed, sourceUrl);
    } catch (error) {
      throw new Error(`Failed to parse RSS content: ${error}`);
    }
  }

  private processFeed(feed: any, sourceUrl: string): Document[] {
    const documents: Document[] = [];
    const items = this.config.maxItems
      ? feed.items.slice(0, this.config.maxItems)
      : feed.items;

    for (const item of items) {
      const document = this.createDocumentFromItem(item, feed, sourceUrl);
      if (document) {
        documents.push(document);
      }
    }

    return documents;
  }

  private createDocumentFromItem(
    item: any,
    feed: any,
    sourceUrl: string
  ): Document | null {
    const title = item.title || "Untitled";
    const link = item.link || "";

    // Extract content from various possible fields
    let content = "";
    if (this.config.includeContent) {
      content =
        item.contentEncoded ||
        item.content ||
        item.description ||
        item.contentSnippet ||
        "";
    } else {
      content = item.contentSnippet || item.description || "";
    }

    if (!content.trim()) {
      return null; // Skip items without content
    }

    // Clean HTML tags from content if present
    const cleanContent = this.cleanHtmlContent(content);
    const fullContent = `${title}\n\n${cleanContent}`;

    const metadata: DocumentMetadata = {
      title,
      author: item.author || feed.title || "Unknown",
      createdAt: item.pubDate ? new Date(item.pubDate) : new Date(),
      updatedAt: new Date(),
      fileType: "rss",
      fileSize: fullContent.length,
      language: this.detectLanguage(fullContent),
      tags: item.categories || [],
      categories: item.categories || [],
      description: item.contentSnippet || item.description || "",
    };

    // Add RSS-specific metadata
    (metadata as any).rssSource = {
      feedUrl: sourceUrl,
      feedTitle: feed.title,
      itemLink: link,
      pubDate: item.pubDate,
      guid: item.guid,
    };

    return {
      id: this.generateDocumentId(link || item.guid || title),
      content: fullContent,
      metadata,
      source: link || sourceUrl,
    };
  }

  private cleanHtmlContent(html: string): string {
    // Remove HTML tags and decode HTML entities
    return html
      .replace(/<[^>]*>/g, "") // Remove HTML tags
      .replace(/&nbsp;/g, " ")
      .replace(/&amp;/g, "&")
      .replace(/&lt;/g, "<")
      .replace(/&gt;/g, ">")
      .replace(/&quot;/g, '"')
      .replace(/&#39;/g, "'")
      .replace(/\s+/g, " ") // Normalize whitespace
      .trim();
  }

  private detectLanguage(content: string): string {
    // Simple language detection - Korean vs English
    const koreanPattern = /[ㄱ-ㅎ가-힣]/;
    return koreanPattern.test(content) ? "ko" : "en";
  }

  protected generateDocumentId(identifier: string): string {
    // Create a stable ID from the identifier
    const timestamp = Date.now();
    const hash = identifier.split("").reduce((a, b) => {
      a = (a << 5) - a + b.charCodeAt(0);
      return a & a;
    }, 0);

    return `rss_${Math.abs(hash)}_${timestamp}`;
  }
}

// Specialized loader for Naver Blog RSS
export class NaverBlogRSSLoader extends RSSLoader {
  private blogId: string;

  constructor(blogId: string, config: RSSLoaderConfig = {}) {
    super(config);
    this.blogId = blogId;
  }

  async loadBlog(): Promise<Document[]> {
    const rssUrl = `https://rss.blog.naver.com/${this.blogId}`;
    return this.loadFromURL(rssUrl);
  }

  getBlogId(): string {
    return this.blogId;
  }

  getRSSUrl(): string {
    return `https://rss.blog.naver.com/${this.blogId}`;
  }

  static createFromUrl(
    url: string,
    config: RSSLoaderConfig = {}
  ): NaverBlogRSSLoader {
    // Extract blog ID from various URL formats
    const blogId = this.extractBlogId(url);
    if (!blogId) {
      throw new Error(`Cannot extract blog ID from URL: ${url}`);
    }
    return new NaverBlogRSSLoader(blogId, config);
  }

  static extractBlogId(url: string): string | null {
    // Handle various Naver blog URL formats
    const patterns = [
      /rss\.blog\.naver\.com\/([^\/\?]+)/, // RSS URL
      /blog\.naver\.com\/([^\/\?]+)/, // Blog URL
      /([^\/\?]+)\.blog\.me/, // blog.me URL
    ];

    for (const pattern of patterns) {
      const match = url.match(pattern);
      if (match && match[1]) {
        return match[1];
      }
    }

    // If no pattern matches, assume the URL itself is the blog ID
    const cleanUrl = url.replace(/^https?:\/\//, "").replace(/\/$/, "");
    if (cleanUrl && !cleanUrl.includes("/") && !cleanUrl.includes(".")) {
      return cleanUrl;
    }

    return null;
  }
}

// Generic RSS feed manager for multiple sources
export class RSSFeedManager {
  private feedSources: Map<string, RSSLoader> = new Map();

  addFeed(name: string, loader: RSSLoader): void {
    this.feedSources.set(name, loader);
  }

  removeFeed(name: string): boolean {
    return this.feedSources.delete(name);
  }

  async loadAllFeeds(): Promise<Map<string, Document[]>> {
    const results = new Map<string, Document[]>();
    const promises = Array.from(this.feedSources.entries()).map(
      async ([name, loader]) => {
        try {
          const documents =
            loader instanceof NaverBlogRSSLoader
              ? await loader.loadBlog()
              : await loader.loadFromURL(""); // This would need to be configured
          results.set(name, documents);
        } catch (error) {
          console.error(`Failed to load feed ${name}:`, error);
          results.set(name, []);
        }
      }
    );

    await Promise.all(promises);
    return results;
  }

  getFeedNames(): string[] {
    return Array.from(this.feedSources.keys());
  }

  getFeed(name: string): RSSLoader | undefined {
    return this.feedSources.get(name);
  }
}
