import { promises as fs } from 'fs';
import path from 'path';
import type { 
  RAGDocument, 
  RAGChunk, 
  DocumentMetadata, 
  ChunkMetadata,
  DocumentProcessor 
} from './types.js';

export class MarkdownDocumentProcessor implements DocumentProcessor {
  private chunkIdCounter = 0;

  async parse(content: string, filePath: string): Promise<RAGDocument> {
    const metadata = await this.extractMetadata(content, filePath);
    
    return {
      id: `doc-${Date.now()}-${Math.random().toString(36).substr(2, 9)}`,
      path: filePath,
      content,
      metadata
    };
  }

  async chunk(document: RAGDocument, chunkSize: number, overlap: number): Promise<RAGChunk[]> {
    const chunks: RAGChunk[] = [];
    const lines = document.content.split('\n');
    
    let currentChunk = '';
    let currentIndex = 0;
    let startOffset = 0;
    let inCodeBlock = false;
    let codeBlockContent = '';
    let codeBlockLanguage = '';
    let codeBlockStart = 0;

    for (let i = 0; i < lines.length; i++) {
      const line = lines[i];
      const lineWithNewline = line + '\n';

      // Handle code blocks
      if (line.trim().startsWith('```')) {
        if (!inCodeBlock) {
          // Starting a code block
          if (currentChunk.trim()) {
            // Save current chunk before code block
            chunks.push(this.createChunk(
              document.id,
              currentChunk.trim(),
              currentIndex++,
              startOffset,
              startOffset + currentChunk.length,
              this.detectChunkType(currentChunk)
            ));
          }
          
          inCodeBlock = true;
          codeBlockContent = lineWithNewline;
          codeBlockLanguage = line.trim().slice(3);
          codeBlockStart = startOffset + currentChunk.length;
          currentChunk = '';
        } else {
          // Ending a code block
          codeBlockContent += lineWithNewline;
          chunks.push(this.createChunk(
            document.id,
            codeBlockContent.trim(),
            currentIndex++,
            codeBlockStart,
            codeBlockStart + codeBlockContent.length,
            { 
              type: 'code', 
              language: codeBlockLanguage || undefined 
            }
          ));
          
          inCodeBlock = false;
          codeBlockContent = '';
          codeBlockLanguage = '';
          startOffset = codeBlockStart + codeBlockContent.length;
          currentChunk = '';
        }
        continue;
      }

      if (inCodeBlock) {
        codeBlockContent += lineWithNewline;
        continue;
      }

      // Handle headers
      if (line.trim().match(/^#+\s/)) {
        // Save current chunk if any
        if (currentChunk.trim()) {
          chunks.push(this.createChunk(
            document.id,
            currentChunk.trim(),
            currentIndex++,
            startOffset,
            startOffset + currentChunk.length,
            this.detectChunkType(currentChunk)
          ));
        }

        // Create header chunk
        const headerLevel = line.match(/^(#+)/)?.[1].length || 1;
        chunks.push(this.createChunk(
          document.id,
          line.trim(),
          currentIndex++,
          startOffset + currentChunk.length,
          startOffset + currentChunk.length + line.length,
          { type: 'header', level: headerLevel }
        ));

        startOffset = startOffset + currentChunk.length + lineWithNewline.length;
        currentChunk = '';
        continue;
      }

      // Handle empty lines (paragraph breaks)
      if (line.trim() === '') {
        if (currentChunk.trim()) {
          chunks.push(this.createChunk(
            document.id,
            currentChunk.trim(),
            currentIndex++,
            startOffset,
            startOffset + currentChunk.length,
            this.detectChunkType(currentChunk)
          ));
          startOffset = startOffset + currentChunk.length + lineWithNewline.length;
          currentChunk = '';
        } else {
          startOffset += lineWithNewline.length;
        }
        continue;
      }

      // Add line to current chunk
      const potentialChunk = currentChunk + lineWithNewline;
      
      if (potentialChunk.length > chunkSize) {
        if (currentChunk.length > 0) {
          // Current chunk is full, save it
          chunks.push(this.createChunk(
            document.id,
            currentChunk.trim(),
            currentIndex++,
            startOffset,
            startOffset + currentChunk.length,
            this.detectChunkType(currentChunk)
          ));

          // Handle overlap
          if (overlap > 0) {
            const words = currentChunk.trim().split(/\s+/);
            const overlapWords = Math.ceil(overlap / 5); // Approximate words for overlap
            const overlapText = words.slice(-overlapWords).join(' ');
            currentChunk = overlapText + ' ' + lineWithNewline;
            startOffset = startOffset + currentChunk.length - overlapText.length - 1;
          } else {
            currentChunk = lineWithNewline;
            startOffset = startOffset + currentChunk.length;
          }
        } else {
          // Single line is too long, need to split it
          const words = line.split(/\s+/);
          let tempChunk = '';
          let wordIndex = 0;
          
          while (wordIndex < words.length) {
            const nextWord = words[wordIndex];
            const potentialTemp = tempChunk + (tempChunk ? ' ' : '') + nextWord;
            
            if (potentialTemp.length > chunkSize && tempChunk) {
              // Save current temp chunk
              chunks.push(this.createChunk(
                document.id,
                tempChunk.trim(),
                currentIndex++,
                startOffset,
                startOffset + tempChunk.length,
                this.detectChunkType(tempChunk)
              ));
              
              startOffset += tempChunk.length + 1;
              tempChunk = nextWord;
            } else {
              tempChunk = potentialTemp;
            }
            wordIndex++;
          }
          
          // Set remaining words as current chunk
          currentChunk = tempChunk + '\n';
        }
      } else {
        currentChunk = potentialChunk;
      }
    }

    // Save any remaining chunk
    if (currentChunk.trim()) {
      chunks.push(this.createChunk(
        document.id,
        currentChunk.trim(),
        currentIndex++,
        startOffset,
        startOffset + currentChunk.length,
        this.detectChunkType(currentChunk)
      ));
    }

    return chunks;
  }

  async extractMetadata(content: string, filePath: string): Promise<DocumentMetadata> {
    const fileName = path.basename(filePath, path.extname(filePath));
    
    // Default metadata
    let metadata: DocumentMetadata = {
      title: fileName,
      lastModified: new Date().toISOString(),
      size: content.length
    };

    // Extract frontmatter if present
    const frontmatterMatch = content.match(/^---\n([\s\S]*?)\n---/);
    if (frontmatterMatch) {
      try {
        const frontmatterContent = frontmatterMatch[1];
        const frontmatterData = this.parseFrontmatter(frontmatterContent);
        
        if (frontmatterData.title) metadata.title = frontmatterData.title;
        if (frontmatterData.author) metadata.author = frontmatterData.author;
        if (frontmatterData.tags) metadata.tags = Array.isArray(frontmatterData.tags) ? frontmatterData.tags : [frontmatterData.tags];
        if (frontmatterData.version) metadata.version = String(frontmatterData.version);
        if (frontmatterData.custom) metadata.custom = frontmatterData.custom;
        
        // Copy any other frontmatter fields to custom
        const knownFields = ['title', 'author', 'tags', 'version', 'custom'];
        const customFields: Record<string, any> = {};
        for (const [key, value] of Object.entries(frontmatterData)) {
          if (!knownFields.includes(key)) {
            customFields[key] = value;
          }
        }
        if (Object.keys(customFields).length > 0) {
          metadata.custom = { ...metadata.custom, ...customFields };
        }
      } catch (e) {
        // Ignore frontmatter parsing errors
      }
    }

    // If no title from frontmatter, try to extract from first header
    if (metadata.title === fileName) {
      const headerMatch = content.match(/^#+\s+(.+)$/m);
      if (headerMatch) {
        metadata.title = headerMatch[1].trim();
      }
    }

    return metadata;
  }

  private createChunk(
    documentId: string,
    content: string,
    index: number,
    startOffset: number,
    endOffset: number,
    metadata: Partial<ChunkMetadata>
  ): RAGChunk {
    return {
      id: `chunk-${documentId}-${this.chunkIdCounter++}`,
      documentId,
      content,
      index,
      metadata: {
        startOffset,
        endOffset,
        type: 'paragraph',
        ...metadata
      } as ChunkMetadata
    };
  }

  private detectChunkType(content: string): Partial<ChunkMetadata> {
    const trimmed = content.trim();
    
    if (trimmed.startsWith('>')) {
      return { type: 'blockquote' };
    }
    
    if (trimmed.match(/^[-*+]\s/) || trimmed.match(/^\d+\.\s/)) {
      return { type: 'list' };
    }
    
    return { type: 'paragraph' };
  }

  private parseFrontmatter(content: string): Record<string, any> {
    const result: Record<string, any> = {};
    const lines = content.split('\n');
    let currentIndent = 0;
    let currentKey: string | null = null;
    
    for (let i = 0; i < lines.length; i++) {
      const line = lines[i];
      const trimmedLine = line.trim();
      
      // Skip empty lines
      if (!trimmedLine) continue;
      
      // Check for key-value pairs
      const match = line.match(/^(\s*)(\w+):\s*(.*)$/);
      if (match) {
        const [, indent, key, value] = match;
        currentIndent = indent.length;
        currentKey = key;
        
        // Try to parse arrays in square brackets
        if (value.startsWith('[') && value.endsWith(']')) {
          try {
            // Handle special case: tags: [tutorial, guide] without quotes
            if (key === 'tags' && !value.includes('"') && !value.includes("'")) {
              result[key] = value.slice(1, -1).split(',').map(s => s.trim());
            } else {
              result[key] = JSON.parse(value);
            }
          } catch {
            result[key] = value;
          }
        }
        // Handle YAML-style arrays (value is empty, next line starts with -)
        else if (value.trim() === '' && i + 1 < lines.length && lines[i + 1].trim().startsWith('-')) {
          result[key] = [];
        }
        // Handle nested objects
        else if (value.trim() === '' && i + 1 < lines.length && lines[i + 1].match(/^\s+\w+:/)) {
          result[key] = {};
          // Process nested object
          let j = i + 1;
          while (j < lines.length && lines[j].match(/^\s+/)) {
            const nestedMatch = lines[j].match(/^\s+(\w+):\s*(.*)$/);
            if (nestedMatch) {
              const [, nestedKey, nestedValue] = nestedMatch;
              result[key][nestedKey] = nestedValue;
            }
            j++;
          }
          i = j - 1; // Skip processed lines
        }
        // Handle empty values
        else if (value.trim() === '') {
          // Don't set undefined values
        }
        // Handle numbers
        else if (/^\d+(\.\d+)?$/.test(value)) {
          result[key] = parseFloat(value);
        }
        // Handle booleans
        else if (value === 'true' || value === 'false') {
          result[key] = value === 'true';
        }
        // Everything else is a string
        else {
          result[key] = value;
        }
      }
      // Handle multi-line arrays
      else if (trimmedLine.startsWith('-') && currentKey && Array.isArray(result[currentKey])) {
        result[currentKey].push(trimmedLine.slice(1).trim());
      }
    }
    
    return result;
  }
}