import { promises as fs } from 'fs';
import path from 'path';
import crypto from 'crypto';
import type { EmbeddingModel, EmbeddingVector } from './types.js';

export interface LocalEmbeddingConfig {
  modelName?: string;
  dimension?: number;
  cachePath?: string;
}

export class LocalEmbeddingModel implements EmbeddingModel {
  readonly modelName: string;
  readonly dimension: number;
  private cachePath: string;
  private initialized = false;

  constructor(config: LocalEmbeddingConfig = {}) {
    this.modelName = config.modelName || 'all-MiniLM-L6-v2';
    this.dimension = config.dimension || 384;
    this.cachePath = config.cachePath || '.atlas/rag/embeddings-cache';
  }

  async initialize(): Promise<void> {
    if (this.initialized) return;

    // Ensure cache directory exists
    try {
      await fs.access(this.cachePath);
    } catch {
      await fs.mkdir(this.cachePath, { recursive: true });
    }

    this.initialized = true;
  }

  async embed(texts: string[]): Promise<EmbeddingVector[]> {
    if (!texts || texts.length === 0) {
      return [];
    }

    const embeddings: EmbeddingVector[] = [];
    
    for (const text of texts) {
      const embedding = await this.embedSingle(text);
      embeddings.push(embedding);
    }

    return embeddings;
  }

  async embedSingle(text: string): Promise<EmbeddingVector> {
    await this.initialize();

    // Check cache first
    const cacheKey = this.getCacheKey(text);
    const cachePath = path.join(this.cachePath, `${cacheKey}.json`);

    try {
      const cached = await fs.readFile(cachePath, 'utf-8');
      const data = JSON.parse(cached);
      return new Float32Array(data);
    } catch {
      // Not in cache, generate embedding
    }

    // Generate embedding
    const embedding = await this.generateEmbedding(text);

    // Cache the result
    try {
      await fs.writeFile(cachePath, JSON.stringify(Array.from(embedding)));
    } catch {
      // Ignore cache write errors
    }

    return embedding;
  }

  cosineSimilarity(a: EmbeddingVector, b: EmbeddingVector): number {
    if (a.length !== b.length) {
      throw new Error('Vectors must have the same dimension');
    }

    let dotProduct = 0;
    let normA = 0;
    let normB = 0;

    for (let i = 0; i < a.length; i++) {
      dotProduct += a[i] * b[i];
      normA += a[i] * a[i];
      normB += b[i] * b[i];
    }

    normA = Math.sqrt(normA);
    normB = Math.sqrt(normB);

    if (normA === 0 || normB === 0) {
      return 0;
    }

    return dotProduct / (normA * normB);
  }

  private async generateEmbedding(text: string): Promise<EmbeddingVector> {
    // For now, we'll create a deterministic mock embedding based on the text
    // In a real implementation, this would use a local embedding model
    const embedding = new Float32Array(this.dimension);
    
    // Create a simple deterministic embedding based on text characteristics
    const hash = crypto.createHash('sha256').update(text).digest();
    
    for (let i = 0; i < this.dimension; i++) {
      // Use hash bytes to generate values between -1 and 1
      const byte = hash[i % hash.length];
      embedding[i] = (byte / 127.5) - 1;
    }

    // Normalize the embedding
    let norm = 0;
    for (let i = 0; i < embedding.length; i++) {
      norm += embedding[i] * embedding[i];
    }
    norm = Math.sqrt(norm);
    
    if (norm > 0) {
      for (let i = 0; i < embedding.length; i++) {
        embedding[i] /= norm;
      }
    }

    return embedding;
  }

  private getCacheKey(text: string): string {
    return crypto
      .createHash('sha256')
      .update(`${this.modelName}:${text}`)
      .digest('hex');
  }
}