// src/extractors/fileExtractor.ts
import * as fs from 'fs'; // Use sync fs for createReadStream
import * as readline from 'readline';
import { IExtractor, PipelineContext, DataSource, FileSourceConfig } from '../core/interfaces';
import { ComponentError } from '../core/errors';

export class FileExtractor implements IExtractor<string | object> { // Output depends on format
    private config: FileSourceConfig;

    constructor(config: FileSourceConfig) {
         if (!config.path || !config.format) {
            throw new ComponentError('FileExtractor requires "path" and "format" in config.');
        }
        this.config = config;
    }

    async extract(context: PipelineContext): Promise<DataSource<string | object>> {
        context.logger.info(`Extracting data from file: ${this.config.path} (format: ${this.config.format})`);

        // Use AsyncIterable for large files
        return this.extractAsStream(context);
    }

    private async *extractAsStream(context: PipelineContext): AsyncIterable<string | object> {
         const fileStream = fs.createReadStream(this.config.path, { encoding: this.config.encoding || 'utf-8' });
         const rl = readline.createInterface({
             input: fileStream,
             crlfDelay: Infinity // Handle different line endings
         });

         let lineNum = 0;
         try {
             for await (const line of rl) {
                 lineNum++;
                 if (this.config.format === 'json') {
                     // Assume JSON Lines format (one JSON object per line)
                     if(line.trim()){ // Avoid parsing empty lines
                         try {
                             yield JSON.parse(line);
                         } catch (error) {
                             context.logger.error({ err: error, line, lineNum, file: this.config.path }, `Skipping invalid JSON line`);
                         }
                     }
                 } else if (this.config.format === 'csv') {
                     // Basic CSV parsing (split by comma), consider a proper CSV library for robustness
                      yield line.split(','); // VERY basic, use 'csv-parse' for real cases
                      // TODO: Add header handling / object creation based on header
                 }
                 else { // text format
                     yield line;
                 }
             }
             context.logger.info(`Finished reading file ${this.config.path}, ${lineNum} lines processed.`);
         } catch (error: any) {
              context.logger.error({ err: error, file: this.config.path }, `Error reading file stream`);
              throw new ComponentError(`Error reading file ${this.config.path}`, 'FileExtractor', error);
         } finally {
            // Ensure stream is closed if readline doesn't do it automatically on error/completion
            if (!fileStream.destroyed) {
                 fileStream.destroy();
            }
         }
    }
}