// tests/extractors/fileExtractor.test.ts
import * as path from 'path';
import * as fs from 'fs/promises';
import { createLogger } from '../utils/logger';
import { FileExtractor } from '../extractors/fileExtractor';
import { PipelineContext, FileSourceConfig } from '../core/interfaces';

const TEST_DIR = path.join(__dirname, 'test-files');
const JSONL_FILE = path.join(TEST_DIR, 'data.jsonl');
const TEXT_FILE = path.join(TEST_DIR, 'data.txt');

// MARK: - beforeAll
beforeAll(async () => {
    await fs.mkdir(TEST_DIR, { recursive: true });
    // Create JSON Lines file
    const jsonlContent = [
        JSON.stringify({ id: 1, name: 'Alice' }),
        JSON.stringify({ id: 2, name: 'Bob' }),
        '{"id": 3, "name": "Charlie"}', // Slightly different format
        '', // Empty line
        'invalid json', // Invalid line
        JSON.stringify({ id: 4, name: 'David' }),
    ].join('\n');
    await fs.writeFile(JSONL_FILE, jsonlContent);

    // Create Text file
    const textContent = ['Line 1', 'Line 2', '', 'Line 4'].join('\n');
    await fs.writeFile(TEXT_FILE, textContent);
});

// Helper to clean up test files after tests run
// MARK: - afterAll
afterAll(async () => {
    await fs.rm(TEST_DIR, { recursive: true, force: true });
});

const mockContext: PipelineContext = {
    logger: createLogger({ level: 'silent' }),
    runId: 'test-run-file',
};

describe('FileExtractor', () => {
    it('should extract objects from a JSON Lines file', async () => {
        const config: FileSourceConfig = {
            type: 'file',
            path: JSONL_FILE,
            format: 'json',
        };
        const extractor = new FileExtractor(config);
        const dataSource = await extractor.extract(mockContext);

        const results: object[] = [];
        // Type assertion needed as extract returns AsyncIterable<string | object>
        for await (const item of dataSource as AsyncIterable<object>) {
            results.push(item);
        }

        expect(results).toHaveLength(4); // Skips empty line and invalid json
        expect(results).toEqual([
            { id: 1, name: 'Alice' },
            { id: 2, name: 'Bob' },
            { id: 3, name: 'Charlie' },
            { id: 4, name: 'David' },
        ]);
    });

    it('should extract lines from a text file', async () => {
        const config: FileSourceConfig = {
            type: 'file',
            path: TEXT_FILE,
            format: 'text',
        };
        const extractor = new FileExtractor(config);
        const dataSource = await extractor.extract(mockContext);

        const results: string[] = [];
         // Type assertion needed as extract returns AsyncIterable<string | object>
        for await (const item of dataSource as AsyncIterable<string>) {
            results.push(item);
        }

        expect(results).toHaveLength(4); // Includes empty line if present
        expect(results).toEqual(['Line 1', 'Line 2', '', 'Line 4']);
    });

    it('should throw an error for a non-existent file', async () => {
         const config: FileSourceConfig = {
            type: 'file',
            path: path.join(TEST_DIR, 'nonexistent.txt'),
            format: 'text',
        };
         const extractor = new FileExtractor(config);
         // Expect the async iterator creation itself might throw, or the first iteration
         await expect(async () => {
             const dataSource = await extractor.extract(mockContext);
             // Attempt to iterate to trigger file read
             for await (const _ of dataSource) {}
         }).rejects.toThrow(/ENOENT|Error reading file stream/); // Check for file not found or read error
    });
});