// tests/core/pipeline.test.ts
import { DataPipeline } from '../core/pipeline';
import { createLogger } from '../utils/logger';
import { IExtractor, ILoader, ITransformer, ICleaner, PipelineContext, DataSource } from '../core/interfaces';
import { jest } from '@jest/globals'; // Use Jest's ESM-compatible mocking utilities

// --- Mock Components ---
class MockExtractor implements IExtractor<number> {
    constructor(private data: DataSource<number>) {}
    extract = jest.fn(async (ctx: PipelineContext): Promise<DataSource<number>> => this.data);
}

class MockCleaner implements ICleaner<number> {
    clean = jest.fn(async (data: number, ctx: PipelineContext): Promise<number | null> => {
        return data % 2 === 0 ? data : null; // Keep only even numbers
    });
}

class MockTransformer implements ITransformer<number, string> {
    transform = jest.fn(async (data: number, ctx: PipelineContext): Promise<string> => {
        return `Item-${data * 10}`;
    });
}

class MockLoader implements ILoader<string> {
    loadedData: string[][] = []; // Store batches loaded
    loadBatch = jest.fn(async (batch: string[], ctx: PipelineContext): Promise<void> => {
        this.loadedData.push([...batch]); // Store a copy of the batch
    });
}
// --- ---

describe('DataPipeline', () => {
    let pipeline: DataPipeline<number, string>;
    let extractor: MockExtractor;
    let cleaner: MockCleaner;
    let transformer: MockTransformer;
    let loader: MockLoader;

    const inputData = [1, 2, 3, 4, 5, 6, 7, 8];

    beforeEach(() => {
        // Reset mocks and pipeline before each test
        extractor = new MockExtractor(inputData);
        cleaner = new MockCleaner();
        transformer = new MockTransformer();
        loader = new MockLoader();

        pipeline = new DataPipeline<number, string>('test-pipeline', {
            logger: createLogger({ level: 'silent' }),
        });

        // Configure the pipeline
        pipeline
            .configure({ batchSize: 2 }) // Loader batch size
            .extract(extractor)
            .clean(cleaner)
            .transform(transformer)
            .load(loader);
    });

    it('should run the full ETL process correctly', async () => {
        await pipeline.run();

        // Verify extractor was called
        expect(extractor.extract).toHaveBeenCalledTimes(1);

        // Verify cleaner was called for each item
        expect(cleaner.clean).toHaveBeenCalledTimes(inputData.length);
        expect(cleaner.clean).toHaveBeenCalledWith(1, expect.anything());
        expect(cleaner.clean).toHaveBeenCalledWith(8, expect.anything());

        // Verify transformer was called only for items that passed the cleaner
        expect(transformer.transform).toHaveBeenCalledTimes(4); // Only 2, 4, 6, 8 pass cleaner
        expect(transformer.transform).toHaveBeenCalledWith(2, expect.anything());
        expect(transformer.transform).toHaveBeenCalledWith(4, expect.anything());
        expect(transformer.transform).toHaveBeenCalledWith(6, expect.anything());
        expect(transformer.transform).toHaveBeenCalledWith(8, expect.anything());
        expect(transformer.transform).not.toHaveBeenCalledWith(1, expect.anything());

        // Verify loader received the correct transformed data in batches
        expect(loader.loadBatch).toHaveBeenCalledTimes(2); // 4 items / batchSize 2 = 2 batches
        expect(loader.loadedData).toHaveLength(2);
        expect(loader.loadedData[0]).toEqual(['Item-20', 'Item-40']); // First batch
        expect(loader.loadedData[1]).toEqual(['Item-60', 'Item-80']); // Second batch
    });

     it('should handle empty input data gracefully', async () => {
         const emptyExtractor = new MockExtractor([]);
         pipeline.extract(emptyExtractor); // Override extractor

         await pipeline.run();

         expect(emptyExtractor.extract).toHaveBeenCalledTimes(1);
         expect(cleaner.clean).not.toHaveBeenCalled();
         expect(transformer.transform).not.toHaveBeenCalled();
         expect(loader.loadBatch).not.toHaveBeenCalled(); // No batches to load
     });

      it('should handle async iterable input', async () => {
         async function* asyncInputGenerator() {
             for (const item of inputData) {
                 await new Promise(r => setTimeout(r, 1)); // Simulate async fetch
                 yield item;
             }
         }
         const iterableExtractor = new MockExtractor(asyncInputGenerator());
         pipeline.extract(iterableExtractor);

         await pipeline.run();

         // Verify counts are the same as array input
         expect(iterableExtractor.extract).toHaveBeenCalledTimes(1);
         expect(cleaner.clean).toHaveBeenCalledTimes(inputData.length);
         expect(transformer.transform).toHaveBeenCalledTimes(4);
         expect(loader.loadBatch).toHaveBeenCalledTimes(2);
         expect(loader.loadedData[0]).toEqual(['Item-20', 'Item-40']);
         expect(loader.loadedData[1]).toEqual(['Item-60', 'Item-80']);
     });

     // TODO: Add tests for error handling (e.g., transformer throws error)
     // TODO: Add tests for retry logic (mock component to fail then succeed)
     // TODO: Add tests for parallel processing once implemented
});