// src/extractors/mongoExtractor.ts
import { Collection as MongoCollection, Filter as MongoFilter, Document } from 'mongodb';
import { IExtractor, PipelineContext, DataSource, DatabaseSourceConfig, MongoConnection } from '../core/interfaces';
import { ComponentError } from '../core/errors';

export class MongoExtractor<TOutput extends Document> implements IExtractor<TOutput> {
    private config: DatabaseSourceConfig;
    private collection: MongoCollection<TOutput>;

    constructor(config: DatabaseSourceConfig) {
        if (config.type !== 'mongodb' || !(config.connection as MongoConnection)?.db || !(config.connection as MongoConnection)?.collection) {
            throw new ComponentError('MongoExtractor requires config type "mongodb" and "connection" object with "db" (MongoDB Db instance) and "collection" (string name).');
        }
        this.config = config;
        const mongoConn = config.connection as MongoConnection;
        this.collection = mongoConn.db.collection<TOutput>(mongoConn.collection);
        this.config.batchSize = config.batchSize || 1000; // Default batch size for cursor
    }

    async extract(context: PipelineContext): Promise<DataSource<TOutput>> {
        const filter = (this.config.query || {}) as MongoFilter<TOutput>;
        context.logger.info({ filter, collection: this.collection.collectionName }, `Extracting data from MongoDB collection: ${this.collection.collectionName}`);

        // Use cursor for efficient iteration over large collections
        const cursor = this.collection.find(filter).batchSize(this.config.batchSize || 1000);

        // Return the cursor directly as an AsyncIterable
        // Add error handling around cursor operations if needed
        // Note: The pipeline's run method will handle iterating the async iterable.
        return cursor as AsyncIterable<TOutput>;
    }
}