// src/core/pipeline.ts
import {
  IExtractor,
  ICleaner,
  ITransformer,
  ILoader,
  DataSource,
  PipelineContext,
  IFlowLabNode,
  IFlowLabRegistry,
  BaseSourceConfig,
  BaseTargetConfig,
  SourceConfig,
  TargetConfig,
  FailedItemInfo,
  ProcessingErrorStrategy,
  FileTargetConfig,
} from './interfaces';
import { createLogger } from '../utils/logger';
import { performRetry } from '../utils/retry';
// import { v4 as uuidv4 } from 'uuid'; // Use uuid for run IDs
import { Logger } from 'pino';
import { loadPipelineFromConfig as loadConfig } from '../config/loader';
import pLimit from 'p-limit'; // Import p-limit
import { FileDlqLoader } from '../loaders/fileDlqLoader'; 
import { createLoader as createComponentLoader } from './registry';


// Helper type for pipeline steps
type PipelineStep<TInput, TOutput> = ICleaner<TInput> | ITransformer<TInput, TOutput>;

// Type guards
function isCleaner<T>(step: any): step is ICleaner<T> {
  return typeof step.clean === 'function';
}

function isTransformer<TInput, TOutput>(step: any): step is ITransformer<TInput, TOutput> {
  return typeof step.transform === 'function';
}

function isAsyncIterable<T>(dataSource: DataSource<T>): dataSource is AsyncIterable<T> {
  return typeof (dataSource as any)[Symbol.asyncIterator] === 'function';
}


// MARK: - DataPipeline
export class DataPipeline<TStart, TEnd> implements IFlowLabNode {
  readonly id: string; // Can be user-defined or generated
  readonly type = '@flowlab/data:pipeline';

  private extractorInstance: IExtractor<TStart> | null = null;
  private steps: PipelineStep<any, any>[] = [];
  private loaderInstance: ILoader<TEnd> | null = null;
  private config: {
      batchSize: number;
      retries: number;
      retryDelay: number;
      logger: Logger;
      concurrency: number; // Add concurrency setting
      itemProcessingErrorStrategy: ProcessingErrorStrategy;
      dlqLoaderInstance?: ILoader<FailedItemInfo>; 
  };

  // Track types through the chain - using 'any' here for simplicity in the builder,
  // but the run method ensures type flow based on step order. A more complex builder
  // could maintain stricter compile-time types between steps.
  private currentOutputType: any; // Internal helper


  constructor(id?: string, options?: { logger?: Logger }) {
    this.id = id || `pipeline-${uuidv4()}`;
    this.config = {
        batchSize: 100,
        retries: 3,
        retryDelay: 1000,
        logger: options?.logger || createLogger(),
        concurrency: 1, // Default to sequential processing
        itemProcessingErrorStrategy: 'fail', // Default to failing the pipeline on item error
        dlqLoaderInstance: undefined,
    };
      this.currentOutputType = null; // Placeholder
  }

  /**
   * MARK: - Configure
   * Configure pipeline options.
   */
  configure(options: Partial<typeof this.config> & { dlqTarget?: TargetConfig }): this {
    const { dlqTarget, ...pipelineOptions } = options;
    this.config = { ...this.config, ...pipelineOptions };
    if (dlqTarget) {
        try {
           // Use a simple file DLQ loader for now, could be made dynamic
        //TO chinese: 使用一个简单的文件DLQ加载器，可以动态创建
           if (dlqTarget.type === 'file-dlq') { // Use a specific type for DLQ file
               this.config.dlqLoaderInstance = new FileDlqLoader(dlqTarget as FileTargetConfig);
               this.config.logger.info(`Configured File DLQ Loader target: ${dlqTarget.path}`);
           } else {
                // Allow registering other DLQ loader types via registry if needed
                // this.config.dlqLoaderInstance = createComponentLoader(dlqTarget);
                //TO chinese: 允许通过注册表动态创建其他DLQ加载器类型，如果需要
                this.config.logger.warn(`Unsupported dlqTarget type: ${dlqTarget.type}. Only 'file-dlq' supported currently.`);
                // Fallback strategy if DLQ loader fails?
                // 如果DLQ加载器失败，回退策略是什么？
                if (this.config.itemProcessingErrorStrategy === 'dlq') {    
                    this.config.itemProcessingErrorStrategy = 'fail';
                    this.config.logger.warn("DLQ configured but loader couldn't be created. Defaulting item error strategy to 'fail'.");
                }
           }
           // Ensure DLQ strategy is set if target is provided
           if (!pipelineOptions.itemProcessingErrorStrategy) {
                this.config.itemProcessingErrorStrategy = 'dlq'; // Default to DLQ if target is set
           } else if (pipelineOptions.itemProcessingErrorStrategy !== 'dlq') {
                 this.config.logger.warn(`DLQ target configured, but itemProcessingErrorStrategy is set to '${pipelineOptions.itemProcessingErrorStrategy}'. DLQ will not be used.`);
           }

       } catch (error) {
           this.config.logger.error({ err: error }, "Failed to instantiate DLQ loader. DLQ strategy disabled.");
            if (this.config.itemProcessingErrorStrategy === 'dlq') {
                this.config.itemProcessingErrorStrategy = 'fail'; // Fallback needed
            }
       }
   } else if (this.config.itemProcessingErrorStrategy === 'dlq') {
        this.config.itemProcessingErrorStrategy = 'fail'; // Can't use DLQ strategy without a target
        this.config.logger.warn("itemProcessingErrorStrategy set to 'dlq' but no dlqTarget provided. Defaulting to 'fail'.");
   }

  return this;
  }


  /**
   * MARK: - handleProcessingError
   * Handle processing errors.
   */
  private async handleProcessingError(
    error: Error,
    item: any,
    stepName: string,
    context: PipelineContext
    ): Promise<boolean> { // Returns true if pipeline should continue, false if it should fail
        context.logger.error({ err: error, item, step: stepName }, `Error during pipeline step: ${stepName}`);

        const strategy = this.config.itemProcessingErrorStrategy;

        if (strategy === 'dlq' && this.config.dlqLoaderInstance) {
            const failedInfo: FailedItemInfo = {
                pipelineId: this.id,
                runId: context.runId,
                timestamp: new Date().toISOString(),
                step: stepName,
                error: {
                    message: error.message,
                    stack: error.stack,
                    name: error.name,
                },
                originalItem: item, // Be careful with large items
            };
            try {
                // Load batch takes an array
                await this.config.dlqLoaderInstance.loadBatch([failedInfo], context);
                context.logger.warn(`Item sent to DLQ due to processing error in step ${stepName}.`);
                return true; // Continue pipeline
            } catch (dlqError) {
                context.logger.error({ err: dlqError }, `Failed to send item to DLQ! Falling back to 'fail' strategy for this run.`);
                return false; // Fail pipeline if DLQ fails
            }
        } else if (strategy === 'log') {
            context.logger.warn(`Skipping item due to processing error in step ${stepName} (strategy: log).`);
            return true; // Continue pipeline
        } else if (strategy === 'skip') {
            context.logger.info(`Skipping item due to processing error in step ${stepName} (strategy: skip).`);
            return true; // Continue pipeline
        } else { // strategy === 'fail'
            context.logger.error(`Pipeline failed due to processing error in step ${stepName} (strategy: fail).`);
            return false; // Fail the pipeline
        }
    }


  /**
   * MARK: - Extractor
   * Set the data extractor.
   */
  extract<TOutput>(extractor: IExtractor<TOutput>): DataPipeline<TOutput, TOutput> { // Reset TEnd initially
      if (this.extractorInstance) {
          this.config.logger.warn({ pipelineId: this.id }, 'Extractor is already set. Overwriting.');
      }
      this.extractorInstance = extractor as unknown as IExtractor<TStart>; // Cast needed due to generic reset
      this.currentOutputType = null; // Reset output type marker
       // Explicitly cast the return type to reflect the new TStart and TEnd
      return this as unknown as DataPipeline<TOutput, TOutput>;
  }

   /**
   * MARK: - Cleaning
   * Add a cleaning step. The output type remains the same.
   */
  clean<TCurrent>(cleaner: ICleaner<TCurrent>): DataPipeline<TStart, TEnd> {
      // Basic type check simulation based on prior step (won't catch all errors at compile time here)
      // if (this.currentOutputType && /* check compatibility */) { throw new Error('Type mismatch'); }
      this.steps.push(cleaner);
      // Cleaner doesn't change the core type, just filters or modifies in place
      return this;
  }

  /**
   * MARK: - Transformation
   * Add a transformation step. Changes the current data type.
   * We need 'any' here in the builder pattern unless we use more complex generic chaining.
   */
  transform<TCurrent, TNext>(transformer: ITransformer<TCurrent, TNext>): DataPipeline<TStart, any> {
      // Basic type check simulation
      // if (this.currentOutputType && /* check compatibility */) { throw new Error('Type mismatch'); }
      this.steps.push(transformer);
      this.currentOutputType = null; // Mark type as changed, could store TNext constructor potentially
      // The TEnd type becomes unknown until load is called
      return this as unknown as DataPipeline<TStart, any>;
  }

   /**
   * MARK: - Loading
   * Set the data loader. This defines the final TEnd type for the pipeline.
   */
  load<TCurrent>(loader: ILoader<TCurrent>): DataPipeline<TStart, TCurrent> {
      // Basic type check simulation
       // if (this.currentOutputType && /* check compatibility */) { throw new Error('Type mismatch'); }
      if (this.loaderInstance) {
          this.config.logger.warn({ pipelineId: this.id }, 'Loader is already set. Overwriting.');
      }
      this.loaderInstance = loader as unknown as ILoader<TEnd>; // Cast needed
      // Explicitly cast the return type to set the final TEnd
      return this as unknown as DataPipeline<TStart, TCurrent>;
  }


  /**
   * MARK: - Execution
   * Execute the pipeline.
   * This is the core logic where data flows through steps.
   */
  async run(): Promise<void> {
      const runId = uuidv4();
      const context: PipelineContext = {
          logger: this.config.logger.child({ pipelineId: this.id, runId }),
          runId: runId,
          // Add any other initial context
      };

      context.logger.info('Pipeline run started.');

      if (!this.extractorInstance) throw new Error('Extractor is not defined for pipeline.');
      if (!this.loaderInstance) throw new Error('Loader is not defined for pipeline.');

      let processedCount = 0;
      let loadedCount = 0;
      let errorCount = 0;
      let currentBatch: TEnd[] = []; // Batch for the loader

      try {
          const dataSource = await performRetry(
              () => this.extractorInstance!.extract(context),
              this.config.retries,
              this.config.retryDelay,
              context.logger,
              "Extractor"
          );

          const processItem = async (item: TStart): Promise<TEnd | null | undefined> => {
               let currentData: any = item; // Start with the extracted item

               for (const step of this.steps) {
                   if (currentData === null || currentData === undefined) {
                       return null; // Skip remaining steps if item was filtered out
                   }

                   try {
                       if (isCleaner(step)) {
                           currentData = await step.clean(currentData, context);
                       } else if (isTransformer(step)) {
                           currentData = await step.transform(currentData, context);
                       }
                   } catch (error: any) {
                       context.logger.error({ err: error, item }, `Error during pipeline step ${step.constructor.name}`);
                       throw error; // Propagate error to main handler for the item
                   }
               }
               return currentData as TEnd; // Final type after all steps
          };


          const loadBatchIfNeeded = async () => {
              if (currentBatch.length > 0) {
                  context.logger.debug(`Loading batch of ${currentBatch.length} items.`);
                  await performRetry(
                      () => this.loaderInstance!.loadBatch(currentBatch, context),
                      this.config.retries,
                      this.config.retryDelay,
                      context.logger,
                      "Loader"
                  );
                  loadedCount += currentBatch.length;
                  currentBatch = []; // Clear the batch
              }
          };

          // Process data (handles both arrays and async iterables)
          if (isAsyncIterable(dataSource)) {
               context.logger.info('Processing data as AsyncIterable (stream/batched extraction).');
               for await (const item of dataSource) {
                  processedCount++;
                  try {
                      const result = await processItem(item);
                      if (result !== null && result !== undefined) {
                          currentBatch.push(result);
                          if (currentBatch.length >= this.config.batchSize) {
                              await loadBatchIfNeeded();
                          }
                      }
                  } catch (error) {
                       errorCount++;
                       // Decide how to handle item processing errors (e.g., log, send to DLQ)
                       context.logger.error({ err: error, item, runId }, 'Failed to process item.');
                       // Continue processing next item? Or fail the run? For now, just log and count.
                  }
                  if (processedCount % 1000 === 0) { // Log progress periodically
                     context.logger.info(`Processed ${processedCount} items...`);
                  }
               }
          } else { // Handle simple array input
               context.logger.info(`Processing data as an Array (${dataSource.length} items).`);
               for (const item of dataSource) {
                   processedCount++;
                    try {
                       const result = await processItem(item);
                        if (result !== null && result !== undefined) {
                           currentBatch.push(result);
                           if (currentBatch.length >= this.config.batchSize) {
                               await loadBatchIfNeeded();
                           }
                       }
                   } catch (error) {
                       errorCount++;
                       context.logger.error({ err: error, item, runId }, 'Failed to process item.');
                   }
               }
          }

          // Load any remaining items in the last batch
          await loadBatchIfNeeded();

          context.logger.info(
              `Pipeline run finished. Processed: ${processedCount}, Loaded: ${loadedCount}, Errors: ${errorCount}`
          );

      } catch (error: any) {
          context.logger.error({ err: error, runId }, 'Pipeline run failed.');
          // Rethrow or handle the fatal error appropriately
          throw error;
      }
  }

  // --- IFlowLabNode Implementation ---

  /**
   * MARK: - execute
   * Basic execution for FlowLab integration. Assumes payload might contain
   * overrides or context, but primarily runs the predefined pipeline.
   */
  async execute(payload?: any, context?: any): Promise<void> {
      this.config.logger.info({ payload, context }, `Executing pipeline node ${this.id} via FlowLab.`);
      // Potentially merge payload/context into pipeline context or config
      await this.run();
  }

  /**
   * Static method to create a pipeline from a configuration object.
   * This requires a way to instantiate components based on config strings.
   * (Requires a registry or factory pattern - more complex setup)
   */
  // MARK: - fromConfig
  static fromConfig(config: /* PipelineConfig from interfaces.ts */ any, componentRegistry: any): DataPipeline<any, any> {
       // TODO: Implement logic to:
       // 1. Parse config
       // 2. Instantiate Extractor based on config.source and registry
       // 3. Instantiate Steps based on config.steps and registry
       // 4. Instantiate Loader based on config.target and registry
       // 5. Configure pipeline options
       throw new Error("fromConfig not yet implemented. Requires component registry.");
  }

  /**
   * MARK: - register
   * Registers the pipeline itself as a node.
   * Individual components (extractors, loaders) could also be registered.
   */
  register(registry: IFlowLabRegistry): void {
      registry.register(this);
      this.config.logger.info(`Registered pipeline node ${this.id} with FlowLab.`);
  }
}