/**
 * @file OCR Worker
 * @description OCR处理的Worker线程实现
 * @module modules/id-card/ocr-worker
 */

import { IDCardInfo, IDCardType } from './types';
import { IDCardTextParser } from './id-card-text-parser';
import { LoggerMessage } from 'tesseract.js';

/**
 * OCR处理输入参数
 */
export interface OCRProcessInput {
  /** 图像Base64数据 */
  imageBase64: string;
  /** Tesseract Worker选项 */
  tessWorkerOptions?: {
    /** 语言 */
    language?: string;
    /** 日志回调 */
    logger?: (message: LoggerMessage) => void;
  };
}

/**
 * 在Worker中处理OCR识别
 * @param input OCR处理输入参数
 * @returns OCR处理结果
 */
export async function processOCRInWorker(
  input: OCRProcessInput
): Promise<{ idCardInfo: IDCardInfo; processingTime: number }> {
  const startTime = performance.now();

  try {
    // 导入Tesseract.js
    const { createWorker } = await import('tesseract.js');

    // 创建Tesseract Worker
    const worker = createWorker({
      logger: input.tessWorkerOptions?.logger
    });

    // 初始化Worker
    await worker.load();
    await worker.loadLanguage('chi_sim');
    await worker.initialize('chi_sim');

    // 设置识别参数
    await worker.setParameters({
      tessedit_char_whitelist: '0123456789X年月日壹贰叁肆伍陆柒捌玖拾民族汉满回维吾尔藏苗彝壮朝鲜侗瑶白土家哈尼哈萨克傣黎傈僳佤高山拉祜水东乡纳西景颇柯尔克孜达斡尔仫佬羌布朗撒拉毛南仡佬锡伯阿昌普米塔吉克怒乌孜别克俄罗斯鄂温克德昂保安裕固京塔塔尔独龙鄂伦春赫哲门巴珞巴基诺男女住址出生公民身份号码签发机关有效期省市区县乡镇街道号楼单元室ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz',
      tessedit_pageseg_mode: 7, // PSM_SINGLE_LINE
      preserve_interword_spaces: '1'
    });

    // 识别图像
    const { data } = await worker.recognize(input.imageBase64);

    // 解析身份证信息
    const idCardInfo = IDCardTextParser.parse(data.text);

    // 释放Worker资源
    await worker.terminate();

    const processingTime = performance.now() - startTime;

    return { idCardInfo, processingTime };
  } catch (error) {
    console.error('OCR处理错误:', error);
    return {
      idCardInfo: {} as IDCardInfo,
      processingTime: performance.now() - startTime
    };
  }
}


