export interface Result {
	numpages: number;
	numrender: number;
	info: any;
	metadata: any;
	text: string;
	version?: string;
	_meta?: {
		method?: string;
		duration?: number;
		analysis?: any;
		fastPath?: boolean;
		cached?: boolean;
		commonScenario?: boolean;
	};
}

export interface Options {
	/**
	 * Custom page render function (for single-thread parsers)
	 * @param pageData - PDF.js page object
	 * @returns Extracted text from the page
	 */
	pagerender?: ((pageData: any) => string | Promise<string>) | undefined;

	/**
	 * Path to external module exporting custom render function
	 * Used by workers/processes to load custom render logic without eval()
	 * Module must export: module.exports = function(pageData) { ... }
	 * @example './my-custom-render.js'
	 */
	pagerenderModule?: string | undefined;

	max?: number | undefined;
	verbosityLevel?: 0 | 1 | 5 | undefined;
	parallelizePages?: boolean | undefined;
	batchSize?: number | undefined;

	/**
	 * Password for encrypted PDFs. Forwarded as-is to PDF.js `getDocument({ password })`.
	 * Ignored when the document is not encrypted.
	 */
	password?: string | undefined;
}

export interface SmartParserOptions {
	/** Force a specific parsing method instead of auto-selection */
	forceMethod?: 'sequential' | 'batch' | 'stream' | 'aggressive' | 'processes' | 'workers' | null;

	/** Maximum memory usage in bytes (default: 70% of total RAM) */
	maxMemoryUsage?: number;

	/** Number of available CPUs (auto-detected by default) */
	availableCPUs?: number;

	/** Enable fast-path optimization for tiny PDFs (default: true) */
	enableFastPath?: boolean;

	/** Enable decision caching (default: true) */
	enableCache?: boolean;

	/** Oversaturation factor for worker/process count (default: 1.5) */
	oversaturationFactor?: number;

	/** Hard limit on maximum workers/processes (default: null = auto) */
	maxWorkerLimit?: number | null;
}

export interface SmartParserStats {
	totalParses: number;
	methodUsage: {
		sequential: number;
		batch: number;
		stream: number;
		aggressive: number;
		processes: number;
		workers: number;
	};
	averageTimes: Record<string, number>;
	failedParses: number;
	fastPathHits: number;
	cacheHits: number;
	treeNavigations: number;
	optimizationRate: string;
	averageOverhead: string;
}

export interface StreamOptions extends Options {
	chunkSize?: number | undefined;
	onChunkComplete?: ((progress: ChunkProgress) => void) | undefined;
}

export interface WorkersOptions extends Options {
	chunkSize?: number | undefined;
	maxWorkers?: number | undefined;
	/** Batch size for processing pages within each worker (default: 10) */
	batchSize?: number | undefined;
	onProgress?: ((progress: WorkerProgress) => void) | undefined;
}

export interface ProcessesOptions extends Options {
	chunkSize?: number | undefined;
	maxProcesses?: number | undefined;
	/** Timeout for each child process in milliseconds (default: 300000) */
	processTimeout?: number | undefined;
	/** Batch size for processing pages within each process (default: 10) */
	batchSize?: number | undefined;
	onProgress?: ((progress: ProcessProgress) => void) | undefined;
}

export interface ChunkProgress {
	processedPages: number;
	totalPages: number;
	progress: string;
	currentChunk: number;
	totalChunks: number;
}

export interface WorkerProgress {
	completedChunks: number;
	totalChunks: number;
	progress: string;
}

export interface ProcessProgress {
	completedChunks: number;
	totalChunks: number;
	progress: string;
}

export const DEFAULT_OPTIONS: Options;

/**
 * Parse PDF with streaming/chunking approach for large files
 * Reduces memory pressure by processing in chunks
 * Best for 500-1000 page PDFs
 * @param dataBuffer - PDF file buffer
 * @param options - Streaming options
 * @returns Promise with parsed PDF data
 */
export function stream(dataBuffer: Buffer, options?: StreamOptions): Promise<Result>;

/**
 * Parse PDF with aggressive parallelization for maximum speed
 * Best for very large PDFs (1000+ pages)
 * All batches within a chunk run in parallel (single-thread)
 * @param dataBuffer - PDF file buffer
 * @param options - Aggressive parsing options
 * @returns Promise with parsed PDF data
 */
export function aggressive(dataBuffer: Buffer, options?: StreamOptions): Promise<Result>;

/**
 * Parse PDF using worker threads for true multi-core parallelism
 * May have compatibility issues with PDF.js in some environments
 * Best for very large PDFs (1000+ pages) on multi-core systems
 * @param dataBuffer - PDF file buffer
 * @param options - Worker threads options
 * @returns Promise with parsed PDF data
 */
export function workers(dataBuffer: Buffer, options?: WorkersOptions): Promise<Result>;

/**
 * Parse PDF using child processes for true multi-core parallelism
 * Most reliable multi-threading option, works in all environments
 * Best for very large PDFs (1000+ pages) on multi-core systems
 * @param dataBuffer - PDF file buffer
 * @param options - Child processes options
 * @returns Promise with parsed PDF data
 */
export function processes(dataBuffer: Buffer, options?: ProcessesOptions): Promise<Result>;

/**
 * Smart PDF Parser - Automatically selects optimal parsing method
 * based on PDF characteristics and system resources.
 *
 * Features:
 * - CPU-aware decision tree (adapts to available cores)
 * - Fast-path optimization (0.5ms overhead for tiny PDFs)
 * - LRU cache for repeated similar PDFs
 * - Common scenario matching (90%+ hit rate)
 * - Oversaturation for maximum CPU utilization
 *
 * @example
 * ```typescript
 * import PdfParse, { SmartPDFParser } from 'pdf-parse-new';
 * const parser = new SmartPDFParser();
 * const result = await parser.parse(pdfBuffer);
 * console.log(`Parsed ${result.numpages} pages using ${result._meta.method}`);
 * ```
 */
export class SmartPDFParser {
	constructor(options?: SmartParserOptions);

	/**
	 * Parse PDF with automatic method selection
	 * @param dataBuffer - PDF file buffer
	 * @param userOptions - Optional parsing options to override defaults
	 * @returns Promise with parsed PDF data including _meta with method and performance info
	 */
	parse(dataBuffer: Buffer, userOptions?: Options): Promise<Result>;

	/**
	 * Get parser statistics (in-memory for current session)
	 * @returns Statistics object with parse counts, method usage, and optimization metrics
	 */
	getStats(): SmartParserStats;
}

export interface FontStats {
	/** Most common font size across the sampled pages (body text). */
	bodySize: number;
	/** Threshold (>=) above which a line is treated as `# h1`. */
	h1Size: number;
	/** Threshold (>=) above which a line is treated as `## h2`. */
	h2Size: number;
	/** Threshold (>=) above which a line is treated as `### h3`. */
	h3Size: number;
	/** Median vertical distance between consecutive lines of body text. */
	lineHeight: number;
}

export interface MarkdownOptions extends Options {
	/** Number of pages to sample for font statistics (default: 5). */
	sampleSize?: number;
	/** Wrap items in `**...**` / `*...*` based on font name (default: true). */
	detectEmphasis?: boolean;
	/** Convert leading bullets and numbered prefixes to Markdown lists (default: true). */
	detectLists?: boolean;
	/** Wrap monospace runs in fenced code blocks (default: true). */
	detectCodeBlocks?: boolean;
}

/**
 * Parse a PDF and emit Markdown instead of plain text.
 *
 * Performs a two-pass analysis: first samples a few pages to build a font-size
 * histogram (used to infer headings), then parses the document with a renderer
 * that emits headings, lists, inline emphasis and (optionally) fenced code
 * blocks. Heuristic-based — works well on text-heavy PDFs, struggles with
 * tables and complex multi-column layouts (use a vision model for those).
 *
 * @param dataBuffer - PDF file buffer
 * @param options - Markdown rendering options
 * @returns Promise with `result.text` containing Markdown
 */
export function markdown(dataBuffer: Buffer, options?: MarkdownOptions): Promise<Result>;

/**
 * Drop-in `pagerender` that emits Markdown using only per-page statistics.
 * Lower quality than `markdown(buffer)` (no document-wide font stats) but
 * works in single-call contexts and as a `pagerenderModule` for workers.
 */
export function markdownRender(pageData: any): Promise<string>;

/**
 * Build a Markdown `pagerender` bound to pre-computed font statistics.
 * Useful when calling `pdf(buffer, { pagerender: createMarkdownRenderer(stats) })`
 * in a custom flow.
 */
export function createMarkdownRenderer(stats: FontStats, options?: MarkdownOptions): (pageData: any) => Promise<string>;

/**
 * Sample the PDF and compute font-size statistics used to drive Markdown
 * heading detection. Cheap: defaults to 5 evenly-distributed pages.
 */
export function collectFontStats(dataBuffer: Buffer, options?: { sampleSize?: number; verbosityLevel?: number; password?: string }): Promise<FontStats>;

/**
 * Absolute path to the standalone Markdown renderer module, suitable for
 * passing as `pagerenderModule` to `workers()` / `processes()`.
 */
export const markdownRenderModule: string;

/**
 * Funzione principale di parsing (retrocompatibile)
 */
declare function PdfParse(dataBuffer: Buffer, options?: Options): Promise<Result>;

export default PdfParse;

