'use strict'; let resolvedModule; const isNode = globalThis.process?.release?.name === "node"; const isBrowser = typeof window !== "undefined"; async function getDocumentProxy(data, options = {}) { const { getDocument } = await getResolvedPDFJS(); const pdf = await getDocument({ data, isEvalSupported: false, // See: https://github.com/mozilla/pdf.js/issues/4244#issuecomment-1479534301 useSystemFonts: true, ...options }).promise; return pdf; } async function getResolvedPDFJS() { if (!resolvedModule) { await resolvePDFJSImports(); } return resolvedModule; } async function resolvePDFJSImports(pdfjsResolver, { force = false } = {}) { if (resolvedModule && !force) { return; } if (pdfjsResolver) { try { resolvedModule = await interopDefault(pdfjsResolver()); if (resolvedModule && "resolvePDFJS" in resolvedModule) { resolvedModule = await resolvedModule.resolvePDFJS(); } return; } catch (error) { console.error(error); throw new Error("Resolving failed. Please check the provided configuration."); } } try { const { resolvePDFJS } = await import('unpdf/pdfjs'); resolvedModule = await resolvePDFJS(); } catch (error) { console.error(error); throw new Error("PDF.js is not available. Please add the package as a dependency."); } } function isPDFDocumentProxy(data) { return typeof data === "object" && data !== null && "_pdfInfo" in data; } async function interopDefault(m) { const resolved = await m; return resolved.default || resolved; } async function extractImages$1(data, pageNumber) { const pdf = isPDFDocumentProxy(data) ? data : await getDocumentProxy(data); const page = await pdf.getPage(pageNumber); if (pageNumber < 1 || pageNumber > pdf.numPages) { throw new Error(`Invalid page number. Must be between 1 and ${pdf.numPages}.`); } const operatorList = await page.getOperatorList(); const { OPS } = await getResolvedPDFJS(); const images = []; for (let i = 0; i < operatorList.fnArray.length; i++) { const op = operatorList.fnArray[i]; if (op !== OPS.paintImageXObject) { continue; } const imageKey = operatorList.argsArray[i][0]; const image = await page.objs.get(imageKey); images.push(image.data); } return images; } async function renderPageAsImage$1(data, pageNumber, options = {}) { const canvasFactory = await createIsomorphicCanvasFactory(options.canvas); const pdf = isPDFDocumentProxy(data) ? data : await getDocumentProxy(data, { canvasFactory }); const page = await pdf.getPage(pageNumber); if (pageNumber < 1 || pageNumber > pdf.numPages) { throw new Error(`Invalid page number. Must be between 1 and ${pdf.numPages}.`); } let outputScale = options.scale || 1; let viewport = page.getViewport({ scale: outputScale }); if (options.width) { outputScale = options.width / viewport.width; } else if (options.height) { outputScale = options.height / viewport.height; } if (outputScale !== 1 && outputScale > 0) { viewport = page.getViewport({ scale: outputScale }); } const ctx = canvasFactory.create(viewport.width, viewport.height); await page.render({ canvasContext: ctx.context, viewport }).promise; const dataUrl = isBrowser ? ctx.canvas.toDataURL() : ctx.canvas.toDataURL(); const response = await fetch(dataUrl); return await response.arrayBuffer(); } async function createIsomorphicCanvasFactory(canvas) { const _canvas = canvas ? await interopDefault(canvas()) : void 0; return { _createCanvas(width, height) { if (isBrowser) { const canvas2 = document.createElement("canvas"); canvas2.width = width; canvas2.height = height; return canvas2; } if (isNode) { if (!_canvas) { throw new Error('Failed to resolve "canvas" package.'); } return _canvas.createCanvas(width, height); } throw new Error("Unsupported environment for canvas creation."); }, create(width, height) { const _canvas2 = this._createCanvas(width, height); const context = _canvas2.getContext( "2d" ); return { canvas: _canvas2, context }; }, reset(ctx, width, height) { if (ctx.canvas) { ctx.canvas.width = width; ctx.canvas.height = height; } }, destroy(ctx) { if (ctx.canvas) { ctx.canvas.width = 0; ctx.canvas.height = 0; } ctx.canvas = void 0; ctx.context = void 0; } }; } async function getMeta$1(data) { const pdf = isPDFDocumentProxy(data) ? data : await getDocumentProxy(data); const meta = await pdf.getMetadata(); return { info: meta?.info ?? {}, metadata: meta?.metadata?.getAll() ?? {} }; } async function extractText$1(data, options = {}) { const { mergePages = false } = { ...options }; const pdf = isPDFDocumentProxy(data) ? data : await getDocumentProxy(data); const texts = await Promise.all( Array.from({ length: pdf.numPages }, (_, i) => getPageText(pdf, i + 1)) ); return { totalPages: pdf.numPages, text: mergePages ? texts.join("\n").replace(/\s+/g, " ") : texts }; } async function getPageText(document, pageNumber) { const page = await document.getPage(pageNumber); const content = await page.getTextContent(); return content.items.filter((item) => item.str != null).map((item) => item.str + (item.hasEOL ? "\n" : "")).join(""); } async function configureUnPDF(options) { const { pdfjs } = { ...options }; if (pdfjs) { await resolvePDFJSImports(pdfjs, { force: true }); } } const getMeta = async (...args) => { await resolvePDFJSImports(); return await getMeta$1(...args); }; const extractText = async (...args) => { await resolvePDFJSImports(); return await extractText$1(...args); }; const extractImages = async (...args) => { await resolvePDFJSImports(); return await extractImages$1(...args); }; const renderPageAsImage = async (...args) => { await resolvePDFJSImports(); return await renderPageAsImage$1(...args); }; exports.configureUnPDF = configureUnPDF; exports.createIsomorphicCanvasFactory = createIsomorphicCanvasFactory; exports.extractImages = extractImages; exports.extractText = extractText; exports.getDocumentProxy = getDocumentProxy; exports.getMeta = getMeta; exports.getResolvedPDFJS = getResolvedPDFJS; exports.renderPageAsImage = renderPageAsImage; exports.resolvePDFJSImports = resolvePDFJSImports;