declare const ANYPARSER_VERSION = "1.0.1";
declare const version = "1.0.1";

declare const OCR_PRESETS: Readonly<{
    readonly DOCUMENT: "document";
    readonly HANDWRITING: "handwriting";
    readonly SCAN: "scan";
    readonly RECEIPT: "receipt";
    readonly MAGAZINE: "magazine";
    readonly INVOICE: "invoice";
    readonly BUSINESS_CARD: "business-card";
    readonly PASSPORT: "passport";
    readonly DRIVER_LICENSE: "driver-license";
}>;
declare const OCR_LANGUAGES: Readonly<{
    readonly AFRIKAANS: "afr";
    readonly AMHARIC: "amh";
    readonly ARABIC: "ara";
    readonly ASSAMESE: "asm";
    readonly AZERBAIJANI: "aze";
    readonly AZERBAIJANI_CYRILLIC: "aze_cyrl";
    readonly BELARUSIAN: "bel";
    readonly BENGALI: "ben";
    readonly TIBETAN: "bod";
    readonly BOSNIAN: "bos";
    readonly BRETON: "bre";
    readonly BULGARIAN: "bul";
    readonly CATALAN: "cat";
    readonly CEBUANO: "ceb";
    readonly CZECH: "ces";
    readonly SIMPLIFIED_CHINESE: "chi_sim";
    readonly SIMPLIFIED_CHINESE_VERTICAL: "chi_sim_vert";
    readonly TRADITIONAL_CHINESE: "chi_tra";
    readonly TRADITIONAL_CHINESE_VERTICAL: "chi_tra_vert";
    readonly CHEROKEE: "chr";
    readonly CORSICAN: "cos";
    readonly WELSH: "cym";
    readonly DANISH: "dan";
    readonly DANISH_FRAKTUR: "dan_frak";
    readonly GERMAN: "deu";
    readonly GERMAN_FRAKTUR: "deu_frak";
    readonly GERMAN_LATIN: "deu_latf";
    readonly DIVESH: "div";
    readonly DZONGKHA: "dzo";
    readonly GREEK: "ell";
    readonly ENGLISH: "eng";
    readonly MIDDLE_ENGLISH: "enm";
    readonly ESPERANTO: "epo";
    readonly EQUATORIAL_GUINEAN: "equ";
    readonly ESTONIAN: "est";
    readonly BASQUE: "eus";
    readonly FAROESE: "fao";
    readonly PERSIAN: "fas";
    readonly FILIPINO: "fil";
    readonly FINNISH: "fin";
    readonly FRENCH: "fra";
    readonly OLD_FRENCH: "frm";
    readonly FRISIAN: "fry";
    readonly SCOTTISH_GAELIC: "gla";
    readonly IRISH: "gle";
    readonly GALICIAN: "glg";
    readonly ANCIENT_GREEK: "grc";
    readonly GUJARATI: "guj";
    readonly HAITIAN_CREOLE: "hat";
    readonly HEBREW: "heb";
    readonly HINDI: "hin";
    readonly CROATIAN: "hrv";
    readonly HUNGARIAN: "hun";
    readonly ARMENIAN: "hye";
    readonly IGBO: "iku";
    readonly INDONESIAN: "ind";
    readonly ICELANDIC: "isl";
    readonly ITALIAN: "ita";
    readonly OLD_ITALIAN: "ita_old";
    readonly JAVANESE: "jav";
    readonly JAPANESE: "jpn";
    readonly JAPANESE_VERTICAL: "jpn_vert";
    readonly KANNADA: "kan";
    readonly GEORGIAN: "kat";
    readonly OLD_GEORGIAN: "kat_old";
    readonly KAZAKH: "kaz";
    readonly KHMER: "khm";
    readonly KIRGHIZ: "kir";
    readonly KURDISH: "kmr";
    readonly KOREAN: "kor";
    readonly KOREAN_VERTICAL: "kor_vert";
    readonly LAO: "lao";
    readonly LATIN: "lat";
    readonly LATVIAN: "lav";
    readonly LITHUANIAN: "lit";
    readonly LUXEMBOURGISH: "ltz";
    readonly MALAYALAM: "mal";
    readonly MARATHI: "mar";
    readonly MACEDONIAN: "mkd";
    readonly MALTESE: "mlt";
    readonly MONGOLIAN: "mon";
    readonly MAORI: "mri";
    readonly MALAY: "msa";
    readonly MYANMAR: "mya";
    readonly NEPALI: "nep";
    readonly DUTCH: "nld";
    readonly NORWEGIAN: "nor";
    readonly OCCITAN: "oci";
    readonly ODISHA: "ori";
    readonly OSD: "osd";
    readonly PUNJABI: "pan";
    readonly POLISH: "pol";
    readonly PORTUGUESE: "por";
    readonly PASHTO: "pus";
    readonly QUECHUA: "que";
    readonly ROMANIAN: "ron";
    readonly RUSSIAN: "rus";
    readonly SANSKRIT: "san";
    readonly SINHALA: "sin";
    readonly SLOVAK: "slk";
    readonly SLOVAK_FRAKTUR: "slk_frak";
    readonly SLOVENIAN: "slv";
    readonly SINDHI: "snd";
    readonly SPANISH: "spa";
    readonly OLD_SPANISH: "spa_old";
    readonly ALBANIAN: "sqi";
    readonly SERBIAN: "srp";
    readonly SERBIAN_LATIN: "srp_latn";
    readonly SUNDIANESE: "sun";
    readonly SWAHILI: "swa";
    readonly SWEDISH: "swe";
    readonly SYRIAC: "syr";
    readonly TAMIL: "tam";
    readonly TATAR: "tat";
    readonly TELUGU: "tel";
    readonly TAJIK: "tgk";
    readonly TAGALOG: "tgl";
    readonly THAI: "tha";
    readonly TIGRINYA: "tir";
    readonly TONGAN: "ton";
    readonly TURKISH: "tur";
    readonly UIGHUR: "uig";
    readonly UKRAINIAN: "ukr";
    readonly URDU: "urd";
    readonly UZBEK: "uzb";
    readonly UZBEK_CYRILLIC: "uzb_cyrl";
    readonly VIETNAMESE: "vie";
    readonly YIDDISH: "yid";
    readonly YORUBA: "yor";
}>;
type OcrPresetType = (typeof OCR_PRESETS)[keyof typeof OCR_PRESETS];
type OcrLanguageType = (typeof OCR_LANGUAGES)[keyof typeof OCR_LANGUAGES];

/**
 * Main class for parsing items using the Anyparser API.
 */
declare class Anyparser {
    options?: AnyparserOption;
    /**
     * Initialize the parser with optional configuration.
     * @param options - Configuration options for the parser
     */
    constructor(options?: AnyparserOption);
    /**
     * Parse files using the Anyparser API.
     * @param filePathsOrUrl - A single file path or list of file paths to parse, or a start URL for crawling
     * @returns List of parsed file results if format is JSON, or raw text content if format is text/markdown
     * @throws Error if the API request fails
     */
    parse(filePathsOrUrl: string | string[]): Promise<Result>;
}

type AnyparserFormatType = 'json' | 'markdown' | 'html'
type AnyparserModelType = 'text' | 'ocr' | 'vlm' | 'lam' | 'crawler'
type AnyparserEncodingType = 'utf-8' | 'latin1'

interface AnyparserOption {
  apiUrl?: URL
  apiKey?: string
  format?: AnyparserFormatType
  model?: AnyparserModelType
  encoding?: AnyparserEncodingType
  image?: boolean
  table?: boolean
  files?: string | string[]
  ocrLanguage?: OcrLanguageType[]
  ocrPreset?: OcrPresetType
  url?: string
  maxDepth?: number
  maxExecutions?: number
  strategy?: 'LIFO' | 'FIFO'
  traversalScope?: 'subtree' | 'domain'
}

// ---- Parser

interface AnyparserImageReference {
  base64Data: string
  displayName: string
  page?: number
  imageIndex: number
}

interface AnyparserResultBase {
  rid: string
  originalFilename: string
  checksum: string
  totalCharacters?: number
  markdown?: string
}

interface AnyparserCrawlDirectiveBase {
  type: 'HTTP Header' | 'HTML Meta' | 'Combined'
  priority: number
  name?: string
  noindex?: boolean
  nofollow?: boolean
  crawlDelay?: number
  unavailableAfter?: Date
}

interface AnyparserCrawlDirective extends AnyparserCrawlDirectiveBase {
  type: 'Combined'
  name: undefined
  underlying: AnyparserCrawlDirectiveBase[]
}

interface AnyparserUrl {
  url: URL
  title?: string
  crawledAt?: string
  statusCode: number
  statusMessage: string
  directive: AnyparserCrawlDirective
  totalCharacters?: number
  markdown?: string
  images?: AnyparserImageReference[]
  text?: string
  politenessDelay: number
}

interface AnyparserRobotsTxtDirective {
  userAgent: string
  disallow: Set<string>
  allow: Set<string>
  crawlDelay?: number
}

interface AnyparserPdfPage {
  pageNumber: number
  markdown?: string
  text?: string
  images?: AnyparserImageReference[]
}

interface AnyparserPdfResult extends AnyparserResultBase {
  totalItems?: number
  items?: AnyparserPdfPage[]
}

interface AnyparserCrawlResult {
  rid: string
  startUrl: URL
  totalCharacters: number
  totalItems: number
  markdown: string
  items?: AnyparserUrl[]
  robotsDirective: AnyparserRobotsTxtDirective
}

type AnyparserResult = AnyparserCrawlResult | AnyparserPdfResult | AnyparserResultBase
type Result = AnyparserResult[] | string

export { ANYPARSER_VERSION, Anyparser, type AnyparserCrawlDirective, type AnyparserCrawlDirectiveBase, type AnyparserCrawlResult, type AnyparserImageReference, type AnyparserOption, type AnyparserPdfPage, type AnyparserPdfResult, type AnyparserResult, type AnyparserResultBase, type AnyparserRobotsTxtDirective, type AnyparserUrl, OCR_LANGUAGES, OCR_PRESETS, type OcrLanguageType, type OcrPresetType, type Result, version };
