import { d as NormalizeTitleSpanOptions } from "./types-CeDA67OZ.js";

//#region src/content.d.ts
type Line = {
  id?: string;
  text: string;
};
/**
 * Normalizes line endings to Unix-style (`\n`).
 *
 * Converts Windows (`\r\n`) and old Mac (`\r`) line endings to Unix style
 * for consistent pattern matching across platforms.
 *
 * @param content - Raw content with potentially mixed line endings
 * @returns Content with all line endings normalized to `\n`
 */
declare const normalizeLineEndings: (content: string) => string;
/**
 * Parses Shamela HTML content into structured lines while preserving headings.
 *
 * @param content - The raw HTML markup representing a page
 * @returns An array of {@link Line} objects containing text and optional IDs
 */
declare const parseContentRobust: (content: string) => Line[];
/**
 * Sanitises page content by applying regex replacement rules.
 *
 * @param text - The text to clean
 * @param rules - Optional custom replacements, defaults to {@link DEFAULT_MAPPING_RULES}
 * @returns The sanitised content
 */
declare const mapPageCharacterContent: (text: string, rules?: Record<string, string>) => string;
/**
 * Splits a page body from its trailing footnotes using a marker string.
 *
 * @param content - Combined body and footnote text
 * @param footnoteMarker - Marker indicating the start of footnotes
 * @returns A tuple containing the page body followed by the footnote section
 */
declare const splitPageBodyFromFooter: (content: string, footnoteMarker?: string) => readonly [string, string];
/**
 * Removes Arabic numeral page markers enclosed in turtle ⦗ ⦘ brackets.
 * Replaces the marker along with up to two preceding whitespace characters
 * (space or carriage return) and up to one following whitespace character
 * with a single space.
 *
 * @param text - Text potentially containing page markers
 * @returns The text with numeric markers replaced by a single space
 */
declare const removeArabicNumericPageMarkers: (text: string) => string;
/**
 * Removes anchor and hadeeth tags from the content while preserving spans.
 *
 * @param content - HTML string containing various tags
 * @returns The content with only span tags retained
 */
declare const removeTagsExceptSpan: (content: string) => string;
/**
 * Normalizes Shamela HTML for CSS styling:
 * - Converts <hadeeth-N> to <span class="hadeeth">
 * - Converts </hadeeth> or standalone <hadeeth> to </span>
 */
declare const normalizeHtml: (html: string) => string;
/**
 * Strip all HTML tags from content, keeping only text.
 *
 * @param html - HTML content
 * @returns Plain text content
 */
declare const stripHtmlTags: (html: string) => string;
/**
 * Moves content that appears after a line break but before a title span into the span.
 *
 * This handles cases where text at the start of a line (such as chapter numbers like "١ -")
 * should logically be part of the following title but was placed outside the span in the HTML.
 *
 * @example
 * ```typescript
 * // Input:  "\rباب الأول<span data-type="title">العنوان</span>"
 * // Output: "\r<span data-type="title">باب الأول العنوان</span>"
 * ```
 *
 * @param html - HTML content with potential pre-title text
 * @returns HTML with pre-title text moved inside title spans
 */
declare const moveContentAfterLineBreakIntoSpan: (html: string) => string;
/**
 * Convert Shamela HTML to Markdown format for easier pattern matching.
 *
 * Transformations:
 * - `<span data-type="title">text</span>` → `## text`
 * - `<a href="inr://...">text</a>` → `text` (strip narrator links)
 * - All other HTML tags → stripped
 *
 * Note: Content typically already has proper line breaks before title spans,
 * so we don't add extra newlines around the ## header.
 * Line ending normalization is handled by segmentPages.
 *
 * @param html - HTML content from Shamela
 * @returns Markdown-formatted content
 */
declare const htmlToMarkdown: (html: string) => string;
/**
 * Normalizes consecutive Shamela-style title spans.
 *
 * Shamela exports sometimes contain adjacent title spans like:
 * `<span data-type="title">باب الميم</span><span data-type="title">من اسمه محمد</span>`
 *
 * If you naively convert each title span into a markdown heading, you can end up with:
 * `## باب الميم ## من اسمه محمد` (two headings on one line).
 *
 * This helper rewrites the HTML so downstream HTML→Markdown conversion can stay simple and consistent.
 */
declare const normalizeTitleSpans: (html: string, options: NormalizeTitleSpanOptions) => string;
/**
 * Converts Shamela HTML content to Markdown format using a standardized pipeline.
 *
 * This is a convenience function that applies the recommended sequence of transformations:
 * 1. Normalizes consecutive title spans (default: splitLines strategy)
 * 2. Moves pre-title text into spans
 * 3. Converts to Markdown format
 *
 * @example
 * ```typescript
 * const html = '<span data-type="title">Chapter</span><span data-type="title">One</span>';
 * const markdown = convertContentToMarkdown(html);
 * // => "## Chapter\n## One"
 * ```
 *
 * @param content - Raw HTML content from Shamela
 * @param options - Optional configuration for title span normalization
 * @returns Markdown-formatted content
 */
declare const convertContentToMarkdown: (content: string, options?: NormalizeTitleSpanOptions) => string;
//#endregion
export { Line, convertContentToMarkdown, htmlToMarkdown, mapPageCharacterContent, moveContentAfterLineBreakIntoSpan, normalizeHtml, normalizeLineEndings, normalizeTitleSpans, parseContentRobust, removeArabicNumericPageMarkers, removeTagsExceptSpan, splitPageBodyFromFooter, stripHtmlTags };
//# sourceMappingURL=content.d.ts.map