/**
 * Constant used to mark segment breaks during processing.
 */
declare const SEGMENT_BREAK = "SEGMENT_BREAK";
/**
 * Constant used to mark that we should always start a break when encountering this.
 */
declare const ALWAYS_BREAK = "ALWAYS_BREAK";

/**
 * Represents a segment that was updated with the ground truth values.
 */
type GroundedSegment = Omit<Segment, 'tokens'> & {
    tokens: GroundedToken[];
};
/**
 * Represents a token that was matched or unmatched during sync with the ground truth value.
 */
type GroundedToken = Token & {
    /** If this is true it means this token was not matched during the ground truth syncing */
    isUnknown?: boolean;
};
type Hints = Record<string, string[][]>;
/**
 * Represents a segment during the marking and processing stage.
 * Contains an array of tokens that may include segment break markers.
 */
type MarkedSegment = {
    /**
     * End time of the segment in seconds
     */
    end: number;
    /**
     * Start time of the segment in seconds
     */
    start: number;
    /**
     * Array of tokens and segment break markers that make up this segment
     */
    tokens: MarkedToken[];
};
/**
 * Represents either a token or a segment break marker.
 * Used during the processing of text to identify natural break points.
 */
type MarkedToken = Token | typeof ALWAYS_BREAK | typeof SEGMENT_BREAK;
type MarkTokensWithDividersOptions = {
    fillers?: string[];
    gapThreshold: number;
    hints?: Hints;
};
/**
 * Represents a segment of text with timing information and optional word-level tokens.
 * A segment is a higher-level structure that contains a sequence of related tokens.
 */
type Segment = Token & {
    /**
     * Word-by-word breakdown of the transcription with individual timings
     */
    tokens: Token[];
};
/**
 * Represents a single token (word or phrase) with timing information.
 * This is the basic unit of transcribed text.
 */
type Token = {
    /**
     * End time in seconds.
     */
    end: number;
    /**
     * Start time in seconds.
     */
    start: number;
    /**
     * The transcribed text
     */
    text: string;
};

/**
 * Estimates a segment with word-level tokens from a single token with multi-word text.
 * Splits the text by whitespace and calculates approximate timing for each word.
 *
 * @param {Token} param0 - The source token containing text with multiple words
 * @param {number} param0.end - End time of the token in seconds
 * @param {number} param0.start - Start time of the token in seconds
 * @param {string} param0.text - The multi-word text content
 * @returns {Segment} A segment with the original text and estimated word-level tokens
 */
declare const estimateSegmentFromToken: ({ end, start, text }: Token) => Segment;
/**
 * Marks tokens with segment dividers based on various criteria including:
 * - Filler words (uh, umm, etc.)
 * - Explicit multi-word hints
 * - Significant time gaps between tokens
 * - Punctuation at the end of tokens
 *
 * @param {Token[]} tokens - Array of tokens to process
 * @param {Object} options - Configuration options
 * @param {string[]} [options.fillers] - Optional array of filler words to mark as segment breaks
 * @param {number} options.gapThreshold - Minimum time gap (in seconds) to consider a segment break
 * @param {Hints} [options.hints] - Hints created with the createHints() function to indicate when to insert a new segment break.
 * @returns {MarkedToken[]} Tokens with segment break markers inserted
 */
declare const markTokensWithDividers: (tokens: Token[], { fillers, gapThreshold, hints }: MarkTokensWithDividersOptions) => MarkedToken[];
/**
 * Groups marked tokens into segments based on maximum segment duration.
 * Creates segments from tokens, splitting when the duration exceeds the specified maximum.
 *
 * @param {MarkedToken[]} markedTokens - Array of tokens with segment break markers
 * @param {number} maxSecondsPerSegment - Maximum duration (in seconds) for a segment
 * @returns {MarkedSegment[]} Array of marked segments
 */
declare const groupMarkedTokensIntoSegments: (markedTokens: MarkedToken[], maxSecondsPerSegment: number) => MarkedSegment[];
/**
 * Merges segments with fewer than the specified minimum words into the previous segment.
 * This helps avoid very short segments that might break the flow of text.
 *
 * @param {MarkedSegment[]} segments - Array of marked segments to process
 * @param {number} minWordsPerSegment - Minimum number of words required for a segment to stand alone
 * @returns {MarkedSegment[]} Array of merged segments
 */
declare const mergeShortSegmentsWithPrevious: (segments: MarkedSegment[], minWordsPerSegment: number) => MarkedSegment[];
/**
 * Formats segments into a timestamped transcript with timestamps at the beginning of each line.
 * Lines are split based on segment breaks and maximum line duration.
 *
 * @param {MarkedSegment[]} segments - Array of marked segments to format
 * @param {number} maxSecondsPerLine - Maximum duration (in seconds) for a single line
 * @returns {string} Formatted transcript with timestamps
 */
declare const formatSegmentsToTimestampedTranscript: (segments: MarkedSegment[], maxSecondsPerLine: number, formatTokens?: (buffer: Token) => string) => string;
/**
 * Maps marked segments into formatted segments with clean text representation.
 * Combines the tokens into properly formatted text, respecting segment breaks
 * and optional maximum line duration.
 *
 * @param {MarkedSegment[]} segments - Array of marked segments to format
 * @param {number} [maxSecondsPerLine] - Optional maximum duration (in seconds) for a single line
 * @returns {Segment[]} Array of formatted segments with clean text
 */
declare const mapSegmentsIntoFormattedSegments: (segments: MarkedSegment[], maxSecondsPerLine?: number) => Segment[];
/**
 * Convenience function that processes segments through all steps:
 * marking tokens with dividers, grouping into segments, and merging short segments.
 *
 * @param {Segment[]} segments - Array of input segments to process
 * @param {Object} options - Configuration options
 * @param {string[]} options.fillers - Array of filler words to mark as segment breaks
 * @param {number} options.gapThreshold - Minimum time gap (in seconds) to consider a segment break
 * @param {number} options.maxSecondsPerSegment - Maximum duration (in seconds) for a segment
 * @param {number} options.minWordsPerSegment - Minimum number of words required for a segment to stand alone
 * @returns {MarkedSegment[]} Array of processed and marked segments
 */
declare const markAndCombineSegments: (segments: Segment[], options: MarkTokensWithDividersOptions & {
    maxSecondsPerSegment: number;
    minWordsPerSegment: number;
}) => MarkedSegment[];
/**
 * Cleans up marked tokens by removing unnecessary segment breaks that would
 * cause individual tokens to appear on their own lines.
 *
 * @param {MarkedToken[]} markedTokens - The array of marked tokens to clean up
 * @returns {MarkedToken[]} A new array with unnecessary breaks removed
 */
declare const cleanupIsolatedTokens: (markedTokens: MarkedToken[]) => MarkedToken[];
/**
 * Aligns AI-generated tokens to a ground truth human-edited segment text.
 *
 * Uses Longest Common Subsequence (LCS) to identify anchor matches between
 * tokenized output and ground truth. Where no matches exist, it interpolates
 * timestamped tokens for unmatched words.
 *
 * @param segment - A `Segment` object with ground truth `text` and AI-generated `tokens`
 * @param groundTruth - The ground truth text to apply to the segment's text and its tokens.
 * @returns A new `GroundedSegment` with the `tokens` adjusted to match the ground truth `text`
 * along with any unmatched tokens flagged.
 */
declare const updateSegmentWithGroundTruth: (segment: Segment, groundTruth: string) => GroundedSegment;
/**
 * Produces a segment with the ground truth replacing the text and its respective tokens.
 * @param segment The segment to replace the ground truth with.
 * @param groundTruth The human verified transcription of the segment.
 * @returns A segment with the ground truth applies to the segment text and its tokens.
 */
declare const applyGroundTruthToSegment: (segment: Segment, groundTruth: string) => Segment;
/**
 * Merges multiple segments into a single segment.
 *
 * @param segments - Array of segments to merge into one
 * @param delimiter - Optional string to join segment texts (defaults to space)
 * @returns A single merged segment containing all tokens
 */
declare const mergeSegments: (segments: Segment[], delimiter?: string) => Segment;
/**
 * Splits a segment at a specific time point into exactly two segments.
 *
 * This function does the opposite of mergeSegments, taking a single segment
 * and dividing it into two segments at the specified split time.
 *
 * @param segment - The segment to split
 * @param splitTime - The time (in seconds) at which to split the segment
 * @returns An array containing exactly two segments
 */
declare const splitSegment: (segment: Segment, splitTime: number) => Segment[];
/**
 * Searches through an array of tokens and returns the first one whose text sequence
 * matches the given query string.
 *
 * This function will split the `query` into one or more hint phrases (via `createHints`),
 * then scan `tokens` in order, returning the first token at which any hint sequence
 * fully matches the subsequent tokens.
 *
 * @param tokens
 *   An ordered array of `Token` objects to search.
 * @param query
 *   A string containing one or more words to match.  If you pass multiple words
 *   (e.g. `"hello world"`), it will only match if `"hello"` at position `i` is
 *   immediately followed by `"world"` at position `i+1`.
 * @returns
 *   The first `Token` in the array where the hint sequence matches, or `null`
 *   if no matching sequence is found.
 *
 * @example
 * ```ts
 * const tokens: Token[] = [
 *   { start: 0, end: 1, text: 'the' },
 *   { start: 1, end: 2, text: 'quick' },
 *   { start: 2, end: 3, text: 'brown' },
 *   { start: 3, end: 4, text: 'fox' },
 * ];
 *
 * getFirstMatchingToken(tokens, 'quick brown');
 * // → { start: 1, end: 2, text: 'quick' }
 *
 * getFirstMatchingToken(tokens, 'lazy dog');
 * // → null
 * ```
 */
declare const getFirstMatchingToken: (tokens: Token[], query: string) => null | Token;
/**
 * Finds and returns the first token in a segment whose character‐range fully contains
 * the given [selectionStart, selectionEnd) range.
 *
 * This is useful when you have a selection in the raw `segment.text` (for example, from
 * an <input>’s `selectionStart` and `selectionEnd`) and you want to map that back to the
 * corresponding timed `Token`.
 *
 * @param segment  The Segment object containing the full `text` and an ordered list of `tokens`.
 * @param selectionStart
 *   The zero‐based index into `segment.text` where the selection begins (inclusive).
 * @param selectionEnd
 *   The zero‐based index into `segment.text` where the selection ends (exclusive).
 * @returns
 *   The first `Token` whose span in `segment.text` covers the entire selection range or `null` if it is not found.
 *
 * @example
 * ```ts
 * const segment: Segment = {
 *   text: 'the fox and the rabbit',
 *   start: 0,
 *   end: 6,
 *   tokens: [
 *     { start: 0, end: 1, text: 'the' },
 *     { start: 2, end: 3, text: 'fox' },
 *     { start: 3, end: 4, text: 'and' },
 *     { start: 4, end: 5, text: 'the' },
 *     { start: 5, end: 6, text: 'rabbit' },
 *   ],
 * };
 *
 * // Suppose the user selected the second "the" in an <input>,
 * // which corresponds to characters 12–15 (exclusive end):
 * const tok = getFirstTokenForSelection(segment, 12, 15);
 * // tok === { start: 4, end: 5, text: 'the' }
 * ```
 */
declare const getFirstTokenForSelection: (segment: Segment, selectionStart: number, selectionEnd: number) => null | Token;

/**
 * Checks if a text string ends with a punctuation mark (period, question mark, exclamation mark).
 * Supports both Latin and Arabic punctuation.
 *
 * @param {string} text - The text to check for ending punctuation
 * @returns {boolean} True if the text ends with punctuation, false otherwise
 */
declare const isEndingWithPunctuation: (text: string) => boolean;
/**
 * Formats seconds into a human-readable timestamp.
 * For durations less than an hour: m:ss (e.g., "1:05")
 * For durations an hour or longer: h:mm:ss (e.g., "1:02:05")
 *
 * @param {number} seconds - The time duration in seconds
 * @returns {string} Formatted timestamp string
 */
declare const formatSecondsToTimestamp: (seconds: number) => string;
/**
 * Strip leading/trailing punctuation/symbols, remove Arabic diacritics, NFC-normalize.
 * Normalizes a word by removing diacritics and punctuation.
 *
 * This function:
 * 1. Decomposes Unicode characters (NFD normalization)
 * 2. Removes Arabic diacritics
 * 3. Strips leading and trailing punctuation or symbols
 * 4. Recomposes Unicode characters (NFC normalization)
 *
 * @param {string} w - The word to normalize
 * @returns {string} The normalized word
 */
declare const normalizeWord: (w: string) => string;
/**
 * Creates a map of hints organized by their first word.
 *
 * Takes multiple hint strings, splits each into words, and organizes them into
 * a map where the keys are the first words and values are arrays of word arrays.
 *
 * @param {...string} hints - One or more hint strings to process
 * @returns {Hints} A map of hints organized by their first word
 */
declare const createHints: (...hints: string[]) => Hints;

export { type GroundedSegment, type GroundedToken, type Hints, type MarkTokensWithDividersOptions, type MarkedSegment, type MarkedToken, type Segment, type Token, applyGroundTruthToSegment, cleanupIsolatedTokens, createHints, estimateSegmentFromToken, formatSecondsToTimestamp, formatSegmentsToTimestampedTranscript, getFirstMatchingToken, getFirstTokenForSelection, groupMarkedTokensIntoSegments, isEndingWithPunctuation, mapSegmentsIntoFormattedSegments, markAndCombineSegments, markTokensWithDividers, mergeSegments, mergeShortSegmentsWithPrevious, normalizeWord, splitSegment, updateSegmentWithGroundTruth };
