/*!
 * Copyright (c) 2025-present, Vanilagy and contributors
 *
 * This Source Code Form is subject to the terms of the Mozilla Public
 * License, v. 2.0. If a copy of the MPL was not distributed with this
 * file, You can obtain one at https://mozilla.org/MPL/2.0/.
 */

import {
	AUDIO_CODECS,
	AudioCodec,
	getFirstEncodableVideoCodec,
	getEncodableAudioCodecs,
	NON_PCM_AUDIO_CODECS,
	Quality,
	QUALITY_HIGH,
	VIDEO_CODECS,
	VideoCodec,
} from './codec';
import { Input } from './input';
import { InputAudioTrack, InputTrack, InputVideoTrack } from './input-track';
import {
	AudioSampleSink,
	CanvasSink,
	EncodedPacketSink,
	VideoSampleSink,
} from './media-sink';
import {
	AudioEncodingConfig,
	AudioSource,
	EncodedVideoPacketSource,
	EncodedAudioPacketSource,
	VideoEncodingConfig,
	VideoSource,
	VideoSampleSource,
	AudioSampleSource,
} from './media-source';
import { assert, clamp, MaybePromise, normalizeRotation, promiseWithResolvers, Rotation } from './misc';
import { Output, TrackType } from './output';
import { AudioSample, VideoSample } from './sample';

/**
 * Video-specific options.
 * @public
 */
export type ConversionVideoOptions = {
	/** If true, all video tracks will be discarded and will not be present in the output. */
	discard?: boolean;
	/**
	 * The desired width of the output video in pixels, defaulting to the video's natural display width. If height
	 * is not set, it will be deduced automatically based on aspect ratio.
	 */
	width?: number;
	/**
	 * The desired height of the output video in pixels, defaulting to the video's natural display height. If width
	 * is not set, it will be deduced automatically based on aspect ratio.
	 */
	height?: number;
	/**
	 * The fitting algorithm in case both width and height are set.
	 *
	 * - 'fill' will stretch the image to fill the entire box, potentially altering aspect ratio.
	 * - 'contain' will contain the entire image within the box while preserving aspect ratio. This may lead to
	 * letterboxing.
	 * - 'cover' will scale the image until the entire box is filled, while preserving aspect ratio.
	 */
	fit?: 'fill' | 'contain' | 'cover';
	/**
	 * The angle in degrees to rotate the input video by, clockwise. Rotation is applied before resizing. This
	 * rotation is _in addition to_ the natural rotation of the input video as specified in input file's metadata.
	 */
	rotate?: Rotation;
	/**
	 * The desired frame rate of the output video, in hertz. If not specified, the original input frame rate will
	 * be used (which may be variable).
	 */
	frameRate?: number;
	/** The desired output video codec. */
	codec?: VideoCodec;
	/** The desired bitrate of the output video. */
	bitrate?: VideoEncodingConfig['bitrate'];
	/** When true, video will always be re-encoded instead of directly copying over the encoded samples. */
	forceTranscode?: boolean;
};

/**
 * Audio-specific options.
 * @public
 */
export type ConversionAudioOptions = {
	/** If true, all audio tracks will be discarded and will not be present in the output. */
	discard?: boolean;
	/** The desired channel count of the output audio. */
	numberOfChannels?: number;
	/** The desired sample rate of the output audio, in hertz. */
	sampleRate?: number;
	/** The desired output audio codec. */
	codec?: AudioCodec;
	/** The desired bitrate of the output audio. */
	bitrate?: AudioEncodingConfig['bitrate'];
	/** When true, audio will always be re-encoded instead of directly copying over the encoded samples. */
	forceTranscode?: boolean;
};

const validateVideoOptions = (videoOptions: ConversionVideoOptions | undefined) => {
	if (videoOptions !== undefined && (!videoOptions || typeof videoOptions !== 'object')) {
		throw new TypeError('options.video, when provided, must be an object.');
	}
	if (videoOptions?.discard !== undefined && typeof videoOptions.discard !== 'boolean') {
		throw new TypeError('options.video.discard, when provided, must be a boolean.');
	}
	if (videoOptions?.forceTranscode !== undefined && typeof videoOptions.forceTranscode !== 'boolean') {
		throw new TypeError('options.video.forceTranscode, when provided, must be a boolean.');
	}
	if (videoOptions?.codec !== undefined && !VIDEO_CODECS.includes(videoOptions.codec)) {
		throw new TypeError(
			`options.video.codec, when provided, must be one of: ${VIDEO_CODECS.join(', ')}.`,
		);
	}
	if (
		videoOptions?.bitrate !== undefined
		&& !(videoOptions.bitrate instanceof Quality)
		&& (!Number.isInteger(videoOptions.bitrate) || videoOptions.bitrate <= 0)
	) {
		throw new TypeError('options.video.bitrate, when provided, must be a positive integer or a quality.');
	}
	if (
		videoOptions?.width !== undefined
		&& (!Number.isInteger(videoOptions.width) || videoOptions.width <= 0)
	) {
		throw new TypeError('options.video.width, when provided, must be a positive integer.');
	}
	if (
		videoOptions?.height !== undefined
		&& (!Number.isInteger(videoOptions.height) || videoOptions.height <= 0)
	) {
		throw new TypeError('options.video.height, when provided, must be a positive integer.');
	}
	if (videoOptions?.fit !== undefined && !['fill', 'contain', 'cover'].includes(videoOptions.fit)) {
		throw new TypeError('options.video.fit, when provided, must be one of "fill", "contain", or "cover".');
	}
	if (
		videoOptions?.width !== undefined
		&& videoOptions.height !== undefined
		&& videoOptions.fit === undefined
	) {
		throw new TypeError(
			'When both options.video.width and options.video.height are provided, options.video.fit must also be'
			+ ' provided.',
		);
	}
	if (videoOptions?.rotate !== undefined && ![0, 90, 180, 270].includes(videoOptions.rotate)) {
		throw new TypeError('options.video.rotate, when provided, must be 0, 90, 180 or 270.');
	}
	if (
		videoOptions?.frameRate !== undefined
		&& (!Number.isFinite(videoOptions.frameRate) || videoOptions.frameRate <= 0)
	) {
		throw new TypeError('options.video.frameRate, when provided, must be a finite positive number.');
	}
};

/**
 * The options for media file conversion.
 * @public
 */
export type ConversionOptions = {
	/** The input file. */
	input: Input;
	/** The output file. */
	output: Output;

	/**
	 * Video-specific options. When passing an object, the same options are applied to all video tracks. When passing a
	 * function, it will be invoked for each video track and is expected to return or resolve to the options
	 * for that specific track. The function is passed an instance of `InputVideoTrack` as well as a number `n`, which
	 * is the 1-based index of the track in the list of all video tracks.
	 */
	video?: ConversionVideoOptions
		| ((track: InputVideoTrack, n: number) => MaybePromise<ConversionVideoOptions | undefined>);

	/**
	 * Audio-specific options. When passing an object, the same options are applied to all audio tracks. When passing a
	 * function, it will be invoked for each audio track and is expected to return or resolve to the options
	 * for that specific track. The function is passed an instance of `InputAudioTrack` as well as a number `n`, which
	 * is the 1-based index of the track in the list of all audio tracks.
	 */
	audio?: ConversionAudioOptions
		| ((track: InputAudioTrack, n: number) => MaybePromise<ConversionAudioOptions | undefined>);

	/** Options to trim the input file. */
	trim?: {
		/** The time in the input file in seconds at which the output file should start. Must be less than `end`.  */
		start: number;
		/** The time in the input file in seconds at which the output file should end. Must be greater than `start`. */
		end: number;
	};
};

const validateAudioOptions = (audioOptions: ConversionAudioOptions | undefined) => {
	if (audioOptions !== undefined && (!audioOptions || typeof audioOptions !== 'object')) {
		throw new TypeError('options.audio, when provided, must be an object.');
	}
	if (audioOptions?.discard !== undefined && typeof audioOptions.discard !== 'boolean') {
		throw new TypeError('options.audio.discard, when provided, must be a boolean.');
	}
	if (audioOptions?.forceTranscode !== undefined && typeof audioOptions.forceTranscode !== 'boolean') {
		throw new TypeError('options.audio.forceTranscode, when provided, must be a boolean.');
	}
	if (audioOptions?.codec !== undefined && !AUDIO_CODECS.includes(audioOptions.codec)) {
		throw new TypeError(
			`options.audio.codec, when provided, must be one of: ${AUDIO_CODECS.join(', ')}.`,
		);
	}
	if (
		audioOptions?.bitrate !== undefined
		&& !(audioOptions.bitrate instanceof Quality)
		&& (!Number.isInteger(audioOptions.bitrate) || audioOptions.bitrate <= 0)
	) {
		throw new TypeError('options.audio.bitrate, when provided, must be a positive integer or a quality.');
	}
	if (
		audioOptions?.numberOfChannels !== undefined
		&& (!Number.isInteger(audioOptions.numberOfChannels) || audioOptions.numberOfChannels <= 0)
	) {
		throw new TypeError('options.audio.numberOfChannels, when provided, must be a positive integer.');
	}
	if (
		audioOptions?.sampleRate !== undefined
		&& (!Number.isInteger(audioOptions.sampleRate) || audioOptions.sampleRate <= 0)
	) {
		throw new TypeError('options.audio.sampleRate, when provided, must be a positive integer.');
	}
};

const FALLBACK_NUMBER_OF_CHANNELS = 2;
const FALLBACK_SAMPLE_RATE = 48000;

/**
 * Represents a media file conversion process, used to convert one media file into another. In addition to conversion,
 * this class can be used to resize and rotate video, resample audio, drop tracks, or trim to a specific time range.
 * @public
 */
export class Conversion {
	/** The input file. */
	readonly input: Input;
	/** The output file. */
	readonly output: Output;

	/** @internal */
	_options: ConversionOptions;
	/** @internal */
	_startTimestamp: number;
	/** @internal */
	_endTimestamp: number;

	/** @internal */
	_addedCounts: Record<TrackType, number> = {
		video: 0,
		audio: 0,
		subtitle: 0,
	};

	/** @internal */
	_totalTrackCount = 0;

	/** @internal */
	_trackPromises: Promise<void>[] = [];

	/** @internal */
	_started: Promise<void>;
	/** @internal */
	_start: () => void;
	/** @internal */
	_executed = false;

	/** @internal */
	_synchronizer = new TrackSynchronizer();

	/** @internal */
	_totalDuration: number | null = null;
	/** @internal */
	_maxTimestamps = new Map<number, number>(); // Track ID -> timestamp

	/** @internal */
	_canceled = false;

	/**
	 * A callback that is fired whenever the conversion progresses. Returns a number between 0 and 1, indicating the
	 * completion of the conversion. Note that a progress of 1 doesn't necessarily mean the conversion is complete;
	 * the conversion is complete once `execute` resolves.
	 *
	 * In order for progress to be computed, this property must be set before `execute` is called.
	 */
	onProgress?: (progress: number) => unknown = undefined;
	/** @internal */
	_computeProgress = false;
	/** @internal */
	_lastProgress = 0;

	/** The list of tracks that are included in the output file. */
	readonly utilizedTracks: InputTrack[] = [];
	/** The list of tracks from the input file that have been discarded, alongside the discard reason. */
	readonly discardedTracks: {
		/** The track that was discarded. */
		track: InputTrack;
		/** The reason for discarding the track. */
		reason:
			| 'discarded_by_user'
			| 'max_track_count_reached'
			| 'max_track_count_of_type_reached'
			| 'unknown_source_codec'
			| 'undecodable_source_codec'
			| 'no_encodable_target_codec';
	}[] = [];

	/** Initializes a new conversion process without starting the conversion. */
	static async init(options: ConversionOptions) {
		const conversion = new Conversion(options);
		await conversion._init();

		return conversion;
	}

	private constructor(options: ConversionOptions) {
		if (!options || typeof options !== 'object') {
			throw new TypeError('options must be an object.');
		}
		if (!(options.input instanceof Input)) {
			throw new TypeError('options.input must be an Input.');
		}
		if (!(options.output instanceof Output)) {
			throw new TypeError('options.output must be an Output.');
		}
		if (options.output._tracks.length > 0 || options.output.state !== 'pending') {
			throw new TypeError('options.output must be fresh: no tracks added and not started.');
		}

		if (typeof options.video !== 'function') {
			validateVideoOptions(options.video);
		} else {
			// We'll validate the return value later
		}

		if (typeof options.audio !== 'function') {
			validateAudioOptions(options.audio);
		} else {
			// We'll validate the return value later
		}

		if (options.trim !== undefined && (!options.trim || typeof options.trim !== 'object')) {
			throw new TypeError('options.trim, when provided, must be an object.');
		}
		if (options.trim?.start !== undefined && (!Number.isFinite(options.trim.start) || options.trim.start < 0)) {
			throw new TypeError('options.trim.start, when provided, must be a non-negative number.');
		}
		if (options.trim?.end !== undefined && (!Number.isFinite(options.trim.end) || options.trim.end < 0)) {
			throw new TypeError('options.trim.end, when provided, must be a non-negative number.');
		}
		if (
			options.trim?.start !== undefined
			&& options.trim.end !== undefined
			&& options.trim.start >= options.trim.end) {
			throw new TypeError('options.trim.start must be less than options.trim.end.');
		}

		this._options = options;
		this.input = options.input;
		this.output = options.output;

		this._startTimestamp = options.trim?.start ?? 0;
		this._endTimestamp = options.trim?.end ?? Infinity;

		const { promise: started, resolve: start } = promiseWithResolvers();
		this._started = started;
		this._start = start;
	}

	/** @internal */
	async _init() {
		const inputTracks = await this.input.getTracks();
		const outputTrackCounts = this.output.format.getSupportedTrackCounts();

		let nVideo = 1;
		let nAudio = 1;

		for (const track of inputTracks) {
			let trackOptions: ConversionVideoOptions | ConversionAudioOptions | undefined = undefined;
			if (track.isVideoTrack()) {
				if (this._options.video) {
					if (typeof this._options.video === 'function') {
						trackOptions = await this._options.video(track, nVideo);
						validateVideoOptions(trackOptions);
						nVideo++;
					} else {
						trackOptions = this._options.video;
					}
				}
			} else if (track.isAudioTrack()) {
				if (this._options.audio) {
					if (typeof this._options.audio === 'function') {
						trackOptions = await this._options.audio(track, nAudio);
						validateAudioOptions(trackOptions);
						nAudio++;
					} else {
						trackOptions = this._options.audio;
					}
				}
			} else {
				assert(false);
			}

			if (trackOptions?.discard) {
				this.discardedTracks.push({
					track,
					reason: 'discarded_by_user',
				});
				continue;
			}

			if (this._totalTrackCount === outputTrackCounts.total.max) {
				this.discardedTracks.push({
					track,
					reason: 'max_track_count_reached',
				});
				continue;
			}

			if (this._addedCounts[track.type] === outputTrackCounts[track.type].max) {
				this.discardedTracks.push({
					track,
					reason: 'max_track_count_of_type_reached',
				});
				continue;
			}

			if (track.isVideoTrack()) {
				await this._processVideoTrack(track, (trackOptions ?? {}) as ConversionVideoOptions);
			} else if (track.isAudioTrack()) {
				await this._processAudioTrack(track, (trackOptions ?? {}) as ConversionAudioOptions);
			}
		}

		const unintentionallyDiscardedTracks = this.discardedTracks.filter(x => x.reason !== 'discarded_by_user');
		if (unintentionallyDiscardedTracks.length > 0) {
			// Let's give the user a notice/warning about discarded tracks so they aren't confused
			console.warn('Some tracks had to be discarded from the conversion:', unintentionallyDiscardedTracks);
		}
	}

	/** Executes the conversion process. Resolves once conversion is complete. */
	async execute() {
		if (this._executed) {
			throw new Error('Conversion cannot be executed twice.');
		}

		this._executed = true;

		if (this.onProgress) {
			this._computeProgress = true;
			this._totalDuration = Math.min(
				await this.input.computeDuration() - this._startTimestamp,
				this._endTimestamp - this._startTimestamp,
			);
			this.onProgress?.(0);
		}

		await this.output.start();
		this._start();

		try {
			await Promise.all(this._trackPromises);
		} catch (error) {
			if (!this._canceled) {
				// Make sure to cancel to stop other encoding processes and clean up resources
				void this.cancel();
			}

			throw error;
		}

		if (this._canceled) {
			await new Promise(() => {}); // Never resolve
		}

		await this.output.finalize();

		if (this._computeProgress) {
			this.onProgress?.(1);
		}
	}

	/** Cancels the conversion process. Does nothing if the conversion is already complete. */
	async cancel() {
		if (this.output.state === 'finalizing' || this.output.state === 'finalized') {
			return;
		}

		if (this._canceled) {
			console.warn('Conversion already canceled.');
			return;
		}

		this._canceled = true;
		await this.output.cancel();
	}

	/** @internal */
	async _processVideoTrack(track: InputVideoTrack, trackOptions: ConversionVideoOptions) {
		const sourceCodec = track.codec;
		if (!sourceCodec) {
			this.discardedTracks.push({
				track,
				reason: 'unknown_source_codec',
			});
			return;
		}

		let videoSource: VideoSource;

		const totalRotation = normalizeRotation(track.rotation + (trackOptions.rotate ?? 0));
		const outputSupportsRotation = this.output.format.supportsVideoRotationMetadata;

		const [originalWidth, originalHeight] = totalRotation % 180 === 0
			? [track.codedWidth, track.codedHeight]
			: [track.codedHeight, track.codedWidth];

		let width = originalWidth;
		let height = originalHeight;
		const aspectRatio = width / height;

		// A lot of video encoders require that the dimensions be multiples of 2
		const ceilToMultipleOfTwo = (value: number) => Math.ceil(value / 2) * 2;

		if (trackOptions.width !== undefined && trackOptions.height === undefined) {
			width = ceilToMultipleOfTwo(trackOptions.width);
			height = ceilToMultipleOfTwo(Math.round(width / aspectRatio));
		} else if (trackOptions.width === undefined && trackOptions.height !== undefined) {
			height = ceilToMultipleOfTwo(trackOptions.height);
			width = ceilToMultipleOfTwo(Math.round(height * aspectRatio));
		} else if (trackOptions.width !== undefined && trackOptions.height !== undefined) {
			width = ceilToMultipleOfTwo(trackOptions.width);
			height = ceilToMultipleOfTwo(trackOptions.height);
		}

		const firstTimestamp = await track.getFirstTimestamp();
		const needsTranscode = !!trackOptions.forceTranscode
			|| this._startTimestamp > 0
			|| firstTimestamp < 0
			|| !!trackOptions.frameRate;
		const needsRerender = width !== originalWidth
			|| height !== originalHeight
			|| (totalRotation !== 0 && !outputSupportsRotation);

		let videoCodecs = this.output.format.getSupportedVideoCodecs();
		if (
			!needsTranscode
			&& !trackOptions.bitrate
			&& !needsRerender
			&& videoCodecs.includes(sourceCodec)
			&& (!trackOptions.codec || trackOptions.codec === sourceCodec)
		) {
			// Fast path, we can simply copy over the encoded packets

			const source = new EncodedVideoPacketSource(sourceCodec);
			videoSource = source;

			this._trackPromises.push((async () => {
				await this._started;

				const sink = new EncodedPacketSink(track);
				const decoderConfig = await track.getDecoderConfig();
				const meta: EncodedVideoChunkMetadata = { decoderConfig: decoderConfig ?? undefined };
				const endPacket = Number.isFinite(this._endTimestamp)
					? await sink.getPacket(this._endTimestamp, { metadataOnly: true }) ?? undefined
					: undefined;

				for await (const packet of sink.packets(undefined, endPacket, { verifyKeyPackets: true })) {
					if (this._synchronizer.shouldWait(track.id, packet.timestamp)) {
						await this._synchronizer.wait(packet.timestamp);
					}

					if (this._canceled) {
						return;
					}

					await source.add(packet, meta);
					this._reportProgress(track.id, packet.timestamp + packet.duration);
				}

				source.close();
				this._synchronizer.closeTrack(track.id);
			})());
		} else {
			// We need to decode & reencode the video

			const canDecode = await track.canDecode();
			if (!canDecode) {
				this.discardedTracks.push({
					track,
					reason: 'undecodable_source_codec',
				});
				return;
			}

			if (trackOptions.codec) {
				videoCodecs = videoCodecs.filter(codec => codec === trackOptions.codec);
			}

			const bitrate = trackOptions.bitrate ?? QUALITY_HIGH;

			const encodableCodec = await getFirstEncodableVideoCodec(videoCodecs, { width, height, bitrate });
			if (!encodableCodec) {
				this.discardedTracks.push({
					track,
					reason: 'no_encodable_target_codec',
				});
				return;
			}

			const encodingConfig: VideoEncodingConfig = {
				codec: encodableCodec,
				bitrate,
				onEncodedPacket: sample => this._reportProgress(track.id, sample.timestamp + sample.duration),
			};

			const source = new VideoSampleSource(encodingConfig);
			videoSource = source;

			if (needsRerender) {
				this._trackPromises.push((async () => {
					await this._started;

					const sink = new CanvasSink(track, {
						width,
						height,
						fit: trackOptions.fit ?? 'fill',
						rotation: totalRotation, // Bake the rotation into the output
						poolSize: 1,
					});
					const iterator = sink.canvases(this._startTimestamp, this._endTimestamp);
					const frameRate = trackOptions.frameRate;

					let lastCanvas: HTMLCanvasElement | OffscreenCanvas | null = null;
					let lastCanvasTimestamp: number | null = null;
					let lastCanvasEndTimestamp: number | null = null;

					/** Repeats the last sample to pad out the time until the specified timestamp. */
					const padFrames = async (until: number) => {
						assert(lastCanvas);
						assert(frameRate !== undefined);

						const frameDifference = Math.round((until - lastCanvasTimestamp!) * frameRate);

						for (let i = 1; i < frameDifference; i++) {
							const sample = new VideoSample(lastCanvas, {
								timestamp: lastCanvasTimestamp! + i / frameRate,
								duration: 1 / frameRate,
							});
							await source.add(sample);
						}
					};

					for await (const { canvas, timestamp, duration } of iterator) {
						if (this._synchronizer.shouldWait(track.id, timestamp)) {
							await this._synchronizer.wait(timestamp);
						}

						if (this._canceled) {
							return;
						}

						let adjustedSampleTimestamp = Math.max(timestamp - this._startTimestamp, 0);
						lastCanvasEndTimestamp = timestamp + duration;

						if (frameRate !== undefined) {
							// Logic for skipping/repeating frames when a frame rate is set
							const alignedTimestamp = Math.floor(adjustedSampleTimestamp * frameRate) / frameRate;

							if (lastCanvas !== null) {
								if (alignedTimestamp <= lastCanvasTimestamp!) {
									lastCanvas = canvas;
									lastCanvasTimestamp = alignedTimestamp;

									// Skip this sample, since we already added one for this frame
									continue;
								} else {
									// Check if we may need to repeat the previous frame
									await padFrames(alignedTimestamp);
								}
							}

							adjustedSampleTimestamp = alignedTimestamp;
						}

						const sample = new VideoSample(canvas, {
							timestamp: adjustedSampleTimestamp,
							duration: frameRate !== undefined ? 1 / frameRate : duration,
						});

						await source.add(sample);

						if (frameRate !== undefined) {
							lastCanvas = canvas;
							lastCanvasTimestamp = adjustedSampleTimestamp;
						} else {
							sample.close();
						}
					}

					if (lastCanvas) {
						assert(lastCanvasEndTimestamp !== null);
						assert(frameRate !== undefined);

						// If necessary, pad until the end timestamp of the last sample
						await padFrames(Math.floor(lastCanvasEndTimestamp * frameRate) / frameRate);
					}

					source.close();
					this._synchronizer.closeTrack(track.id);
				})());
			} else {
				this._trackPromises.push((async () => {
					await this._started;

					const sink = new VideoSampleSink(track);
					const frameRate = trackOptions.frameRate;

					let lastSample: VideoSample | null = null;
					let lastSampleTimestamp: number | null = null;
					let lastSampleEndTimestamp: number | null = null;

					/** Repeats the last sample to pad out the time until the specified timestamp. */
					const padFrames = async (until: number) => {
						assert(lastSample);
						assert(frameRate !== undefined);

						const frameDifference = Math.round((until - lastSampleTimestamp!) * frameRate);

						for (let i = 1; i < frameDifference; i++) {
							lastSample.setTimestamp(lastSampleTimestamp! + i / frameRate);
							lastSample.setDuration(1 / frameRate);
							await source.add(lastSample);
						}

						lastSample.close();
					};

					for await (const sample of sink.samples(this._startTimestamp, this._endTimestamp)) {
						if (this._synchronizer.shouldWait(track.id, sample.timestamp)) {
							await this._synchronizer.wait(sample.timestamp);
						}

						if (this._canceled) {
							lastSample?.close();
							return;
						}

						let adjustedSampleTimestamp = Math.max(sample.timestamp - this._startTimestamp, 0);
						lastSampleEndTimestamp = sample.timestamp + sample.duration;

						if (frameRate !== undefined) {
							// Logic for skipping/repeating frames when a frame rate is set
							const alignedTimestamp = Math.floor(adjustedSampleTimestamp * frameRate) / frameRate;

							if (lastSample !== null) {
								if (alignedTimestamp <= lastSampleTimestamp!) {
									lastSample.close();
									lastSample = sample;
									lastSampleTimestamp = alignedTimestamp;

									// Skip this sample, since we already added one for this frame
									continue;
								} else {
									// Check if we may need to repeat the previous frame
									await padFrames(alignedTimestamp);
								}
							}

							adjustedSampleTimestamp = alignedTimestamp;
							sample.setDuration(1 / frameRate);
						}

						sample.setTimestamp(adjustedSampleTimestamp);
						await source.add(sample);

						if (frameRate !== undefined) {
							lastSample = sample;
							lastSampleTimestamp = adjustedSampleTimestamp;
						} else {
							sample.close();
						}
					}

					if (lastSample) {
						assert(lastSampleEndTimestamp !== null);
						assert(frameRate !== undefined);

						// If necessary, pad until the end timestamp of the last sample
						await padFrames(Math.floor(lastSampleEndTimestamp * frameRate) / frameRate);
					}

					source.close();
					this._synchronizer.closeTrack(track.id);
				})());
			}
		}

		this.output.addVideoTrack(videoSource, {
			frameRate: trackOptions.frameRate,
			languageCode: track.languageCode,
			rotation: needsRerender ? 0 : totalRotation, // Rerendering will bake the rotation into the output
		});
		this._addedCounts.video++;
		this._totalTrackCount++;

		this.utilizedTracks.push(track);
	}

	/** @internal */
	async _processAudioTrack(track: InputAudioTrack, trackOptions: ConversionAudioOptions) {
		const sourceCodec = track.codec;
		if (!sourceCodec) {
			this.discardedTracks.push({
				track,
				reason: 'unknown_source_codec',
			});
			return;
		}

		let audioSource: AudioSource;

		const originalNumberOfChannels = track.numberOfChannels;
		const originalSampleRate = track.sampleRate;

		const firstTimestamp = await track.getFirstTimestamp();

		let numberOfChannels = trackOptions.numberOfChannels ?? originalNumberOfChannels;
		let sampleRate = trackOptions.sampleRate ?? originalSampleRate;
		let needsResample = numberOfChannels !== originalNumberOfChannels
			|| sampleRate !== originalSampleRate
			|| this._startTimestamp > 0
			|| firstTimestamp < 0;

		let audioCodecs = this.output.format.getSupportedAudioCodecs();
		if (
			!trackOptions.forceTranscode
			&& !trackOptions.bitrate
			&& !needsResample
			&& audioCodecs.includes(sourceCodec)
			&& (!trackOptions.codec || trackOptions.codec === sourceCodec)
		) {
			// Fast path, we can simply copy over the encoded packets

			const source = new EncodedAudioPacketSource(sourceCodec);
			audioSource = source;

			this._trackPromises.push((async () => {
				await this._started;

				const sink = new EncodedPacketSink(track);
				const decoderConfig = await track.getDecoderConfig();
				const meta: EncodedAudioChunkMetadata = { decoderConfig: decoderConfig ?? undefined };
				const endPacket = Number.isFinite(this._endTimestamp)
					? await sink.getPacket(this._endTimestamp, { metadataOnly: true }) ?? undefined
					: undefined;

				for await (const packet of sink.packets(undefined, endPacket)) {
					if (this._synchronizer.shouldWait(track.id, packet.timestamp)) {
						await this._synchronizer.wait(packet.timestamp);
					}

					if (this._canceled) {
						return;
					}

					await source.add(packet, meta);
					this._reportProgress(track.id, packet.timestamp + packet.duration);
				}

				source.close();
				this._synchronizer.closeTrack(track.id);
			})());
		} else {
			// We need to decode & reencode the audio

			const canDecode = await track.canDecode();
			if (!canDecode) {
				this.discardedTracks.push({
					track,
					reason: 'undecodable_source_codec',
				});
				return;
			}

			let codecOfChoice: AudioCodec | null = null;

			if (trackOptions.codec) {
				audioCodecs = audioCodecs.filter(codec => codec === trackOptions.codec);
			}

			const bitrate = trackOptions.bitrate ?? QUALITY_HIGH;

			const encodableCodecs = await getEncodableAudioCodecs(audioCodecs, {
				numberOfChannels,
				sampleRate,
				bitrate,
			});

			if (
				!encodableCodecs.some(codec => (NON_PCM_AUDIO_CODECS as readonly string[]).includes(codec))
				&& audioCodecs.some(codec => (NON_PCM_AUDIO_CODECS as readonly string[]).includes(codec))
				&& (numberOfChannels !== FALLBACK_NUMBER_OF_CHANNELS || sampleRate !== FALLBACK_SAMPLE_RATE)
			) {
				// We could not find a compatible non-PCM codec despite the container supporting them. This can be
				// caused by strange channel count or sample rate configurations. Therefore, let's try again but with
				// fallback parameters.

				const encodableCodecsWithDefaultParams = await getEncodableAudioCodecs(audioCodecs, {
					numberOfChannels: FALLBACK_NUMBER_OF_CHANNELS,
					sampleRate: FALLBACK_SAMPLE_RATE,
					bitrate,
				});

				const nonPcmCodec = encodableCodecsWithDefaultParams
					.find(codec => (NON_PCM_AUDIO_CODECS as readonly string[]).includes(codec));
				if (nonPcmCodec) {
					// We are able to encode using a non-PCM codec, but it'll require resampling
					needsResample = true;
					codecOfChoice = nonPcmCodec;
					numberOfChannels = FALLBACK_NUMBER_OF_CHANNELS;
					sampleRate = FALLBACK_SAMPLE_RATE;
				}
			} else {
				codecOfChoice = encodableCodecs[0] ?? null;
			}

			if (codecOfChoice === null) {
				this.discardedTracks.push({
					track,
					reason: 'no_encodable_target_codec',
				});
				return;
			}

			if (needsResample) {
				audioSource = this._resampleAudio(track, codecOfChoice, numberOfChannels, sampleRate, bitrate);
			} else {
				const source = new AudioSampleSource({
					codec: codecOfChoice,
					bitrate,
					onEncodedPacket: packet => this._reportProgress(track.id, packet.timestamp + packet.duration),
				});
				audioSource = source;

				this._trackPromises.push((async () => {
					await this._started;

					const sink = new AudioSampleSink(track);
					for await (const sample of sink.samples(undefined, this._endTimestamp)) {
						if (this._synchronizer.shouldWait(track.id, sample.timestamp)) {
							await this._synchronizer.wait(sample.timestamp);
						}

						if (this._canceled) {
							return;
						}

						await source.add(sample);
						sample.close();
					}

					source.close();
					this._synchronizer.closeTrack(track.id);
				})());
			}
		}

		this.output.addAudioTrack(audioSource, {
			languageCode: track.languageCode,
		});
		this._addedCounts.audio++;
		this._totalTrackCount++;

		this.utilizedTracks.push(track);
	}

	/** @internal */
	_resampleAudio(
		track: InputAudioTrack,
		codec: AudioCodec,
		targetNumberOfChannels: number,
		targetSampleRate: number,
		bitrate: number | Quality,
	) {
		const source = new AudioSampleSource({
			codec,
			bitrate,
			onEncodedPacket: packet => this._reportProgress(track.id, packet.timestamp + packet.duration),
		});

		this._trackPromises.push((async () => {
			await this._started;

			const resampler = new AudioResampler({
				sourceNumberOfChannels: track.numberOfChannels,
				sourceSampleRate: track.sampleRate,
				targetNumberOfChannels,
				targetSampleRate,
				startTime: this._startTimestamp,
				endTime: this._endTimestamp,
				onSample: sample => source.add(sample),
			});

			const sink = new AudioSampleSink(track);
			const iterator = sink.samples(this._startTimestamp, this._endTimestamp);

			for await (const sample of iterator) {
				if (this._synchronizer.shouldWait(track.id, sample.timestamp)) {
					await this._synchronizer.wait(sample.timestamp);
				}

				if (this._canceled) {
					return;
				}

				await resampler.add(sample);
			}

			await resampler.finalize();

			source.close();
			this._synchronizer.closeTrack(track.id);
		})());

		return source;
	}

	/** @internal */
	_reportProgress(trackId: number, endTimestamp: number) {
		if (!this._computeProgress) {
			return;
		}
		assert(this._totalDuration !== null);

		this._maxTimestamps.set(trackId, Math.max(endTimestamp, this._maxTimestamps.get(trackId) ?? -Infinity));

		let totalTimestamps = 0;
		for (const [, timestamp] of this._maxTimestamps) {
			totalTimestamps += timestamp;
		}

		const averageTimestamp = totalTimestamps / this._totalTrackCount;
		const newProgress = clamp(averageTimestamp / this._totalDuration, 0, 1);

		if (newProgress !== this._lastProgress) {
			this._lastProgress = newProgress;
			this.onProgress?.(newProgress);
		}
	}
}

const MAX_TIMESTAMP_GAP = 5;

/**
 * Utility class for synchronizing multiple track packet consumers with one another. We don't want one consumer to get
 * too out-of-sync with the others, as that may lead to a large number of packets that need to be internally buffered
 * before they can be written. Therefore, we use this class to slow down a consumer if it is too far ahead of the
 * slowest consumer.
 */
class TrackSynchronizer {
	maxTimestamps = new Map<number, number>(); // Track ID -> timestamp
	resolvers: {
		timestamp: number;
		resolve: () => void;
	}[] = [];

	computeMinAndMaybeResolve() {
		let newMin = Infinity;
		for (const [, timestamp] of this.maxTimestamps) {
			newMin = Math.min(newMin, timestamp);
		}

		for (let i = 0; i < this.resolvers.length; i++) {
			const entry = this.resolvers[i]!;

			if (entry.timestamp - newMin < MAX_TIMESTAMP_GAP) {
				// The gap has gotten small enough again, the consumer can continue again
				entry.resolve();
				this.resolvers.splice(i, 1);
				i--;
			}
		}

		return newMin;
	}

	shouldWait(trackId: number, timestamp: number) {
		this.maxTimestamps.set(trackId, Math.max(timestamp, this.maxTimestamps.get(trackId) ?? -Infinity));

		const newMin = this.computeMinAndMaybeResolve();
		return timestamp - newMin >= MAX_TIMESTAMP_GAP; // Should wait if it is too far ahead of the slowest consumer
	}

	wait(timestamp: number) {
		const { promise, resolve } = promiseWithResolvers();

		this.resolvers.push({
			timestamp,
			resolve,
		});

		return promise;
	}

	closeTrack(trackId: number) {
		this.maxTimestamps.delete(trackId);
		this.computeMinAndMaybeResolve();
	}
}

/**
 * Utility class to handle audio resampling, handling both sample rate resampling as well as channel up/downmixing.
 * The advantage over doing this manually rather than using OfflineAudioContext to do it for us is the artifact-free
 * handling of putting multiple resampled audio samples back to back, which produces flaky results using
 * OfflineAudioContext.
 */
export class AudioResampler {
	sourceSampleRate: number;
	targetSampleRate: number;
	sourceNumberOfChannels: number;
	targetNumberOfChannels: number;
	startTime: number;
	endTime: number;
	onSample: (sample: AudioSample) => Promise<void>;

	bufferSizeInFrames: number;
	bufferSizeInSamples: number;
	outputBuffer: Float32Array;
	/** Start frame of current buffer */
	bufferStartFrame: number;
	/** The highest index written to in the current buffer */
	maxWrittenFrame: number;
	channelMixer!: (sourceData: Float32Array, sourceFrameIndex: number, targetChannelIndex: number) => number;
	tempSourceBuffer: Float32Array;

	constructor(options: {
		sourceSampleRate: number;
		targetSampleRate: number;
		sourceNumberOfChannels: number;
		targetNumberOfChannels: number;
		startTime: number;
		endTime: number;
		onSample: (sample: AudioSample) => Promise<void>;
	}) {
		this.sourceSampleRate = options.sourceSampleRate;
		this.targetSampleRate = options.targetSampleRate;
		this.sourceNumberOfChannels = options.sourceNumberOfChannels;
		this.targetNumberOfChannels = options.targetNumberOfChannels;
		this.startTime = options.startTime;
		this.endTime = options.endTime;
		this.onSample = options.onSample;

		this.bufferSizeInFrames = Math.floor(this.targetSampleRate * 5.0); // 5 seconds
		this.bufferSizeInSamples = this.bufferSizeInFrames * this.targetNumberOfChannels;

		this.outputBuffer = new Float32Array(this.bufferSizeInSamples);
		this.bufferStartFrame = 0;
		this.maxWrittenFrame = -1;

		this.setupChannelMixer();

		// Pre-allocate temporary buffer for source data
		this.tempSourceBuffer = new Float32Array(this.sourceSampleRate * this.sourceNumberOfChannels);
	}

	/**
	 * Sets up the channel mixer to handle up/downmixing in the case where input and output channel counts don't match.
	 */
	setupChannelMixer(): void {
		const sourceNum = this.sourceNumberOfChannels;
		const targetNum = this.targetNumberOfChannels;

		// Logic taken from
		// https://developer.mozilla.org/en-US/docs/Web/API/Web_Audio_API/Basic_concepts_behind_Web_Audio_API
		// Most of the mapping functions are branchless.

		if (sourceNum === 1 && targetNum === 2) {
			// Mono to Stereo: M -> L, M -> R
			this.channelMixer = (sourceData: Float32Array, sourceFrameIndex: number) => {
				return sourceData[sourceFrameIndex * sourceNum]!;
			};
		} else if (sourceNum === 1 && targetNum === 4) {
			// Mono to Quad: M -> L, M -> R, 0 -> SL, 0 -> SR
			this.channelMixer = (sourceData: Float32Array, sourceFrameIndex: number, targetChannelIndex: number) => {
				return sourceData[sourceFrameIndex * sourceNum]! * +(targetChannelIndex < 2);
			};
		} else if (sourceNum === 1 && targetNum === 6) {
			// Mono to 5.1: 0 -> L, 0 -> R, M -> C, 0 -> LFE, 0 -> SL, 0 -> SR
			this.channelMixer = (sourceData: Float32Array, sourceFrameIndex: number, targetChannelIndex: number) => {
				return sourceData[sourceFrameIndex * sourceNum]! * +(targetChannelIndex === 2);
			};
		} else if (sourceNum === 2 && targetNum === 1) {
			// Stereo to Mono: 0.5 * (L + R)
			this.channelMixer = (sourceData: Float32Array, sourceFrameIndex: number) => {
				const baseIdx = sourceFrameIndex * sourceNum;
				return 0.5 * (sourceData[baseIdx]! + sourceData[baseIdx + 1]!);
			};
		} else if (sourceNum === 2 && targetNum === 4) {
			// Stereo to Quad: L -> L, R -> R, 0 -> SL, 0 -> SR
			this.channelMixer = (sourceData: Float32Array, sourceFrameIndex: number, targetChannelIndex: number) => {
				return sourceData[sourceFrameIndex * sourceNum + targetChannelIndex]! * +(targetChannelIndex < 2);
			};
		} else if (sourceNum === 2 && targetNum === 6) {
			// Stereo to 5.1: L -> L, R -> R, 0 -> C, 0 -> LFE, 0 -> SL, 0 -> SR
			this.channelMixer = (sourceData: Float32Array, sourceFrameIndex: number, targetChannelIndex: number) => {
				return sourceData[sourceFrameIndex * sourceNum + targetChannelIndex]! * +(targetChannelIndex < 2);
			};
		} else if (sourceNum === 4 && targetNum === 1) {
			// Quad to Mono: 0.25 * (L + R + SL + SR)
			this.channelMixer = (sourceData: Float32Array, sourceFrameIndex: number) => {
				const baseIdx = sourceFrameIndex * sourceNum;
				return 0.25 * (
					sourceData[baseIdx]! + sourceData[baseIdx + 1]!
					+ sourceData[baseIdx + 2]! + sourceData[baseIdx + 3]!
				);
			};
		} else if (sourceNum === 4 && targetNum === 2) {
			// Quad to Stereo: 0.5 * (L + SL), 0.5 * (R + SR)
			this.channelMixer = (sourceData: Float32Array, sourceFrameIndex: number, targetChannelIndex: number) => {
				const baseIdx = sourceFrameIndex * sourceNum;
				return 0.5 * (
					sourceData[baseIdx + targetChannelIndex]!
					+ sourceData[baseIdx + targetChannelIndex + 2]!
				);
			};
		} else if (sourceNum === 4 && targetNum === 6) {
			// Quad to 5.1: L -> L, R -> R, 0 -> C, 0 -> LFE, SL -> SL, SR -> SR
			this.channelMixer = (sourceData: Float32Array, sourceFrameIndex: number, targetChannelIndex: number) => {
				const baseIdx = sourceFrameIndex * sourceNum;

				// It's a bit harder to do this one branchlessly
				if (targetChannelIndex < 2) return sourceData[baseIdx + targetChannelIndex]!; // L, R
				if (targetChannelIndex === 2 || targetChannelIndex === 3) return 0; // C, LFE
				return sourceData[baseIdx + targetChannelIndex - 2]!; // SL, SR
			};
		} else if (sourceNum === 6 && targetNum === 1) {
			// 5.1 to Mono: sqrt(1/2) * (L + R) + C + 0.5 * (SL + SR)
			this.channelMixer = (sourceData: Float32Array, sourceFrameIndex: number) => {
				const baseIdx = sourceFrameIndex * sourceNum;
				return Math.SQRT1_2 * (sourceData[baseIdx]! + sourceData[baseIdx + 1]!)
					+ sourceData[baseIdx + 2]!
					+ 0.5 * (sourceData[baseIdx + 4]! + sourceData[baseIdx + 5]!);
			};
		} else if (sourceNum === 6 && targetNum === 2) {
			// 5.1 to Stereo: L + sqrt(1/2) * (C + SL), R + sqrt(1/2) * (C + SR)
			this.channelMixer = (sourceData: Float32Array, sourceFrameIndex: number, targetChannelIndex: number) => {
				const baseIdx = sourceFrameIndex * sourceNum;
				return sourceData[baseIdx + targetChannelIndex]!
					+ Math.SQRT1_2 * (sourceData[baseIdx + 2]! + sourceData[baseIdx + targetChannelIndex + 4]!);
			};
		} else if (sourceNum === 6 && targetNum === 4) {
			// 5.1 to Quad: L + sqrt(1/2) * C, R + sqrt(1/2) * C, SL, SR
			this.channelMixer = (sourceData: Float32Array, sourceFrameIndex: number, targetChannelIndex: number) => {
				const baseIdx = sourceFrameIndex * sourceNum;

				// It's a bit harder to do this one branchlessly
				if (targetChannelIndex < 2) {
					return sourceData[baseIdx + targetChannelIndex]! + Math.SQRT1_2 * sourceData[baseIdx + 2]!;
				}
				return sourceData[baseIdx + targetChannelIndex + 2]!; // SL, SR
			};
		} else {
			// Discrete fallback: direct mapping with zero-fill or drop
			this.channelMixer = (sourceData: Float32Array, sourceFrameIndex: number, targetChannelIndex: number) => {
				return targetChannelIndex < sourceNum
					? sourceData[sourceFrameIndex * sourceNum + targetChannelIndex]!
					: 0;
			};
		}
	}

	ensureTempBufferSize(requiredSamples: number): void {
		let length = this.tempSourceBuffer.length;

		while (length < requiredSamples) {
			length *= 2;
		}

		if (length !== this.tempSourceBuffer.length) {
			const newBuffer = new Float32Array(length);
			newBuffer.set(this.tempSourceBuffer);
			this.tempSourceBuffer = newBuffer;
		}
	}

	async add(audioSample: AudioSample) {
		if (!audioSample || audioSample._closed) {
			return;
		}

		const requiredSamples = audioSample.numberOfFrames * audioSample.numberOfChannels;
		this.ensureTempBufferSize(requiredSamples);

		// Copy the audio data to the temp buffer
		const sourceDataSize = audioSample.allocationSize({ planeIndex: 0, format: 'f32' });
		const sourceView = new Float32Array(this.tempSourceBuffer.buffer, 0, sourceDataSize / 4);
		audioSample.copyTo(sourceView, { planeIndex: 0, format: 'f32' });

		const inputStartTime = audioSample.timestamp - this.startTime;
		const inputDuration = audioSample.numberOfFrames / this.sourceSampleRate;
		const inputEndTime = Math.min(inputStartTime + inputDuration, this.endTime - this.startTime);

		// Compute which output frames are affected by this sample
		const outputStartFrame = Math.floor(inputStartTime * this.targetSampleRate);
		const outputEndFrame = Math.ceil(inputEndTime * this.targetSampleRate);

		for (let outputFrame = outputStartFrame; outputFrame < outputEndFrame; outputFrame++) {
			if (outputFrame < this.bufferStartFrame) {
				continue; // Skip writes to the past
			}

			while (outputFrame >= this.bufferStartFrame + this.bufferSizeInFrames) {
				// The write is after the current buffer, so finalize it
				await this.finalizeCurrentBuffer();
				this.bufferStartFrame += this.bufferSizeInFrames;
			}

			const bufferFrameIndex = outputFrame - this.bufferStartFrame;
			assert(bufferFrameIndex < this.bufferSizeInFrames);

			const outputTime = outputFrame / this.targetSampleRate;
			const inputTime = outputTime - inputStartTime;
			const sourcePosition = inputTime * this.sourceSampleRate;

			const sourceLowerFrame = Math.floor(sourcePosition);
			const sourceUpperFrame = Math.ceil(sourcePosition);
			const fraction = sourcePosition - sourceLowerFrame;

			// Process each output channel
			for (let targetChannel = 0; targetChannel < this.targetNumberOfChannels; targetChannel++) {
				let lowerSample = 0;
				let upperSample = 0;

				if (sourceLowerFrame >= 0 && sourceLowerFrame < audioSample.numberOfFrames) {
					lowerSample = this.channelMixer(sourceView, sourceLowerFrame, targetChannel);
				}

				if (sourceUpperFrame >= 0 && sourceUpperFrame < audioSample.numberOfFrames) {
					upperSample = this.channelMixer(sourceView, sourceUpperFrame, targetChannel);
				}

				// For resampling, we do naive linear interpolation to find the in-between sample. This produces
				// suboptimal results especially for downsampling (for which a low-pass filter would first need to be
				// applied), but AudioContext doesn't do this either, so, whatever, for now.
				const outputSample = lowerSample + fraction * (upperSample - lowerSample);

				// Write to output buffer (interleaved)
				const outputIndex = bufferFrameIndex * this.targetNumberOfChannels + targetChannel;
				this.outputBuffer[outputIndex]! += outputSample; // Add in case of overlapping samples
			}

			this.maxWrittenFrame = Math.max(this.maxWrittenFrame, bufferFrameIndex);
		}
	}

	async finalizeCurrentBuffer() {
		if (this.maxWrittenFrame < 0) {
			return; // Nothing to finalize
		}

		const samplesWritten = (this.maxWrittenFrame + 1) * this.targetNumberOfChannels;

		const outputData = new Float32Array(samplesWritten);
		outputData.set(this.outputBuffer.subarray(0, samplesWritten));

		const timestampSeconds = this.bufferStartFrame / this.targetSampleRate;
		const audioSample = new AudioSample({
			format: 'f32',
			sampleRate: this.targetSampleRate,
			numberOfChannels: this.targetNumberOfChannels,
			timestamp: timestampSeconds,
			data: outputData,
		});

		await this.onSample(audioSample);

		this.outputBuffer.fill(0);
		this.maxWrittenFrame = -1;
	}

	finalize() {
		return this.finalizeCurrentBuffer();
	}
}
