/*!
 * Copyright (c) 2025-present, Vanilagy and contributors
 *
 * This Source Code Form is subject to the terms of the Mozilla Public
 * License, v. 2.0. If a copy of the MPL was not distributed with this
 * file, You can obtain one at https://mozilla.org/MPL/2.0/.
 */

import { Box, ftyp, IsobmffBoxWriter, mdat, mfra, moof, moov, vtta, vttc, vtte } from './isobmff-boxes';
import { Muxer } from '../muxer';
import { Output, OutputAudioTrack, OutputSubtitleTrack, OutputTrack, OutputVideoTrack } from '../output';
import { BufferTargetWriter, Writer } from '../writer';
import { assert, computeRationalApproximation, last, promiseWithResolvers } from '../misc';
import { IsobmffOutputFormatOptions, IsobmffOutputFormat, MovOutputFormat } from '../output-format';
import { inlineTimestampRegex, SubtitleConfig, SubtitleCue, SubtitleMetadata } from '../subtitles';
import {
	parsePcmCodec,
	PCM_AUDIO_CODECS,
	PcmAudioCodec,
	SubtitleCodec,
	validateAudioChunkMetadata,
	validateSubtitleMetadata,
	validateVideoChunkMetadata,
} from '../codec';
import { BufferTarget } from '../target';
import { EncodedPacket, PacketType } from '../packet';
import {
	extractAvcDecoderConfigurationRecord,
	extractHevcDecoderConfigurationRecord,
	serializeAvcDecoderConfigurationRecord,
	serializeHevcDecoderConfigurationRecord,
	transformAnnexBToLengthPrefixed,
} from '../codec-data';
import { buildIsobmffMimeType } from './isobmff-misc';
import { MAX_BOX_HEADER_SIZE, MIN_BOX_HEADER_SIZE } from './isobmff-reader';

export const GLOBAL_TIMESCALE = 1000;
const TIMESTAMP_OFFSET = 2_082_844_800; // Seconds between Jan 1 1904 and Jan 1 1970

export type Sample = {
	timestamp: number;
	decodeTimestamp: number;
	duration: number;
	data: Uint8Array | null;
	size: number;
	type: PacketType;
	timescaleUnitsToNextSample: number;
};

type Chunk = {
	/** The lowest presentation timestamp in this chunk */
	startTimestamp: number;
	samples: Sample[];
	offset: number | null;
	// In the case of a fragmented file, this indicates the position of the moof box pointing to the data in this chunk
	moofOffset: number | null;
};

export type IsobmffTrackData = {
	muxer: IsobmffMuxer;
	timescale: number;
	samples: Sample[];
	sampleQueue: Sample[]; // For fragmented files
	timestampProcessingQueue: Sample[];

	timeToSampleTable: { sampleCount: number; sampleDelta: number }[];
	compositionTimeOffsetTable: { sampleCount: number; sampleCompositionTimeOffset: number }[];
	lastTimescaleUnits: number | null;
	lastSample: Sample | null;

	finalizedChunks: Chunk[];
	currentChunk: Chunk | null;
	compactlyCodedChunkTable: {
		firstChunk: number;
		samplesPerChunk: number;
	}[];
} & ({
	track: OutputVideoTrack;
	type: 'video';
	info: {
		width: number;
		height: number;
		decoderConfig: VideoDecoderConfig;
		/**
		 * The "Annex B transformation" involves converting the raw packet data from Annex B to
		 * "MP4" (length-prefixed) format.
		 * https://stackoverflow.com/questions/24884827
		 */
		requiresAnnexBTransformation: boolean;
	};
} | {
	track: OutputAudioTrack;
	type: 'audio';
	info: {
		numberOfChannels: number;
		sampleRate: number;
		decoderConfig: AudioDecoderConfig;
		/**
		 * The "PCM transformation" is making every sample in the sample table be exactly one PCM audio sample long.
		 * Some players expect this for PCM audio.
		 */
		requiresPcmTransformation: boolean;
	};
} | {
	track: OutputSubtitleTrack;
	type: 'subtitle';
	info: {
		config: SubtitleConfig;
	};
	lastCueEndTimestamp: number;
	cueQueue: SubtitleCue[];
	nextSourceId: number;
	cueToSourceId: WeakMap<SubtitleCue, number>;
});

export type IsobmffVideoTrackData = IsobmffTrackData & { type: 'video' };
export type IsobmffAudioTrackData = IsobmffTrackData & { type: 'audio' };
export type IsobmffSubtitleTrackData = IsobmffTrackData & { type: 'subtitle' };

export const intoTimescale = (timeInSeconds: number, timescale: number, round = true) => {
	const value = timeInSeconds * timescale;
	return round ? Math.round(value) : value;
};

export class IsobmffMuxer extends Muxer {
	private format: IsobmffOutputFormat;
	private writer: Writer;
	private boxWriter: IsobmffBoxWriter;
	private fastStart: NonNullable<IsobmffOutputFormatOptions['fastStart']>;
	private isFragmented: boolean;

	isQuickTime: boolean;

	private auxTarget = new BufferTarget();
	private auxWriter = this.auxTarget._createWriter();
	private auxBoxWriter = new IsobmffBoxWriter(this.auxWriter);

	private mdat: Box | null = null;

	private trackDatas: IsobmffTrackData[] = [];
	private allTracksKnown = promiseWithResolvers();

	private creationTime = Math.floor(Date.now() / 1000) + TIMESTAMP_OFFSET;
	private finalizedChunks: Chunk[] = [];

	private nextFragmentNumber = 1;
	// Only relevant for fragmented files, to make sure new fragments start with the highest timestamp seen so far
	private maxWrittenTimestamp = -Infinity;
	private minimumFragmentDuration: number;

	constructor(output: Output, format: IsobmffOutputFormat) {
		super(output);

		this.format = format;
		this.writer = output._writer;
		this.boxWriter = new IsobmffBoxWriter(this.writer);

		this.isQuickTime = format instanceof MovOutputFormat;

		// If the fastStart option isn't defined, enable in-memory fast start if the target is an ArrayBuffer, as the
		// memory usage remains identical
		const fastStartDefault = this.writer instanceof BufferTargetWriter ? 'in-memory' : false;
		this.fastStart = format._options.fastStart ?? fastStartDefault;
		this.isFragmented = this.fastStart === 'fragmented';

		if (this.fastStart === 'in-memory' || this.isFragmented) {
			this.writer.ensureMonotonicity = true;
		}

		this.minimumFragmentDuration = format._options.minimumFragmentDuration ?? 1;
	}

	async start() {
		const release = await this.mutex.acquire();

		const holdsAvc = this.output._tracks.some(x => x.type === 'video' && x.source._codec === 'avc');

		// Write the header
		{
			if (this.format._options.onFtyp) {
				this.writer.startTrackingWrites();
			}

			this.boxWriter.writeBox(ftyp({
				isQuickTime: this.isQuickTime,
				holdsAvc: holdsAvc,
				fragmented: this.isFragmented,
			}));

			if (this.format._options.onFtyp) {
				const { data, start } = this.writer.stopTrackingWrites();
				this.format._options.onFtyp(data, start);
			}
		}

		if (this.fastStart === 'in-memory') {
			this.mdat = mdat(false);
		} else if (this.isFragmented) {
			// We write the moov box once we write out the first fragment to make sure we get the decoder configs
		} else {
			if (this.format._options.onMdat) {
				this.writer.startTrackingWrites();
			}

			this.mdat = mdat(true); // Reserve large size by default, can refine this when finalizing.
			this.boxWriter.writeBox(this.mdat);
		}

		await this.writer.flush();

		release();
	}

	private allTracksAreKnown() {
		for (const track of this.output._tracks) {
			if (!track.source._closed && !this.trackDatas.some(x => x.track === track)) {
				return false; // We haven't seen a sample from this open track yet
			}
		}

		return true;
	}

	async getMimeType() {
		await this.allTracksKnown.promise;

		const codecStrings = this.trackDatas.map((trackData) => {
			if (trackData.type === 'video') {
				return trackData.info.decoderConfig.codec;
			} else if (trackData.type === 'audio') {
				return trackData.info.decoderConfig.codec;
			} else {
				const map: Record<SubtitleCodec, string> = {
					webvtt: 'wvtt',
				};
				return map[trackData.track.source._codec];
			}
		});

		return buildIsobmffMimeType({
			isQuickTime: this.isQuickTime,
			hasVideo: this.trackDatas.some(x => x.type === 'video'),
			hasAudio: this.trackDatas.some(x => x.type === 'audio'),
			codecStrings,
		});
	}

	private getVideoTrackData(track: OutputVideoTrack, packet: EncodedPacket, meta?: EncodedVideoChunkMetadata) {
		const existingTrackData = this.trackDatas.find(x => x.track === track);
		if (existingTrackData) {
			return existingTrackData as IsobmffVideoTrackData;
		}

		validateVideoChunkMetadata(meta);

		assert(meta);
		assert(meta.decoderConfig);

		const decoderConfig = { ...meta.decoderConfig };
		assert(decoderConfig.codedWidth !== undefined);
		assert(decoderConfig.codedHeight !== undefined);

		let requiresAnnexBTransformation = false;

		if (track.source._codec === 'avc' && !decoderConfig.description) {
			// ISOBMFF can only hold AVC in the AVCC format, not in Annex B, but the missing description indicates
			// Annex B. This means we'll need to do some converterino.

			const decoderConfigurationRecord = extractAvcDecoderConfigurationRecord(packet.data);
			if (!decoderConfigurationRecord) {
				throw new Error(
					'Couldn\'t extract an AVCDecoderConfigurationRecord from the AVC packet. Make sure the packets are'
					+ ' in Annex B format (as specified in ITU-T-REC-H.264) when not providing a description, or'
					+ ' provide a description (must be an AVCDecoderConfigurationRecord as specified in ISO 14496-15)'
					+ ' and ensure the packets are in AVCC format.',
				);
			}

			decoderConfig.description = serializeAvcDecoderConfigurationRecord(decoderConfigurationRecord);
			requiresAnnexBTransformation = true;
		} else if (track.source._codec === 'hevc' && !decoderConfig.description) {
			// ISOBMFF can only hold HEVC in the HEVC format, not in Annex B, but the missing description indicates
			// Annex B. This means we'll need to do some converterino.

			const decoderConfigurationRecord = extractHevcDecoderConfigurationRecord(packet.data);
			if (!decoderConfigurationRecord) {
				throw new Error(
					'Couldn\'t extract an HEVCDecoderConfigurationRecord from the HEVC packet. Make sure the packets'
					+ ' are in Annex B format (as specified in ITU-T-REC-H.265) when not providing a description, or'
					+ ' provide a description (must be an HEVCDecoderConfigurationRecord as specified in ISO 14496-15)'
					+ ' and ensure the packets are in HEVC format.',
				);
			}

			decoderConfig.description = serializeHevcDecoderConfigurationRecord(decoderConfigurationRecord);
			requiresAnnexBTransformation = true;
		}

		// The frame rate set by the user may not be an integer. Since timescale is an integer, we'll approximate the
		// frame time (inverse of frame rate) with a rational number, then use that approximation's denominator
		// as the timescale.
		const timescale = computeRationalApproximation(1 / (track.metadata.frameRate ?? 57600), 1e6).denominator;

		const newTrackData: IsobmffVideoTrackData = {
			muxer: this,
			track,
			type: 'video',
			info: {
				width: decoderConfig.codedWidth,
				height: decoderConfig.codedHeight,
				decoderConfig: decoderConfig,
				requiresAnnexBTransformation,
			},
			timescale,
			samples: [],
			sampleQueue: [],
			timestampProcessingQueue: [],
			timeToSampleTable: [],
			compositionTimeOffsetTable: [],
			lastTimescaleUnits: null,
			lastSample: null,
			finalizedChunks: [],
			currentChunk: null,
			compactlyCodedChunkTable: [],
		};

		this.trackDatas.push(newTrackData);
		this.trackDatas.sort((a, b) => a.track.id - b.track.id);

		if (this.allTracksAreKnown()) {
			this.allTracksKnown.resolve();
		}

		return newTrackData;
	}

	private getAudioTrackData(track: OutputAudioTrack, meta?: EncodedAudioChunkMetadata) {
		const existingTrackData = this.trackDatas.find(x => x.track === track);
		if (existingTrackData) {
			return existingTrackData as IsobmffAudioTrackData;
		}

		validateAudioChunkMetadata(meta);

		assert(meta);
		assert(meta.decoderConfig);

		const newTrackData: IsobmffAudioTrackData = {
			muxer: this,
			track,
			type: 'audio',
			info: {
				numberOfChannels: meta.decoderConfig.numberOfChannels,
				sampleRate: meta.decoderConfig.sampleRate,
				decoderConfig: meta.decoderConfig,
				requiresPcmTransformation:
					!this.isFragmented
					&& (PCM_AUDIO_CODECS as readonly string[]).includes(track.source._codec),
			},
			timescale: meta.decoderConfig.sampleRate,
			samples: [],
			sampleQueue: [],
			timestampProcessingQueue: [],
			timeToSampleTable: [],
			compositionTimeOffsetTable: [],
			lastTimescaleUnits: null,
			lastSample: null,
			finalizedChunks: [],
			currentChunk: null,
			compactlyCodedChunkTable: [],
		};

		this.trackDatas.push(newTrackData);
		this.trackDatas.sort((a, b) => a.track.id - b.track.id);

		if (this.allTracksAreKnown()) {
			this.allTracksKnown.resolve();
		}

		return newTrackData;
	}

	private getSubtitleTrackData(track: OutputSubtitleTrack, meta?: SubtitleMetadata) {
		const existingTrackData = this.trackDatas.find(x => x.track === track);
		if (existingTrackData) {
			return existingTrackData as IsobmffSubtitleTrackData;
		}

		validateSubtitleMetadata(meta);

		assert(meta);
		assert(meta.config);

		const newTrackData: IsobmffSubtitleTrackData = {
			muxer: this,
			track,
			type: 'subtitle',
			info: {
				config: meta.config,
			},
			timescale: 1000, // Reasonable
			samples: [],
			sampleQueue: [],
			timestampProcessingQueue: [],
			timeToSampleTable: [],
			compositionTimeOffsetTable: [],
			lastTimescaleUnits: null,
			lastSample: null,
			finalizedChunks: [],
			currentChunk: null,
			compactlyCodedChunkTable: [],

			lastCueEndTimestamp: 0,
			cueQueue: [],
			nextSourceId: 0,
			cueToSourceId: new WeakMap(),
		};

		this.trackDatas.push(newTrackData);
		this.trackDatas.sort((a, b) => a.track.id - b.track.id);

		if (this.allTracksAreKnown()) {
			this.allTracksKnown.resolve();
		}

		return newTrackData;
	}

	async addEncodedVideoPacket(track: OutputVideoTrack, packet: EncodedPacket, meta?: EncodedVideoChunkMetadata) {
		const release = await this.mutex.acquire();

		try {
			const trackData = this.getVideoTrackData(track, packet, meta);

			let packetData = packet.data;
			if (trackData.info.requiresAnnexBTransformation) {
				const transformedData = transformAnnexBToLengthPrefixed(packetData);
				if (!transformedData) {
					throw new Error(
						'Failed to transform packet data. Make sure all packets are provided in Annex B format, as'
						+ ' specified in ITU-T-REC-H.264 and ITU-T-REC-H.265.',
					);
				}

				packetData = transformedData;
			}

			const timestamp = this.validateAndNormalizeTimestamp(
				trackData.track,
				packet.timestamp,
				packet.type === 'key',
			);
			const internalSample = this.createSampleForTrack(
				trackData,
				packetData,
				timestamp,
				packet.duration,
				packet.type,
			);

			await this.registerSample(trackData, internalSample);
		} finally {
			release();
		}
	}

	async addEncodedAudioPacket(track: OutputAudioTrack, packet: EncodedPacket, meta?: EncodedAudioChunkMetadata) {
		const release = await this.mutex.acquire();

		try {
			const trackData = this.getAudioTrackData(track, meta);

			const timestamp = this.validateAndNormalizeTimestamp(
				trackData.track,
				packet.timestamp,
				packet.type === 'key',
			);
			const internalSample = this.createSampleForTrack(
				trackData,
				packet.data,
				timestamp,
				packet.duration,
				packet.type,
			);

			if (trackData.info.requiresPcmTransformation) {
				await this.maybePadWithSilence(trackData, timestamp);
			}

			await this.registerSample(trackData, internalSample);
		} finally {
			release();
		}
	}

	private async maybePadWithSilence(trackData: IsobmffAudioTrackData, untilTimestamp: number) {
		// The PCM transformation assumes that all samples are contiguous. This is not something that is enforced, so
		// we need to pad the "holes" in between samples (and before the first sample) with additional
		// "silence samples".

		const lastSample = last(trackData.samples);
		const lastEndTimestamp = lastSample
			? lastSample.timestamp + lastSample.duration
			: 0;

		const delta = untilTimestamp - lastEndTimestamp;
		const deltaInTimescale = intoTimescale(delta, trackData.timescale);

		if (deltaInTimescale > 0) {
			const { sampleSize, silentValue } = parsePcmCodec(
				trackData.info.decoderConfig.codec as PcmAudioCodec,
			);
			const samplesNeeded = deltaInTimescale * trackData.info.numberOfChannels;
			const data = new Uint8Array(sampleSize * samplesNeeded).fill(silentValue);

			const paddingSample = this.createSampleForTrack(
				trackData,
				new Uint8Array(data.buffer),
				lastEndTimestamp,
				delta,
				'key',
			);
			await this.registerSample(trackData, paddingSample);
		}
	}

	async addSubtitleCue(track: OutputSubtitleTrack, cue: SubtitleCue, meta?: SubtitleMetadata) {
		const release = await this.mutex.acquire();

		try {
			const trackData = this.getSubtitleTrackData(track, meta);

			this.validateAndNormalizeTimestamp(trackData.track, cue.timestamp, true);

			if (track.source._codec === 'webvtt') {
				trackData.cueQueue.push(cue);
				await this.processWebVTTCues(trackData, cue.timestamp);
			} else {
				// TODO
			}
		} finally {
			release();
		}
	}

	private async processWebVTTCues(trackData: IsobmffSubtitleTrackData, until: number) {
		// WebVTT cues need to undergo special processing as empty sections need to be padded out with samples, and
		// overlapping samples require special logic. The algorithm produces the format specified in ISO 14496-30.

		while (trackData.cueQueue.length > 0) {
			const timestamps = new Set<number>([]);
			for (const cue of trackData.cueQueue) {
				assert(cue.timestamp <= until);
				assert(trackData.lastCueEndTimestamp <= cue.timestamp + cue.duration);

				timestamps.add(Math.max(cue.timestamp, trackData.lastCueEndTimestamp)); // Start timestamp
				timestamps.add(cue.timestamp + cue.duration); // End timestamp
			}

			const sortedTimestamps = [...timestamps].sort((a, b) => a - b);

			// These are the timestamps of the next sample we'll create:
			const sampleStart = sortedTimestamps[0]!;
			const sampleEnd = sortedTimestamps[1] ?? sampleStart;

			if (until < sampleEnd) {
				break;
			}

			// We may need to pad out empty space with an vtte box
			if (trackData.lastCueEndTimestamp < sampleStart) {
				this.auxWriter.seek(0);
				const box = vtte();
				this.auxBoxWriter.writeBox(box);

				const body = this.auxWriter.getSlice(0, this.auxWriter.getPos());
				const sample = this.createSampleForTrack(
					trackData,
					body,
					trackData.lastCueEndTimestamp,
					sampleStart - trackData.lastCueEndTimestamp,
					'key',
				);

				await this.registerSample(trackData, sample);
				trackData.lastCueEndTimestamp = sampleStart;
			}

			this.auxWriter.seek(0);

			for (let i = 0; i < trackData.cueQueue.length; i++) {
				const cue = trackData.cueQueue[i]!;

				if (cue.timestamp >= sampleEnd) {
					break;
				}

				inlineTimestampRegex.lastIndex = 0;
				const containsTimestamp = inlineTimestampRegex.test(cue.text);

				const endTimestamp = cue.timestamp + cue.duration;
				let sourceId = trackData.cueToSourceId.get(cue);
				if (sourceId === undefined && sampleEnd < endTimestamp) {
					// We know this cue will appear in more than one sample, therefore we need to mark it with a
					// unique ID
					sourceId = trackData.nextSourceId++;
					trackData.cueToSourceId.set(cue, sourceId);
				}

				if (cue.notes) {
					// Any notes/comments are included in a special vtta box
					const box = vtta(cue.notes);
					this.auxBoxWriter.writeBox(box);
				}

				const box = vttc(
					cue.text,
					containsTimestamp ? sampleStart : null,
					cue.identifier ?? null,
					cue.settings ?? null,
					sourceId ?? null,
				);
				this.auxBoxWriter.writeBox(box);

				if (endTimestamp === sampleEnd) {
					// The cue won't appear in any future sample, so we're done with it
					trackData.cueQueue.splice(i--, 1);
				}
			}

			const body = this.auxWriter.getSlice(0, this.auxWriter.getPos());
			const sample = this.createSampleForTrack(trackData, body, sampleStart, sampleEnd - sampleStart, 'key');

			await this.registerSample(trackData, sample);
			trackData.lastCueEndTimestamp = sampleEnd;
		}
	}

	private createSampleForTrack(
		trackData: IsobmffTrackData,
		data: Uint8Array,
		timestamp: number,
		duration: number,
		type: PacketType,
	) {
		const sample: Sample = {
			timestamp,
			decodeTimestamp: timestamp, // This may be refined later
			duration,
			data,
			size: data.byteLength,
			type,
			timescaleUnitsToNextSample: intoTimescale(duration, trackData.timescale), // Will be refined
		};

		return sample;
	}

	private processTimestamps(trackData: IsobmffTrackData, nextSample?: Sample) {
		if (trackData.timestampProcessingQueue.length === 0) {
			return;
		}

		if (trackData.type === 'audio' && trackData.info.requiresPcmTransformation) {
			let totalDuration = 0;

			// Compute the total duration in the track timescale (which is equal to the amount of PCM audio samples)
			// and simply say that's how many new samples there are.

			for (let i = 0; i < trackData.timestampProcessingQueue.length; i++) {
				const sample = trackData.timestampProcessingQueue[i]!;
				const duration = intoTimescale(sample.duration, trackData.timescale);
				totalDuration += duration;
			}

			if (trackData.timeToSampleTable.length === 0) {
				trackData.timeToSampleTable.push({
					sampleCount: totalDuration,
					sampleDelta: 1,
				});
			} else {
				const lastEntry = last(trackData.timeToSampleTable)!;
				lastEntry.sampleCount += totalDuration;
			}

			trackData.timestampProcessingQueue.length = 0;
			return;
		}

		const sortedTimestamps = trackData.timestampProcessingQueue.map(x => x.timestamp).sort((a, b) => a - b);

		for (let i = 0; i < trackData.timestampProcessingQueue.length; i++) {
			const sample = trackData.timestampProcessingQueue[i]!;

			// Since the user only supplies presentation time, but these may be out of order, we reverse-engineer from
			// that a sensible decode timestamp. The notion of a decode timestamp doesn't really make sense
			// (presentation timestamp & decode order are all you need), but it is a concept in ISOBMFF so we need to
			// model it.
			sample.decodeTimestamp = sortedTimestamps[i]!;

			if (!this.isFragmented && trackData.lastTimescaleUnits === null) {
				// In non-fragmented files, the first decode timestamp is always zero. If the first presentation
				// timestamp isn't zero, we'll simply use the composition time offset to achieve it.
				sample.decodeTimestamp = 0;
			}

			const sampleCompositionTimeOffset
				= intoTimescale(sample.timestamp - sample.decodeTimestamp, trackData.timescale);
			const durationInTimescale = intoTimescale(sample.duration, trackData.timescale);

			if (trackData.lastTimescaleUnits !== null) {
				assert(trackData.lastSample);

				const timescaleUnits = intoTimescale(sample.decodeTimestamp, trackData.timescale, false);
				const delta = Math.round(timescaleUnits - trackData.lastTimescaleUnits);
				assert(delta >= 0);

				trackData.lastTimescaleUnits += delta;
				trackData.lastSample.timescaleUnitsToNextSample = delta;

				if (!this.isFragmented) {
					let lastTableEntry = last(trackData.timeToSampleTable);
					assert(lastTableEntry);

					if (lastTableEntry.sampleCount === 1) {
						lastTableEntry.sampleDelta = delta;

						const entryBefore = trackData.timeToSampleTable[trackData.timeToSampleTable.length - 2];
						if (entryBefore && entryBefore.sampleDelta === delta) {
							// If the delta is the same as the previous one, merge the two entries
							entryBefore.sampleCount++;
							trackData.timeToSampleTable.pop();
							lastTableEntry = entryBefore;
						}
					} else if (lastTableEntry.sampleDelta !== delta) {
						// The delta has changed, so we need a new entry to reach the current sample
						lastTableEntry.sampleCount--;
						trackData.timeToSampleTable.push(lastTableEntry = {
							sampleCount: 1,
							sampleDelta: delta,
						});
					}

					if (lastTableEntry.sampleDelta === durationInTimescale) {
						// The sample's duration matches the delta, so we can increment the count
						lastTableEntry.sampleCount++;
					} else {
						// Add a new entry in order to maintain the last sample's true duration
						trackData.timeToSampleTable.push({
							sampleCount: 1,
							sampleDelta: durationInTimescale,
						});
					}

					const lastCompositionTimeOffsetTableEntry = last(trackData.compositionTimeOffsetTable);
					assert(lastCompositionTimeOffsetTableEntry);

					if (
						lastCompositionTimeOffsetTableEntry.sampleCompositionTimeOffset === sampleCompositionTimeOffset
					) {
						// Simply increment the count
						lastCompositionTimeOffsetTableEntry.sampleCount++;
					} else {
						// The composition time offset has changed, so create a new entry with the new composition time
						// offset
						trackData.compositionTimeOffsetTable.push({
							sampleCount: 1,
							sampleCompositionTimeOffset: sampleCompositionTimeOffset,
						});
					}
				}
			} else {
				// Decode timestamp of the first sample
				trackData.lastTimescaleUnits = intoTimescale(sample.decodeTimestamp, trackData.timescale, false);

				if (!this.isFragmented) {
					trackData.timeToSampleTable.push({
						sampleCount: 1,
						sampleDelta: durationInTimescale,
					});
					trackData.compositionTimeOffsetTable.push({
						sampleCount: 1,
						sampleCompositionTimeOffset: sampleCompositionTimeOffset,
					});
				}
			}

			trackData.lastSample = sample;
		}

		trackData.timestampProcessingQueue.length = 0;

		assert(trackData.lastSample);
		assert(trackData.lastTimescaleUnits !== null);

		if (nextSample !== undefined && trackData.lastSample.timescaleUnitsToNextSample === 0) {
			assert(nextSample.type === 'key');

			// Given the next sample, we can make a guess about the duration of the last sample. This avoids having
			// the last sample's duration in each fragment be "0" for fragmented files. The guess we make here is
			// actually correct most of the time, since typically, no delta frame with a lower timestamp follows the key
			// frame (although it can happen).
			const timescaleUnits = intoTimescale(nextSample.timestamp, trackData.timescale, false);
			const delta = Math.round(timescaleUnits - trackData.lastTimescaleUnits);
			trackData.lastSample.timescaleUnitsToNextSample = delta;
		}
	}

	private async registerSample(trackData: IsobmffTrackData, sample: Sample) {
		if (sample.type === 'key') {
			this.processTimestamps(trackData, sample);
		}
		trackData.timestampProcessingQueue.push(sample);

		if (this.isFragmented) {
			trackData.sampleQueue.push(sample);
			await this.interleaveSamples();
		} else {
			await this.addSampleToTrack(trackData, sample);
		}
	}

	private async addSampleToTrack(trackData: IsobmffTrackData, sample: Sample) {
		if (!this.isFragmented) {
			trackData.samples.push(sample);
		}

		let beginNewChunk = false;
		if (!trackData.currentChunk) {
			beginNewChunk = true;
		} else {
			// Timestamp don't need to be monotonic (think B-frames), so we may need to update the start timestamp of
			// the chunk
			trackData.currentChunk.startTimestamp = Math.min(
				trackData.currentChunk.startTimestamp,
				sample.timestamp,
			);

			const currentChunkDuration = sample.timestamp - trackData.currentChunk.startTimestamp;

			if (this.isFragmented) {
				// We can only finalize this fragment (and begin a new one) if we know that each track will be able to
				// start the new one with a key frame.
				const keyFrameQueuedEverywhere = this.trackDatas.every((otherTrackData) => {
					if (trackData === otherTrackData) {
						return sample.type === 'key';
					}

					const firstQueuedSample = otherTrackData.sampleQueue[0];
					if (firstQueuedSample) {
						return firstQueuedSample.type === 'key';
					}

					return otherTrackData.track.source._closed;
				});

				if (
					currentChunkDuration >= this.minimumFragmentDuration
					&& keyFrameQueuedEverywhere
					&& sample.timestamp > this.maxWrittenTimestamp
				) {
					beginNewChunk = true;
					await this.finalizeFragment();
				}
			} else {
				beginNewChunk = currentChunkDuration >= 0.5; // Chunk is long enough, we need a new one
			}
		}

		if (beginNewChunk) {
			if (trackData.currentChunk) {
				await this.finalizeCurrentChunk(trackData);
			}

			trackData.currentChunk = {
				startTimestamp: sample.timestamp,
				samples: [],
				offset: null,
				moofOffset: null,
			};
		}

		assert(trackData.currentChunk);
		trackData.currentChunk.samples.push(sample);

		if (this.isFragmented) {
			this.maxWrittenTimestamp = Math.max(this.maxWrittenTimestamp, sample.timestamp);
		}
	}

	private async finalizeCurrentChunk(trackData: IsobmffTrackData) {
		assert(!this.isFragmented);

		if (!trackData.currentChunk) return;

		trackData.finalizedChunks.push(trackData.currentChunk);
		this.finalizedChunks.push(trackData.currentChunk);

		let sampleCount = trackData.currentChunk.samples.length;
		if (trackData.type === 'audio' && trackData.info.requiresPcmTransformation) {
			sampleCount = trackData.currentChunk.samples
				.reduce((acc, sample) => acc + intoTimescale(sample.duration, trackData.timescale), 0);
		}

		if (
			trackData.compactlyCodedChunkTable.length === 0
			|| last(trackData.compactlyCodedChunkTable)!.samplesPerChunk !== sampleCount
		) {
			trackData.compactlyCodedChunkTable.push({
				firstChunk: trackData.finalizedChunks.length, // 1-indexed
				samplesPerChunk: sampleCount,
			});
		}

		if (this.fastStart === 'in-memory') {
			trackData.currentChunk.offset = 0; // We'll compute the proper offset when finalizing
			return;
		}

		// Write out the data
		trackData.currentChunk.offset = this.writer.getPos();
		for (const sample of trackData.currentChunk.samples) {
			assert(sample.data);
			this.writer.write(sample.data);
			sample.data = null; // Can be GC'd
		}

		await this.writer.flush();
	}

	private async interleaveSamples(isFinalCall = false) {
		assert(this.isFragmented);

		if (!isFinalCall) {
			if (!this.allTracksAreKnown()) {
				return; // We can't interleave yet as we don't yet know how many tracks we'll truly have
			}
		}

		outer:
		while (true) {
			let trackWithMinTimestamp: IsobmffTrackData | null = null;
			let minTimestamp = Infinity;

			for (const trackData of this.trackDatas) {
				if (!isFinalCall && trackData.sampleQueue.length === 0 && !trackData.track.source._closed) {
					break outer;
				}

				if (trackData.sampleQueue.length > 0 && trackData.sampleQueue[0]!.timestamp < minTimestamp) {
					trackWithMinTimestamp = trackData;
					minTimestamp = trackData.sampleQueue[0]!.timestamp;
				}
			}

			if (!trackWithMinTimestamp) {
				break;
			}

			const sample = trackWithMinTimestamp.sampleQueue.shift()!;
			await this.addSampleToTrack(trackWithMinTimestamp, sample);
		}
	}

	private async finalizeFragment(flushWriter = true) {
		assert(this.isFragmented);

		const fragmentNumber = this.nextFragmentNumber++;

		if (fragmentNumber === 1) {
			if (this.format._options.onMoov) {
				this.writer.startTrackingWrites();
			}

			// Write the moov box now that we have all decoder configs
			const movieBox = moov(this.trackDatas, this.creationTime, true);
			this.boxWriter.writeBox(movieBox);

			if (this.format._options.onMoov) {
				const { data, start } = this.writer.stopTrackingWrites();
				this.format._options.onMoov(data, start);
			}
		}

		// Not all tracks need to be present in every fragment
		const tracksInFragment = this.trackDatas.filter(x => x.currentChunk);

		// Create an initial moof box and measure it; we need this to know where the following mdat box will begin
		const moofBox = moof(fragmentNumber, tracksInFragment);
		const moofOffset = this.writer.getPos();
		const mdatStartPos = moofOffset + this.boxWriter.measureBox(moofBox);

		let currentPos = mdatStartPos + MIN_BOX_HEADER_SIZE;
		let fragmentStartTimestamp = Infinity;
		for (const trackData of tracksInFragment) {
			trackData.currentChunk!.offset = currentPos;
			trackData.currentChunk!.moofOffset = moofOffset;

			for (const sample of trackData.currentChunk!.samples) {
				currentPos += sample.size;
			}

			fragmentStartTimestamp = Math.min(fragmentStartTimestamp, trackData.currentChunk!.startTimestamp);
		}

		const mdatSize = currentPos - mdatStartPos;
		const needsLargeMdatSize = mdatSize >= 2 ** 32;

		if (needsLargeMdatSize) {
			// Shift all offsets by 8. Previously, all chunks were shifted assuming the large box size, but due to what
			// I suspect is a bug in WebKit, it failed in Safari (when livestreaming with MSE, not for static playback).
			for (const trackData of tracksInFragment) {
				trackData.currentChunk!.offset! += MAX_BOX_HEADER_SIZE - MIN_BOX_HEADER_SIZE;
			}
		}

		if (this.format._options.onMoof) {
			this.writer.startTrackingWrites();
		}

		const newMoofBox = moof(fragmentNumber, tracksInFragment);
		this.boxWriter.writeBox(newMoofBox);

		if (this.format._options.onMoof) {
			const { data, start } = this.writer.stopTrackingWrites();
			this.format._options.onMoof(data, start, fragmentStartTimestamp);
		}

		assert(this.writer.getPos() === mdatStartPos);

		if (this.format._options.onMdat) {
			this.writer.startTrackingWrites();
		}

		const mdatBox = mdat(needsLargeMdatSize);
		mdatBox.size = mdatSize;
		this.boxWriter.writeBox(mdatBox);

		this.writer.seek(mdatStartPos + (needsLargeMdatSize ? MAX_BOX_HEADER_SIZE : MIN_BOX_HEADER_SIZE));

		// Write sample data
		for (const trackData of tracksInFragment) {
			for (const sample of trackData.currentChunk!.samples) {
				this.writer.write(sample.data!);
				sample.data = null; // Can be GC'd
			}
		}

		if (this.format._options.onMdat) {
			const { data, start } = this.writer.stopTrackingWrites();
			this.format._options.onMdat(data, start);
		}

		for (const trackData of tracksInFragment) {
			trackData.finalizedChunks.push(trackData.currentChunk!);
			this.finalizedChunks.push(trackData.currentChunk!);
			trackData.currentChunk = null;
		}

		if (flushWriter) {
			await this.writer.flush();
		}
	}

	// eslint-disable-next-line @typescript-eslint/no-misused-promises
	override async onTrackClose(track: OutputTrack) {
		const release = await this.mutex.acquire();

		if (track.type === 'subtitle' && track.source._codec === 'webvtt') {
			const trackData = this.trackDatas.find(x => x.track === track) as IsobmffSubtitleTrackData;
			if (trackData) {
				await this.processWebVTTCues(trackData, Infinity);
			}
		}

		if (this.allTracksAreKnown()) {
			this.allTracksKnown.resolve();
		}

		if (this.isFragmented) {
			// Since a track is now closed, we may be able to write out chunks that were previously waiting
			await this.interleaveSamples();
		}

		release();
	}

	/** Finalizes the file, making it ready for use. Must be called after all video and audio chunks have been added. */
	async finalize() {
		const release = await this.mutex.acquire();

		this.allTracksKnown.resolve();

		for (const trackData of this.trackDatas) {
			if (trackData.type === 'subtitle' && trackData.track.source._codec === 'webvtt') {
				await this.processWebVTTCues(trackData, Infinity);
			}
		}

		if (this.isFragmented) {
			await this.interleaveSamples(true);

			for (const trackData of this.trackDatas) {
				this.processTimestamps(trackData);
			}

			await this.finalizeFragment(false); // Don't flush the last fragment as we will flush it with the mfra box
		} else {
			for (const trackData of this.trackDatas) {
				this.processTimestamps(trackData);
				await this.finalizeCurrentChunk(trackData);
			}
		}

		if (this.fastStart === 'in-memory') {
			assert(this.mdat);
			let mdatSize: number;

			// We know how many chunks there are, but computing the chunk positions requires an iterative approach:
			// In order to know where the first chunk should go, we first need to know the size of the moov box. But we
			// cannot write a proper moov box without first knowing all chunk positions. So, we generate a tentative
			// moov box with placeholder values (0) for the chunk offsets to be able to compute its size. If it then
			// turns out that appending all chunks exceeds 4 GiB, we need to repeat this process, now with the co64 box
			// being used in the moov box instead, which will make it larger. After that, we definitely know the final
			// size of the moov box and can compute the proper chunk positions.

			for (let i = 0; i < 2; i++) {
				const movieBox = moov(this.trackDatas, this.creationTime);
				const movieBoxSize = this.boxWriter.measureBox(movieBox);
				mdatSize = this.boxWriter.measureBox(this.mdat);
				let currentChunkPos = this.writer.getPos() + movieBoxSize + mdatSize;

				for (const chunk of this.finalizedChunks) {
					chunk.offset = currentChunkPos;
					for (const { data } of chunk.samples) {
						assert(data);
						currentChunkPos += data.byteLength;
						mdatSize += data.byteLength;
					}
				}

				if (currentChunkPos < 2 ** 32) break;
				if (mdatSize >= 2 ** 32) this.mdat.largeSize = true;
			}

			if (this.format._options.onMoov) {
				this.writer.startTrackingWrites();
			}

			const movieBox = moov(this.trackDatas, this.creationTime);
			this.boxWriter.writeBox(movieBox);

			if (this.format._options.onMoov) {
				const { data, start } = this.writer.stopTrackingWrites();
				this.format._options.onMoov(data, start);
			}

			if (this.format._options.onMdat) {
				this.writer.startTrackingWrites();
			}

			this.mdat.size = mdatSize!;
			this.boxWriter.writeBox(this.mdat);

			for (const chunk of this.finalizedChunks) {
				for (const sample of chunk.samples) {
					assert(sample.data);
					this.writer.write(sample.data);
					sample.data = null;
				}
			}

			if (this.format._options.onMdat) {
				const { data, start } = this.writer.stopTrackingWrites();
				this.format._options.onMdat(data, start);
			}
		} else if (this.isFragmented) {
			// Append the mfra box to the end of the file for better random access
			const startPos = this.writer.getPos();
			const mfraBox = mfra(this.trackDatas);
			this.boxWriter.writeBox(mfraBox);

			// Patch the 'size' field of the mfro box at the end of the mfra box now that we know its actual size
			const mfraBoxSize = this.writer.getPos() - startPos;
			this.writer.seek(this.writer.getPos() - 4);
			this.boxWriter.writeU32(mfraBoxSize);
		} else {
			assert(this.mdat);

			const mdatPos = this.boxWriter.offsets.get(this.mdat);
			assert(mdatPos !== undefined);
			const mdatSize = this.writer.getPos() - mdatPos;
			this.mdat.size = mdatSize;
			this.mdat.largeSize = mdatSize >= 2 ** 32; // Only use the large size if we need it
			this.boxWriter.patchBox(this.mdat);

			if (this.format._options.onMdat) {
				const { data, start } = this.writer.stopTrackingWrites();
				this.format._options.onMdat(data, start);
			}

			if (this.format._options.onMoov) {
				this.writer.startTrackingWrites();
			}

			const movieBox = moov(this.trackDatas, this.creationTime);
			this.boxWriter.writeBox(movieBox);

			if (this.format._options.onMoov) {
				const { data, start } = this.writer.stopTrackingWrites();
				this.format._options.onMoov(data, start);
			}
		}

		release();
	}
}
