/* eslint no-use-before-define:0 */

// Import
import type Buffer from 'buffer'
import * as pathUtil from 'path'
import textExtensions from 'textextensions'
import binaryExtensions from 'binaryextensions'

export interface EncodingOpts {
	/** Defaults to 24 */
	chunkLength?: number

	/** If not provided, will check the start, beginning, and end */
	chunkBegin?: number
}

/**
 * Determine if the filename and/or buffer is text.
 * Determined by extension checks first (if filename is available), otherwise if unknown extension or no filename, will perform a slower buffer encoding detection.
 * This order is done, as extension checks are quicker, and also because encoding checks cannot guarantee accuracy for chars between utf8 and utf16.
 * The extension checks are performed using the resources https://github.com/bevry/textextensions and https://github.com/bevry/binaryextensions
 * @param filename The filename for the file/buffer if available
 * @param buffer The buffer for the file if available
 * @returns Will be `null` if neither `filename` nor `buffer` were provided. Otherwise will be a boolean value with the detection result.
 */
export function isText(
	filename?: string | null,
	buffer?: Buffer | null
): boolean | null {
	// Test extensions
	if (filename) {
		// Extract filename
		const parts = pathUtil.basename(filename).split('.').reverse()

		// Cycle extensions
		for (const extension of parts) {
			if (textExtensions.indexOf(extension) !== -1) {
				return true
			}
			if (binaryExtensions.indexOf(extension) !== -1) {
				return false
			}
		}
	}

	// Fallback to encoding if extension check was not enough
	if (buffer) {
		return getEncoding(buffer) === 'utf8'
	}

	// No buffer was provided
	return null
}

/**
 * Determine if the filename and/or buffer is binary.
 * Determined by extension checks first (if filename is available), otherwise if unknown extension or no filename, will perform a slower buffer encoding detection.
 * This order is done, as extension checks are quicker, and also because encoding checks cannot guarantee accuracy for chars between utf8 and utf16.
 * The extension checks are performed using the resources https://github.com/bevry/textextensions and https://github.com/bevry/binaryextensions
 * @param filename The filename for the file/buffer if available
 * @param buffer The buffer for the file if available
 * @returns Will be `null` if neither `filename` nor `buffer` were provided. Otherwise will be a boolean value with the detection result.
 */
export function isBinary(filename?: string | null, buffer?: Buffer | null) {
	const text = isText(filename, buffer)
	if (text == null) return null
	return !text
}

/**
 * Get the encoding of a buffer.
 * Checks the start, middle, and end of the buffer for characters that are unrecognized within UTF8 encoding.
 * History has shown that inspection at all three locations is necessary.
 * @returns Will be `null` if `buffer` was not provided. Otherwise will be either `'utf8'` or `'binary'`
 */
export function getEncoding(
	buffer: Buffer | null,
	opts?: EncodingOpts
): 'utf8' | 'binary' | null {
	// Check
	if (!buffer) return null

	// Prepare
	const textEncoding = 'utf8'
	const binaryEncoding = 'binary'
	const chunkLength = opts?.chunkLength ?? 24
	let chunkBegin = opts?.chunkBegin ?? 0

	// Discover
	if (opts?.chunkBegin == null) {
		// Start
		let encoding = getEncoding(buffer, { chunkLength, chunkBegin })
		if (encoding === textEncoding) {
			// Middle
			chunkBegin = Math.max(0, Math.floor(buffer.length / 2) - chunkLength)
			encoding = getEncoding(buffer, {
				chunkLength,
				chunkBegin,
			})
			if (encoding === textEncoding) {
				// End
				chunkBegin = Math.max(0, buffer.length - chunkLength)
				encoding = getEncoding(buffer, {
					chunkLength,
					chunkBegin,
				})
			}
		}

		// Return
		return encoding
	} else {
		// Extract
		chunkBegin = getChunkBegin(buffer, chunkBegin)
		if (chunkBegin === -1) {
			return binaryEncoding
		}

		const chunkEnd = getChunkEnd(
			buffer,
			Math.min(buffer.length, chunkBegin + chunkLength)
		)

		if (chunkEnd > buffer.length) {
			return binaryEncoding
		}

		const contentChunkUTF8 = buffer.toString(textEncoding, chunkBegin, chunkEnd)

		// Detect encoding
		for (let i = 0; i < contentChunkUTF8.length; ++i) {
			const charCode = contentChunkUTF8.charCodeAt(i)
			if (charCode === 65533 || charCode <= 8) {
				// 8 and below are control characters (e.g. backspace, null, eof, etc.)
				// 65533 is the unknown character
				// console.log(charCode, contentChunkUTF8[i])
				return binaryEncoding
			}
		}

		// Return
		return textEncoding
	}
}

// ====================================
// The functions below are created to handle multibyte utf8 characters.
// To understand how the encoding works, check this article: https://en.wikipedia.org/wiki/UTF-8#Encoding
// @todo add documentation for these

function getChunkBegin(buf: Buffer, chunkBegin: number) {
	// If it's the beginning, just return.
	if (chunkBegin === 0) {
		return 0
	}

	if (!isLaterByteOfUtf8(buf[chunkBegin])) {
		return chunkBegin
	}

	let begin = chunkBegin - 3

	if (begin >= 0) {
		if (isFirstByteOf4ByteChar(buf[begin])) {
			return begin
		}
	}

	begin = chunkBegin - 2

	if (begin >= 0) {
		if (
			isFirstByteOf4ByteChar(buf[begin]) ||
			isFirstByteOf3ByteChar(buf[begin])
		) {
			return begin
		}
	}

	begin = chunkBegin - 1

	if (begin >= 0) {
		// Is it a 4-byte, 3-byte utf8 character?
		if (
			isFirstByteOf4ByteChar(buf[begin]) ||
			isFirstByteOf3ByteChar(buf[begin]) ||
			isFirstByteOf2ByteChar(buf[begin])
		) {
			return begin
		}
	}

	return -1
}

function getChunkEnd(buf: Buffer, chunkEnd: number) {
	// If it's the end, just return.
	if (chunkEnd === buf.length) {
		return chunkEnd
	}

	let index = chunkEnd - 3

	if (index >= 0) {
		if (isFirstByteOf4ByteChar(buf[index])) {
			return chunkEnd + 1
		}
	}

	index = chunkEnd - 2

	if (index >= 0) {
		if (isFirstByteOf4ByteChar(buf[index])) {
			return chunkEnd + 2
		}

		if (isFirstByteOf3ByteChar(buf[index])) {
			return chunkEnd + 1
		}
	}

	index = chunkEnd - 1

	if (index >= 0) {
		if (isFirstByteOf4ByteChar(buf[index])) {
			return chunkEnd + 3
		}

		if (isFirstByteOf3ByteChar(buf[index])) {
			return chunkEnd + 2
		}

		if (isFirstByteOf2ByteChar(buf[index])) {
			return chunkEnd + 1
		}
	}

	return chunkEnd
}

function isFirstByteOf4ByteChar(byte: number) {
	// eslint-disable-next-line no-bitwise
	return byte >> 3 === 30 // 11110xxx?
}

function isFirstByteOf3ByteChar(byte: number) {
	// eslint-disable-next-line no-bitwise
	return byte >> 4 === 14 // 1110xxxx?
}

function isFirstByteOf2ByteChar(byte: number) {
	// eslint-disable-next-line no-bitwise
	return byte >> 5 === 6 // 110xxxxx?
}

function isLaterByteOfUtf8(byte: number) {
	// eslint-disable-next-line no-bitwise
	return byte >> 6 === 2 // 10xxxxxx?
}