import type { MongoClientWithMappings } from "$lib/server/mongo";
import { ReadPreference } from "mongodb";
import { z } from "zod";

export interface CollectionSchemaInfo {
	hasSchema: boolean;
	validator: Record<string, unknown> | null;
	validationLevel: string | null;
	validationAction: string | null;
}

export interface SchemaAuditResult {
	nrecords: number;
	nInvalidDocuments: number;
	nValidDocuments: number;
	compliancePct: number;
	errors: Array<{
		message: string;
		docId?: unknown;
		/** The full document that failed validation (encoded via JsonEncoder) */
		document?: unknown;
	}>;
	warnings: string[];
	hasSchema: boolean;
	tookMs: number;
}

/**
 * Fetch the JSON Schema validator configuration for a collection.
 */
export async function getCollectionSchema(
	client: MongoClientWithMappings,
	dbName: string,
	colName: string,
): Promise<CollectionSchemaInfo> {
	const db = client.db(dbName);
	const collections = await db.listCollections({ name: colName }, { nameOnly: false }).toArray();

	const colInfo = collections[0];
	if (!colInfo) {
		return { hasSchema: false, validator: null, validationLevel: null, validationAction: null };
	}

	const options = (colInfo as { options?: Record<string, unknown> }).options ?? {};

	const validator = (options.validator as Record<string, unknown>) ?? null;
	const validationLevel = (options.validationLevel as string) ?? "strict";
	const validationAction = (options.validationAction as string) ?? "error";

	return {
		hasSchema: !!validator && Object.keys(validator).length > 0,
		validator,
		validationLevel,
		validationAction,
	};
}

/**
 * Extract the inner JSON Schema object from a MongoDB validator document.
 * Validators are typically `{ $jsonSchema: { ... } }` but may also be wrapped
 * in `$and`/`$or` or combined with other operators.
 */
function extractJsonSchema(validator: Record<string, unknown>): Record<string, unknown> | null {
	// Direct $jsonSchema (most common)
	if (validator.$jsonSchema && typeof validator.$jsonSchema === "object") {
		return validator.$jsonSchema as Record<string, unknown>;
	}
	// $and: [{ $jsonSchema: ... }, ...]
	if (Array.isArray(validator.$and)) {
		for (const clause of validator.$and) {
			const extracted = extractJsonSchema(clause as Record<string, unknown>);
			if (extracted) {
				return extracted;
			}
		}
	}
	return null;
}

/** Standard JSON Schema type names that zod's fromJSONSchema supports. */
const STANDARD_TYPES = new Set(["string", "number", "integer", "boolean", "object", "array", "null"]);

/** Map MongoDB bsonType aliases to standard JSON Schema types. */
const BSON_TYPE_MAP: Record<string, string> = {
	int: "integer",
	long: "integer",
	double: "number",
	bool: "boolean",
	decimal: "number",
	objectId: "objectId",
	date: "date",
};

/**
 * JSON Schema keywords that, when present, give the schema actual semantics.
 * If none of these are set, the schema is `z.any()`-equivalent under zod's
 * `fromJSONSchema`, which means it silently accepts `undefined` even when
 * the parent's `required` list includes the property.
 */
const CONSTRAINT_KEYWORDS = [
	"type",
	"bsonType",
	"enum",
	"const",
	"anyOf",
	"oneOf",
	"allOf",
	"$ref",
	"properties",
	"patternProperties",
	"additionalProperties",
	"items",
	"prefixItems",
	"additionalItems",
	"not",
	"required",
	"propertyNames",
];

// eslint-disable-next-line @typescript-eslint/no-explicit-any
function isEffectivelyAnySchema(schema: any): boolean {
	if (typeof schema !== "object" || schema === null) {
		return false;
	}
	return !CONSTRAINT_KEYWORDS.some((k) => schema[k] !== undefined);
}

/**
 * Replacement for empty `{}` sub-schemas under a required property: a union
 * of every JSON-representable type. This lets the value be anything (mirroring
 * the original "any" intent) while still rejecting `undefined`, so the parent
 * object's `required` check fires when the field is missing.
 */
const ANY_NON_UNDEFINED_SCHEMA = {
	anyOf: [
		{ type: "string" },
		{ type: "number" },
		{ type: "boolean" },
		{ type: "object" },
		{ type: "array" },
		{ type: "null" },
	],
};

/**
 * Convert a MongoDB $jsonSchema (which uses `bsonType` instead of `type`) into
 * standard JSON Schema Draft-07 that zod's `fromJSONSchema` can enforce.
 *
 * MongoDB-specific types are mapped to EJSON wrapper shapes so zod can
 * perform precise structural validation:
 *   bsonType: "objectId"  →  { type: "object", required: ["$oid"], properties: { $oid: { type: "string" } } }
 *   bsonType: "date"      →  { type: "object", required: ["$date"], properties: { $date: { type: "string" } } }
 *
 * Documents are likewise normalized via `normalizeBsonValue()` so ObjectId
 * instances become `{ $oid: "hex" }` and Date instances become `{ $date: "ISO" }`
 * before validation.
 *
 * Supports bsonType as a string or array (e.g., `["string", "null"]` for nullable).
 */
// eslint-disable-next-line @typescript-eslint/no-explicit-any
function bsonSchemaToStandard(schema: any): any {
	if (typeof schema !== "object" || schema === null) {
		return schema;
	}
	const out: Record<string, unknown> = { ...schema };
	if (out.bsonType) {
		const bsonTypeVal = out.bsonType;
		delete out.bsonType;
		// Handle array of types (nullable fields): bsonType: ["string", "null"]
		const types: string[] = Array.isArray(bsonTypeVal) ? bsonTypeVal : [bsonTypeVal as string];
		const mapped = types
			.map((t) => BSON_TYPE_MAP[t] ?? (STANDARD_TYPES.has(t) ? t : null))
			.filter((t): t is string => t !== null);
		if (mapped.length === 0) {
			// All types were unknown (binData, regex, etc.) — drop type constraint
		} else if (mapped.length === 1) {
			const bson = mapped[0];
			if (bson === "objectId") {
				out.type = "object";
				out.required = ["$oid"];
				out.properties = { $oid: { type: "string" } };
				return out;
			}
			if (bson === "date") {
				out.type = "object";
				out.required = ["$date"];
				out.properties = { $date: { type: "string" } };
				return out;
			}
			out.type = bson;
		} else {
			// Multiple types — use anyOf. Don't return early; still need to recurse
			// into properties/items/etc. for any object types in the union.
			out.anyOf = mapped.map((t) => {
				if (t === "objectId") {
					return { type: "object", required: ["$oid"], properties: { $oid: { type: "string" } } };
				}
				if (t === "date") {
					return { type: "object", required: ["$date"], properties: { $date: { type: "string" } } };
				}
				return { type: t };
			});
		}
		// binData, regex, timestamp, etc. — drop the type so zod uses z.any()
	}
	if (out.properties) {
		const requiredKeys = new Set(Array.isArray(out.required) ? (out.required as string[]) : []);
		out.properties = Object.fromEntries(
			Object.entries(out.properties as Record<string, unknown>).map(([k, v]) => {
				const converted = bsonSchemaToStandard(v);
				// If a required property has no real constraints, zod's
				// fromJSONSchema collapses it to `z.any()` — which silently
				// accepts `undefined` and defeats the `required` check.
				// Replace with an explicit any-of-any-non-undefined union.
				if (requiredKeys.has(k) && isEffectivelyAnySchema(converted)) {
					return [k, ANY_NON_UNDEFINED_SCHEMA];
				}
				return [k, converted];
			}),
		);
	}
	if (out.additionalProperties && typeof out.additionalProperties === "object") {
		out.additionalProperties = bsonSchemaToStandard(out.additionalProperties);
	}
	if (Array.isArray(out.oneOf)) {
		out.oneOf = (out.oneOf as Array<unknown>).map((v) => bsonSchemaToStandard(v));
	}
	if (Array.isArray(out.anyOf)) {
		out.anyOf = (out.anyOf as Array<unknown>).map((v) => bsonSchemaToStandard(v));
	}
	if (Array.isArray(out.allOf)) {
		out.allOf = (out.allOf as Array<unknown>).map((v) => bsonSchemaToStandard(v));
	}
	if (out.items) {
		out.items = Array.isArray(out.items)
			? (out.items as Array<unknown>).map((v) => bsonSchemaToStandard(v))
			: bsonSchemaToStandard(out.items);
	}
	return out;
}

/**
 * Recursively normalise BSON types in a document to their EJSON wrapper
 * representations so zod can structurally validate them against a schema
 * that has been converted to expect those wrappers.
 *
 *   ObjectId  →  { $oid: "...hex..." }
 *   Date      →  { $date: "...ISO..." }
 *   Decimal128 →  { $numberDecimal: "...string..." }
 *   Long      →  { $numberLong: "...string..." }
 */
// eslint-disable-next-line @typescript-eslint/no-explicit-any
function normalizeBsonValue(value: any): any {
	if (value === null || value === undefined) {
		return value;
	}
	if (typeof value !== "object") {
		return value;
	}
	if (value instanceof Date) {
		return { $date: value.toISOString() };
	}
	// ObjectId
	if (value.constructor?.name === "ObjectId" && typeof value.toHexString === "function") {
		return { $oid: value.toHexString() };
	}
	// Decimal128
	if (value.constructor?.name === "Decimal128" && typeof value.toString === "function") {
		return { $numberDecimal: value.toString() };
	}
	// Long
	if (value.constructor?.name === "Long" && typeof value.toString === "function") {
		return { $numberLong: value.toString() };
	}
	if (Array.isArray(value)) {
		return value.map(normalizeBsonValue);
	}
	const out: Record<string, unknown> = {};
	for (const [k, v] of Object.entries(value)) {
		out[k] = normalizeBsonValue(v);
	}
	return out;
}

/**
 * BSON numeric types that have no faithful representation in JavaScript:
 * MongoDB distinguishes int/long/double/decimal at the storage layer, but
 * when a document is returned through the Node driver they all surface as
 * `number` (or wrapped Long/Decimal128 for ranges that don't fit).
 *
 * If a validator constrains a field to one of these, our zod-based audit
 * cannot reliably detect violations — so we use this to produce a more
 * helpful fallback message when we know specific feedback isn't possible.
 */
const PRECISE_BSON_NUMERIC_TYPES = new Set(["int", "long", "double", "decimal"]);

/** Walk the schema tree and check whether any field uses a precise BSON numeric type. */
// eslint-disable-next-line @typescript-eslint/no-explicit-any
function schemaUsesPreciseBsonTypes(schema: any): boolean {
	if (typeof schema !== "object" || schema === null) {
		return false;
	}
	if (schema.bsonType) {
		const types = Array.isArray(schema.bsonType) ? schema.bsonType : [schema.bsonType];
		if (types.some((t: unknown) => typeof t === "string" && PRECISE_BSON_NUMERIC_TYPES.has(t))) {
			return true;
		}
	}
	for (const key of ["properties", "patternProperties"]) {
		const props = schema[key];
		if (props && typeof props === "object") {
			for (const v of Object.values(props)) {
				if (schemaUsesPreciseBsonTypes(v)) {
					return true;
				}
			}
		}
	}
	for (const key of ["items", "additionalProperties", "additionalItems"]) {
		const v = schema[key];
		if (v && typeof v === "object" && schemaUsesPreciseBsonTypes(v)) {
			return true;
		}
	}
	for (const key of ["oneOf", "anyOf", "allOf"]) {
		const arr = schema[key];
		if (Array.isArray(arr) && arr.some((v) => schemaUsesPreciseBsonTypes(v))) {
			return true;
		}
	}
	return false;
}

/**
 * Build a reusable per-document validator from a MongoDB `$jsonSchema`.
 *
 * The conversion + `z.fromJSONSchema` compilation is non-trivial for nested
 * schemas, so we do it once per audit run and reuse the resulting closure
 * for every sampled document.
 *
 * Returns a function that, given a raw MongoDB document, returns an array of
 * human-readable failure messages (empty if the doc matches the schema, or a
 * single fallback message if the validator itself couldn't be built).
 */
// eslint-disable-next-line @typescript-eslint/no-explicit-any
function buildDocumentValidator(schema: Record<string, unknown>): (doc: any) => string[] {
	let validator: ReturnType<typeof z.fromJSONSchema> | null = null;
	try {
		const standardSchema = bsonSchemaToStandard(schema);
		validator = z.fromJSONSchema(standardSchema);
	} catch {
		// Conversion / compilation failed — every doc will get the fallback.
		validator = null;
	}

	// eslint-disable-next-line @typescript-eslint/no-explicit-any
	return (doc: any): string[] => {
		if (!validator) {
			return ["document does not match schema (could not parse schema with zod)"];
		}
		try {
			const normalized = normalizeBsonValue(doc);
			const result = validator.safeParse(normalized);
			if (result.success) {
				return [];
			}
			return [z.prettifyError(result.error)];
		} catch {
			return ["document does not match schema (could not parse schema with zod)"];
		}
	};
}

/**
 * Audit schema compliance for a collection.
 *
 * Uses aggregation with the `$jsonSchema` operator rather than `db.validate()`
 * because validate() does not reliably return `nInvalidDocuments` counts in
 * MongoDB 8.x (always returns 0, only logs a warning to the server log).
 */
export async function auditSchemaCompliance(
	client: MongoClientWithMappings,
	dbName: string,
	colName: string,
	opts?: {
		readPreference?: ReadPreference;
		maxTimeMS?: number;
	},
): Promise<SchemaAuditResult> {
	const coll = client.db(dbName).collection(colName);

	const schemaInfo = await getCollectionSchema(client, dbName, colName);

	if (!schemaInfo.hasSchema || !schemaInfo.validator) {
		return {
			nrecords: 0,
			nInvalidDocuments: 0,
			nValidDocuments: 0,
			compliancePct: 100,
			errors: [],
			warnings: [],
			hasSchema: false,
			tookMs: 0,
		};
	}

	const jsonSchema = extractJsonSchema(schemaInfo.validator);
	if (!jsonSchema) {
		return {
			nrecords: 0,
			nInvalidDocuments: 0,
			nValidDocuments: 0,
			compliancePct: 100,
			errors: [],
			warnings: [
				"Validator is present but could not extract a $jsonSchema for auditing — validator may use non-schema operators",
			],
			hasSchema: true,
			tookMs: 0,
		};
	}

	const start = performance.now();

	const aggOptions: Record<string, unknown> = {};
	if (opts?.readPreference) {
		aggOptions.readPreference = opts.readPreference;
	}
	if (opts?.maxTimeMS) {
		aggOptions.maxTimeMS = opts.maxTimeMS;
	}

	const total = await coll.countDocuments({}, aggOptions);

	// Count non-matching documents.
	// $nor + $jsonSchema identifies docs that don't conform.
	const nonMatchingResult = await coll
		.aggregate([{ $match: { $nor: [{ $jsonSchema: jsonSchema }] } }, { $count: "c" }], aggOptions)
		.next()
		.then((r) => (r as { c: number } | null)?.c ?? 0)
		.catch(() => null);

	if (nonMatchingResult === null) {
		return {
			nrecords: total,
			nInvalidDocuments: 0,
			nValidDocuments: total,
			compliancePct: 100,
			errors: [],
			warnings: ["Unable to count non-matching documents (aggregation failed)"],
			hasSchema: true,
			tookMs: Math.round(performance.now() - start),
		};
	}

	const nInvalidDocuments = nonMatchingResult;
	const nValidDocuments = total - nInvalidDocuments;
	const compliancePct = total > 0 ? (nValidDocuments * 100) / total : 100;

	// Sample non-matching documents (up to 20)
	const sampleDocs = await coll
		.aggregate([{ $match: { $nor: [{ $jsonSchema: jsonSchema }] } }, { $limit: 20 }], aggOptions)
		.toArray();

	// Compare each non-matching doc against individual $jsonSchema constraints
	// to produce specific error messages rather than a generic "does not match".
	// When zod can't pinpoint the failure (returns no issues), it usually means
	// the violation is a BSON-specific type distinction that isn't visible from
	// the JS value alone (e.g. `bsonType: "double"` where the doc has an int).
	const fallbackMessage = schemaUsesPreciseBsonTypes(jsonSchema)
		? "The validator uses BSON-specific numeric types (int/long/double/decimal) which cannot be distinguished from a JavaScript value alone — try inspecting the document directly in MongoDB."
		: "Failed to detect validation error";

	// Build the per-document validator once: the schema is identical across
	// every sample, so there's no need to re-run bsonSchemaToStandard /
	// z.fromJSONSchema for each doc.
	const validateOne = buildDocumentValidator(jsonSchema);
	const errors: SchemaAuditResult["errors"] = sampleDocs.map((doc) => {
		const failures = validateOne(doc);
		return {
			message: failures.length > 0 ? failures.join("; ") : fallbackMessage,
			docId: doc._id,
			document: doc,
		};
	});

	const tookMs = Math.round(performance.now() - start);

	return {
		nrecords: total,
		nInvalidDocuments,
		nValidDocuments,
		compliancePct,
		errors,
		warnings: [],
		hasSchema: true,
		tookMs,
	};
}
