import { EventType } from '@ag-ui/core'
import { toRunErrorPayload } from '../error-payload'
import { MAX_TOKENS_KEYS } from '../../utilities/sampling-keys'
import { BaseSummarizeAdapter } from './adapter'
import type {
  StreamChunk,
  SummarizationOptions,
  SummarizationResult,
  TextOptions,
} from '../../types'

/**
 * Minimal contract for a text adapter that supports `chatStream`. Lets
 * `ChatStreamSummarizeAdapter` work with any text adapter without coupling
 * to a specific implementation.
 *
 * The provider-options shape is intentionally `any` here — the wrapper only
 * forwards `modelOptions` straight through, so a text adapter with a richer
 * per-model options type (e.g. `ResolveProviderOptions<TModel>`) is still
 * acceptable. Summarize-level type safety is enforced via
 * `SummarizationOptions<TProviderOptions>` on the wrapper itself.
 */
export interface ChatStreamCapable {
  chatStream: (options: TextOptions<any>) => AsyncIterable<StreamChunk>
}

/**
 * Provider-native max-output-tokens key per summarize-adapter `name`. summarize
 * is provider-agnostic and forwards `modelOptions` opaquely to the wrapped text
 * adapter, so `maxLength` must be written under the exact key the underlying
 * provider reads — no adapter reads a generic `maxTokens`. Ollama is the one
 * exception: it nests sampling under `options`, so it has no entry here and is
 * handled as a special nested case in `applyMaxLength`/`applyDefaultTemperature`.
 *
 * Keep in sync with each adapter's wire mapping:
 * - OpenAI (Responses): `max_output_tokens`
 * - Anthropic / Grok: `max_tokens`
 * - Groq: `max_completion_tokens`
 * - Gemini: `maxOutputTokens`
 * - OpenRouter: `maxCompletionTokens`
 * - Ollama: nested `options.num_predict` (no entry — see `applyMaxLength`)
 */
const MAX_TOKENS_KEY_BY_ADAPTER: Record<string, string> = {
  openai: 'max_output_tokens',
  anthropic: 'max_tokens',
  grok: 'max_tokens',
  groq: 'max_completion_tokens',
  gemini: 'maxOutputTokens',
  openrouter: 'maxCompletionTokens',
}

/**
 * Every flat key any supported provider uses to cap output tokens (plus the
 * generic `maxTokens` spelling no adapter reads). Used to detect a
 * caller-supplied token limit so the summarize default never overrides an
 * explicit caller value. Shared with the OTel middleware via
 * `MAX_TOKENS_KEYS` so the two spelling sets cannot drift.
 */
const KNOWN_MAX_TOKENS_KEYS = MAX_TOKENS_KEYS

/**
 * Whether `applyMaxLength` knows how to place a token limit for this adapter
 * `name` (either the nested Ollama shape or a flat provider-native key).
 * Used to surface a warning when `maxLength` would otherwise be silently
 * dropped for an unrecognised adapter name.
 */
function isKnownMaxTokensAdapter(adapterName: string): boolean {
  return (
    adapterName === 'ollama' ||
    MAX_TOKENS_KEY_BY_ADAPTER[adapterName] !== undefined
  )
}

/**
 * Apply the low-temperature summarize default to a working copy of the
 * caller's `modelOptions`, placed where the wrapped provider actually reads
 * it (nested under `options` for Ollama, flat otherwise). The caller always
 * wins: if they already set `temperature` in that location, it is untouched.
 */
function applyDefaultTemperature(
  adapterName: string,
  temperature: number,
  modelOptions: Record<string, unknown>,
): Record<string, unknown> {
  const merged: Record<string, unknown> = { ...modelOptions }

  if (adapterName === 'ollama') {
    const existing =
      merged.options && typeof merged.options === 'object'
        ? (merged.options as Record<string, unknown>)
        : undefined
    if (existing && 'temperature' in existing) return merged
    merged.options = { temperature, ...existing }
    return merged
  }

  if ('temperature' in merged) return merged
  merged.temperature = temperature
  return merged
}

/**
 * Resolve `maxLength` to the provider-native max-output-tokens key for the
 * given summarize-adapter `name` (this wrapper's OWN `name`, not the wrapped
 * text adapter's) and merge it into a working copy of the caller's
 * `modelOptions`. The caller always wins: if they already set any recognised
 * token-limit key (flat or, for Ollama, nested `options.num_predict`), the
 * default is left untouched. Unknown/unrecognised adapter names fall back to
 * NOT setting a token key (the prompt hint still asks the model to stay under
 * `maxLength`) rather than writing a dead key no provider reads.
 *
 * Caveat (intentional): "caller wins" keys off ANY recognised spelling in
 * `KNOWN_MAX_TOKENS_KEYS`, but only the adapter's native key is read on the
 * wire. So a caller who sets a NON-native spelling for this provider — e.g.
 * `maxTokens`, or Anthropic's `max_tokens` against an OpenAI adapter — suppresses
 * the summarize default WITHOUT getting their own value applied either: neither
 * cap reaches the wire. This favours never clobbering a migration leftover over
 * guaranteeing a cap; the prompt-level hint still asks the model to stay under
 * `maxLength`. Rename the key to the provider-native spelling to forward it.
 */
function applyMaxLength(
  adapterName: string,
  maxLength: number,
  modelOptions: Record<string, unknown>,
): Record<string, unknown> {
  const merged: Record<string, unknown> = { ...modelOptions }

  if (adapterName === 'ollama') {
    // Honor a caller-set limit in either shape: a recognised flat key (e.g.
    // left over from a migration) or the nested `options.num_predict`.
    const callerSetFlatLimit = KNOWN_MAX_TOKENS_KEYS.some(
      (k) => typeof merged[k] === 'number',
    )
    const existing =
      merged.options && typeof merged.options === 'object'
        ? (merged.options as Record<string, unknown>)
        : undefined
    if (
      callerSetFlatLimit ||
      (existing && typeof existing.num_predict === 'number')
    ) {
      return merged
    }
    merged.options = { num_predict: maxLength, ...existing }
    return merged
  }

  const key = MAX_TOKENS_KEY_BY_ADAPTER[adapterName]
  if (key === undefined) return merged

  const callerSetLimit = KNOWN_MAX_TOKENS_KEYS.some(
    (k) => typeof merged[k] === 'number',
  )
  if (callerSetLimit) return merged

  merged[key] = maxLength
  return merged
}

/**
 * Extract the per-model `modelOptions` type a text adapter accepts. Used by
 * provider summarize factories so their `modelOptions` IntelliSense matches
 * what the underlying text adapter actually understands.
 */
export type InferTextProviderOptions<TAdapter> = TAdapter extends {
  '~types': { providerOptions: infer P }
}
  ? P extends object
    ? P
    : object
  : object

/**
 * Summarize adapter that wraps any `ChatStreamCapable` text adapter and
 * prompts it for summarization. Not tied to any wire format.
 */
export class ChatStreamSummarizeAdapter<
  TModel extends string,
  TProviderOptions extends object = Record<string, unknown>,
> extends BaseSummarizeAdapter<TModel, TProviderOptions> {
  readonly name: string

  private readonly textAdapter: ChatStreamCapable

  constructor(
    textAdapter: ChatStreamCapable,
    model: TModel,
    name: string = 'chat-stream-summarize',
  ) {
    super({}, model)
    this.name = name
    this.textAdapter = textAdapter
  }

  async summarize(
    options: SummarizationOptions<TProviderOptions>,
  ): Promise<SummarizationResult> {
    const systemPrompt = this.buildSummarizationPrompt(options)

    let summary = ''
    const id = this.generateId()
    let model = options.model
    let usage = { promptTokens: 0, completionTokens: 0, totalTokens: 0 }

    options.logger.request(
      `activity=summarize provider=${this.name} model=${options.model} text-length=${options.text.length} maxLength=${options.maxLength ?? 'unset'}`,
      { provider: this.name, model: options.model },
    )

    try {
      for await (const chunk of this.textAdapter.chatStream(
        this.buildTextOptions(options, systemPrompt),
      )) {
        if (chunk.type === 'TEXT_MESSAGE_CONTENT') {
          if (chunk.content) {
            summary = chunk.content
          } else if (chunk.delta) {
            // Append delta only when present — a content-less chunk with no
            // delta would otherwise concat literal `'undefined'`.
            summary += chunk.delta
          }
          model = chunk.model || model
        }
        if (chunk.type === 'RUN_FINISHED') {
          if (chunk.usage) {
            usage = chunk.usage
          }
        }
        // Surface failures: the underlying chatStream emits RUN_ERROR instead
        // of throwing, so without this branch summarize() would return an
        // empty summary and pretend a failed run succeeded.
        if (chunk.type === 'RUN_ERROR') {
          const message =
            (chunk.error && typeof chunk.error.message === 'string'
              ? chunk.error.message
              : null) ?? 'Summarization failed'
          const code =
            chunk.error && typeof chunk.error.code === 'string'
              ? chunk.error.code
              : undefined
          const err = new Error(message)
          if (code) {
            ;(err as Error & { code?: string }).code = code
          }
          throw err
        }
      }
    } catch (error: unknown) {
      // Narrow before logging: raw SDK errors can carry request metadata
      // (including auth headers) which we must never surface to user loggers.
      options.logger.errors(`${this.name}.summarize fatal`, {
        error: toRunErrorPayload(error, `${this.name}.summarize failed`),
        source: `${this.name}.summarize`,
      })
      throw error
    }

    return { id, model, summary, usage }
  }

  override async *summarizeStream(
    options: SummarizationOptions<TProviderOptions>,
  ): AsyncIterable<StreamChunk> {
    const systemPrompt = this.buildSummarizationPrompt(options)

    options.logger.request(
      `activity=summarizeStream provider=${this.name} model=${options.model} text-length=${options.text.length} maxLength=${options.maxLength ?? 'unset'}`,
      { provider: this.name, model: options.model },
    )

    const id = this.generateId()
    let summary = ''
    let model = options.model
    let usage: SummarizationResult['usage'] = {
      promptTokens: 0,
      completionTokens: 0,
      totalTokens: 0,
    }

    try {
      for await (const chunk of this.textAdapter.chatStream(
        this.buildTextOptions(options, systemPrompt),
      )) {
        // Accumulate the same way `summarize()` does so consumers see deltas
        // AND the terminal `generation:result` event below carries the same
        // final summary that non-streaming returns.
        if (chunk.type === 'TEXT_MESSAGE_CONTENT') {
          if (chunk.content) {
            summary = chunk.content
          } else if (chunk.delta) {
            summary += chunk.delta
          }
          if (chunk.model) model = chunk.model
        }

        // Emit the GenerationClient-shaped result event just before the
        // terminal RUN_FINISHED so subscribers (useSummarize) populate
        // `result` before flipping `status` to success.
        if (chunk.type === 'RUN_FINISHED') {
          if (chunk.usage) usage = chunk.usage
          if (chunk.model) model = chunk.model
          yield {
            type: EventType.CUSTOM,
            name: 'generation:result',
            value: { id, model, summary, usage } satisfies SummarizationResult,
            model,
            timestamp: Date.now(),
          }
        }

        yield chunk
      }
    } catch (error: unknown) {
      options.logger.errors(`${this.name}.summarizeStream fatal`, {
        error: toRunErrorPayload(error, `${this.name}.summarizeStream failed`),
        source: `${this.name}.summarizeStream`,
      })
      throw error
    }
  }

  /**
   * Build the TextOptions passed to the underlying chatStream. Provider
   * `modelOptions` from the summarize call are forwarded as-is so knobs like
   * Anthropic cache headers, Gemini safety settings, or Ollama tuning params
   * still reach the wire layer.
   */
  protected buildTextOptions(
    options: SummarizationOptions<TProviderOptions>,
    systemPrompt: string,
  ): TextOptions<TProviderOptions> {
    // Sampling knobs now live in provider-native `modelOptions`. Apply the
    // low-temperature default where the wrapped provider actually reads it
    // (nested under `options` for Ollama, flat otherwise) so callers can still
    // override it. Resolving the placement from this summarize adapter's OWN
    // `name` keeps the default off the wire correctly per provider — a flat
    // `temperature` would be silently dropped by Ollama while still showing up
    // in OTel.
    let working: Record<string, unknown> = {
      ...(options.modelOptions as Record<string, unknown> | undefined),
    }
    working = applyDefaultTemperature(this.name, 0.3, working)
    // `maxLength` must reach the wire under the provider-native token key (it
    // differs per provider, and no adapter reads a generic `maxTokens`).
    // Resolve it from this summarize adapter's `name` (the constructor arg,
    // not the wrapped text adapter's name), never overriding a caller-supplied
    // token limit.
    if (options.maxLength !== undefined) {
      if (!isKnownMaxTokensAdapter(this.name)) {
        options.logger.warn(
          `summarize: maxLength=${options.maxLength} could not be mapped to a provider token key for adapter name "${this.name}" — it was dropped from modelOptions (the prompt still asks the model to stay under it). Construct ChatStreamSummarizeAdapter with a recognised provider name to forward the cap.`,
          { provider: this.name },
        )
      }
      working = applyMaxLength(this.name, options.maxLength, working)
    }
    const modelOptions = working as TProviderOptions

    return {
      model: options.model,
      messages: [{ role: 'user', content: options.text }],
      systemPrompts: [systemPrompt],
      modelOptions,
      logger: options.logger,
    }
  }

  protected buildSummarizationPrompt(
    options: SummarizationOptions<TProviderOptions>,
  ): string {
    let prompt = 'You are a professional summarizer. '

    switch (options.style) {
      case 'bullet-points':
        prompt += 'Provide a summary in bullet point format. '
        break
      case 'paragraph':
        prompt += 'Provide a summary in paragraph format. '
        break
      case 'concise':
        prompt += 'Provide a very concise summary in 1-2 sentences. '
        break
      case undefined:
        prompt += 'Provide a clear and concise summary. '
        break
      default:
        prompt += 'Provide a clear and concise summary. '
    }

    if (options.focus && options.focus.length > 0) {
      prompt += `Focus on the following aspects: ${options.focus.join(', ')}. `
    }

    if (options.maxLength) {
      prompt += `Keep the summary under ${options.maxLength} tokens. `
    }

    return prompt
  }
}
