import {routes} from "@lodestar/api";
import {BeaconConfig} from "@lodestar/config";
import {IBeaconStateView, computeStartSlotAtEpoch} from "@lodestar/state-transition";
import {Epoch, RootHex, phase0} from "@lodestar/types";
import {Logger, MapDef, fromHex, sleep, toHex, toRootHex} from "@lodestar/utils";
import {Metrics} from "../../metrics/index.js";
import {AllocSource, BufferPool, BufferWithKey} from "../../util/bufferPool.js";
import {IClock} from "../../util/clock.js";
import {serializeState} from "../serializeState.js";
import {CPStateDatastore, DatastoreKey} from "./datastore/index.js";
import {MapTracker} from "./mapMetrics.js";
import {BlockStateCache, CacheItemType, CheckpointHex, CheckpointStateCache} from "./types.js";

export type PersistentCheckpointStateCacheOpts = {
  /** Keep max n state epochs in memory, persist the rest to disk */
  maxCPStateEpochsInMemory?: number;
  /** Keep max n state epochs on disk */
  maxCPStateEpochsOnDisk?: number;
};

type PersistentCheckpointStateCacheModules = {
  config: BeaconConfig;
  metrics?: Metrics | null;
  logger: Logger;
  clock?: IClock | null;
  signal?: AbortSignal;
  datastore: CPStateDatastore;
  blockStateCache: BlockStateCache;
  bufferPool?: BufferPool;
};

/** checkpoint serialized as a string */
type CacheKey = string;

type InMemoryCacheItem = {
  type: CacheItemType.inMemory;
  state: IBeaconStateView;
  // if a cp state is reloaded from disk, it'll keep track of persistedKey to allow us to remove it from disk later
  // it also helps not to persist it again
  persistedKey?: DatastoreKey;
};

type PersistedCacheItem = {
  type: CacheItemType.persisted;
  value: DatastoreKey;
};

type CacheItem = InMemoryCacheItem | PersistedCacheItem;

type LoadedStateBytesData = {persistedKey: DatastoreKey; stateBytes: Uint8Array};

/**
 * Before n-historical states, lodestar keeps all checkpoint states since finalized
 * Since Sep 2024, lodestar stores 3 most recent checkpoint states in memory and the rest on disk. The finalized state
 * may not be available in memory, and stay on disk instead.
 */
export const DEFAULT_MAX_CP_STATE_EPOCHS_IN_MEMORY = 3;

/**
 * By default we don't prune any persistent checkpoint states as it's not safe to delete them during
 * long non-finality as we don't know the state of the chain and there could be a deep (hundreds of epochs) reorg
 * if there two competing chains with similar weight but we wouldn't have a close enough state to pivot to this chain
 * and instead require a resync from last finalized checkpoint state which could be very far in the past.
 */
export const DEFAULT_MAX_CP_STATE_ON_DISK = Infinity;

// TODO GLOAS: re-evaluate this timing
const PROCESS_CHECKPOINT_STATES_BPS = 6667;

/**
 * An implementation of CheckpointStateCache that keep up to n epoch checkpoint states in memory and persist the rest to disk
 * - If it's more than `maxEpochsInMemory` epochs old, it will persist n last epochs to disk based on the view of the block
 * - Once a chain gets finalized we'll prune all states from memory and disk for epochs < finalizedEpoch
 * - In get*() apis if shouldReload is true, it will reload from disk. The reload() api is expensive and should only be called in some important flows:
 *   - Get state for block processing
 *   - updateHeadState
 *   - as with any cache, the state could be evicted from memory at any time, so we should always check if the state is in memory or not
 * - Each time we process a state, we only persist exactly 1 checkpoint state per epoch based on the view of block and prune all others. The persisted
 * checkpoint state could be finalized and used later in archive task, it's also used to regen states.
 * - When we process multiple states in the same epoch, we could persist different checkpoint states of the same epoch because each block could have its
 * own view. See unit test of this file `packages/beacon-node/test/unit/chain/stateCache/persistentCheckpointsCache.test.ts` for more details.
 *
 * The below diagram shows Previous Root Checkpoint State is persisted for epoch (n-2) and Current Root Checkpoint State is persisted for epoch (n-1)
 * while at epoch (n) and (n+1) we have both of them in memory
 *
 * ╔════════════════════════════════════╗═══════════════╗
 * ║      persisted to db or fs         ║   in memory   ║
 * ║        reload if needed            ║               ║
 * ║ -----------------------------------║---------------║
 * ║        epoch:       (n-2)   (n-1)  ║  n     (n+1)  ║
 * ║               |-------|-------|----║--|-------|----║
 * ║                      ^        ^    ║ ^       ^     ║
 * ║                                    ║  ^       ^    ║
 * ╚════════════════════════════════════╝═══════════════╝
 *
 * The "in memory" checkpoint states are similar to the old implementation: we have both Previous Root Checkpoint State and Current Root Checkpoint State per epoch.
 * However in the "persisted to db or fs" part
 *   - if there is no reorg, we only store 1 checkpoint state per epoch, the one that could potentially be justified/finalized later based on the view of the state
 *   - if there is reorg, we may store >=2 checkpoint states per epoch, including any checkpoints with unknown roots to the processed state
 *   - the goal is to make sure we can regen any states later if needed, and we have the checkpoint state that could be justified/finalized later
 */
export class PersistentCheckpointStateCache implements CheckpointStateCache {
  private readonly cache: MapTracker<CacheKey, CacheItem>;
  /** Epoch -> Set<blockRoot> */
  private readonly epochIndex = new MapDef<Epoch, Set<RootHex>>(() => new Set<string>());
  private readonly config: BeaconConfig;
  private readonly metrics: Metrics | null | undefined;
  private readonly logger: Logger;
  private readonly clock: IClock | null | undefined;
  private readonly signal: AbortSignal | undefined;
  private preComputedCheckpoint: string | null = null;
  private preComputedCheckpointHits: number | null = null;
  private readonly maxEpochsInMemory: number;
  private readonly maxEpochsOnDisk: number;
  private readonly datastore: CPStateDatastore;
  private readonly blockStateCache: BlockStateCache;
  private readonly bufferPool?: BufferPool;

  constructor(
    {
      config,
      metrics,
      logger,
      clock,
      signal,
      datastore,
      blockStateCache,
      bufferPool,
    }: PersistentCheckpointStateCacheModules,
    opts: PersistentCheckpointStateCacheOpts
  ) {
    this.cache = new MapTracker(metrics?.cpStateCache);
    this.config = config;
    if (metrics) {
      this.metrics = metrics;
      metrics.cpStateCache.size.addCollect(() => {
        let persistCount = 0;
        let inMemoryCount = 0;
        const memoryEpochs = new Set<Epoch>();
        const persistentEpochs = new Set<Epoch>();
        for (const [key, cacheItem] of this.cache.entries()) {
          const {epoch} = fromCacheKey(key);
          if (isPersistedCacheItem(cacheItem)) {
            persistCount++;
            persistentEpochs.add(epoch);
          } else {
            inMemoryCount++;
            memoryEpochs.add(epoch);
          }
        }
        metrics.cpStateCache.size.set({type: CacheItemType.persisted}, persistCount);
        metrics.cpStateCache.size.set({type: CacheItemType.inMemory}, inMemoryCount);
        metrics.cpStateCache.epochSize.set({type: CacheItemType.persisted}, persistentEpochs.size);
        metrics.cpStateCache.epochSize.set({type: CacheItemType.inMemory}, memoryEpochs.size);
      });
    }
    this.logger = logger;
    this.clock = clock;
    this.signal = signal;

    if (opts.maxCPStateEpochsInMemory !== undefined && opts.maxCPStateEpochsInMemory < 0) {
      throw new Error("maxEpochsInMemory must be >= 0");
    }
    if (opts.maxCPStateEpochsOnDisk !== undefined && opts.maxCPStateEpochsOnDisk < 0) {
      throw new Error("maxCPStateEpochsOnDisk must be >= 0");
    }

    this.maxEpochsInMemory = opts.maxCPStateEpochsInMemory ?? DEFAULT_MAX_CP_STATE_EPOCHS_IN_MEMORY;
    this.maxEpochsOnDisk = opts.maxCPStateEpochsOnDisk ?? DEFAULT_MAX_CP_STATE_ON_DISK;
    // Specify different datastore for testing
    this.datastore = datastore;
    this.blockStateCache = blockStateCache;
    this.bufferPool = bufferPool;
  }

  /**
   * Reload checkpoint state keys from the last run.
   */
  async init(): Promise<void> {
    if (this.datastore?.init) {
      await this.datastore.init();
    }
    const persistedKeys = await this.datastore.readKeys();
    // all checkpoint states from the last run are not trusted, remove them
    // otherwise if we have a bad checkpoint state from the last run, the node get stucked
    // this was found during mekong devnet, see https://github.com/ChainSafe/lodestar/pull/7255
    await Promise.all(persistedKeys.map((key) => this.datastore.remove(key)));
    this.logger.info("Removed persisted checkpoint states from the last run", {
      count: persistedKeys.length,
      maxEpochsInMemory: this.maxEpochsInMemory,
    });
  }

  /**
   * Get a state from cache, it may reload from disk.
   * This is an expensive api, should only be called in some important flows:
   * - Validate a gossip block
   * - Get block for processing
   * - Regen head state
   */
  async getOrReload(cp: CheckpointHex): Promise<IBeaconStateView | null> {
    const stateOrStateBytesData = await this.getStateOrLoadDb(cp);
    if (stateOrStateBytesData === null || isBeaconStateView(stateOrStateBytesData)) {
      return stateOrStateBytesData ?? null;
    }
    const {persistedKey, stateBytes} = stateOrStateBytesData;
    const logMeta = {persistedKey: toHex(persistedKey)};
    this.logger.debug("Reload: read state successful", logMeta);
    this.metrics?.cpStateCache.stateReloadSecFromSlot.observe(
      this.clock?.secFromSlot(this.clock?.currentSlot ?? 0) ?? 0
    );
    const seedState = this.findSeedStateToReload(cp);
    this.metrics?.cpStateCache.stateReloadEpochDiff.observe(Math.abs(seedState.epoch - cp.epoch));
    this.logger.debug("Reload: found seed state", {...logMeta, seedSlot: seedState.slot});

    try {
      // 80% of validators serialization time comes from memory allocation, this is to avoid it
      const sszTimer = this.metrics?.cpStateCache.stateReloadValidatorsSerializeDuration.startTimer();
      // automatically free the buffer pool after this scope
      using validatorsBytesWithKey = this.serializeStateValidators(seedState);
      let validatorsBytes = validatorsBytesWithKey?.buffer;
      if (validatorsBytes == null) {
        // fallback logic in case we can't use the buffer pool
        this.metrics?.cpStateCache.stateReloadValidatorsSerializeAllocCount.inc();
        validatorsBytes = seedState.serializeValidators();
      }
      sszTimer?.();
      const timer = this.metrics?.cpStateCache.stateReloadDuration.startTimer();
      // preload validators and balances for faster state transition
      const newCachedState = seedState.loadOtherState(stateBytes, validatorsBytes, {
        preloadValidatorsAndBalances: true,
      });
      // hashTreeRoot() calls the commit() inside
      // there is no modification inside the state, it's just that we want to compute and cache all roots
      const stateRoot = toRootHex(newCachedState.hashTreeRoot());
      timer?.();

      this.logger.debug("Reload: cached state load successful", {
        ...logMeta,
        stateSlot: newCachedState.slot,
        stateRoot,
        seedSlot: seedState.slot,
      });

      // only remove persisted state once we reload successfully
      const cpKey = toCacheKey(cp);
      this.cache.set(cpKey, {type: CacheItemType.inMemory, state: newCachedState, persistedKey});
      this.epochIndex.getOrDefault(cp.epoch).add(cp.rootHex);
      // don't prune from memory here, call it at the last 1/3 of slot 0 of an epoch
      return newCachedState;
    } catch (e) {
      this.logger.debug("Reload: error loading cached state", logMeta, e as Error);
      return null;
    }
  }

  /**
   * Return either state or state bytes loaded from db.
   */
  async getStateOrBytes(cp: CheckpointHex): Promise<IBeaconStateView | Uint8Array | null> {
    const stateOrLoadedState = await this.getStateOrLoadDb(cp);
    if (stateOrLoadedState === null || isBeaconStateView(stateOrLoadedState)) {
      return stateOrLoadedState;
    }
    return stateOrLoadedState.stateBytes;
  }

  /**
   * Return either state or state bytes with persisted key loaded from db.
   */
  async getStateOrLoadDb(cp: CheckpointHex): Promise<IBeaconStateView | LoadedStateBytesData | null> {
    const cpKey = toCacheKey(cp);
    const inMemoryState = this.get(cpKey);
    if (inMemoryState) {
      return inMemoryState;
    }

    const cacheItem = this.cache.get(cpKey);
    if (cacheItem === undefined) {
      return null;
    }

    if (isInMemoryCacheItem(cacheItem)) {
      // should not happen, in-memory state is handled above
      throw new Error("Expected persistent key");
    }

    const persistedKey = cacheItem.value;
    const dbReadTimer = this.metrics?.cpStateCache.stateReloadDbReadTime.startTimer();
    const stateBytes = await this.datastore.read(persistedKey);
    dbReadTimer?.();

    if (stateBytes === null) {
      return null;
    }
    return {persistedKey, stateBytes};
  }

  /**
   * Similar to get() api without reloading from disk
   */
  get(cpOrKey: CheckpointHex | CacheKey): IBeaconStateView | null {
    this.metrics?.cpStateCache.lookups.inc();
    const cpKey = typeof cpOrKey === "string" ? cpOrKey : toCacheKey(cpOrKey);
    const cacheItem = this.cache.get(cpKey);

    if (cacheItem === undefined) {
      return null;
    }

    this.metrics?.cpStateCache.hits.inc();

    if (cpKey === this.preComputedCheckpoint) {
      this.preComputedCheckpointHits = (this.preComputedCheckpointHits ?? 0) + 1;
    }

    if (isInMemoryCacheItem(cacheItem)) {
      const {state} = cacheItem;
      this.metrics?.cpStateCache.stateClonedCount.observe(state.clonedCount);
      return state;
    }

    return null;
  }

  /**
   * Add a state of a checkpoint to this cache, prune from memory if necessary.
   */
  add(cp: phase0.Checkpoint, state: IBeaconStateView): void {
    const cpHex = toCheckpointHex(cp);
    const key = toCacheKey(cpHex);
    const cacheItem = this.cache.get(key);
    this.metrics?.cpStateCache.adds.inc();
    if (cacheItem !== undefined && isPersistedCacheItem(cacheItem)) {
      const persistedKey = cacheItem.value;
      // was persisted to disk, set back to memory
      this.cache.set(key, {type: CacheItemType.inMemory, state, persistedKey});
      this.logger.verbose("Added checkpoint state to memory but a persisted key existed", {
        epoch: cp.epoch,
        rootHex: cpHex.rootHex,
        persistedKey: toHex(persistedKey),
      });
    } else {
      this.cache.set(key, {type: CacheItemType.inMemory, state});
      this.logger.verbose("Added checkpoint state to memory", {epoch: cp.epoch, rootHex: cpHex.rootHex});
    }
    this.epochIndex.getOrDefault(cp.epoch).add(cpHex.rootHex);
    this.prunePersistedStates();
  }

  /**
   * Searches in-memory state for the latest cached state with a `root` without reload, starting with `epoch` and descending
   */
  getLatest(rootHex: RootHex, maxEpoch: Epoch): IBeaconStateView | null {
    // sort epochs in descending order, only consider epochs lte `epoch`
    const epochs = Array.from(this.epochIndex.keys())
      .sort((a, b) => b - a)
      .filter((e) => e <= maxEpoch);
    for (const epoch of epochs) {
      if (this.epochIndex.get(epoch)?.has(rootHex)) {
        const inMemoryClonedState = this.get({rootHex, epoch});
        if (inMemoryClonedState) {
          return inMemoryClonedState;
        }
      }
    }
    return null;
  }

  /**
   * Searches state for the latest cached state with a `root`, reload if needed, starting with `epoch` and descending
   * This is expensive api, should only be called in some important flows:
   * - Validate a gossip block
   * - Get block for processing
   * - Regen head state
   */
  async getOrReloadLatest(rootHex: RootHex, maxEpoch: Epoch): Promise<IBeaconStateView | null> {
    // sort epochs in descending order, only consider epochs lte `epoch`
    const epochs = Array.from(this.epochIndex.keys())
      .sort((a, b) => b - a)
      .filter((e) => e <= maxEpoch);
    for (const epoch of epochs) {
      if (this.epochIndex.get(epoch)?.has(rootHex)) {
        try {
          const state = await this.getOrReload({rootHex, epoch});
          if (state) {
            return state;
          }
        } catch (e) {
          this.logger.debug("Error get or reload state", {epoch, rootHex}, e as Error);
        }
      }
    }
    return null;
  }

  /**
   * Update the precomputed checkpoint and return the number of hits for the
   * previous one (if any).
   */
  updatePreComputedCheckpoint(rootHex: RootHex, epoch: Epoch): number | null {
    const previousHits = this.preComputedCheckpointHits;
    this.preComputedCheckpoint = toCacheKey({rootHex, epoch});
    this.preComputedCheckpointHits = 0;
    return previousHits;
  }

  /**
   * This is just to conform to the old implementation
   */
  prune(): void {
    // do nothing
  }

  /**
   * Prune all checkpoint states before the provided finalized epoch.
   */
  pruneFinalized(finalizedEpoch: Epoch): void {
    for (const epoch of this.epochIndex.keys()) {
      if (epoch < finalizedEpoch) {
        this.deleteAllEpochItems(epoch).catch((e) =>
          this.logger.debug("Error delete all epoch items", {epoch, finalizedEpoch}, e as Error)
        );
      }
    }
  }

  /**
   * After processing a block, prune from memory based on the view of that block.
   * This is likely persist 1 state per epoch, at the last 1/3 of slot 0 of an epoch although it'll be called on every last 1/3 of slot.
   * Given the following block b was processed with b2, b1, b0 are ancestors in epoch (n-2), (n-1), n respectively
   *
   *       epoch:          (n-2)       (n-1)         n         (n+1)
   *             |-----------|-----------|-----------|-----------|
   *                        ^            ^           ^    ^
   *                        |            |           |    |
   * block chain:           b2---------->b1--------->b0-->b
   *
   * After processing block b, if maxEpochsInMemory is:
   * - 2 then we'll persist {root: b2, epoch n-2} checkpoint state to disk
   * - 1 then we'll persist {root: b2, epoch n-2} and {root: b1, epoch n-1} checkpoint state to disk
   * - 0 then we'll persist {root: b2, epoch n-2} and {root: b1, epoch n-1} and {root: b0, epoch n} checkpoint state to disk
   *   - if any old epochs checkpoint states are persisted, no need to do it again
   *
   * Note that for each epoch there could be multiple checkpoint states, usually 2, one for Previous Root Checkpoint State and one for Current Root Checkpoint State.
   * We normally only persist 1 checkpoint state per epoch, the one that could potentially be justified/finalized later based on the view of the block.
   * Other checkpoint states are pruned from memory.
   *
   * This design also covers the reorg scenario. Given block c in the same epoch n where c.slot > b.slot, c is not descendant of b, and c is built on top of c0
   * instead of b0 (epoch (n - 1))
   *
   *       epoch:          (n-2)       (n-1)         n         (n+1)
   *             |-----------|-----------|-----------|-----------|
   *                        ^            ^       ^   ^    ^   ^
   *                        |            |       |   |    |   |
   * block chain:           b2---------->b1----->c0->b0-->b   |
   *                                             ║            |
   *                                             ╚═══════════>c (reorg)
   *
   * After processing block c, if maxEpochsInMemory is:
   * - 0 then we'll persist {root: c0, epoch: n} checkpoint state to disk. Note that regen should populate {root: c0, epoch: n} checkpoint state before.
   *
   *                           epoch:      (n-1)                                                           n                                                           (n+1)
   *                                         |-------------------------------------------------------------|-------------------------------------------------------------|
   *                                         ^                               ^                             ^                             ^
   *   _______                               |                               |                             |                             |
   *  |       |                              |                               |                             |                             |
   *  |  db   |====== reload ======> {root: b1, epoch: n-1} cp state ======> c0 block state ======> {root: c0, epoch: n} cp state =====> c block state
   *  |_______|
   *
   *
   *
   * - 1 then we'll persist {root: b1, epoch n-1} checkpoint state to disk. Note that at epoch n there is both {root: b0, epoch: n} and {root: c0, epoch: n} checkpoint states in memory
   * - 2 then we'll persist {root: b2, epoch n-2} checkpoint state to disk, there are also 2 checkpoint states in memory at epoch n, same to the above (maxEpochsInMemory=1)
   *
   * As of Mar 2024, it takes <=350ms to persist a holesky state on fast server
   */
  async processState(blockRootHex: RootHex, state: IBeaconStateView): Promise<number> {
    let persistCount = 0;
    // it's important to sort the epochs in ascending order, in case of big reorg we always want to keep the most recent checkpoint states
    const sortedEpochs = Array.from(this.epochIndex.keys()).sort((a, b) => a - b);
    if (sortedEpochs.length <= this.maxEpochsInMemory) {
      return 0;
    }

    const blockSlot = state.slot;
    const processCPStatesTimeMs = this.config.getSlotComponentDurationMs(PROCESS_CHECKPOINT_STATES_BPS);
    // we always have clock in production, fallback value is only for test
    const msFromSlot = this.clock?.msFromSlot(blockSlot) ?? processCPStatesTimeMs;
    const msToProcessCPStates = processCPStatesTimeMs - msFromSlot;
    if (msToProcessCPStates > 0) {
      // At ~67% of slot is the most free time of every slot, take that chance to persist checkpoint states
      // normally it should only persist checkpoint states at ~67% of slot 0 of epoch
      await sleep(msToProcessCPStates, this.signal);
    }
    // at syncing time, it's critical to persist checkpoint states as soon as possible to avoid OOM during unfinality time
    // if node is synced this is not a hot time because block comes late, we'll likely miss attestation already, or the block is orphaned

    const persistEpochs = sortedEpochs.slice(0, sortedEpochs.length - this.maxEpochsInMemory);
    for (const lowestEpoch of persistEpochs) {
      try {
        // getBlockRootAtSlot() may fail, see https://github.com/ChainSafe/lodestar/issues/7495
        if (state.slot < computeStartSlotAtEpoch(lowestEpoch)) {
          // there is no checkpoint states of epochs newer than this state
          break;
        }
        // usually there is only 0 or 1 epoch to persist in this loop
        persistCount += await this.processPastEpoch(blockRootHex, state, lowestEpoch);
        this.logger.verbose("Processed past epoch", {epoch: lowestEpoch, slot: blockSlot, root: blockRootHex});
      } catch (e) {
        this.logger.debug(
          "Error processing past epoch",
          {epoch: lowestEpoch, slot: blockSlot, root: blockRootHex},
          e as Error
        );
      }
    }

    if (persistCount > 0) {
      this.logger.verbose("Persisted checkpoint states", {
        slot: blockSlot,
        root: blockRootHex,
        persistCount,
        persistEpochs: persistEpochs.length,
      });
    }
    return persistCount;
  }

  /**
   * Find a seed state to reload the state of provided checkpoint. Based on the design of n-historical state:
   *
   * ╔════════════════════════════════════╗═══════════════╗
   * ║      persisted to db or fs         ║   in memory   ║
   * ║        reload if needed            ║               ║
   * ║ -----------------------------------║---------------║
   * ║        epoch:       (n-2)   (n-1)  ║  n     (n+1)  ║
   * ║               |-------|-------|----║--|-------|----║
   * ║                      ^        ^    ║ ^       ^     ║
   * ║                                    ║  ^       ^    ║
   * ╚════════════════════════════════════╝═══════════════╝
   *
   * we always reload an epoch in the past. We'll start with epoch n then (n+1) prioritizing ones with the same view of `reloadedCp`.
   *
   * Use seed state from the block cache if cannot find any seed states within this cache.
   */
  findSeedStateToReload(reloadedCp: CheckpointHex): IBeaconStateView {
    const maxEpoch = Math.max(...Array.from(this.epochIndex.keys()));
    const reloadedCpSlot = computeStartSlotAtEpoch(reloadedCp.epoch);
    let firstState: IBeaconStateView | null = null;
    const logCtx = {reloadedCpEpoch: reloadedCp.epoch, reloadedCpRoot: reloadedCp.rootHex};

    // no need to check epochs before `maxEpoch - this.maxEpochsInMemory + 1` before they are all persisted
    for (let epoch = maxEpoch - this.maxEpochsInMemory + 1; epoch <= maxEpoch; epoch++) {
      // if there's at least 1 state in memory in an epoch, just return the 1st one
      if (firstState !== null) {
        return firstState;
      }

      for (const rootHex of this.epochIndex.get(epoch) || []) {
        const cpKey = toCacheKey({rootHex, epoch});
        const cacheItem = this.cache.get(cpKey);
        if (cacheItem === undefined) {
          continue;
        }
        if (isInMemoryCacheItem(cacheItem)) {
          const {state} = cacheItem;
          if (firstState === null) {
            firstState = state;
          }
          const cpLog = {cpEpoch: epoch, cpRoot: rootHex};

          try {
            // amongst states of the same epoch, choose the one with the same view of reloadedCp
            if (
              reloadedCpSlot < state.slot &&
              toRootHex(state.getBlockRootAtSlot(reloadedCpSlot)) === reloadedCp.rootHex
            ) {
              this.logger.verbose("Reload: use checkpoint state as seed state", {...cpLog, ...logCtx});
              return state;
            }
          } catch (e) {
            // getBlockRootAtSlot may throw error
            this.logger.debug("Error finding checkpoint state to reload", {...cpLog, ...logCtx}, e as Error);
          }
        }
      }
    }

    // fallback to using the default seed state from block state cache
    const seedBlockState = this.blockStateCache.getSeedState();
    this.logger.verbose("Reload: use default block state as seed state", {stateSlot: seedBlockState.slot, ...logCtx});
    return seedBlockState;
  }

  clear(): void {
    this.cache.clear();
    this.epochIndex.clear();
  }

  /** ONLY FOR DEBUGGING PURPOSES. For lodestar debug API */
  dumpSummary(): routes.lodestar.StateCacheItem[] {
    return Array.from(this.cache.keys()).map((key) => {
      const cp = fromCacheKey(key);
      // TODO: add checkpoint key and persistent key to the summary
      return {
        slot: computeStartSlotAtEpoch(cp.epoch),
        root: cp.rootHex,
        reads: this.cache.readCount.get(key) ?? 0,
        lastRead: this.cache.lastRead.get(key) ?? 0,
        checkpointState: true,
      };
    });
  }

  getStates(): IterableIterator<IBeaconStateView> {
    const items = Array.from(this.cache.values())
      .filter(isInMemoryCacheItem)
      .map((item) => item.state);

    return items.values();
  }

  /** ONLY FOR DEBUGGING PURPOSES. For spec tests on error */
  dumpCheckpointKeys(): string[] {
    return Array.from(this.cache.keys());
  }

  /**
   * Prune or persist checkpoint states in an epoch
   * 1) If there is 1 checkpoint state with known root, persist it. This is when there is skipped slot at block 0 of epoch
   *     slot:                           n
   *             |-----------------------|-----------------------|
   *     PRCS root                      |
   *
   * 2) If there are 2 checkpoint states, PRCS and CRCS and both roots are known to this state, persist CRCS. If the block is reorged,
   * PRCS is regen and populated to this cache again.
   *     slot:                           n
   *             |-----------------------|-----------------------|
   *     PRCS root - prune              |
   *     CRCS root - persist             |
   *
   * 3) If there are any roots that unknown to this state, persist their cp state. This is to handle the current block is reorged later
   *
   * 4) (derived from above) If there are 2 checkpoint states, PRCS and an unknown root, persist both.
   *      - In the example below block slot (n + 1) reorged n
   *      - If we process state n + 1, CRCS is unknown to it
   *      - we need to also store CRCS to handle the case (n+2) switches to n again
   *
   *                     PRCS - persist
   *                       |  processState()
   *                       |       |
   *                 -------------n+1
   *               /       |
   *             n-1 ------n------------n+2
   *                       |
   *                     CRCS - persist
   *
   *   - PRCS is the checkpoint state that could be justified/finalized later based on the view of the state
   *   - unknown root checkpoint state is persisted to handle the reorg back to that branch later
   *
   * Performance note:
   *   - In normal condition, we persist 1 checkpoint state per epoch.
   *   - In reorged condition, we may persist multiple (most likely 2) checkpoint states per epoch.
   */
  private async processPastEpoch(blockRootHex: RootHex, state: IBeaconStateView, epoch: Epoch): Promise<number> {
    let persistCount = 0;
    const epochBoundarySlot = computeStartSlotAtEpoch(epoch);
    const epochBoundaryRoot =
      epochBoundarySlot === state.slot ? fromHex(blockRootHex) : state.getBlockRootAtSlot(epochBoundarySlot);
    const epochBoundaryHex = toRootHex(epochBoundaryRoot);
    const prevEpochRoot = toRootHex(state.getBlockRootAtSlot(epochBoundarySlot - 1));

    // for each epoch, usually there are 2 rootHexes respective to the 2 checkpoint states: Previous Root Checkpoint State and Current Root Checkpoint State
    const cpRootHexes = this.epochIndex.get(epoch) ?? [];
    const persistedRootHexes = new Set<RootHex>();

    // 1) if there is no CRCS, persist PRCS (block 0 of epoch is skipped). In this case prevEpochRoot === epochBoundaryHex
    // 2) if there are PRCS and CRCS, persist CRCS => persist CRCS
    // => this is simplified to always persist epochBoundaryHex
    persistedRootHexes.add(epochBoundaryHex);

    // 3) persist any states with unknown roots to this state
    for (const rootHex of cpRootHexes) {
      if (rootHex !== epochBoundaryHex && rootHex !== prevEpochRoot) {
        persistedRootHexes.add(rootHex);
      }
    }

    for (const rootHex of cpRootHexes) {
      const cpKey = toCacheKey({epoch: epoch, rootHex});
      const cacheItem = this.cache.get(cpKey);

      if (cacheItem !== undefined && isInMemoryCacheItem(cacheItem)) {
        let {persistedKey} = cacheItem;
        const {state} = cacheItem;
        const logMeta = {
          stateSlot: state.slot,
          rootHex,
          epochBoundaryHex,
          persistedKey: persistedKey ? toHex(persistedKey) : "",
        };

        if (persistedRootHexes.has(rootHex)) {
          if (persistedKey) {
            // we don't care if the checkpoint state is already persisted
            this.logger.verbose("Pruned checkpoint state from memory but no need to persist", logMeta);
          } else {
            // persist and do not update epochIndex
            this.metrics?.cpStateCache.statePersistSecFromSlot.observe(
              this.clock?.secFromSlot(this.clock?.currentSlot ?? 0) ?? 0
            );
            const cpPersist = {epoch: epoch, root: fromHex(rootHex)};
            // It's not sustainable to allocate ~240MB for each state every epoch, so we use buffer pool to reuse the memory.
            // As monitored on holesky as of Jan 2024:
            //   - This does not increase heap allocation while gc time is the same
            //   - It helps stabilize persist time and save ~300ms in average (1.5s vs 1.2s)
            //   - It also helps the state reload to save ~500ms in average (4.3s vs 3.8s)
            //   - Also `serializeState.test.ts` perf test shows a lot of differences allocating ~240MB once vs per state serialization
            const timer = this.metrics?.stateSerializeDuration.startTimer({
              source: AllocSource.PERSISTENT_CHECKPOINTS_CACHE_STATE,
            });
            persistedKey = await serializeState(
              state,
              AllocSource.PERSISTENT_CHECKPOINTS_CACHE_STATE,
              (stateBytes) => {
                timer?.();
                return this.datastore.write(cpPersist, stateBytes);
              },
              this.bufferPool
            );

            persistCount++;
            this.logger.verbose("Pruned checkpoint state from memory and persisted to disk", {
              ...logMeta,
              persistedKey: toHex(persistedKey),
            });
          }
          // overwrite cpKey, this means the state is deleted from memory
          this.cache.set(cpKey, {type: CacheItemType.persisted, value: persistedKey});
        } else {
          if (persistedKey) {
            // persisted file will be eventually deleted by the archive task
            // this also means the state is deleted from memory
            this.cache.set(cpKey, {type: CacheItemType.persisted, value: persistedKey});
            // do not update epochIndex
          } else {
            // delete the state from memory
            this.cache.delete(cpKey);
            const rootSet = this.epochIndex.get(epoch);
            if (rootSet) {
              rootSet.delete(rootHex);
              if (rootSet.size === 0) {
                this.epochIndex.delete(epoch);
              }
            }
          }
          this.metrics?.cpStateCache.statePruneFromMemoryCount.inc();
          this.logger.verbose("Pruned checkpoint state from memory", logMeta);
        }
      }
    }

    return persistCount;
  }

  /**
   * Delete all items of an epoch from disk and memory
   */
  private async deleteAllEpochItems(epoch: Epoch): Promise<void> {
    let persistCount = 0;
    const rootHexes = this.epochIndex.get(epoch) || [];
    for (const rootHex of rootHexes) {
      const key = toCacheKey({rootHex, epoch});
      const cacheItem = this.cache.get(key);

      if (cacheItem) {
        const persistedKey = isPersistedCacheItem(cacheItem) ? cacheItem.value : cacheItem.persistedKey;
        if (persistedKey) {
          await this.datastore.remove(persistedKey);
          persistCount++;
          this.metrics?.cpStateCache.persistedStateRemoveCount.inc();
        }
      }
      this.cache.delete(key);
    }
    this.epochIndex.delete(epoch);
    this.logger.verbose("Pruned checkpoint states for epoch", {
      epoch,
      persistCount,
      rootHexes: Array.from(rootHexes).join(","),
    });
  }

  /**
   * Prune persisted checkpoint states from disk.
   * Note that this should handle all possible errors and not throw.
   */
  private prunePersistedStates(): void {
    //                epochsOnDisk                                   epochsInMemory
    // |----------------------------------------------------------|----------------------|
    const maxTrackedEpochs = this.maxEpochsOnDisk + this.maxEpochsInMemory;
    if (this.epochIndex.size <= maxTrackedEpochs) {
      return;
    }

    const sortedEpochs = Array.from(this.epochIndex.keys()).sort((a, b) => a - b);
    const pruneEpochs = sortedEpochs.slice(0, sortedEpochs.length - maxTrackedEpochs);
    for (const epoch of pruneEpochs) {
      this.deleteAllEpochItems(epoch).catch((e) =>
        this.logger.debug(
          "Error delete all epoch items",
          {epoch, maxEpochsOnDisk: this.maxEpochsOnDisk, maxEpochsInMemory: this.maxEpochsInMemory},
          e as Error
        )
      );
    }
  }

  /**
   * Serialize validators to bytes leveraging the buffer pool to save memory allocation.
   *   - As monitored on holesky as of Jan 2024, it helps save ~500ms state reload time (4.3s vs 3.8s)
   *   - Also `serializeState.test.ts` perf test shows a lot of differences allocating validators bytes once vs every time,
   * This is 2x - 3x faster than allocating memory every time.
   */
  private serializeStateValidators(state: IBeaconStateView): BufferWithKey | null {
    const size = state.serializedValidatorsSize();
    if (this.bufferPool) {
      const bufferWithKey = this.bufferPool.alloc(size, AllocSource.PERSISTENT_CHECKPOINTS_CACHE_VALIDATORS);
      if (bufferWithKey) {
        const validatorsBytes = bufferWithKey.buffer;
        const dataView = new DataView(validatorsBytes.buffer, validatorsBytes.byteOffset, validatorsBytes.byteLength);
        state.serializeValidatorsToBytes({uint8Array: validatorsBytes, dataView}, 0);
        return bufferWithKey;
      }
    }

    return null;
  }
}

export function toCheckpointHex(checkpoint: phase0.Checkpoint): CheckpointHex {
  return {
    epoch: checkpoint.epoch,
    rootHex: toRootHex(checkpoint.root),
  };
}

export function toCheckpointKey(cp: CheckpointHex): string {
  return `${cp.rootHex}:${cp.epoch}`;
}

function toCacheKey(cp: CheckpointHex): CacheKey {
  return `${cp.rootHex}_${cp.epoch}`;
}

function fromCacheKey(key: CacheKey): CheckpointHex {
  const [rootHex, epoch] = key.split("_");
  return {
    rootHex,
    epoch: Number(epoch),
  };
}

function isBeaconStateView(stateOrBytes: IBeaconStateView | LoadedStateBytesData): stateOrBytes is IBeaconStateView {
  return (stateOrBytes as IBeaconStateView).slot !== undefined;
}

function isInMemoryCacheItem(cacheItem: CacheItem): cacheItem is InMemoryCacheItem {
  return cacheItem.type === CacheItemType.inMemory;
}

function isPersistedCacheItem(cacheItem: CacheItem): cacheItem is PersistedCacheItem {
  return cacheItem.type === CacheItemType.persisted;
}