/*
 * Copyright 2013 Google Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package com.google.template.soy.msgs.restricted;

import com.google.common.annotations.VisibleForTesting;
import com.google.common.base.Preconditions;

import java.util.Random;

/**
 * An memory-efficient object canonicalizer.
 *
 * <p> This functions similarly to string.intern(). It has extremely low memory overhead of a
 * little over one reference per item interned. Standard HashMap based interners use many bytes
 * per object interned, partially defeating the memory savings of interning.
 *
 */
final class CompactInterner {

  /** Initial size of the table. */
  @VisibleForTesting static final int INITIAL_SIZE = 1024;

  /**
   * The maximum expected number of collisions to tolerate before growing.
   *
   * <p> Increasing this number reduces memory usage at the expense of more equals() checks
   * for both cache hits and misses. The max load factor of the table is
   * 1 / (MAX_EXPECTED_COLLISION_COUNT + 1).
   */
  private static final int MAX_EXPECTED_COLLISION_COUNT = 4;

  /**
   * The denominator of the current size when growing the table.
   *
   * <p> Increasing this number results in better memory utilization at the expense of more
   * frequent rehashing.
   *
   * <p> Increasing this value results in a linear increase in the amortized number of equals()
   * checks for a miss because of the increased frequency of rehashes, but has no effect on cache
   * hits. Proof: Let D be GROWTH_DENOMINATOR. Right before a rehash, the total number of times
   * any item has been hashed is computed by assuming all items in the table have been hashed once,
   * and D/(D+1) have been hashed at least twice, and (D/D+1)^2 have been hashed thrice, etc.
   * Since the sum of a geometric series on x is 1/(1-x), the number of hashes per item inserted is
   * exactly 1/(1 - (D/D+1)), or (D+1)/(D + 1 - D) or exactly D+1. The number of rehashes per item
   * is exactly D, since its first insertion was not a rehash.
   */
  private static final int GROWTH_DENOMINATOR = 4;

  /** Hash table of all the items interned. */
  private Object[] table;

  /** Number of items in the table. */
  private int count;

  /** The total number of collisions, including collisions incurred during a rehash. */
  private long collisions;

  public CompactInterner() {
    table = new Object[INITIAL_SIZE];
    count = 0;
    collisions = 0;
  }


  /**
   * Returns either the input, or an instance that equals it that was previously passed to this
   * method.
   *
   * <p> This operation performs in amortized constant time.
   */
  @SuppressWarnings("unchecked")  // If a.equals(b) then a and b have the same type.
  public synchronized <T> T intern(T value) {
    Preconditions.checkNotNull(value);

    // Use a pseudo-random number generator to mix up the high and low bits of the hash code.
    Random generator = new java.util.Random(value.hashCode());
    int tries = 0;

    while (true) {
      int index = generator.nextInt(table.length);
      Object candidate = table[index];

      if (candidate == null) {
        // Found a good place to hash it.
        count++;
        collisions += tries;
        table[index] = value;
        rehashIfNeeded();
        return value;
      }

      if (candidate.equals(value)) {
        Preconditions.checkArgument(value.getClass() == candidate.getClass(),
            "Interned objects are equals() but different classes: %s and %s", value, candidate);
        return (T) candidate;
      }

      tries++;
    }
  }


  /**
   * Doubles the table size.
   */
  private void rehashIfNeeded() {
    int currentSize = table.length;
    if (currentSize - count >= currentSize / (MAX_EXPECTED_COLLISION_COUNT + 1)) {
      // Still enough overhead.
      return;
    }

    Object[] oldTable = table;
    // Grow the table so it increases by 1 / GROWTH_DENOMINATOR.
    int newSize = currentSize + currentSize / GROWTH_DENOMINATOR;

    table = new Object[newSize];
    count = 0;

    for (Object element : oldTable) {
      if (element != null) {
        intern(element);
      }
    }
  }


  @VisibleForTesting double getAverageCollisions() {
    return 1.0 * collisions / count;
  }


  @VisibleForTesting static double getAverageCollisionsBound() {
    // NOTE: I'm sure there are research papers dedicated to open addressed hashing and load
    // factors but I've not been able to find them quickly. This is some rough empirical work
    // to make sure we get reasonable performance in tests.
    double x = Math.max(MAX_EXPECTED_COLLISION_COUNT, GROWTH_DENOMINATOR);
    return x * Math.log1p(x) + 1;
  }


  @VisibleForTesting double getOverhead() {
    return 1.0 * (table.length - count) / count;
  }


  @VisibleForTesting static final double getWorstCaseOverhead() {
    // This is the proportion of null entries to non-null entries right after a rehash.
    return (1 + 1.0 / MAX_EXPECTED_COLLISION_COUNT) * (1 + 1.0 / GROWTH_DENOMINATOR) - 1;
  }
}
