/**
 * Policy gradient agent
 */
export default class PGAgent {
    /**
     * @param {RLEnvironmentBase} env Environment
     * @param {number} [resolution] Resolution
     */
    constructor(env: RLEnvironmentBase, resolution?: number);
    _table: SoftmaxPolicyGradient;
    _history: any[];
    /**
     * Reset agent.
     */
    reset(): void;
    /**
     * Returns a score.
     * @returns {Array<Array<Array<number>>>} Score values
     */
    get_score(): Array<Array<Array<number>>>;
    /**
     * Returns a action.
     * @param {*[]} state Current states
     * @returns {*[]} Action
     */
    get_action(state: any[]): any[];
    /**
     * Update model.
     * @param {*[]} action Action
     * @param {*[]} state Next states
     * @param {number} reward Reward
     * @param {boolean} done Done epoch or not
     * @param {number} learning_rate Learning rate
     */
    update(action: any[], state: any[], reward: number, done: boolean, learning_rate: number): void;
}
declare class SoftmaxPolicyGradient {
    constructor(env: any, resolution?: number);
    _params: QTableBase;
    _epoch: number;
    get _state_sizes(): any;
    get _action_sizes(): any;
    _state_index(state: any): any;
    _action_index(action: any): any;
    probability(state: any): any;
    toArray(): any[];
    get_action(state: any): any;
    update(actions: any, learning_rate: any): void;
}
import { RLEnvironmentBase } from '../rl/base.js';
import { QTableBase } from './q_learning.js';
export {};