package org.chocosolver.util.bandit;

import java.util.Arrays;

/* loaded from: input_file:org/chocosolver/util/bandit/UCB1.class */
public class UCB1 implements Policy {
    int numActions;
    double[] payoffSums;
    int[] numPlays;

    public UCB1(int i) {
        this.numActions = i;
        this.payoffSums = new double[i];
        this.numPlays = new int[i];
    }

    @Override // org.chocosolver.util.bandit.Policy
    public void init() {
        Arrays.fill(this.numPlays, 1);
    }

    @Override // org.chocosolver.util.bandit.Policy
    public int nextAction(int i) {
        if (i < this.numActions) {
            return i;
        }
        int i2 = 0;
        double d = -2.147483648E9d;
        for (int i3 = 0; i3 < this.numActions; i3++) {
            double upperBound = (this.payoffSums[i3] / this.numPlays[i3]) + upperBound(i, this.numPlays[i3]);
            if (d < upperBound) {
                d = upperBound;
                i2 = i3;
            }
        }
        return i2;
    }

    protected double upperBound(int i, int i2) {
        return Math.sqrt((2.0d * Math.log(i + 1)) / i2);
    }

    @Override // org.chocosolver.util.bandit.Policy
    public void update(int i, double d) {
        int[] iArr = this.numPlays;
        iArr[i] = iArr[i] + 1;
        double[] dArr = this.payoffSums;
        dArr[i] = dArr[i] + d;
    }
}
