package org.tweetyproject.machinelearning.rl.mdp.algorithms;

import java.util.HashMap;
import java.util.Iterator;
import java.util.Map;
import org.tweetyproject.machinelearning.rl.mdp.Action;
import org.tweetyproject.machinelearning.rl.mdp.MarkovDecisionProcess;
import org.tweetyproject.machinelearning.rl.mdp.Policy;
import org.tweetyproject.machinelearning.rl.mdp.State;

/* loaded from: input_file:org.tweetyproject.machinelearning-1.24.jar:org/tweetyproject/machinelearning/rl/mdp/algorithms/IterativePolicyEvaluation.class */
public class IterativePolicyEvaluation<S extends State, A extends Action> implements PolicyEvaluation<S, A> {
    private long num_iterations;

    public IterativePolicyEvaluation(long j) {
        this.num_iterations = j;
    }

    @Override // org.tweetyproject.machinelearning.rl.mdp.algorithms.PolicyEvaluation
    public Map<S, Double> getUtilities(MarkovDecisionProcess<S, A> markovDecisionProcess, Policy<S, A> policy, double d) {
        HashMap hashMap = new HashMap();
        Iterator<S> it = markovDecisionProcess.getStates().iterator();
        while (it.hasNext()) {
            hashMap.put(it.next(), Double.valueOf(0.0d));
        }
        for (int i = 0; i < this.num_iterations; i++) {
            HashMap hashMap2 = new HashMap();
            for (S s : markovDecisionProcess.getStates()) {
                if (markovDecisionProcess.isTerminal(s)) {
                    hashMap2.put(s, Double.valueOf(0.0d));
                } else {
                    double d2 = 0.0d;
                    for (S s2 : markovDecisionProcess.getStates()) {
                        d2 += markovDecisionProcess.getProb(s, policy.execute(s), s2) * (markovDecisionProcess.getReward(s, policy.execute(s), s2) + (d * ((Double) hashMap.get(s2)).doubleValue()));
                    }
                    hashMap2.put(s, Double.valueOf(d2));
                }
            }
            hashMap = hashMap2;
        }
        return hashMap;
    }
}
