package org.apache.mahout.classifier.df.mapreduce.partial;

import com.google.common.base.Preconditions;
import java.io.IOException;
import java.util.Arrays;
import java.util.Iterator;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.JobContext;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat;
import org.apache.mahout.classifier.df.DFUtils;
import org.apache.mahout.classifier.df.DecisionForest;
import org.apache.mahout.classifier.df.builder.TreeBuilder;
import org.apache.mahout.classifier.df.mapreduce.Builder;
import org.apache.mahout.classifier.df.mapreduce.MapredOutput;
import org.apache.mahout.classifier.df.node.Node;
import org.apache.mahout.common.Pair;
import org.apache.mahout.common.iterator.sequencefile.SequenceFileIterable;

/* loaded from: input_file:WEB-INF/lib/mahout-core-0.7.jar:org/apache/mahout/classifier/df/mapreduce/partial/PartialBuilder.class */
public class PartialBuilder extends Builder {
    public PartialBuilder(TreeBuilder treeBuilder, Path path, Path path2, Long l) {
        this(treeBuilder, path, path2, l, new Configuration());
    }

    public PartialBuilder(TreeBuilder treeBuilder, Path path, Path path2, Long l, Configuration configuration) {
        super(treeBuilder, path, path2, l, configuration);
    }

    @Override // org.apache.mahout.classifier.df.mapreduce.Builder
    protected void configureJob(Job job) throws IOException {
        Configuration configuration = job.getConfiguration();
        job.setJarByClass(PartialBuilder.class);
        FileInputFormat.setInputPaths(job, new Path[]{getDataPath()});
        FileOutputFormat.setOutputPath(job, getOutputPath(configuration));
        job.setOutputKeyClass(TreeID.class);
        job.setOutputValueClass(MapredOutput.class);
        job.setMapperClass(Step1Mapper.class);
        job.setNumReduceTasks(0);
        job.setInputFormatClass(TextInputFormat.class);
        job.setOutputFormatClass(SequenceFileOutputFormat.class);
    }

    @Override // org.apache.mahout.classifier.df.mapreduce.Builder
    protected DecisionForest parseOutput(Job job) throws IOException {
        Configuration configuration = job.getConfiguration();
        int nbTrees = Builder.getNbTrees(configuration);
        Path outputPath = getOutputPath(configuration);
        TreeID[] treeIDArr = new TreeID[nbTrees];
        Node[] nodeArr = new Node[nbTrees];
        processOutput(job, outputPath, treeIDArr, nodeArr);
        return new DecisionForest(Arrays.asList(nodeArr));
    }

    protected static void processOutput(JobContext jobContext, Path path, TreeID[] treeIDArr, Node[] nodeArr) throws IOException {
        Preconditions.checkArgument((treeIDArr == null && nodeArr == null) || !(treeIDArr == null || nodeArr == null), "if keys is null, trees should also be null");
        Preconditions.checkArgument(treeIDArr == null || treeIDArr.length == nodeArr.length, "keys.length != trees.length");
        Configuration configuration = jobContext.getConfiguration();
        int i = 0;
        for (Path path2 : DFUtils.listOutputFiles(path.getFileSystem(configuration), path)) {
            Iterator it = new SequenceFileIterable(path2, configuration).iterator();
            while (it.hasNext()) {
                Pair pair = (Pair) it.next();
                TreeID treeID = (TreeID) pair.getFirst();
                MapredOutput mapredOutput = (MapredOutput) pair.getSecond();
                if (treeIDArr != null) {
                    treeIDArr[i] = treeID;
                }
                if (nodeArr != null) {
                    nodeArr[i] = mapredOutput.getTree();
                }
                i++;
            }
        }
        if (treeIDArr != null && i != treeIDArr.length) {
            throw new IllegalStateException("Some key/values are missing from the output");
        }
    }
}
