package ml.shifu.shifu.core.yarn.appmaster;

import java.io.FileNotFoundException;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import ml.shifu.shifu.util.HDFSUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.LocatedFileStatus;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.RemoteIterator;

/* loaded from: input_file:ml/shifu/shifu/core/yarn/appmaster/TrainingDataSet.class */
public class TrainingDataSet {
    private int workerNum;
    private String rootDataPath;
    private List<StringBuilder> splitedFilePaths;
    private Configuration globalConf;
    private FileSystem hdfs;

    /* loaded from: input_file:ml/shifu/shifu/core/yarn/appmaster/TrainingDataSet$TrainingDataSetHolder.class */
    private static class TrainingDataSetHolder {
        private static final TrainingDataSet INSTANCE = new TrainingDataSet();

        private TrainingDataSetHolder() {
        }
    }

    private TrainingDataSet() {
        this.workerNum = 0;
        this.rootDataPath = null;
        this.splitedFilePaths = null;
        this.hdfs = HDFSUtils.getFS();
    }

    public static final TrainingDataSet getInstance() {
        return TrainingDataSetHolder.INSTANCE;
    }

    public List<StringBuilder> getSplitedFilePaths(Configuration configuration, int i, String str) throws FileNotFoundException, IllegalArgumentException, IOException {
        if (this.splitedFilePaths != null || this.globalConf != null) {
            return this.splitedFilePaths;
        }
        this.globalConf = configuration;
        this.workerNum = i;
        this.rootDataPath = str;
        this.splitedFilePaths = new ArrayList();
        RemoteIterator listFiles = this.hdfs.listFiles(new Path(str), true);
        int i2 = 0;
        while (listFiles.hasNext()) {
            LocatedFileStatus locatedFileStatus = (LocatedFileStatus) listFiles.next();
            if (!locatedFileStatus.getPath().getName().startsWith(".") && !locatedFileStatus.getPath().getName().startsWith("_")) {
                if (this.splitedFilePaths.size() <= i2 + 1) {
                    this.splitedFilePaths.add(new StringBuilder(locatedFileStatus.getPath().toString()));
                } else {
                    this.splitedFilePaths.get(i2).append(",").append(locatedFileStatus.getPath().toString());
                }
                i2 = (i2 + 1) % this.workerNum;
            }
        }
        if (this.splitedFilePaths.size() < i) {
            throw new RuntimeException("Training data file count is smaller than worker number, this will make some workers do not have training data!");
        }
        return this.splitedFilePaths;
    }
}
