package com.kotlinnlp.neuraltokenizer.utils;

import com.jsoniter.JsonIterator;
import com.kotlinnlp.simplednn.dataset.Shuffler;
import com.kotlinnlp.simplednn.helpers.training.utils.ExamplesIndices;
import java.io.BufferedInputStream;
import java.io.FileInputStream;
import java.util.ArrayList;
import java.util.Iterator;
import kotlin.Metadata;
import kotlin.Pair;
import kotlin.jvm.internal.Intrinsics;
import org.jetbrains.annotations.NotNull;

/* compiled from: DatasetUtils.kt */
@Metadata(mv = {1, 1, 8}, bv = {1, 0, 2}, k = 2, d1 = {"��4\n��\n\u0002\u0018\u0002\n\u0002\u0010\u000e\n\u0002\u0018\u0002\n\u0002\u0010\b\n\u0002\u0018\u0002\n��\n\u0002\u0018\u0002\n\u0002\b\u0004\n\u0002\u0018\u0002\n\u0002\b\u0002\n\u0002\u0018\u0002\n��\n\u0002\u0018\u0002\n��\u001aD\u0010��\u001a\u0018\u0012\u0004\u0012\u00020\u0002\u0012\u000e\u0012\f\u0012\u0004\u0012\u00020\u00040\u0003j\u0002`\u00050\u00012&\u0010\u0006\u001a\"\u0012\u001a\u0012\u0018\u0012\u0004\u0012\u00020\u0002\u0012\u000e\u0012\f\u0012\u0004\u0012\u00020\u00040\u0003j\u0002`\u00050\u00010\u0003j\u0002`\u0007\u001a.\u0010\b\u001a\"\u0012\u001a\u0012\u0018\u0012\u0004\u0012\u00020\u0002\u0012\u000e\u0012\f\u0012\u0004\u0012\u00020\u00040\u0003j\u0002`\u00050\u00010\u0003j\u0002`\u00072\u0006\u0010\t\u001a\u00020\u0002\u001aV\u0010\n\u001a\"\u0012\u001a\u0012\u0018\u0012\u0004\u0012\u00020\u0002\u0012\u000e\u0012\f\u0012\u0004\u0012\u00020\u00040\u0003j\u0002`\u00050\u00010\u0003j\u0002`\u00072&\u0010\u0006\u001a\"\u0012\u001a\u0012\u0018\u0012\u0004\u0012\u00020\u0002\u0012\u000e\u0012\f\u0012\u0004\u0012\u00020\u00040\u0003j\u0002`\u00050\u00010\u0003j\u0002`\u00072\u0006\u0010\u000b\u001a\u00020\f*,\u0010\r\"\u000e\u0012\u0004\u0012\u00020\u0002\u0012\u0004\u0012\u0002`\u00050\u00012\u0018\u0012\u0004\u0012\u00020\u0002\u0012\u000e\u0012\f\u0012\u0004\u0012\u00020\u00040\u0003j\u0002`\u00050\u0001*\u0016\u0010\u000e\"\b\u0012\u0004\u0012\u00020\u0004`\u000f2\b\u0012\u0004\u0012\u00020\u00040\u0003*,\u0010\u0010\"\b\u0012\u0004\u0012\u0002`\u0011`\u000f2\u001e\u0012\u001a\u0012\u0018\u0012\u0004\u0012\u00020\u0002\u0012\u000e\u0012\f\u0012\u0004\u0012\u00020\u00040\u0003j\u0002`\u00050\u00010\u0003¨\u0006\u0012"}, d2 = {"mergeDataset", "Lkotlin/Pair;", "", "Ljava/util/ArrayList;", "", "Lcom/kotlinnlp/neuraltokenizer/utils/CharsClassification;", "dataset", "Lcom/kotlinnlp/neuraltokenizer/utils/Dataset;", "readDataset", "filename", "shuffleDataset", "shuffler", "Lcom/kotlinnlp/simplednn/dataset/Shuffler;", "AnnotatedSentence", "CharsClassification", "Lkotlin/collections/ArrayList;", "Dataset", "Lcom/kotlinnlp/neuraltokenizer/utils/AnnotatedSentence;", "neuraltokenizer"})
/* loaded from: input_file:com/kotlinnlp/neuraltokenizer/utils/DatasetUtilsKt.class */
public final class DatasetUtilsKt {
    @NotNull
    public static final ArrayList<Pair<String, ArrayList<Integer>>> readDataset(@NotNull String filename) {
        Intrinsics.checkParameterIsNotNull(filename, "filename");
        JsonIterator parse = JsonIterator.parse(new BufferedInputStream(new FileInputStream(filename)), 2048);
        ArrayList<Pair<String, ArrayList<Integer>>> arrayList = new ArrayList<>();
        while (parse.readArray()) {
            while (parse.readArray()) {
                String readString = parse.readString();
                Intrinsics.checkExpressionValueIsNotNull(readString, "iterator.readString()");
                ArrayList arrayList2 = new ArrayList();
                parse.readArray();
                while (parse.readArray()) {
                    arrayList2.add(Integer.valueOf(parse.readInt()));
                }
                arrayList.add(new Pair<>(readString, arrayList2));
            }
        }
        return arrayList;
    }

    @NotNull
    public static final Pair<String, ArrayList<Integer>> mergeDataset(@NotNull ArrayList<Pair<String, ArrayList<Integer>>> dataset) {
        Intrinsics.checkParameterIsNotNull(dataset, "dataset");
        StringBuffer stringBuffer = new StringBuffer();
        ArrayList arrayList = new ArrayList();
        Iterator<T> it = dataset.iterator();
        while (it.hasNext()) {
            Pair pair = (Pair) it.next();
            String str = (String) pair.component1();
            ArrayList arrayList2 = (ArrayList) pair.component2();
            if (str.length() != arrayList2.size()) {
                throw new InvalidDataset("Sentence and chars classification have different lengths");
            }
            stringBuffer.append(str);
            Iterator it2 = arrayList2.iterator();
            while (it2.hasNext()) {
                arrayList.add(Integer.valueOf(((Number) it2.next()).intValue()));
            }
        }
        return new Pair<>(stringBuffer.toString(), arrayList);
    }

    @NotNull
    public static final ArrayList<Pair<String, ArrayList<Integer>>> shuffleDataset(@NotNull ArrayList<Pair<String, ArrayList<Integer>>> dataset, @NotNull Shuffler shuffler) {
        Intrinsics.checkParameterIsNotNull(dataset, "dataset");
        Intrinsics.checkParameterIsNotNull(shuffler, "shuffler");
        ExamplesIndices examplesIndices = new ExamplesIndices(dataset.size(), shuffler);
        ArrayList arrayList = new ArrayList();
        Iterator<Integer> it = examplesIndices.iterator();
        while (it.hasNext()) {
            arrayList.add(dataset.get(it.next().intValue()));
        }
        return dataset;
    }
}
