package com.kotlinnlp.tokenslabeler.helpers;

import com.kotlinnlp.linguisticdescription.sentence.RealSentence;
import com.kotlinnlp.linguisticdescription.sentence.token.RealToken;
import com.kotlinnlp.linguisticdescription.sentence.token.properties.Position;
import com.kotlinnlp.tokenslabeler.language.AnnotatedToken;
import com.kotlinnlp.tokenslabeler.language.IOBTag;
import com.kotlinnlp.tokenslabeler.language.Label;
import com.kotlinnlp.tokenslabeler.language.SentenceUtilsKt;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import java.util.Set;
import kotlin.Metadata;
import kotlin.Pair;
import kotlin.Unit;
import kotlin.collections.CollectionsKt;
import kotlin.jvm.internal.DefaultConstructorMarker;
import kotlin.jvm.internal.Intrinsics;
import kotlin.jvm.internal.Ref;
import kotlin.text.Regex;
import kotlin.text.StringsKt;
import org.jetbrains.annotations.NotNull;
import org.jetbrains.annotations.Nullable;

/* compiled from: DatasetReader.kt */
@Metadata(mv = {1, 1, 15}, bv = {1, 0, 3}, k = 1, d1 = {"��T\n\u0002\u0018\u0002\n\u0002\u0010��\n��\n\u0002\u0010\u000e\n\u0002\b\u0002\n\u0002\u0010\u000b\n��\n\u0002\u0010\"\n��\n\u0002\u0010\b\n\u0002\b\u0003\n\u0002\u0010 \n\u0002\u0018\u0002\n\u0002\b\u0002\n\u0002\u0018\u0002\n\u0002\u0018\u0002\n\u0002\b\u0002\n\u0002\u0018\u0002\n\u0002\b\u0003\n\u0002\u0010\u0002\n\u0002\b\u0002\n\u0002\u0018\u0002\n��\u0018��2\u00020\u0001B=\u0012\u0006\u0010\u0002\u001a\u00020\u0003\u0012\u0006\u0010\u0004\u001a\u00020\u0003\u0012\b\b\u0002\u0010\u0005\u001a\u00020\u0006\u0012\u0010\b\u0002\u0010\u0007\u001a\n\u0012\u0004\u0012\u00020\u0003\u0018\u00010\b\u0012\n\b\u0002\u0010\t\u001a\u0004\u0018\u00010\n¢\u0006\u0002\u0010\u000bJ\u001c\u0010\r\u001a\b\u0012\u0004\u0012\u00020\u000f0\u000e2\f\u0010\u0010\u001a\b\u0012\u0004\u0012\u00020\u00030\u000eH\u0002J\u001c\u0010\u0011\u001a\b\u0012\u0004\u0012\u00020\u00130\u00122\f\u0010\u0014\u001a\b\u0012\u0004\u0012\u00020\u00030\u000eH\u0002J\u001c\u0010\u0015\u001a\b\u0012\u0004\u0012\u00020\u00160\u00122\f\u0010\u0017\u001a\b\u0012\u0004\u0012\u00020\u00030\u000eH\u0002J\u0012\u0010\u0018\u001a\u000e\u0012\n\u0012\b\u0012\u0004\u0012\u00020\u00160\u00120\u000eJ\u001c\u0010\u0019\u001a\b\u0012\u0004\u0012\u00020\u001a0\u000e2\f\u0010\u001b\u001a\b\u0012\u0004\u0012\u00020\u000f0\u000eH\u0002J\f\u0010\u001c\u001a\u00020\u001d*\u00020\u0003H\u0002R\u000e\u0010\u0004\u001a\u00020\u0003X\u0082\u0004¢\u0006\u0002\n��R\u0016\u0010\u0007\u001a\n\u0012\u0004\u0012\u00020\u0003\u0018\u00010\bX\u0082\u0004¢\u0006\u0002\n��R\u0012\u0010\t\u001a\u0004\u0018\u00010\nX\u0082\u0004¢\u0006\u0004\n\u0002\u0010\fR\u000e\u0010\u0002\u001a\u00020\u0003X\u0082\u0004¢\u0006\u0002\n��R\u000e\u0010\u0005\u001a\u00020\u0006X\u0082\u0004¢\u0006\u0002\n��¨\u0006\u001e"}, d2 = {"Lcom/kotlinnlp/tokenslabeler/helpers/DatasetReader;", Label.EMPTY_VALUE, "type", Label.EMPTY_VALUE, "filePath", "useOPlus", Label.EMPTY_VALUE, "includes", Label.EMPTY_VALUE, "maxSentences", Label.EMPTY_VALUE, "(Ljava/lang/String;Ljava/lang/String;ZLjava/util/Set;Ljava/lang/Integer;)V", "Ljava/lang/Integer;", "buildLabels", Label.EMPTY_VALUE, "Lcom/kotlinnlp/tokenslabeler/language/Label;", "annotations", "buildRealSentence", "Lcom/kotlinnlp/linguisticdescription/sentence/RealSentence;", "Lcom/kotlinnlp/linguisticdescription/sentence/token/RealToken;", "forms", "buildSentence", "Lcom/kotlinnlp/tokenslabeler/language/AnnotatedToken;", "lines", "loadSentences", "setOPlus", Label.EMPTY_VALUE, "labels", "getIOBTag", "Lcom/kotlinnlp/tokenslabeler/language/IOBTag;", "tokenslabeler"})
/* loaded from: input_file:com/kotlinnlp/tokenslabeler/helpers/DatasetReader.class */
public final class DatasetReader {
    private final String type;
    private final String filePath;
    private final boolean useOPlus;
    private final Set<String> includes;
    private final Integer maxSentences;

    /* JADX WARN: Code restructure failed: missing block: B:4:0x006d, code lost:
    
        if (r3 != null) goto L8;
     */
    @org.jetbrains.annotations.NotNull
    /*
        Code decompiled incorrectly, please refer to instructions dump.
        To view partially-correct add '--show-bad-code' argument
    */
    public final java.util.List<com.kotlinnlp.linguisticdescription.sentence.RealSentence<com.kotlinnlp.tokenslabeler.language.AnnotatedToken>> loadSentences() {
        /*
            Method dump skipped, instructions count: 404
            To view this dump add '--comments-level debug' option
        */
        throw new UnsupportedOperationException("Method not decompiled: com.kotlinnlp.tokenslabeler.helpers.DatasetReader.loadSentences():java.util.List");
    }

    private final RealSentence<AnnotatedToken> buildSentence(List<String> list) {
        List<String> list2 = list;
        ArrayList arrayList = new ArrayList(CollectionsKt.collectionSizeOrDefault(list2, 10));
        Iterator<T> it = list2.iterator();
        while (it.hasNext()) {
            List split$default = StringsKt.split$default((String) it.next(), new String[]{"\t"}, false, 0, 6, (Object) null);
            arrayList.add(new Pair(split$default.get(0), split$default.get(1)));
        }
        Pair unzip = CollectionsKt.unzip(arrayList);
        List<String> list3 = (List) unzip.component1();
        List<Label> buildLabels = buildLabels((List) unzip.component2());
        if (this.useOPlus) {
            setOPlus(buildLabels);
        }
        return SentenceUtilsKt.annotate(buildRealSentence(list3), buildLabels);
    }

    private final RealSentence<RealToken> buildRealSentence(List<String> list) {
        final Ref.IntRef intRef = new Ref.IntRef();
        intRef.element = -2;
        List<String> list2 = list;
        ArrayList arrayList = new ArrayList(CollectionsKt.collectionSizeOrDefault(list2, 10));
        int i = 0;
        for (Object obj : list2) {
            final int i2 = i;
            i++;
            if (i2 < 0) {
                CollectionsKt.throwIndexOverflow();
            }
            final String str = (String) obj;
            final int i3 = intRef.element + 2;
            intRef.element = (i3 + str.length()) - 1;
            arrayList.add(new RealToken(str, i2, i3, intRef) { // from class: com.kotlinnlp.tokenslabeler.helpers.DatasetReader$buildRealSentence$$inlined$mapIndexed$lambda$1

                @NotNull
                private final String form;

                @NotNull
                private final Position position;
                final /* synthetic */ String $it;
                final /* synthetic */ int $i;
                final /* synthetic */ int $start;
                final /* synthetic */ Ref.IntRef $end$inlined;

                /* JADX INFO: Access modifiers changed from: package-private */
                {
                    this.$it = str;
                    this.$i = i2;
                    this.$start = i3;
                    this.$end$inlined = intRef;
                    this.form = str;
                    this.position = new Position(i2, i3, this.$end$inlined.element);
                }

                @NotNull
                public String getForm() {
                    return this.form;
                }

                @NotNull
                public Position getPosition() {
                    return this.position;
                }

                public boolean isComma() {
                    return RealToken.DefaultImpls.isComma(this);
                }

                public boolean isNumber() {
                    return RealToken.DefaultImpls.isNumber(this);
                }

                public boolean isPunct() {
                    return RealToken.DefaultImpls.isPunct(this);
                }

                @NotNull
                public String getNormalizedForm() {
                    return RealToken.DefaultImpls.getNormalizedForm(this);
                }
            });
        }
        final ArrayList arrayList2 = arrayList;
        return new RealSentence<RealToken>(arrayList2) { // from class: com.kotlinnlp.tokenslabeler.helpers.DatasetReader$buildRealSentence$1

            @NotNull
            private final List<RealToken> tokens;

            @NotNull
            private final Position position;
            final /* synthetic */ List $tokens;

            @NotNull
            public List<RealToken> getTokens() {
                return this.tokens;
            }

            @NotNull
            public Position getPosition() {
                return this.position;
            }

            /* JADX INFO: Access modifiers changed from: package-private */
            {
                this.$tokens = arrayList2;
                this.tokens = arrayList2;
                this.position = new Position(0, ((RealToken) CollectionsKt.first(arrayList2)).getPosition().getStart(), ((RealToken) CollectionsKt.last(arrayList2)).getPosition().getEnd());
            }

            @NotNull
            public String buildText() {
                return RealSentence.DefaultImpls.buildText(this);
            }
        };
    }

    private final List<Label> buildLabels(List<String> list) {
        List<String> list2 = list;
        ArrayList arrayList = new ArrayList(CollectionsKt.collectionSizeOrDefault(list2, 10));
        for (String str : list2) {
            String replace = new Regex("^B-|^I-|^O").replace(str, Label.EMPTY_VALUE);
            boolean z = (replace.length() > 0) && (this.includes == null || this.includes.contains(replace));
            arrayList.add(new Label(z ? getIOBTag(str) : IOBTag.Outside, z ? replace : Label.EMPTY_VALUE));
        }
        return arrayList;
    }

    private final IOBTag getIOBTag(@NotNull String str) {
        if (StringsKt.startsWith$default(str, "O", false, 2, (Object) null)) {
            return IOBTag.Outside;
        }
        if (StringsKt.startsWith$default(str, "B-", false, 2, (Object) null)) {
            return IOBTag.Beginning;
        }
        if (StringsKt.startsWith$default(str, "I-", false, 2, (Object) null)) {
            return IOBTag.Inside;
        }
        throw new IllegalArgumentException("Unexpected tag");
    }

    private final List<Unit> setOPlus(List<? extends Label> list) {
        Iterator<T> it = list.iterator();
        if (!it.hasNext()) {
            return CollectionsKt.emptyList();
        }
        ArrayList arrayList = new ArrayList();
        Object next = it.next();
        while (true) {
            Object obj = next;
            if (!it.hasNext()) {
                return arrayList;
            }
            Object next2 = it.next();
            Label label = (Label) next2;
            Label label2 = (Label) obj;
            if (label2.getType() == IOBTag.Outside && label.getType() != IOBTag.Outside) {
                label2.setValue(label.getValue());
            }
            arrayList.add(Unit.INSTANCE);
            next = next2;
        }
    }

    public DatasetReader(@NotNull String str, @NotNull String str2, boolean z, @Nullable Set<String> set, @Nullable Integer num) {
        Intrinsics.checkParameterIsNotNull(str, "type");
        Intrinsics.checkParameterIsNotNull(str2, "filePath");
        this.type = str;
        this.filePath = str2;
        this.useOPlus = z;
        this.includes = set;
        this.maxSentences = num;
    }

    public /* synthetic */ DatasetReader(String str, String str2, boolean z, Set set, Integer num, int i, DefaultConstructorMarker defaultConstructorMarker) {
        this(str, str2, (i & 4) != 0 ? false : z, (i & 8) != 0 ? (Set) null : set, (i & 16) != 0 ? (Integer) null : num);
    }
}
