package org.carrot2.text.preprocessing;

import com.carrotsearch.hppc.BitSet;
import com.carrotsearch.hppc.ByteArrayList;
import com.carrotsearch.hppc.IntArrayList;
import com.carrotsearch.hppc.IntStack;
import com.carrotsearch.hppc.ShortArrayList;
import com.carrotsearch.hppc.sorting.IndirectSort;
import java.util.ArrayList;
import java.util.Arrays;
import org.carrot2.language.Tokenizer;
import org.carrot2.util.CharArrayComparators;

/* loaded from: input_file:org/carrot2/text/preprocessing/CaseNormalizer.class */
final class CaseNormalizer {
    public void normalize(PreprocessingContext preprocessingContext, int i) {
        char[][] cArr = preprocessingContext.allTokens.image;
        short[] sArr = preprocessingContext.allTokens.type;
        int[] iArr = preprocessingContext.allTokens.documentIndex;
        byte[] bArr = preprocessingContext.allTokens.fieldIndex;
        int length = cArr.length;
        int[] mergesort = IndirectSort.mergesort(cArr, 0, cArr.length, CharArrayComparators.NORMALIZING_CHAR_ARRAY_COMPARATOR);
        ArrayList arrayList = new ArrayList();
        IntArrayList intArrayList = new IntArrayList();
        ArrayList arrayList2 = new ArrayList();
        ByteArrayList byteArrayList = new ByteArrayList();
        ShortArrayList shortArrayList = new ShortArrayList();
        int[] iArr2 = new int[length];
        Arrays.fill(iArr2, -1);
        int i2 = 1;
        int i3 = 1;
        int i4 = mergesort[0];
        int i5 = 1;
        int i6 = 0;
        BitSet bitSet = new BitSet(preprocessingContext.allFields.name.length);
        IntStack intStack = new IntStack();
        if (iArr[mergesort[0]] >= 0) {
            intStack.push(iArr[mergesort[0]]);
        }
        for (int i7 = 0; i7 < mergesort.length - 1; i7++) {
            char[] cArr2 = cArr[mergesort[i7]];
            char[] cArr3 = cArr[mergesort[i7 + 1]];
            short s = sArr[mergesort[i7]];
            int i8 = iArr[mergesort[i7 + 1]];
            if (cArr2 == null) {
                break;
            }
            if (isNotIndexed(s)) {
                i6 = i7 + 1;
                i4 = mergesort[i7 + 1];
                resetForNewTokenImage(iArr, mergesort, bitSet, intStack, i7);
            } else {
                bitSet.set(bArr[mergesort[i7]]);
                if (CharArrayComparators.FAST_CHAR_ARRAY_COMPARATOR.compare(cArr2, cArr3) == 0) {
                    i2++;
                    i5++;
                    intStack.push(i8);
                } else {
                    if (i3 < i2) {
                        i3 = i2;
                        i4 = mergesort[i7];
                        i2 = 1;
                    }
                    if (CharArrayComparators.CASE_INSENSITIVE_CHAR_ARRAY_COMPARATOR.compare(cArr2, cArr3) == 0) {
                        i5++;
                        intStack.push(i8);
                    } else {
                        if (intStack.size() >= i) {
                            int[] sparseEncoding = SparseArray.toSparseEncoding(intStack);
                            if ((sparseEncoding.length >> 1) >= i) {
                                arrayList2.add(sparseEncoding);
                                arrayList.add(cArr[i4]);
                                shortArrayList.add(sArr[i4]);
                                intArrayList.add(i5);
                                byteArrayList.add((byte) bitSet.bits[0]);
                                for (int i9 = i6; i9 < i7 + 1; i9++) {
                                    iArr2[mergesort[i9]] = arrayList.size() - 1;
                                }
                            }
                        }
                        i5 = 1;
                        i2 = 1;
                        i3 = 1;
                        i4 = mergesort[i7 + 1];
                        i6 = i7 + 1;
                        resetForNewTokenImage(iArr, mergesort, bitSet, intStack, i7);
                    }
                }
            }
        }
        preprocessingContext.allTokens.wordIndex = iArr2;
        preprocessingContext.allWords.image = (char[][]) arrayList.toArray((Object[]) new char[arrayList.size()]);
        preprocessingContext.allWords.tf = intArrayList.toArray();
        preprocessingContext.allWords.tfByDocument = (int[][]) arrayList2.toArray((Object[]) new int[arrayList2.size()]);
        preprocessingContext.allWords.fieldIndices = byteArrayList.toArray();
        preprocessingContext.allWords.type = shortArrayList.toArray();
    }

    private void resetForNewTokenImage(int[] iArr, int[] iArr2, BitSet bitSet, IntStack intStack, int i) {
        bitSet.clear();
        intStack.clear();
        if (iArr[iArr2[i + 1]] >= 0) {
            intStack.push(iArr[iArr2[i + 1]]);
        }
    }

    private boolean isNotIndexed(int i) {
        return i == 3 || i == 6 || (i & Tokenizer.TF_SEPARATOR_SENTENCE) != 0;
    }
}
