package org.carrot2.text.vsm;

import com.carrotsearch.hppc.BitSet;
import com.carrotsearch.hppc.IntIntHashMap;
import com.carrotsearch.hppc.sorting.IndirectComparator;
import com.carrotsearch.hppc.sorting.IndirectSort;
import org.carrot2.attrs.AttrComposite;
import org.carrot2.attrs.AttrDouble;
import org.carrot2.attrs.AttrInteger;
import org.carrot2.attrs.AttrObject;
import org.carrot2.language.TokenTypeUtils;
import org.carrot2.math.mahout.matrix.DoubleMatrix2D;
import org.carrot2.math.mahout.matrix.impl.DenseDoubleMatrix2D;
import org.carrot2.math.mahout.matrix.impl.SparseDoubleMatrix2D;
import org.carrot2.math.matrix.MatrixUtils;
import org.carrot2.text.preprocessing.PreprocessingContext;

/* loaded from: input_file:org/carrot2/text/vsm/TermDocumentMatrixBuilder.class */
public class TermDocumentMatrixBuilder extends AttrComposite {
    public final AttrDouble titleWordsBoost = this.attributes.register("titleWordsBoost", AttrDouble.builder().label2("Title word boost").min(0.0d).max(10.0d).defaultValue(Double.valueOf(2.0d)));
    public final AttrInteger maximumMatrixSize = this.attributes.register("maximumMatrixSize", AttrInteger.builder().label2("Maximum term-document matrix size").min(5000).defaultValue(37500));
    public final AttrDouble maxWordDf = this.attributes.register("maxWordDf", AttrDouble.builder().label2("Maximum word document frequency").min(0.0d).max(1.0d).defaultValue(Double.valueOf(0.9d)));
    public TermWeighting termWeighting;

    /* JADX WARN: Type inference failed for: r3v1, types: [org.carrot2.attrs.AttrDouble$Builder] */
    /* JADX WARN: Type inference failed for: r3v10, types: [org.carrot2.attrs.AttrDouble$Builder] */
    /* JADX WARN: Type inference failed for: r3v6, types: [org.carrot2.attrs.AttrInteger$Builder] */
    public TermDocumentMatrixBuilder() {
        this.attributes.register("termWeighting", AttrObject.builder(TermWeighting.class).label2("Term weighting for term-document matrix").getset(() -> {
            return this.termWeighting;
        }, termWeighting -> {
            this.termWeighting = termWeighting;
        }).defaultValue(LogTfIdfTermWeighting::new));
    }

    public void buildTermDocumentMatrix(VectorSpaceModelContext vectorSpaceModelContext) {
        PreprocessingContext preprocessingContext = vectorSpaceModelContext.preprocessingContext;
        int i = preprocessingContext.documentCount;
        int[] iArr = preprocessingContext.allStems.tf;
        int[][] iArr2 = preprocessingContext.allStems.tfByDocument;
        byte[] bArr = preprocessingContext.allStems.fieldIndices;
        if (i == 0) {
            vectorSpaceModelContext.termDocumentMatrix = new DenseDoubleMatrix2D(0, 0);
            vectorSpaceModelContext.stemToRowIndex = new IntIntHashMap();
            return;
        }
        int i2 = -1;
        String[] strArr = preprocessingContext.allFields.name;
        int i3 = 0;
        while (true) {
            if (i3 >= strArr.length) {
                break;
            }
            if ("title".equals(strArr[i3])) {
                i2 = i3;
                break;
            }
            i3++;
        }
        int[] computeRequiredStemIndices = computeRequiredStemIndices(preprocessingContext);
        TermWeighting termWeighting = this.termWeighting;
        double[] dArr = new double[computeRequiredStemIndices.length];
        for (int i4 = 0; i4 < computeRequiredStemIndices.length; i4++) {
            int i5 = computeRequiredStemIndices[i4];
            dArr[i4] = termWeighting.calculateTermWeight(iArr[i5], iArr2[i5].length / 2, i) * getWeightBoost(i2, bArr[i5]);
        }
        int[] mergesort = IndirectSort.mergesort(0, dArr.length, new IndirectComparator.DescendingDoubleComparator(dArr));
        int intValue = this.maximumMatrixSize.get().intValue() / i;
        DenseDoubleMatrix2D denseDoubleMatrix2D = new DenseDoubleMatrix2D(Math.min(intValue, computeRequiredStemIndices.length), i);
        for (int i6 = 0; i6 < mergesort.length && i6 < intValue; i6++) {
            int i7 = computeRequiredStemIndices[mergesort[i6]];
            int[] iArr3 = iArr2[i7];
            int length = iArr3.length / 2;
            byte b = bArr[i7];
            for (int i8 = 0; i8 < length; i8++) {
                denseDoubleMatrix2D.set(i6, iArr3[i8 * 2], termWeighting.calculateTermWeight(iArr3[(i8 * 2) + 1], length, i) * getWeightBoost(i2, b));
            }
        }
        IntIntHashMap intIntHashMap = new IntIntHashMap();
        for (int i9 = 0; i9 < mergesort.length && i9 < denseDoubleMatrix2D.rows(); i9++) {
            intIntHashMap.put(computeRequiredStemIndices[mergesort[i9]], i9);
        }
        vectorSpaceModelContext.termDocumentMatrix = denseDoubleMatrix2D;
        vectorSpaceModelContext.stemToRowIndex = intIntHashMap;
    }

    public void buildTermPhraseMatrix(VectorSpaceModelContext vectorSpaceModelContext) {
        PreprocessingContext preprocessingContext = vectorSpaceModelContext.preprocessingContext;
        IntIntHashMap intIntHashMap = vectorSpaceModelContext.stemToRowIndex;
        int[] iArr = preprocessingContext.allLabels.featureIndex;
        int i = preprocessingContext.allLabels.firstPhraseIndex;
        if (i < 0 || intIntHashMap.size() <= 0) {
            return;
        }
        int[] iArr2 = new int[iArr.length - i];
        for (int i2 = 0; i2 < iArr2.length; i2++) {
            iArr2[i2] = iArr[i2 + i];
        }
        DoubleMatrix2D buildAlignedMatrix = buildAlignedMatrix(vectorSpaceModelContext, iArr2, this.termWeighting);
        MatrixUtils.normalizeColumnL2(buildAlignedMatrix, null);
        vectorSpaceModelContext.termPhraseMatrix = buildAlignedMatrix.viewDice();
    }

    private double getWeightBoost(int i, byte b) {
        if ((b & (1 << i)) != 0) {
            return this.titleWordsBoost.get().doubleValue();
        }
        return 1.0d;
    }

    private int[] computeRequiredStemIndices(PreprocessingContext preprocessingContext) {
        int[] iArr = preprocessingContext.allLabels.featureIndex;
        int[] iArr2 = preprocessingContext.allWords.stemIndex;
        short[] sArr = preprocessingContext.allWords.type;
        int[][] iArr3 = preprocessingContext.allPhrases.wordIndices;
        int length = iArr2.length;
        int[][] iArr4 = preprocessingContext.allStems.tfByDocument;
        int i = preprocessingContext.documentCount;
        BitSet bitSet = new BitSet(iArr.length);
        double doubleValue = this.maxWordDf.get().doubleValue();
        for (int i2 : iArr) {
            if (i2 < length) {
                addStemIndex(iArr2, i, iArr4, bitSet, i2, doubleValue);
            } else {
                for (int i3 : iArr3[i2 - length]) {
                    if (!TokenTypeUtils.isCommon(sArr[i3])) {
                        addStemIndex(iArr2, i, iArr4, bitSet, i3, doubleValue);
                    }
                }
            }
        }
        return bitSet.asIntLookupContainer().toArray();
    }

    private void addStemIndex(int[] iArr, int i, int[][] iArr2, BitSet bitSet, int i2, double d) {
        int i3 = iArr[i2];
        if ((iArr2[i3].length / 2) / i <= d) {
            bitSet.set(i3);
        }
    }

    static DoubleMatrix2D buildAlignedMatrix(VectorSpaceModelContext vectorSpaceModelContext, int[] iArr, TermWeighting termWeighting) {
        IntIntHashMap intIntHashMap = vectorSpaceModelContext.stemToRowIndex;
        if (iArr.length == 0) {
            return new DenseDoubleMatrix2D(intIntHashMap.size(), 0);
        }
        SparseDoubleMatrix2D sparseDoubleMatrix2D = new SparseDoubleMatrix2D(intIntHashMap.size(), iArr.length);
        PreprocessingContext preprocessingContext = vectorSpaceModelContext.preprocessingContext;
        int[] iArr2 = preprocessingContext.allWords.stemIndex;
        int[] iArr3 = preprocessingContext.allStems.tf;
        int[][] iArr4 = preprocessingContext.allStems.tfByDocument;
        int[][] iArr5 = preprocessingContext.allPhrases.wordIndices;
        int i = preprocessingContext.documentCount;
        int length = iArr2.length;
        for (int i2 = 0; i2 < iArr.length; i2++) {
            int i3 = iArr[i2];
            for (int i4 : i3 < length ? new int[]{i3} : iArr5[i3 - length]) {
                int i5 = iArr2[i4];
                int indexOf = intIntHashMap.indexOf(i5);
                if (intIntHashMap.indexExists(indexOf)) {
                    sparseDoubleMatrix2D.setQuick(intIntHashMap.indexGet(indexOf), i2, termWeighting.calculateTermWeight(iArr3[i5], iArr4[i5].length / 2, i));
                }
            }
        }
        return sparseDoubleMatrix2D;
    }
}
