package org.fnlp.nlp.duplicate;

import gnu.trove.iterator.TIntIntIterator;
import gnu.trove.list.array.TIntArrayList;
import gnu.trove.map.hash.TIntIntHashMap;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.Map;
import java.util.Set;
import java.util.TreeMap;
import java.util.TreeSet;
import java.util.concurrent.ArrayBlockingQueue;
import java.util.concurrent.ThreadPoolExecutor;
import java.util.concurrent.TimeUnit;
import org.fnlp.nlp.duplicate.FingerPrint;

/* loaded from: input_file:org/fnlp/nlp/duplicate/Similarity.class */
public class Similarity {
    public TIntIntHashMap[] similarityMap;
    public TreeSet<DocSim> dsMap;
    private static final double lenDiffThresh = 0.3d;
    public TreeMap<String, TIntArrayList> locationMap;
    public FingerPrint.Type type;
    public int[] featureLen;
    public ArrayList<String> docs;
    private int numThreads;
    public int capacity = 100;
    public double thres = 0.7d;

    /* loaded from: input_file:org/fnlp/nlp/duplicate/Similarity$CalcSimilarity.class */
    public class CalcSimilarity implements Runnable {
        private TIntArrayList al;

        public CalcSimilarity() {
        }

        public CalcSimilarity(TIntArrayList tIntArrayList) {
            this.al = tIntArrayList;
        }

        @Override // java.lang.Runnable
        public void run() {
            int i;
            int i2;
            for (int i3 = 0; i3 < this.al.size(); i3++) {
                int i4 = this.al.get(i3);
                for (int i5 = i3 + 1; i5 < this.al.size(); i5++) {
                    int i6 = this.al.get(i5);
                    if (Math.abs(Similarity.this.featureLen[i4] - Similarity.this.featureLen[i6]) / Math.max(Similarity.this.featureLen[i4], Similarity.this.featureLen[i6]) <= Similarity.lenDiffThresh) {
                        if (i4 <= i6) {
                            i = i4;
                            i2 = i6;
                        } else {
                            i = i6;
                            i2 = i4;
                        }
                        if (Similarity.this.similarityMap[i] == null) {
                            Similarity.this.similarityMap[i] = new TIntIntHashMap();
                        }
                        synchronized (Similarity.this.similarityMap[i]) {
                            Similarity.this.similarityMap[i].adjustOrPutValue(i2, 1, 1);
                        }
                    }
                }
            }
        }
    }

    public Similarity(int i, FingerPrint.Type type) {
        this.type = type;
        this.numThreads = i;
    }

    public void feature() {
        this.locationMap = new TreeMap<>();
        this.featureLen = new int[this.docs.size()];
        for (int i = 1; i < this.docs.size(); i++) {
            Set<String> featureset = FingerPrint.featureset(this.docs.get(i), this.type);
            this.featureLen[i] = featureset.size();
            Object[] array = featureset.toArray();
            for (int i2 = 0; i2 < array.length; i2++) {
                TIntArrayList tIntArrayList = this.locationMap.get((String) array[i2]);
                if (tIntArrayList == null) {
                    TIntArrayList tIntArrayList2 = new TIntArrayList();
                    tIntArrayList2.add(i);
                    this.locationMap.put((String) array[i2], tIntArrayList2);
                } else {
                    tIntArrayList.add(i);
                }
            }
        }
    }

    public void duplicate(ArrayList<String> arrayList) throws Exception {
        this.docs = arrayList;
        this.dsMap = new TreeSet<>();
        feature();
        similarity();
        System.out.println("去重复");
        boolean[] zArr = new boolean[arrayList.size()];
        for (int i = 0; i < arrayList.size(); i++) {
            if (!zArr[i] && this.similarityMap[i] != null) {
                ArrayList arrayList2 = new ArrayList();
                arrayList2.add(Integer.valueOf(i));
                TIntIntIterator it = this.similarityMap[i].iterator();
                int size = this.similarityMap[i].size();
                while (true) {
                    int i2 = size;
                    size--;
                    if (i2 <= 0) {
                        break;
                    }
                    it.advance();
                    int key = it.key();
                    if ((it.value() * 2) / (this.featureLen[i] + this.featureLen[key]) > this.thres) {
                        zArr[key] = true;
                        arrayList2.add(Integer.valueOf(key));
                    }
                }
                this.dsMap.add(new DocSim(arrayList2));
            }
        }
    }

    public void printDocSim() {
        Iterator<DocSim> it = this.dsMap.iterator();
        while (it.hasNext()) {
            System.out.println(it.next().toString());
        }
    }

    public void similarity() throws InterruptedException {
        System.out.println("相似度");
        ThreadPoolExecutor threadPoolExecutor = new ThreadPoolExecutor(this.numThreads, this.numThreads, 1000L, TimeUnit.SECONDS, new ArrayBlockingQueue(this.capacity));
        this.similarityMap = new TIntIntHashMap[this.docs.size()];
        Iterator<Map.Entry<String, TIntArrayList>> it = this.locationMap.entrySet().iterator();
        while (it.hasNext()) {
            if (threadPoolExecutor.getQueue().remainingCapacity() == 0) {
                Thread.sleep(10L);
            } else {
                threadPoolExecutor.execute(new CalcSimilarity(it.next().getValue()));
            }
        }
        while (threadPoolExecutor.getActiveCount() > 0) {
            Thread.sleep(10L);
        }
        threadPoolExecutor.shutdown();
    }

    public static void main(String[] strArr) throws IOException, ClassNotFoundException {
    }
}
