package org.fnlp.nlp.duplicate;

import java.io.FileInputStream;
import java.io.IOException;
import java.io.PrintWriter;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import java.util.Scanner;
import java.util.TreeMap;
import java.util.TreeSet;
import org.fnlp.nlp.duplicate.FingerPrint;

/* loaded from: input_file:org/fnlp/nlp/duplicate/DupDetector.class */
public class DupDetector {
    private ArrayList<Documents> docs;
    private int numThreads;
    private TreeSet<DocSim> dsMap;
    public TreeMap<Integer, DocSim> map = new TreeMap<>();
    public TreeSet<DocSim> resultMap = new TreeSet<>();
    private ISimilarity sim = null;
    private boolean bFreq = false;
    private int minLen = 30;

    public DupDetector(int i) {
        this.numThreads = i;
    }

    public void loadData(String str) throws IOException {
        FileInputStream fileInputStream = new FileInputStream(str);
        Scanner scanner = new Scanner(fileInputStream, "UTF8");
        this.docs = new ArrayList<>();
        while (scanner.hasNext()) {
            String trim = scanner.nextLine().trim();
            if (trim.length() >= this.minLen) {
                this.docs.add(new Documents(trim));
            }
        }
        scanner.close();
        fileInputStream.close();
    }

    public List<String> getTopMessage(int i) {
        ArrayList arrayList = new ArrayList();
        int i2 = 0;
        Iterator<DocSim> it = this.dsMap.iterator();
        while (it.hasNext()) {
            int i3 = i2;
            i2++;
            if (i3 >= i) {
                break;
            }
            DocSim next = it.next();
            String str = this.docs.get(next.ids.get(0).intValue()).content;
            if (this.bFreq) {
                str = String.valueOf(next.ids.size()) + "\n" + str;
            }
            arrayList.add(str);
        }
        return arrayList;
    }

    public void sort(String str, FingerPrint.Type type) throws Exception {
        loadData(str);
        this.sim = new SimilaritySlow(this.numThreads, type);
        this.dsMap = this.sim.duplicate(this.docs);
    }

    public void sort2File(String str, String str2, FingerPrint.Type type, int i) throws Exception {
        sort(str, type);
        List<String> topMessage = getTopMessage(i);
        PrintWriter printWriter = new PrintWriter(str2, "utf8");
        for (int i2 = 0; i2 < topMessage.size(); i2++) {
            printWriter.println(topMessage.get(i2));
        }
        printWriter.close();
    }

    public void sortFeats2File(String str, String str2, FingerPrint.Type type, int i) throws Exception {
        sort(str, type);
        PrintWriter printWriter = new PrintWriter(str2, "utf8");
        int i2 = 0;
        Iterator<DocSim> it = this.dsMap.iterator();
        while (it.hasNext() && i2 < i) {
            TreeSet treeSet = new TreeSet();
            DocSim next = it.next();
            for (int i3 = 0; i3 < next.ids.size(); i3++) {
                treeSet.addAll(FingerPrint.featureset(this.docs.get(next.ids.get(i3).intValue()).content, type));
            }
            if (treeSet.size() > 1 && treeSet.size() <= 10) {
                printWriter.println(next.ids.size());
                Iterator it2 = treeSet.iterator();
                while (it2.hasNext()) {
                    printWriter.print((String) it2.next());
                    printWriter.print(" ");
                }
                printWriter.println();
                i2++;
            }
        }
        printWriter.close();
    }

    public static void main(String[] strArr) throws Exception {
        DupDetector dupDetector = new DupDetector(8);
        String str = strArr.length > 0 ? strArr[0] : "./tmp/filterByTopic/filterhealth.y";
        dupDetector.sort2File(str, str + ".s", FingerPrint.Type.Char, 500);
        System.out.println("done");
    }
}
