package org.fnlp.nlp.duplicate;

import java.util.ArrayList;
import java.util.HashSet;
import java.util.Set;
import java.util.TreeSet;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.fnlp.nlp.pipe.NGram;

/* loaded from: input_file:org/fnlp/nlp/duplicate/FingerPrint.class */
public class FingerPrint {
    private static Pattern p = Pattern.compile(".(\\pP|\\pS|\u3000| |\\s|的|把|和|也)+.");

    /* loaded from: input_file:org/fnlp/nlp/duplicate/FingerPrint$Type.class */
    public enum Type {
        Print,
        Char,
        NGram,
        WhiteSpace
    }

    public static void main(String[] strArr) {
        System.out.println(print("心  把那些挫折好好分析下  让自己的心态能更好  以后遇到事情也不会这么难受  别自己和自己过不去  那叫没事找事儿…  快睡吧"));
        System.out.println(print("明天我问他。你们早点睡觉。明天场地呢"));
        System.out.println(print("自己的心态"));
    }

    public static TreeSet<String> print(String str) {
        Matcher matcher = p.matcher(str);
        TreeSet<String> treeSet = new TreeSet<>();
        while (matcher.find()) {
            treeSet.add(matcher.group());
        }
        return treeSet;
    }

    public static String ngram(String str, int i) {
        ArrayList<String> ngram = NGram.ngram(str, new int[]{i});
        StringBuilder sb = new StringBuilder();
        for (int i2 = 0; i2 < ngram.size(); i2++) {
            sb.append((Object) ngram.get(i2));
            sb.append(" ");
        }
        return sb.toString();
    }

    public static Set<String> ngramSet(String str, int i) {
        return NGram.ngramSet(str, new int[]{i});
    }

    public static String feature(String str, Type type) {
        if (type == Type.WhiteSpace) {
            return whitespace(str);
        }
        if (type == Type.NGram) {
            return ngram(str, 2);
        }
        if (type == Type.Char) {
            return ngram(str, 1);
        }
        return null;
    }

    private static String whitespace(String str) {
        return str;
    }

    public static Set<String> featureset(String str, Type type) {
        if (type == Type.WhiteSpace) {
            return whitespaceSet(str);
        }
        if (type == Type.NGram) {
            return ngramSet(str, 2);
        }
        if (type == Type.Char) {
            return ngramSet(str, 1);
        }
        return null;
    }

    private static Set<String> whitespaceSet(String str) {
        String[] split = str.split("\\s+");
        HashSet hashSet = new HashSet();
        for (String str2 : split) {
            hashSet.add(str2);
        }
        return hashSet;
    }
}
