package net.clementlevallois.stopwords;

import java.io.IOException;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;

/* loaded from: input_file:net/clementlevallois/stopwords/StopWordsRemover.class */
public final class StopWordsRemover {
    private String entryWord;
    private boolean multipleWord;
    private int minWordLength;
    private int nbStopWords;
    private int nbStopWordsShort;
    private List<String> stopwordsLong;
    private Map<String, Set<String>> stopWordsLongAndShort;
    private final int maxAcceptedGarbage = 3;
    private Set<String> setStopWordsFieldSpecificOrShort = new HashSet();
    private Set<String> setStopWordsShort = new HashSet();
    private Set<String> setStopwordsFieldSpecific = new HashSet();
    private Set<String> setStopWords = new HashSet();
    private Set<String> setKeepWords = new HashSet();
    private Set<String> setRemoveWords = new HashSet();
    private List<String> listGeneralStopwordsLarge = new ArrayList();
    private List<String> listGeneralStopwordsShort = new ArrayList();

    public static void main(String[] strArr) throws Exception {
        HashSet hashSet = new HashSet();
        hashSet.add("twitter");
        StopWordsRemover stopWordsRemover = new StopWordsRemover(3, "en");
        stopWordsRemover.addFieldSpecificStopWords(hashSet);
        stopWordsRemover.addFieldSpecificStopWords(Stopwords.getScientificStopwordsInEnglish());
        System.out.println(stopWordsRemover.shouldItBeRemoved("of textual"));
    }

    public StopWordsRemover(int i, String str) {
        this.nbStopWords = 5000;
        this.nbStopWordsShort = 500;
        this.stopwordsLong = new ArrayList();
        this.stopWordsLongAndShort = Stopwords.getStopWords(str);
        this.stopwordsLong = new ArrayList(this.stopWordsLongAndShort.get("long"));
        this.minWordLength = i;
        this.nbStopWordsShort = Math.min(this.nbStopWordsShort, Math.max(0, this.stopwordsLong.size() - 1));
        this.nbStopWords = Math.min(5000, Math.max(0, this.stopwordsLong.size() - 1));
        try {
            init();
        } catch (IOException e) {
            System.out.println("ex: " + e);
        }
    }

    public void addStopWordsToKeep(Set<String> set) {
        if (set != null) {
            this.setKeepWords.addAll(set);
        }
    }

    public void addWordsToRemove(Set<String> set) {
        if (set != null) {
            this.setRemoveWords.addAll(set);
        }
    }

    public void useUSerSuppliedStopwords(Set<String> set, boolean z) {
        if (!z) {
            this.setStopWordsFieldSpecificOrShort.addAll(set);
            this.setStopWordsShort.addAll(set);
            this.setStopWords.addAll(set);
        } else {
            this.setStopWordsFieldSpecificOrShort = new HashSet(set);
            this.setStopWordsShort = new HashSet(set);
            this.setStopwordsFieldSpecific = new HashSet(set);
            this.setStopWords = new HashSet(set);
        }
    }

    public void addFieldSpecificStopWords(Set<String> set) {
        if (set != null) {
            this.setStopWordsFieldSpecificOrShort.addAll(set);
            this.setStopWords.addAll(set);
        }
    }

    private void init() throws IOException {
        this.setKeepWords = new HashSet();
        this.setStopWordsShort = new HashSet();
        this.listGeneralStopwordsLarge = this.stopwordsLong.subList(0, this.nbStopWords);
        this.listGeneralStopwordsShort = this.stopwordsLong.subList(0, this.nbStopWordsShort);
        this.setStopWords.addAll(this.listGeneralStopwordsLarge);
        this.setStopWords.addAll(Stopwords.getStopwordsValidForAllLanguages());
        if (this.stopWordsLongAndShort.get("short").isEmpty()) {
            this.setStopWordsShort.addAll(this.listGeneralStopwordsShort);
        } else {
            this.setStopWordsShort.addAll(this.stopWordsLongAndShort.get("short"));
        }
        this.setStopWordsFieldSpecificOrShort.addAll(this.setStopWordsShort);
    }

    public boolean shouldItBeRemoved(String str) {
        boolean z = true;
        this.entryWord = str;
        this.multipleWord = this.entryWord.contains(" ");
        if (this.multipleWord) {
            String[] split = this.entryWord.split(" ");
            int length = split.length;
            int length2 = split.length;
            int i = 0;
            while (true) {
                if (i >= length2) {
                    break;
                }
                if (split[i].length() < this.minWordLength) {
                    z = false;
                    break;
                }
                i++;
            }
            if (length == 2 && (this.setStopWordsFieldSpecificOrShort.contains(split[0].toLowerCase().trim()) || this.setStopWordsFieldSpecificOrShort.contains(split[1].toLowerCase().trim()))) {
                z = false;
            }
            if (length > 2) {
                int i2 = 0;
                int i3 = 0;
                while (i3 < length) {
                    String trim = split[i3].toLowerCase().trim();
                    if ((!(i3 == 0) && !(i3 == length - 1)) || !this.setStopWordsFieldSpecificOrShort.contains(trim)) {
                        if (((i3 == 0) || (i3 == length - 1)) && this.setStopWordsShort.contains(trim)) {
                            z = false;
                        } else if (this.setStopWordsShort.contains(trim)) {
                            i2 += 3;
                        } else if (this.setStopwordsFieldSpecific.contains(trim)) {
                            i2 += 2;
                        }
                    } else {
                        i2 = 4;
                    }
                    i3++;
                }
                if (this.setStopWords.contains(this.entryWord)) {
                    i2 = 4;
                }
                if (i2 > 3) {
                    z = false;
                }
            }
        } else {
            if (this.setStopWords.contains(this.entryWord) & (!this.setKeepWords.contains(this.entryWord))) {
                z = false;
            }
        }
        if (this.setKeepWords.contains(this.entryWord)) {
            z = true;
        }
        if (this.setRemoveWords.contains(this.entryWord)) {
            z = false;
        }
        return !z;
    }
}
