package org.apache.lucene.analysis.hebrew;

import com.code972.hebmorph.DictionaryLoader;
import com.code972.hebmorph.HebrewUtils;
import com.code972.hebmorph.LookupTolerators;
import com.code972.hebmorph.MorphData;
import com.code972.hebmorph.WordType;
import com.code972.hebmorph.datastructures.DictHebMorph;
import com.code972.hebmorph.datastructures.DictRadix;
import java.io.IOException;
import java.io.InputStream;
import java.nio.charset.StandardCharsets;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.util.CharArraySet;
import org.apache.lucene.analysis.util.WordlistLoader;
import org.apache.lucene.util.IOUtils;

/* loaded from: input_file:org/apache/lucene/analysis/hebrew/HebrewAnalyzer.class */
public abstract class HebrewAnalyzer extends Analyzer {
    private static final Byte dummyData = (byte) 0;
    protected DictHebMorph dict;
    protected final char originalTermSuffix = '$';
    protected DictRadix<Byte> SPECIAL_TOKENIZATION_CASES;
    protected CharArraySet commonWords;

    public DictRadix<Byte> setCustomTokenizationCases(InputStream inputStream) throws IOException {
        if (inputStream != null) {
            CharArraySet snowballWordSet = WordlistLoader.getSnowballWordSet(IOUtils.getDecodingReader(inputStream, StandardCharsets.UTF_8));
            DictRadix<Byte> dictRadix = new DictRadix<>(false);
            Iterator it = snowballWordSet.iterator();
            while (it.hasNext()) {
                dictRadix.addNode((char[]) it.next(), (char[]) dummyData);
            }
            this.SPECIAL_TOKENIZATION_CASES = dictRadix;
        }
        return this.SPECIAL_TOKENIZATION_CASES;
    }

    /* JADX INFO: Access modifiers changed from: protected */
    public HebrewAnalyzer(DictHebMorph dictHebMorph) throws IOException {
        this.originalTermSuffix = '$';
        this.SPECIAL_TOKENIZATION_CASES = null;
        this.commonWords = null;
        this.dict = dictHebMorph;
    }

    public HebrewAnalyzer() throws IOException {
        this(DictionaryLoader.lookForDefaultDictionary());
    }

    public static boolean isHebrewWord(CharSequence charSequence) {
        for (int i = 0; i < charSequence.length(); i++) {
            if (HebrewUtils.isHebrewLetter(charSequence.charAt(i))) {
                return true;
            }
        }
        return false;
    }

    public WordType isRecognizedWord(String str, boolean z) {
        return isRecognizedWord(str, z, this.dict);
    }

    public static WordType isRecognizedWord(String str, boolean z, DictHebMorph dictHebMorph) {
        MorphData morphData;
        HashMap<String, Integer> pref = dictHebMorph.getPref();
        DictRadix<MorphData> radix = dictHebMorph.getRadix();
        if (!isHebrewWord(str)) {
            return WordType.NON_HEBREW;
        }
        try {
            if (dictHebMorph.lookup(str) != null) {
                return WordType.HEBREW;
            }
        } catch (IllegalArgumentException e) {
        }
        if (str.endsWith("'")) {
            try {
                if (dictHebMorph.lookup(str.substring(0, str.length() - 1)) != null) {
                    return WordType.HEBREW;
                }
            } catch (IllegalArgumentException e2) {
            }
        }
        byte b = 0;
        while (str.length() - b >= 2) {
            b = (byte) (b + 1);
            Integer num = pref.get(str.substring(0, b));
            if (num == null) {
                break;
            }
            try {
                morphData = dictHebMorph.lookup(str.substring(b));
            } catch (IllegalArgumentException e3) {
                morphData = null;
            }
            if (morphData != null && (morphData.getPrefixes() & num.intValue()) > 0) {
                for (int i = 0; i < morphData.getLemmas().length; i++) {
                    if ((morphData.getLemmas()[i].getPrefix().getValue() & num.intValue()) > 0) {
                        return WordType.HEBREW_WITH_PREFIX;
                    }
                }
            }
        }
        if (z) {
            if (str.length() > 20) {
                return WordType.UNRECOGNIZED;
            }
            List<DictRadix<MorphData>.LookupResult> lookupTolerant = radix.lookupTolerant(str, LookupTolerators.TolerateEmKryiaAll);
            if (lookupTolerant != null && lookupTolerant.size() > 0) {
                return WordType.HEBREW_TOLERATED;
            }
            byte b2 = 0;
            while (str.length() - b2 >= 2) {
                b2 = (byte) (b2 + 1);
                Integer num2 = pref.get(str.substring(0, b2));
                if (num2 == null) {
                    break;
                }
                List<DictRadix<MorphData>.LookupResult> lookupTolerant2 = radix.lookupTolerant(str.substring(b2), LookupTolerators.TolerateEmKryiaAll);
                if (lookupTolerant2 != null) {
                    for (DictRadix<MorphData>.LookupResult lookupResult : lookupTolerant2) {
                        for (int i2 = 0; i2 < lookupResult.getData().getLemmas().length; i2++) {
                            if ((lookupResult.getData().getLemmas()[i2].getPrefix().getValue() & num2.intValue()) > 0) {
                                return WordType.HEBREW_TOLERATED_WITH_PREFIX;
                            }
                        }
                    }
                }
            }
        }
        return WordType.UNRECOGNIZED;
    }
}
