package org.apache.lucene.analysis.hebrew;

import com.code972.hebmorph.HebrewToken;
import com.code972.hebmorph.MorphData;
import com.code972.hebmorph.Reference;
import com.code972.hebmorph.StreamLemmatizer;
import com.code972.hebmorph.Token;
import com.code972.hebmorph.Tokenizer;
import com.code972.hebmorph.datastructures.DictRadix;
import com.code972.hebmorph.lemmafilters.LemmaFilterBase;
import java.io.IOException;
import java.io.Reader;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.KeywordAttribute;
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
import org.apache.lucene.analysis.util.CharArraySet;
import org.apache.lucene.analysis.util.CharacterUtils;
import org.apache.lucene.util.Version;

/* loaded from: input_file:org/apache/lucene/analysis/hebrew/StreamLemmasFilter.class */
public class StreamLemmasFilter extends Tokenizer {
    private final StreamLemmatizer _streamLemmatizer;
    private final CharArraySet commonWords;
    private final CharTermAttribute termAtt;
    private final OffsetAttribute offsetAtt;
    private final PositionIncrementAttribute posIncrAtt;
    private final TypeAttribute typeAtt;
    private final KeywordAttribute keywordAtt;
    private final CharacterUtils charUtils;
    private final LemmaFilterBase lemmaFilter;
    private final List<Token> stack;
    private final List<Token> filterCache;
    private int index;
    private final Set<String> previousLemmas;
    private boolean keepOriginalWord;
    private final Reference<String> tempRefObject;
    private int currentStartOffset;
    private int currentEndOffset;

    public StreamLemmasFilter(Reader reader, DictRadix<MorphData> dictRadix, HashMap<String, Integer> hashMap) {
        this(reader, dictRadix, hashMap, null, null, null);
    }

    public StreamLemmasFilter(Reader reader, DictRadix<MorphData> dictRadix, HashMap<String, Integer> hashMap, LemmaFilterBase lemmaFilterBase) {
        this(reader, dictRadix, hashMap, null, null, lemmaFilterBase);
    }

    public StreamLemmasFilter(Reader reader, DictRadix<MorphData> dictRadix, HashMap<String, Integer> hashMap, CharArraySet charArraySet, LemmaFilterBase lemmaFilterBase) {
        this(reader, dictRadix, hashMap, null, charArraySet, lemmaFilterBase);
    }

    public StreamLemmasFilter(Reader reader, DictRadix<MorphData> dictRadix, HashMap<String, Integer> hashMap, DictRadix<Byte> dictRadix2, CharArraySet charArraySet, LemmaFilterBase lemmaFilterBase) {
        super(reader);
        this.termAtt = addAttribute(CharTermAttribute.class);
        this.offsetAtt = addAttribute(OffsetAttribute.class);
        this.posIncrAtt = addAttribute(PositionIncrementAttribute.class);
        this.typeAtt = addAttribute(TypeAttribute.class);
        this.keywordAtt = addAttribute(KeywordAttribute.class);
        this.stack = new ArrayList();
        this.filterCache = new ArrayList();
        this.index = 0;
        this.previousLemmas = new HashSet();
        this.tempRefObject = new Reference<>("");
        this._streamLemmatizer = new StreamLemmatizer(this.input, dictRadix, hashMap, dictRadix2);
        this.commonWords = charArraySet != null ? charArraySet : CharArraySet.EMPTY_SET;
        this.lemmaFilter = lemmaFilterBase;
        this.charUtils = CharacterUtils.getInstance(Version.LUCENE_46);
    }

    public void setSuffixForExactMatch(Character ch) {
        this._streamLemmatizer.setSuffixForExactMatch(ch);
    }

    public void setCustomWords(DictRadix<MorphData> dictRadix) {
        this._streamLemmatizer.setCustomWords(dictRadix);
    }

    public final boolean incrementToken() throws IOException {
        clearAttributes();
        while (this.index < this.stack.size()) {
            HebrewToken hebrewToken = (HebrewToken) (this.stack.get(this.index) instanceof HebrewToken ? this.stack.get(this.index) : null);
            this.index++;
            if (hebrewToken != null && this.previousLemmas.add(hebrewToken.getLemma())) {
                createHebrewToken(hebrewToken);
                this.offsetAtt.setOffset(this.currentStartOffset, this.currentEndOffset);
                this.typeAtt.setType(HebrewTokenizer.tokenTypeSignature(0));
                this.posIncrAtt.setPositionIncrement(0);
                return true;
            }
        }
        this.index = 0;
        this.stack.clear();
        this.previousLemmas.clear();
        int lemmatizeNextToken = this._streamLemmatizer.getLemmatizeNextToken(this.tempRefObject, this.stack);
        if (lemmatizeNextToken == 0) {
            return false;
        }
        this.currentStartOffset = correctOffset(this._streamLemmatizer.getStartOffset());
        this.currentEndOffset = correctOffset(this._streamLemmatizer.getEndOffset());
        this.offsetAtt.setOffset(this.currentStartOffset, this.currentEndOffset);
        String str = (String) this.tempRefObject.ref;
        if (this.commonWords.contains(str)) {
            this.termAtt.copyBuffer(str.toCharArray(), 0, str.length());
            this.typeAtt.setType(HebrewTokenizer.tokenTypeSignature(0));
            this.stack.clear();
            if (!this.keepOriginalWord) {
                if ((lemmatizeNextToken & Tokenizer.TokenType.Exact) <= 0) {
                    return true;
                }
                this.keywordAtt.setKeyword(true);
                return true;
            }
            this.keywordAtt.setKeyword(true);
            if ((lemmatizeNextToken & Tokenizer.TokenType.Exact) != 0) {
                return true;
            }
            this.stack.add(new HebrewToken(str, (byte) 0, 0, str, 1.0f));
            return true;
        }
        if ((lemmatizeNextToken & Tokenizer.TokenType.Exact) > 0) {
            this.keywordAtt.setKeyword(true);
        }
        if (this.stack.size() == 1 && !(this.stack.get(0) instanceof HebrewToken)) {
            this.termAtt.copyBuffer(str.toCharArray(), 0, str.length());
            if (this.stack.get(0).isNumeric()) {
                this.typeAtt.setType(HebrewTokenizer.tokenTypeSignature(2));
            } else {
                this.typeAtt.setType(HebrewTokenizer.tokenTypeSignature(1));
            }
            applyLowercaseFilter();
            this.stack.clear();
            return true;
        }
        this.typeAtt.setType(HebrewTokenizer.tokenTypeSignature(0));
        if (this.lemmaFilter != null && this.lemmaFilter.filterCollection(str, this.stack, this.filterCache) != null) {
            this.stack.clear();
            this.stack.addAll(this.filterCache);
        }
        if (!this.stack.isEmpty()) {
            if (this.keepOriginalWord) {
                this.termAtt.copyBuffer(str.toCharArray(), 0, str.length());
                this.keywordAtt.setKeyword(true);
                return true;
            }
            HebrewToken hebrewToken2 = (HebrewToken) this.stack.get(0);
            if (this.stack.size() == 1) {
                this.stack.clear();
            } else {
                this.index = 1;
                this.previousLemmas.add(hebrewToken2.getLemma());
            }
            createHebrewToken(hebrewToken2);
            return true;
        }
        this.termAtt.copyBuffer(str.toCharArray(), 0, str.length());
        if (this.keepOriginalWord) {
            this.keywordAtt.setKeyword(true);
        }
        if ((lemmatizeNextToken & Tokenizer.TokenType.Mixed) > 0) {
            this.typeAtt.setType(HebrewTokenizer.tokenTypeSignature(5));
            applyLowercaseFilter();
            return true;
        }
        if ((lemmatizeNextToken & Tokenizer.TokenType.Exact) > 0) {
            applyLowercaseFilter();
            return true;
        }
        if (!this.keepOriginalWord) {
            return true;
        }
        this.stack.add(new HebrewToken(str, (byte) 0, 0, str, 1.0f));
        return true;
    }

    private void applyLowercaseFilter() {
        this.charUtils.toLowerCase(this.termAtt.buffer(), 0, this.termAtt.length());
    }

    protected void createHebrewToken(HebrewToken hebrewToken) {
        String substring = hebrewToken.getLemma() == null ? hebrewToken.getText().substring(hebrewToken.getPrefixLength()) : hebrewToken.getLemma();
        this.termAtt.copyBuffer(substring.toCharArray(), 0, substring.length());
    }

    public final void end() throws IOException {
        super.end();
        int correctOffset = correctOffset(this._streamLemmatizer.getEndOffset());
        this.currentEndOffset = correctOffset;
        this.currentStartOffset = correctOffset;
        this.offsetAtt.setOffset(correctOffset, correctOffset);
    }

    public void close() throws IOException {
        super.close();
        this.stack.clear();
        this.filterCache.clear();
        this.previousLemmas.clear();
        this.index = 0;
        this._streamLemmatizer.reset(this.input);
    }

    public void reset() throws IOException {
        super.reset();
        this.stack.clear();
        this.filterCache.clear();
        this.previousLemmas.clear();
        this.index = 0;
        this.currentEndOffset = 0;
        this.currentStartOffset = 0;
        this._streamLemmatizer.reset(this.input);
    }

    public void setKeepOriginalWord(boolean z) {
        this.keepOriginalWord = z;
    }
}
