package com.code972.hebmorph;

import com.code972.hebmorph.Tokenizer;
import com.code972.hebmorph.datastructures.DictHebMorph;
import com.code972.hebmorph.datastructures.DictRadix;
import java.io.IOException;
import java.io.Reader;
import java.util.Iterator;
import java.util.List;

/* loaded from: input_file:com/code972/hebmorph/StreamLemmatizer.class */
public class StreamLemmatizer extends Lemmatizer {
    private final Tokenizer _tokenizer;
    private int _startOffset;
    private int _endOffset;
    private boolean tolerateWhenLemmatizingStream;

    public StreamLemmatizer(Reader reader, DictHebMorph dictHebMorph) {
        this(reader, dictHebMorph, null);
    }

    public StreamLemmatizer(Reader reader, DictHebMorph dictHebMorph, DictRadix<Byte> dictRadix) {
        super(dictHebMorph);
        this.tolerateWhenLemmatizingStream = true;
        this._tokenizer = new Tokenizer(reader, dictHebMorph == null ? null : dictHebMorph.getPref(), dictRadix);
    }

    public void reset(Reader reader) {
        this._tokenizer.reset(reader);
        this._startOffset = 0;
        this._endOffset = 0;
    }

    public int getStartOffset() {
        return this._startOffset;
    }

    public int getEndOffset() {
        return this._endOffset;
    }

    public void setSuffixForExactMatch(Character ch) {
        this._tokenizer.setSuffixForExactMatch(ch);
    }

    /* JADX WARN: Type inference failed for: r1v14, types: [T, java.lang.String] */
    /* JADX WARN: Type inference failed for: r1v45, types: [T, java.lang.String] */
    /* JADX WARN: Type inference failed for: r1v63, types: [T, java.lang.String] */
    public final int getLemmatizeNextToken(Reference<String> reference, List<Token> list) throws IOException {
        int nextToken;
        List<HebrewToken> lemmatizeTolerant;
        list.clear();
        do {
            nextToken = this._tokenizer.nextToken(reference);
            this._startOffset = this._tokenizer.getOffset();
            this._endOffset = this._startOffset + this._tokenizer.getLengthInSource();
            if (nextToken == 0) {
                break;
            }
            if ((nextToken & Tokenizer.TokenType.Hebrew) > 0) {
                reference.ref = removeNiqqud(reference.ref);
                if ((nextToken & Tokenizer.TokenType.Construct) <= 0 && (nextToken & Tokenizer.TokenType.Acronym) <= 0) {
                    break;
                }
            } else if ((nextToken & Tokenizer.TokenType.Numeric) > 0) {
                list.add(new Token(reference.ref, true));
            } else {
                list.add(new Token(reference.ref));
            }
        } while (isLegalPrefix(reference.ref));
        if ((nextToken & Tokenizer.TokenType.Exact) <= 0) {
            if ((nextToken & Tokenizer.TokenType.Mixed) > 0 || (nextToken & Tokenizer.TokenType.Custom) > 0) {
                int i = 0;
                while (i < reference.ref.length() && Tokenizer.isHebrewLetter(reference.ref.charAt(i))) {
                    i++;
                }
                if (i > 0 && i < reference.ref.length() - 1 && isLegalPrefix(reference.ref.substring(0, i))) {
                    int i2 = i;
                    while (i < reference.ref.length() && !Tokenizer.isHebrewLetter(reference.ref.charAt(i))) {
                        i++;
                    }
                    if (i == reference.ref.length()) {
                        reference.ref = reference.ref.substring(i2, reference.ref.length());
                        nextToken = Tokenizer.TokenType.NonHebrew;
                        list.add(new Token(reference.ref));
                    }
                }
            }
            if ((nextToken & Tokenizer.TokenType.Acronym) > 0) {
                reference.ref = tryStrippingPrefix(reference.ref);
                if (reference.ref.indexOf(34) == -1) {
                    nextToken &= Tokenizer.TokenType.Acronym ^ (-1);
                }
            }
            List<HebrewToken> lemmatize = lemmatize(reference.ref);
            if (lemmatize != null && lemmatize.size() > 0) {
                if ((nextToken & Tokenizer.TokenType.Construct) > 0) {
                }
                Iterator<HebrewToken> it = lemmatize.iterator();
                while (it.hasNext()) {
                    list.add(it.next());
                }
            }
            if (list.isEmpty() && (nextToken & Tokenizer.TokenType.Acronym) > 0) {
                list.add(new HebrewToken(reference.ref, (byte) 0, DescFlag.D_ACRONYM, reference.ref, PrefixType.PS_NONDEF, 1.0f));
            } else if (this.tolerateWhenLemmatizingStream && list.isEmpty() && (lemmatizeTolerant = lemmatizeTolerant(reference.ref)) != null && lemmatizeTolerant.size() > 0) {
                if ((nextToken & Tokenizer.TokenType.Construct) > 0) {
                }
                Iterator<HebrewToken> it2 = lemmatizeTolerant.iterator();
                while (it2.hasNext()) {
                    list.add(it2.next());
                }
            }
        }
        return nextToken;
    }
}
