package org.carrot2.text.linguistic.lucene;

import java.io.IOException;
import java.io.Reader;
import java.text.BreakIterator;
import java.util.Locale;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.standard.StandardTokenizer;
import org.apache.lucene.analysis.th.ThaiWordFilter;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
import org.apache.lucene.util.Version;
import org.carrot2.text.analysis.ITokenizer;
import org.carrot2.text.util.MutableCharArray;
import org.carrot2.util.ExceptionUtils;

/* loaded from: input_file:org/carrot2/text/linguistic/lucene/ThaiTokenizerAdapter.class */
public final class ThaiTokenizerAdapter implements ITokenizer {
    private TokenStream wordTokenFilter;
    private CharTermAttribute term = null;
    private TypeAttribute type = null;
    private final MutableCharArray tempCharSequence = new MutableCharArray(new char[0]);

    public ThaiTokenizerAdapter() {
        if (!platformSupportsThai()) {
            throw new RuntimeException("Thai segmentation not supported on this platform.");
        }
    }

    @Override // org.carrot2.text.analysis.ITokenizer
    public short nextToken() throws IOException {
        if (!this.wordTokenFilter.incrementToken()) {
            return (short) -1;
        }
        this.tempCharSequence.reset(this.term.buffer(), 0, this.term.length());
        String type = this.type.type();
        return (type.equals("<SOUTHEAST_ASIAN>") || type.equals("<ALPHANUM>")) ? (short) 1 : type.equals("<NUM>") ? (short) 2 : (short) 3;
    }

    @Override // org.carrot2.text.analysis.ITokenizer
    public void setTermBuffer(MutableCharArray mutableCharArray) {
        mutableCharArray.reset(this.term.buffer(), 0, this.term.length());
    }

    @Override // org.carrot2.text.analysis.ITokenizer
    public void reset(Reader reader) throws IOException {
        try {
            this.wordTokenFilter = new ThaiWordFilter(Version.LUCENE_CURRENT, new StandardTokenizer(Version.LUCENE_CURRENT, reader));
            this.term = this.wordTokenFilter.addAttribute(CharTermAttribute.class);
            this.type = this.wordTokenFilter.addAttribute(TypeAttribute.class);
        } catch (Exception e) {
            throw ExceptionUtils.wrapAsRuntimeException(e);
        }
    }

    public static boolean platformSupportsThai() {
        try {
            BreakIterator wordInstance = BreakIterator.getWordInstance(new Locale("th"));
            wordInstance.setText("ภาษาไทย");
            return wordInstance.isBoundary(4);
        } catch (Throwable th) {
            return false;
        }
    }
}
