package org.elasticsearch.xpack.esql.expression.function.grouping;

import java.io.IOException;
import java.util.List;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.core.WhitespaceTokenizer;
import org.apache.lucene.util.BytesRef;
import org.elasticsearch.common.io.stream.NamedWriteableRegistry;
import org.elasticsearch.common.io.stream.StreamInput;
import org.elasticsearch.common.io.stream.StreamOutput;
import org.elasticsearch.common.util.BytesRefHash;
import org.elasticsearch.compute.operator.EvalOperator;
import org.elasticsearch.index.analysis.CharFilterFactory;
import org.elasticsearch.index.analysis.CustomAnalyzer;
import org.elasticsearch.index.analysis.TokenFilterFactory;
import org.elasticsearch.index.analysis.TokenizerFactory;
import org.elasticsearch.xpack.esql.capabilities.Validatable;
import org.elasticsearch.xpack.esql.core.expression.Expression;
import org.elasticsearch.xpack.esql.core.expression.TypeResolutions;
import org.elasticsearch.xpack.esql.core.tree.Node;
import org.elasticsearch.xpack.esql.core.tree.NodeInfo;
import org.elasticsearch.xpack.esql.core.tree.Source;
import org.elasticsearch.xpack.esql.core.type.DataType;
import org.elasticsearch.xpack.esql.evaluator.mapper.EvaluatorMapper;
import org.elasticsearch.xpack.esql.expression.function.FunctionInfo;
import org.elasticsearch.xpack.esql.expression.function.Param;
import org.elasticsearch.xpack.esql.expression.function.grouping.CategorizeEvaluator;
import org.elasticsearch.xpack.esql.io.stream.PlanStreamInput;
import org.elasticsearch.xpack.ml.aggs.categorization.CategorizationBytesRefHash;
import org.elasticsearch.xpack.ml.aggs.categorization.CategorizationPartOfSpeechDictionary;
import org.elasticsearch.xpack.ml.aggs.categorization.TokenListCategorizer;
import org.elasticsearch.xpack.ml.job.categorization.CategorizationAnalyzer;

/* loaded from: input_file:org/elasticsearch/xpack/esql/expression/function/grouping/Categorize.class */
public class Categorize extends GroupingFunction implements Validatable {
    public static final NamedWriteableRegistry.Entry ENTRY = new NamedWriteableRegistry.Entry(Expression.class, "Categorize", Categorize::new);
    private final Expression field;

    @FunctionInfo(returnType = {"integer"}, description = "Categorizes text messages.")
    public Categorize(Source source, @Param(name = "field", type = {"text", "keyword"}, description = "Expression to categorize") Expression expression) {
        super(source, List.of(expression));
        this.field = expression;
    }

    private Categorize(StreamInput streamInput) throws IOException {
        this(Source.readFrom((PlanStreamInput) streamInput), streamInput.readNamedWriteable(Expression.class));
    }

    public void writeTo(StreamOutput streamOutput) throws IOException {
        source().writeTo(streamOutput);
        streamOutput.writeNamedWriteable(this.field);
    }

    public String getWriteableName() {
        return ENTRY.name;
    }

    public boolean foldable() {
        return this.field.foldable();
    }

    /* JADX INFO: Access modifiers changed from: package-private */
    public static int process(BytesRef bytesRef, CategorizationAnalyzer categorizationAnalyzer, TokenListCategorizer.CloseableTokenListCategorizer closeableTokenListCategorizer) {
        String utf8ToString = bytesRef.utf8ToString();
        try {
            TokenStream tokenStream = categorizationAnalyzer.tokenStream("text", utf8ToString);
            try {
                int id = closeableTokenListCategorizer.computeCategory(tokenStream, utf8ToString.length(), 1L).getId();
                if (tokenStream != null) {
                    tokenStream.close();
                }
                return id;
            } finally {
            }
        } catch (IOException e) {
            throw new RuntimeException(e);
        }
    }

    @Override // org.elasticsearch.xpack.esql.evaluator.mapper.EvaluatorMapper
    public EvalOperator.ExpressionEvaluator.Factory toEvaluator(EvaluatorMapper.ToEvaluator toEvaluator) {
        return new CategorizeEvaluator.Factory(source(), toEvaluator.apply(this.field), driverContext -> {
            return new CategorizationAnalyzer(new CustomAnalyzer(TokenizerFactory.newFactory("whitespace", WhitespaceTokenizer::new), new CharFilterFactory[0], new TokenFilterFactory[0]), true);
        }, driverContext2 -> {
            return new TokenListCategorizer.CloseableTokenListCategorizer(new CategorizationBytesRefHash(new BytesRefHash(2048L, driverContext2.bigArrays())), CategorizationPartOfSpeechDictionary.getInstance(), 0.7f);
        });
    }

    protected Expression.TypeResolution resolveType() {
        return TypeResolutions.isString(field(), sourceText(), TypeResolutions.ParamOrdinal.DEFAULT);
    }

    public DataType dataType() {
        return DataType.INTEGER;
    }

    public Expression replaceChildren(List<Expression> list) {
        return new Categorize(source(), list.get(0));
    }

    protected NodeInfo<? extends Expression> info() {
        return NodeInfo.create(this, Categorize::new, this.field);
    }

    public Expression field() {
        return this.field;
    }

    public String toString() {
        return "Categorize{field=" + this.field + "}";
    }

    /* renamed from: replaceChildren, reason: collision with other method in class */
    public /* bridge */ /* synthetic */ Node m103replaceChildren(List list) {
        return replaceChildren((List<Expression>) list);
    }
}
