package org.carrot2.text.preprocessing.pipeline;

import java.util.List;
import org.carrot2.core.Document;
import org.carrot2.core.LanguageCode;
import org.carrot2.core.attribute.Init;
import org.carrot2.core.attribute.Internal;
import org.carrot2.core.attribute.Processing;
import org.carrot2.text.linguistic.DefaultLexicalDataFactory;
import org.carrot2.text.linguistic.DefaultStemmerFactory;
import org.carrot2.text.linguistic.DefaultTokenizerFactory;
import org.carrot2.text.linguistic.ILexicalDataFactory;
import org.carrot2.text.linguistic.IStemmerFactory;
import org.carrot2.text.linguistic.ITokenizerFactory;
import org.carrot2.text.linguistic.LanguageModel;
import org.carrot2.text.preprocessing.CaseNormalizer;
import org.carrot2.text.preprocessing.LanguageModelStemmer;
import org.carrot2.text.preprocessing.PreprocessingContext;
import org.carrot2.text.preprocessing.StopListMarker;
import org.carrot2.text.preprocessing.Tokenizer;
import org.carrot2.util.attribute.Attribute;
import org.carrot2.util.attribute.AttributeLevel;
import org.carrot2.util.attribute.Bindable;
import org.carrot2.util.attribute.DefaultGroups;
import org.carrot2.util.attribute.Group;
import org.carrot2.util.attribute.Input;
import org.carrot2.util.attribute.Level;
import org.carrot2.util.attribute.constraint.ImplementingClasses;

@Bindable(prefix = "PreprocessingPipeline")
/* loaded from: input_file:org/carrot2/text/preprocessing/pipeline/BasicPreprocessingPipeline.class */
public class BasicPreprocessingPipeline implements IPreprocessingPipeline {
    public final Tokenizer tokenizer = new Tokenizer();
    public final CaseNormalizer caseNormalizer = new CaseNormalizer();
    public final LanguageModelStemmer languageModelStemmer = new LanguageModelStemmer();
    public final StopListMarker stopListMarker = new StopListMarker();

    @Level(AttributeLevel.ADVANCED)
    @Input
    @Attribute
    @ImplementingClasses(classes = {}, strict = false)
    @Init
    @Group(DefaultGroups.PREPROCESSING)
    @Processing
    @Internal
    public ITokenizerFactory tokenizerFactory = new DefaultTokenizerFactory();

    @Level(AttributeLevel.ADVANCED)
    @Input
    @Attribute
    @ImplementingClasses(classes = {}, strict = false)
    @Init
    @Group(DefaultGroups.PREPROCESSING)
    @Processing
    @Internal
    public IStemmerFactory stemmerFactory = new DefaultStemmerFactory();

    @Level(AttributeLevel.ADVANCED)
    @Input
    @Attribute
    @ImplementingClasses(classes = {}, strict = false)
    @Init
    @Group(DefaultGroups.PREPROCESSING)
    @Processing
    @Internal
    public ILexicalDataFactory lexicalDataFactory = new DefaultLexicalDataFactory();

    @Override // org.carrot2.text.preprocessing.pipeline.IPreprocessingPipeline
    public PreprocessingContext preprocess(List<Document> list, String str, LanguageCode languageCode) {
        PreprocessingContext preprocessingContext = new PreprocessingContext(LanguageModel.create(languageCode, this.stemmerFactory, this.tokenizerFactory, this.lexicalDataFactory), list, str);
        preprocess(preprocessingContext);
        return preprocessingContext;
    }

    @Override // org.carrot2.text.preprocessing.pipeline.IPreprocessingPipeline
    public void preprocess(PreprocessingContext preprocessingContext) {
        this.tokenizer.tokenize(preprocessingContext);
        this.caseNormalizer.normalize(preprocessingContext);
        this.languageModelStemmer.stem(preprocessingContext);
        this.stopListMarker.mark(preprocessingContext);
        preprocessingContext.preprocessingFinished();
    }
}
