package org.wikibrain.lucene;

import com.typesafe.config.Config;
import java.io.IOException;
import java.io.StringReader;
import java.util.Map;
import java.util.concurrent.ConcurrentHashMap;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.util.Version;
import org.wikibrain.conf.Configuration;
import org.wikibrain.conf.ConfigurationException;
import org.wikibrain.conf.Configurator;
import org.wikibrain.core.lang.Language;
import org.wikibrain.core.lang.LocalString;
import org.wikibrain.core.lang.StringNormalizer;
import org.wikibrain.lucene.tokenizers.LanguageTokenizer;

/* loaded from: input_file:org/wikibrain/lucene/LuceneStringNormalizer.class */
public class LuceneStringNormalizer implements StringNormalizer {
    private final Version version;
    private final TokenizerOptions options;
    private final Map<Language, LanguageTokenizer> tokenizers = new ConcurrentHashMap();

    /* loaded from: input_file:org/wikibrain/lucene/LuceneStringNormalizer$Provider.class */
    public static class Provider extends org.wikibrain.conf.Provider<StringNormalizer> {
        public Provider(Configurator configurator, Configuration configuration) throws ConfigurationException {
            super(configurator, configuration);
        }

        public Class<StringNormalizer> getType() {
            return StringNormalizer.class;
        }

        public String getPath() {
            return "stringnormalizers";
        }

        public StringNormalizer get(String str, Config config, Map<String, String> map) throws ConfigurationException {
            if (!config.getString("type").equals("lucene")) {
                return null;
            }
            return new LuceneStringNormalizer(new TokenizerOptions(config.getBoolean("caseInsensitive"), config.getBoolean("useStopWords"), config.getBoolean("useStem")), Version.parseLeniently(config.getString("version")));
        }

        /* renamed from: get, reason: collision with other method in class */
        public /* bridge */ /* synthetic */ Object m3get(String str, Config config, Map map) throws ConfigurationException {
            return get(str, config, (Map<String, String>) map);
        }
    }

    public LuceneStringNormalizer(TokenizerOptions tokenizerOptions, Version version) {
        this.options = tokenizerOptions;
        this.version = version;
    }

    public LanguageTokenizer getTokenizer(Language language) {
        if (!this.tokenizers.containsKey(language)) {
            this.tokenizers.put(language, LanguageTokenizer.getLanguageTokenizer(language, this.options, this.version));
        }
        return this.tokenizers.get(language);
    }

    public String normalize(LocalString localString) {
        return normalize(localString.getLanguage(), localString.getString());
    }

    public String normalize(Language language, String str) {
        StringBuilder sb = new StringBuilder();
        try {
            TokenStream tokenStream = getTokenizer(language).getTokenStream(new StringReader(str));
            CharTermAttribute addAttribute = tokenStream.addAttribute(CharTermAttribute.class);
            tokenStream.reset();
            while (tokenStream.incrementToken()) {
                if (sb.length() > 0) {
                    sb.append(' ');
                }
                sb.append(addAttribute.toString());
            }
            tokenStream.end();
            tokenStream.close();
            return sb.toString();
        } catch (IOException e) {
            throw new RuntimeException(e);
        }
    }
}
