package net.ontopia.topicmaps.classify;

import java.util.Collection;
import net.ontopia.topicmaps.classify.TopicMapAnalyzer;
import net.ontopia.topicmaps.core.TopicIF;
import net.ontopia.topicmaps.core.TopicMapIF;

/* loaded from: input_file:net/ontopia/topicmaps/classify/TopicMapClassification.class */
public class TopicMapClassification {
    TermDatabase tdb = new TermDatabase();
    TopicMapAnalyzer ta;
    TermAnalyzerIF customTermAnalyzer;

    public TopicMapClassification() {
    }

    public TopicMapClassification(TopicMapIF topicMapIF) {
        this.ta = new TopicMapAnalyzer(topicMapIF);
    }

    public void setCustomTermAnalyzer(TermAnalyzerIF termAnalyzerIF) {
        this.customTermAnalyzer = termAnalyzerIF;
    }

    public void classify(ClassifiableContentIF classifiableContentIF) {
        Document document = new Document();
        new FormatModule().readContent(classifiableContentIF, document);
        DocumentTokenizer documentTokenizer = new DocumentTokenizer(this.tdb);
        documentTokenizer.setTokenizer(new DefaultTokenizer());
        SpecialCharNormalizer specialCharNormalizer = new SpecialCharNormalizer();
        documentTokenizer.setDelimiterTrimmer(specialCharNormalizer);
        documentTokenizer.addTermNormalizer(new JunkNormalizer());
        documentTokenizer.addTermNormalizer(specialCharNormalizer);
        documentTokenizer.tokenize(document);
        Language detectLanguage = Language.detectLanguage(document);
        DocumentClassifier documentClassifier = new DocumentClassifier(this.tdb);
        TermStemmerIF stemmer = detectLanguage.getStemmer();
        documentClassifier.setTermStemmer(stemmer);
        documentClassifier.addDocumentAnalyzer(new DistanceAnalyzer());
        CompoundAnalyzer compoundAnalyzer = new CompoundAnalyzer();
        compoundAnalyzer.setTermStemmer(stemmer);
        documentClassifier.addDocumentAnalyzer(compoundAnalyzer);
        new RegionBooster().addBoost("title", 1.15d);
        documentClassifier.addTermAnalyzer(CharacterAnalyzer.getInstance());
        documentClassifier.addTermAnalyzer(detectLanguage.getFrequencyAnalyzer());
        documentClassifier.addTermAnalyzer(new RegexpTermAnalyzer());
        documentClassifier.addTermAnalyzer(detectLanguage.getStopListAnalyzer());
        if (this.customTermAnalyzer != null) {
            documentClassifier.addTermAnalyzer(this.customTermAnalyzer);
        }
        documentClassifier.addTermAnalyzer(compoundAnalyzer);
        documentClassifier.addTermAnalyzer(detectLanguage.getStopListAnalyzer());
        if (this.customTermAnalyzer != null) {
            documentClassifier.addTermAnalyzer(this.customTermAnalyzer);
        }
        if (this.ta != null) {
            documentClassifier.addTermAnalyzer(this.ta);
        }
        documentClassifier.addTermAnalyzer(new RelativeScore());
        documentClassifier.analyzeDocument(document);
        documentClassifier.analyzeTerms();
    }

    public TermDatabase getTermDatabase() {
        return this.tdb;
    }

    public Collection<TopicIF> getTopics(Variant variant) {
        return this.ta.getTopics(variant);
    }

    public Collection<TopicIF> getCandidateTypes() {
        return this.ta.getCandidateTypes();
    }

    public Collection<TopicMapAnalyzer.AssociationType> getAssociationTypes() {
        return this.ta.getAssociationTypes();
    }
}
