package net.ontopia.topicmaps.classify;

import java.util.ArrayList;
import java.util.List;

/* loaded from: input_file:net/ontopia/topicmaps/classify/DocumentTokenizer.class */
public class DocumentTokenizer {
    TermDatabase tdb;
    TokenizerIF tokenizer;
    DelimiterTrimmerIF delimiterTrimmer;
    List<TermNormalizerIF> termNormalizers = new ArrayList();

    public DocumentTokenizer(TermDatabase termDatabase) {
        this.tdb = termDatabase;
    }

    public TermDatabase getTermDatabase() {
        return this.tdb;
    }

    public void setTermDatabase(TermDatabase termDatabase) {
        this.tdb = termDatabase;
    }

    public void setTokenizer(TokenizerIF tokenizerIF) {
        this.tokenizer = tokenizerIF;
    }

    public void setDelimiterTrimmer(DelimiterTrimmerIF delimiterTrimmerIF) {
        this.delimiterTrimmer = delimiterTrimmerIF;
    }

    public void addTermNormalizer(TermNormalizerIF termNormalizerIF) {
        this.termNormalizers.add(termNormalizerIF);
    }

    public void tokenize(Document document) {
        tokenize(document.getRoot());
        document.setTokenized(true);
    }

    protected void tokenize(Region region) {
        for (Object obj : region.getChildren()) {
            if (obj instanceof TextBlock) {
                tokenize(region, (TextBlock) obj);
            } else {
                tokenize((Region) obj);
            }
        }
    }

    protected void tokenize(Region region, TextBlock textBlock) {
        this.tokenizer.setText(textBlock.getText());
        while (this.tokenizer.next()) {
            tokenize(textBlock, this.tokenizer.getToken());
        }
    }

    protected void tokenize(TextBlock textBlock, String str) {
        if (str == null) {
            return;
        }
        String str2 = null;
        String str3 = null;
        int trimStart = this.delimiterTrimmer.trimStart(str);
        int trimEnd = this.delimiterTrimmer.trimEnd(str);
        if (trimStart > 0 && trimEnd > trimStart && trimEnd < str.length() - 1) {
            str2 = str.substring(0, trimStart);
            str3 = str.substring(trimEnd + 1);
            str = str.substring(trimStart, trimEnd + 1);
        } else if (trimStart > 0) {
            str2 = str.substring(0, trimStart);
            str = str.substring(trimStart);
        } else if (trimEnd < str.length() - 1) {
            str3 = str.substring(trimEnd + 1);
            str = str.substring(0, trimEnd + 1);
        }
        String str4 = str;
        if (this.termNormalizers != null && !this.termNormalizers.isEmpty()) {
            int size = this.termNormalizers.size();
            for (int i = 0; i < size; i++) {
                str4 = this.termNormalizers.get(i).normalize(str4);
                if (str4 == null) {
                    break;
                }
            }
        }
        Token createDelimiter = str4 == null ? this.tdb.createDelimiter(str4) : this.tdb.createVariant(str4);
        if (str2 != null) {
            textBlock.addToken(this.tdb.createDelimiter(str2));
        }
        textBlock.addToken(createDelimiter);
        if (str3 != null) {
            textBlock.addToken(this.tdb.createDelimiter(str3));
        }
    }
}
