package nlp4j.krmj.annotator;

import com.atilika.kuromoji.ipadic.Token;
import com.atilika.kuromoji.ipadic.Tokenizer;
import java.util.Iterator;
import nlp4j.AbstractDocumentAnnotator;
import nlp4j.Document;
import nlp4j.DocumentAnnotator;
import nlp4j.impl.DefaultKeyword;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;

/* loaded from: input_file:nlp4j/krmj/annotator/KuromojiAnnotator.class */
public class KuromojiAnnotator extends AbstractDocumentAnnotator implements DocumentAnnotator {
    private static final Logger logger = LogManager.getLogger(KuromojiAnnotator.class);

    public void annotate(Document document) throws Exception {
        logger.info("processing document");
        Tokenizer tokenizer = new Tokenizer();
        Iterator it = this.targets.iterator();
        while (it.hasNext()) {
            Object attribute = document.getAttribute((String) it.next());
            if (attribute != null && (attribute instanceof String)) {
                int i = 1;
                for (Token token : tokenizer.tokenize((String) attribute)) {
                    logger.debug(token.getAllFeatures());
                    DefaultKeyword defaultKeyword = new DefaultKeyword();
                    defaultKeyword.setLex(token.getBaseForm());
                    defaultKeyword.setStr(token.getSurface());
                    defaultKeyword.setReading(token.getReading());
                    if (defaultKeyword.getLex().equals("*") && defaultKeyword.getReading().equals("*") && defaultKeyword.getStr().matches("[a-zA-Z]*")) {
                        defaultKeyword.setLex(defaultKeyword.getStr());
                    }
                    defaultKeyword.setBegin(token.getPosition());
                    defaultKeyword.setEnd(token.getPosition() + token.getSurface().length());
                    defaultKeyword.setFacet(token.getPartOfSpeechLevel1());
                    defaultKeyword.setSequence(i);
                    document.addKeyword(defaultKeyword);
                    i++;
                }
            }
        }
    }
}
