package nlp4j.annotator;

import java.lang.invoke.MethodHandles;
import java.util.Iterator;
import nlp4j.AbstractDocumentAnnotator;
import nlp4j.Document;
import nlp4j.DocumentAnnotator;
import nlp4j.FieldAnnotator;
import nlp4j.impl.DefaultKeyword;
import nlp4j.util.CharacterUtils;
import nlp4j.util.StringUtils;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;

/* loaded from: input_file:nlp4j/annotator/EmojiAnnotator.class */
public class EmojiAnnotator extends AbstractDocumentAnnotator implements DocumentAnnotator, FieldAnnotator {
    private static Logger logger = LogManager.getLogger(MethodHandles.lookup().lookupClass());

    @Override // nlp4j.AbstractDocumentAnnotator, nlp4j.DocumentAnnotator
    public void setProperty(String str, String str2) {
        super.setProperty(str, str2);
    }

    @Override // nlp4j.DocumentAnnotator
    public void annotate(Document document) throws Exception {
        if (this.targets == null || this.targets.isEmpty()) {
            logger.info("target is empty");
            return;
        }
        Iterator<String> it = this.targets.iterator();
        while (it.hasNext()) {
            String attributeAsString = document.getAttributeAsString(it.next());
            if (attributeAsString == null || attributeAsString.trim().isEmpty()) {
                logger.info("text is empty");
            } else {
                int[] codePointArray = StringUtils.toCodePointArray(attributeAsString);
                for (int i = 0; i < codePointArray.length; i++) {
                    int i2 = codePointArray[i];
                    String str = CharacterUtils.toChar(i2);
                    String name = CharacterUtils.getName(i2);
                    if (name != null) {
                        name = name.replace(" ", "_");
                    }
                    String unicodeBlock = CharacterUtils.getUnicodeBlock(i2);
                    if (unicodeBlock != null && name != null && !unicodeBlock.startsWith("BASIC_LATIN") && !unicodeBlock.startsWith("KATAKANA") && !unicodeBlock.startsWith("HIRAGANA") && !unicodeBlock.startsWith("CJK") && !name.contains("LATIN") && !name.equals("HORIZONTAL_ELLIPSIS") && !name.equals("VARIATION_SELECTOR-16") && !name.equals("FULLWIDTH_LEFT_PARENTHESIS") && !name.equals("FULLWIDTH_RIGHT_PARENTHESIS") && !name.contains("KATAKANA") && !name.contains("HIRAGANA") && !name.startsWith("VARIATION_SELECTOR") && (unicodeBlock.equals("EMOTICONS") || unicodeBlock.equals("MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS"))) {
                        logger.info("block=" + unicodeBlock + ",emoji=" + name);
                        DefaultKeyword defaultKeyword = new DefaultKeyword();
                        defaultKeyword.setBegin(i);
                        defaultKeyword.setEnd(i + 1);
                        defaultKeyword.setLex(name);
                        defaultKeyword.setStr(name);
                        defaultKeyword.setFacet("emoji");
                        document.addKeyword(defaultKeyword);
                        DefaultKeyword defaultKeyword2 = new DefaultKeyword();
                        defaultKeyword2.setBegin(i);
                        defaultKeyword2.setEnd(i + 1);
                        defaultKeyword2.setLex(unicodeBlock);
                        defaultKeyword2.setStr(unicodeBlock);
                        defaultKeyword2.setFacet("emojiblock");
                        document.addKeyword(defaultKeyword2);
                        if (str != null) {
                            DefaultKeyword defaultKeyword3 = new DefaultKeyword();
                            defaultKeyword3.setBegin(i);
                            defaultKeyword3.setEnd(i + 1);
                            defaultKeyword3.setLex(str);
                            defaultKeyword3.setStr(str);
                            defaultKeyword3.setFacet("emojichar");
                            document.addKeyword(defaultKeyword3);
                        }
                    }
                }
            }
        }
    }
}
