package org.corpus_tools.peppermodules.nlpModules;

import com.neovisionaries.i18n.LanguageCode;
import java.util.HashSet;
import java.util.List;
import org.corpus_tools.pepper.common.DOCUMENT_STATUS;
import org.corpus_tools.pepper.impl.PepperManipulatorImpl;
import org.corpus_tools.pepper.impl.PepperMapperImpl;
import org.corpus_tools.pepper.modules.PepperMapper;
import org.corpus_tools.salt.common.STextualDS;
import org.corpus_tools.salt.common.tokenizer.AbbreviationDE;
import org.corpus_tools.salt.common.tokenizer.AbbreviationEN;
import org.corpus_tools.salt.common.tokenizer.AbbreviationFR;
import org.corpus_tools.salt.common.tokenizer.AbbreviationIT;
import org.corpus_tools.salt.graph.Identifier;
import org.corpus_tools.salt.util.DataSourceSequence;
import org.eclipse.emf.common.util.URI;
import org.osgi.service.component.annotations.Component;

@Component(name = "SentencerComponent", factory = "PepperManipulatorComponentFactory")
/* loaded from: input_file:org/corpus_tools/peppermodules/nlpModules/Sentencer.class */
public class Sentencer extends PepperManipulatorImpl {

    /* loaded from: input_file:org/corpus_tools/peppermodules/nlpModules/Sentencer$SentenceMapper.class */
    public static class SentenceMapper extends PepperMapperImpl {
        public DOCUMENT_STATUS mapSDocument() {
            if (getDocument().getDocumentGraph() != null && getDocument().getDocumentGraph().getTextualDSs().size() > 0) {
                for (STextualDS sTextualDS : getDocument().getDocumentGraph().getTextualDSs()) {
                    if (sTextualDS.getText() != null && !sTextualDS.getText().isEmpty()) {
                        char[] charArray = sTextualDS.getText().toCharArray();
                        LanguageCode checkLanguage = org.corpus_tools.salt.common.tokenizer.Tokenizer.checkLanguage(sTextualDS.getText());
                        HashSet hashSet = null;
                        if (LanguageCode.de.equals(checkLanguage)) {
                            hashSet = AbbreviationDE.createAbbriviations();
                        } else if (LanguageCode.en.equals(checkLanguage)) {
                            hashSet = AbbreviationEN.createAbbriviations();
                        } else if (LanguageCode.fr.equals(checkLanguage)) {
                            hashSet = AbbreviationFR.createAbbriviations();
                        } else if (LanguageCode.it.equals(checkLanguage)) {
                            hashSet = AbbreviationIT.createAbbriviations();
                        }
                        int i = 0;
                        StringBuilder sb = new StringBuilder();
                        for (int i2 = 0; i2 <= sTextualDS.getText().length() - 1; i2++) {
                            sb.append(charArray[i2]);
                            if (' ' == charArray[i2]) {
                                sb = new StringBuilder();
                            } else if (('.' == charArray[i2] || '!' == charArray[i2] || '?' == charArray[i2]) && !hashSet.contains(sb.toString())) {
                                DataSourceSequence dataSourceSequence = new DataSourceSequence();
                                dataSourceSequence.setDataSource(sTextualDS);
                                dataSourceSequence.setStart(Integer.valueOf(i));
                                dataSourceSequence.setEnd(Integer.valueOf(i2 + 1));
                                List tokensBySequence = getDocument().getDocumentGraph().getTokensBySequence(dataSourceSequence);
                                if (tokensBySequence != null) {
                                    getDocument().getDocumentGraph().createSpan(tokensBySequence).createAnnotation((String) null, "sentence", "sentence");
                                    i = i2 + 1;
                                    sb = new StringBuilder();
                                }
                            }
                        }
                    }
                }
            }
            return DOCUMENT_STATUS.COMPLETED;
        }
    }

    public Sentencer() {
        setName("Sentencer");
        setSupplierContact(URI.createURI("saltnpepper@lists.hu-berlin.de"));
        setSupplierHomepage(URI.createURI("https://github.com/korpling/pepperModules-nlpModules"));
        setDesc("The sentencer is a Pepper module to bundle tokens to sentences. Therefore it creates a {@link SSpan} object for each sentence and connects that sentence with a set of tokens, belonging to the sentence. A sentence is identified as being determined by punctuations ('.', '!' and * '?'). The sentencer uses the abbreviation lists of Salt to identify abbreviations. ");
    }

    public PepperMapper createPepperMapper(Identifier identifier) {
        return new SentenceMapper();
    }
}
