package org.corpus_tools.peppermodules.toolboxModules;

import com.neovisionaries.i18n.LanguageCode;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Set;
import org.corpus_tools.pepper.common.DOCUMENT_STATUS;
import org.corpus_tools.pepper.impl.PepperImporterImpl;
import org.corpus_tools.pepper.impl.PepperMapperImpl;
import org.corpus_tools.pepper.modules.PepperImporter;
import org.corpus_tools.pepper.modules.PepperMapper;
import org.corpus_tools.salt.SaltFactory;
import org.corpus_tools.salt.common.SDocument;
import org.corpus_tools.salt.common.SDocumentGraph;
import org.corpus_tools.salt.common.SSpan;
import org.corpus_tools.salt.common.STextualDS;
import org.corpus_tools.salt.common.SToken;
import org.corpus_tools.salt.common.tokenizer.Tokenizer;
import org.corpus_tools.salt.core.SAnnotation;
import org.corpus_tools.salt.graph.Identifier;
import org.eclipse.emf.common.util.URI;
import org.osgi.service.component.annotations.Component;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.xml.sax.Attributes;
import org.xml.sax.SAXException;
import org.xml.sax.ext.DefaultHandler2;

@Component(name = "WolofImporterComponent", factory = "PepperImporterComponentFactory")
/* loaded from: input_file:org/corpus_tools/peppermodules/toolboxModules/WolofImporter.class */
public class WolofImporter extends PepperImporterImpl implements PepperImporter {
    public static final Logger logger = LoggerFactory.getLogger(WolofImporter.class);

    /* loaded from: input_file:org/corpus_tools/peppermodules/toolboxModules/WolofImporter$DocumentStructureReader.class */
    public class DocumentStructureReader extends DefaultHandler2 {
        public static final String TAG_ARTICLE = "article";
        public static final String TAG_SENTENCE = "s";
        public static final String TAG_WOL = "wol";
        public static final String TAG_EN = "en";
        public SDocumentGraph structure = null;
        private StringBuilder currentText = new StringBuilder();
        STextualDS primaryText = null;
        List<SToken> currentTokList = new ArrayList();
        List<SToken> articleTokList = new ArrayList();
        Set<SAnnotation> annoList = new HashSet();
        HashMap<String, String> annoListForSegmentElem = new HashMap<>();
        List<String> annosToAssociateWithWholeSegmentList = new ArrayList();

        public DocumentStructureReader() {
        }

        @Override // org.xml.sax.helpers.DefaultHandler, org.xml.sax.ContentHandler
        public void startElement(String str, String str2, String str3, Attributes attributes) throws SAXException {
            this.currentText = new StringBuilder();
            if (TAG_WOL.equals(str3)) {
                this.currentTokList = new ArrayList();
                return;
            }
            if (TAG_ARTICLE.equals(str3)) {
                for (int i = 0; i < attributes.getLength(); i++) {
                    SAnnotation createSAnnotation = SaltFactory.createSAnnotation();
                    createSAnnotation.setName(attributes.getQName(i));
                    createSAnnotation.setValue(attributes.getValue(i));
                    this.annoList.add(createSAnnotation);
                }
            }
        }

        @Override // org.xml.sax.helpers.DefaultHandler, org.xml.sax.ContentHandler
        public void characters(char[] cArr, int i, int i2) throws SAXException {
            for (int i3 = i; i3 < i + i2; i3++) {
                this.currentText.append(cArr[i3]);
            }
        }

        @Override // org.xml.sax.helpers.DefaultHandler, org.xml.sax.ContentHandler
        public void endElement(String str, String str2, String str3) throws SAXException {
            if (TAG_WOL.equals(str3)) {
                if (this.primaryText == null) {
                    this.primaryText = SaltFactory.createSTextualDS();
                    this.primaryText.setText("");
                    this.structure.addNode(this.primaryText);
                }
                String sb = this.currentText.toString();
                List<String> list = new Tokenizer().tokenizeToString(this.currentText.toString(), (LanguageCode) null);
                int length = this.primaryText.getText().length();
                this.primaryText.setText(this.primaryText.getText() + sb);
                for (String str4 : list) {
                    int indexOf = sb.indexOf(str4);
                    int i = length + indexOf;
                    int length2 = i + str4.length();
                    length += str4.length() + indexOf;
                    sb = sb.substring(indexOf + str4.length());
                    this.currentTokList.add(this.structure.createToken(this.primaryText, Integer.valueOf(i), Integer.valueOf(length2)));
                }
                Iterator<SToken> it = this.currentTokList.iterator();
                while (it.hasNext()) {
                    this.articleTokList.add(it.next());
                }
            } else if (TAG_SENTENCE.equals(str3)) {
                this.currentTokList.clear();
            } else if (TAG_EN.equals(str3)) {
                SSpan createSpan = this.structure.createSpan(this.currentTokList);
                createSpan.createAnnotation((String) null, TAG_EN, this.currentText.toString());
                createSpan.createAnnotation((String) null, "sentence", "sentence");
            } else if (TAG_ARTICLE.equals(str3)) {
                SSpan createSpan2 = this.structure.createSpan(this.articleTokList);
                Iterator<SAnnotation> it2 = this.annoList.iterator();
                while (it2.hasNext()) {
                    createSpan2.addAnnotation(it2.next());
                }
                this.annoList.clear();
                this.articleTokList.clear();
            }
            this.currentText = new StringBuilder();
        }
    }

    public WolofImporter() {
        setName("WolofImporter");
        setVersion("0.0.1");
        setDesc("This importer transforms data of an unknown format to salt. ");
        setSupplierContact(URI.createURI("saltnpepper@lists.hu-berlin.de"));
        setSupplierHomepage(URI.createURI("https://github.com/korpling/pepperModules-ModuleBox"));
        addSupportedFormat("xml", "1.0", null);
        getDocumentEndings().add("xml");
    }

    public PepperMapper createPepperMapper(Identifier identifier) {
        PepperMapperImpl pepperMapperImpl = new PepperMapperImpl() { // from class: org.corpus_tools.peppermodules.toolboxModules.WolofImporter.1
            public DOCUMENT_STATUS mapSDocument() {
                DocumentStructureReader documentStructureReader = new DocumentStructureReader();
                documentStructureReader.structure = getDocument().getDocumentGraph();
                readXMLResource(documentStructureReader, getResourceURI());
                return DOCUMENT_STATUS.COMPLETED;
            }
        };
        if (identifier.getIdentifiableElement() != null && (identifier.getIdentifiableElement() instanceof SDocument)) {
            pepperMapperImpl.setResourceURI((URI) getIdentifier2ResourceTable().get(identifier));
        }
        return pepperMapperImpl;
    }
}
