package org.intermine.bio.dataconversion;

import java.io.Reader;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.Map;
import java.util.Set;
import org.apache.commons.collections.keyvalue.MultiKey;
import org.apache.commons.lang.StringUtils;
import org.apache.log4j.Logger;
import org.apache.tools.ant.BuildException;
import org.intermine.dataconversion.ItemWriter;
import org.intermine.metadata.ConstraintOp;
import org.intermine.metadata.Model;
import org.intermine.model.bio.Organism;
import org.intermine.model.bio.Protein;
import org.intermine.objectstore.ObjectStoreException;
import org.intermine.objectstore.ObjectStoreFactory;
import org.intermine.objectstore.query.BagConstraint;
import org.intermine.objectstore.query.ConstraintSet;
import org.intermine.objectstore.query.ContainsConstraint;
import org.intermine.objectstore.query.Query;
import org.intermine.objectstore.query.QueryClass;
import org.intermine.objectstore.query.QueryField;
import org.intermine.objectstore.query.QueryObjectReference;
import org.intermine.objectstore.query.ResultsRow;
import org.intermine.util.FormattedTextParser;
import org.intermine.xml.full.Item;

/* loaded from: input_file:org/intermine/bio/dataconversion/Protein2iprConverter.class */
public class Protein2iprConverter extends BioFileConverter {
    private static final Logger LOG = Logger.getLogger(Protein2iprConverter.class);
    private static final String DATASET_TITLE = "InterPro data set";
    private static final String DATA_SOURCE_NAME = "InterPro";
    private Collection<String> taxonIds;
    private Set<String> proteinIds;
    private Set<MultiKey> xrefs;
    private Map<String, String> proteinMap;
    private Map<String, String> proteinDomainMap;
    private String osAlias;

    public void setProtein2iprOrganisms(String str) {
        for (String str2 : StringUtils.split(str, " ")) {
            this.taxonIds.add(str2);
        }
        LOG.info("Setting list of organisms to " + this.taxonIds);
    }

    public void setOsAlias(String str) {
        this.osAlias = str;
    }

    public Protein2iprConverter(ItemWriter itemWriter, Model model) {
        super(itemWriter, model, DATA_SOURCE_NAME, DATASET_TITLE);
        this.taxonIds = new ArrayList();
        this.proteinIds = new HashSet();
        this.xrefs = new HashSet();
        this.proteinMap = new HashMap();
        this.proteinDomainMap = new HashMap();
        this.osAlias = null;
    }

    public void process(Reader reader) throws Exception {
        if (this.osAlias == null) {
            throw new BuildException("osAlias attribute is not set");
        }
        getProteinIds();
        LOG.info("Found " + this.proteinIds.size() + " protein ids.");
        Iterator parseTabDelimitedReader = FormattedTextParser.parseTabDelimitedReader(reader);
        int i = 0;
        int i2 = 0;
        while (parseTabDelimitedReader.hasNext()) {
            String[] strArr = (String[]) parseTabDelimitedReader.next();
            if (this.proteinIds.contains(strArr[0])) {
                String str = strArr[0];
                String str2 = strArr[1];
                String str3 = strArr[3];
                String str4 = strArr[4];
                String str5 = strArr[5];
                String protein = getProtein(str);
                String proteinDomain = getProteinDomain(str2);
                Item createItem = createItem("ProteinDomainRegion");
                createItem.setAttribute("identifier", str3);
                createItem.setAttribute("database", getSource(str3));
                createItem.setAttribute("start", str4);
                createItem.setAttribute("end", str5);
                createItem.setReference("protein", protein);
                createItem.setReference("proteinDomain", proteinDomain);
                store(createItem);
                i++;
            } else {
                i2++;
            }
        }
        LOG.info("Number of processed lines: " + i);
        LOG.info("Number of skipped lines: " + i2);
    }

    private String getProtein(String str) throws ObjectStoreException {
        String str2 = this.proteinMap.get(str);
        if (str2 == null) {
            Item createItem = createItem("Protein");
            createItem.setAttribute("primaryAccession", str);
            store(createItem);
            str2 = createItem.getIdentifier();
            this.proteinMap.put(str, str2);
        }
        return str2;
    }

    private String getProteinDomain(String str) throws ObjectStoreException {
        String str2 = this.proteinDomainMap.get(str);
        if (str2 == null) {
            Item createItem = createItem("ProteinDomain");
            createItem.setAttribute("primaryIdentifier", str);
            store(createItem);
            str2 = createItem.getIdentifier();
            this.proteinDomainMap.put(str, str2);
        }
        return str2;
    }

    private String getSource(String str) {
        String str2;
        if (str.startsWith("PF")) {
            str2 = "Pfam";
        } else if (str.startsWith("SM")) {
            str2 = "SMART";
        } else if (str.startsWith("SSF")) {
            str2 = "SUPERFAMILY";
        } else if (str.startsWith("SFLD")) {
            str2 = "Structure-Function Linkage Database (SFLD)";
        } else if (str.startsWith("PS")) {
            str2 = "PROSITE";
        } else if (str.startsWith("PR")) {
            str2 = "PRINTS";
        } else if (str.startsWith("PTHR")) {
            str2 = "PANTHER";
        } else if (str.startsWith("G3DSA")) {
            str2 = "Gene3D";
        } else if (str.startsWith("TIGR")) {
            str2 = "TIGRFAMs";
        } else if (str.startsWith("PD")) {
            str2 = "ProDom";
        } else if (str.startsWith("PIRSF")) {
            str2 = "PIRSF";
        } else if (str.startsWith("MF_")) {
            str2 = "HAMAP";
        } else {
            if (!str.startsWith("cd")) {
                throw new RuntimeException("Unknown DB found. ID: " + str);
            }
            str2 = "Conserved Domain Database";
        }
        return str2;
    }

    private void getProteinIds() throws Exception {
        Query query = new Query();
        QueryClass queryClass = new QueryClass(Protein.class);
        QueryClass queryClass2 = new QueryClass(Organism.class);
        QueryField queryField = new QueryField(queryClass, "primaryAccession");
        QueryField queryField2 = new QueryField(queryClass2, "taxonId");
        query.addFrom(queryClass);
        query.addFrom(queryClass2);
        query.addToSelect(queryField);
        ConstraintSet constraintSet = new ConstraintSet(ConstraintOp.AND);
        if (this.taxonIds.size() > 0) {
            constraintSet.addConstraint(new BagConstraint(queryField2, ConstraintOp.IN, this.taxonIds));
        }
        constraintSet.addConstraint(new ContainsConstraint(new QueryObjectReference(queryClass, "organism"), ConstraintOp.CONTAINS, queryClass2));
        query.setConstraint(constraintSet);
        Iterator it = ObjectStoreFactory.getObjectStore(this.osAlias).execute(query).iterator();
        while (it.hasNext()) {
            this.proteinIds.add((String) ((ResultsRow) it.next()).get(0));
        }
    }
}
