package org.intermine.bio.dataconversion;

import java.io.File;
import java.io.FileReader;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.Stack;
import org.apache.commons.lang.StringUtils;
import org.apache.log4j.Logger;
import org.intermine.bio.util.OrganismRepository;
import org.intermine.dataconversion.ItemWriter;
import org.intermine.metadata.Model;
import org.intermine.metadata.StringUtil;
import org.intermine.objectstore.ObjectStoreException;
import org.intermine.util.SAXParser;
import org.intermine.xml.full.Item;
import org.intermine.xml.full.ReferenceList;
import org.xml.sax.Attributes;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
import org.xml.sax.helpers.DefaultHandler;

/* loaded from: input_file:org/intermine/bio/dataconversion/UniprotConverter.class */
public class UniprotConverter extends BioDirectoryConverter {
    private Map<String, String> pubs;
    private Set<Item> synonymsAndXrefs;
    private Map<String, Map<String, String>> sequences;
    private Map<String, String> allSequences;
    private Map<String, String> ontologies;
    private Map<String, String> keywords;
    private Map<String, String> genes;
    private Map<String, String> goterms;
    private Map<String, String> goEvidenceCodes;
    private Map<String, String> ecNumbers;
    private Map<String, String> proteins;
    private static final int POSTGRES_INDEX_SIZE = 2712;
    private Set<String> identifiers;
    private boolean creategenes;
    private boolean creatego;
    private boolean loadfragments;
    private boolean allowduplicates;
    private boolean loadtrembl;
    private Set<String> taxonIds;
    protected IdResolver rslv;
    private static final String FLY = "7227";
    private String datasourceRefId;
    private static final String LICENCE = "http://creativecommons.org/licenses/by/4.0/";
    private static final UniprotConfig CONFIG = new UniprotConfig();
    private static final Logger LOG = Logger.getLogger(UniprotConverter.class);
    private static final Map<String, String> GENE_PREFIXES = new HashMap();

    /* loaded from: input_file:org/intermine/bio/dataconversion/UniprotConverter$DiseaseHolder.class */
    protected class DiseaseHolder {
        private String name = null;
        private String acronym = null;
        private String description = null;
        private String identifier = null;

        protected DiseaseHolder() {
        }

        protected String getName() {
            return this.name;
        }

        protected void setDisease(String str, String str2) {
            if ("name".equals(str)) {
                this.name = str2;
            } else if ("acronym".equals(str)) {
                this.acronym = str2;
            } else if ("description".equals(str)) {
                this.description = str2;
            }
        }

        protected void setIdentifier(String str) {
            this.identifier = str;
        }

        public String toString() {
            StringBuilder sb = new StringBuilder();
            sb.append(this.identifier + "; ");
            if (StringUtils.isNotEmpty(this.name)) {
                sb.append(this.name + "; ");
            }
            if (StringUtils.isNotEmpty(this.acronym)) {
                sb.append(this.acronym + "; ");
            }
            if (StringUtils.isNotEmpty(this.description)) {
                sb.append(this.description + " ");
            }
            return sb.toString();
        }
    }

    /* JADX INFO: Access modifiers changed from: private */
    /* loaded from: input_file:org/intermine/bio/dataconversion/UniprotConverter$UniprotHandler.class */
    public class UniprotHandler extends DefaultHandler {
        private UniprotEntry entry;
        private Stack<String> stack;
        private String attName;
        private StringBuffer attValue;
        private int entryCount;
        private DiseaseHolder disease;

        private UniprotHandler() {
            this.stack = new Stack<>();
            this.attName = null;
            this.attValue = null;
            this.entryCount = 0;
            this.disease = null;
        }

        @Override // org.xml.sax.helpers.DefaultHandler, org.xml.sax.ContentHandler
        public void startElement(String str, String str2, String str3, Attributes attributes) throws SAXException {
            String str4 = null;
            if (!this.stack.isEmpty()) {
                str4 = this.stack.peek();
            }
            this.attName = null;
            if ("entry".equals(str3)) {
                this.entry = new UniprotEntry();
                this.entry.setDatasetRefId(UniprotConverter.this.getDataSet(UniprotConverter.getAttrValue(attributes, "dataset") + " data set", UniprotConverter.this.datasourceRefId, UniprotConverter.LICENCE));
            } else if ("fullName".equals(str3) && this.stack.search("protein") == 2 && ("recommendedName".equals(str4) || "submittedName".equals(str4))) {
                this.attName = "proteinName";
            } else if (("fullName".equals(str3) || "shortName".equals(str3)) && this.stack.search("protein") == 2 && ("alternativeName".equals(str4) || "recommendedName".equals(str4) || "submittedName".equals(str4))) {
                this.attName = "synonym";
            } else if ("fullName".equals(str3) && "recommendedName".equals(str4) && this.stack.search("component") == 2) {
                this.attName = "component";
            } else if ("name".equals(str3) && "entry".equals(str4)) {
                this.attName = "primaryIdentifier";
            } else if ("ecNumber".equals(str3)) {
                this.attName = "ecNumber";
            } else if ("accession".equals(str3)) {
                this.attName = "value";
            } else if ("dbReference".equals(str3) && "organism".equals(str4)) {
                this.entry.setTaxonId(parseTaxonId(UniprotConverter.getAttrValue(attributes, "id")));
            } else if ("name".equals(str3) && "isoform".equals(str4)) {
                this.attName = "isoformname";
            } else if ("id".equals(str3) && "isoform".equals(str4)) {
                this.attName = "isoform";
            } else if ("sequence".equals(str3) && "isoform".equals(str4)) {
                String attrValue = UniprotConverter.getAttrValue(attributes, "type");
                if ("displayed".equals(attrValue)) {
                    this.entry.addCanonicalIsoform(this.entry.getAttribute());
                } else if ("described".equals(attrValue)) {
                    this.entry.addIsoform(this.entry.getAttribute());
                }
            } else if ("sequence".equals(str3)) {
                String attrValue2 = UniprotConverter.getAttrValue(attributes, "length");
                String attrValue3 = UniprotConverter.getAttrValue(attributes, "mass");
                if (attrValue2 != null) {
                    this.entry.setLength(attrValue2);
                    this.attName = "residues";
                }
                if (attrValue3 != null) {
                    this.entry.setMolecularWeight(attrValue3);
                }
                boolean z = false;
                if (UniprotConverter.getAttrValue(attributes, "fragment") != null) {
                    z = true;
                }
                this.entry.setFragment(z);
            } else if ("feature".equals(str3) && UniprotConverter.getAttrValue(attributes, "type") != null) {
                this.entry.addFeature(UniprotConverter.this.getFeature(UniprotConverter.getAttrValue(attributes, "type"), UniprotConverter.getAttrValue(attributes, "description"), UniprotConverter.getAttrValue(attributes, "status")));
            } else if (("begin".equals(str3) || "end".equals(str3)) && this.entry.processingFeature() && UniprotConverter.getAttrValue(attributes, "position") != null) {
                this.entry.addFeatureLocation(str3, UniprotConverter.getAttrValue(attributes, "position"));
            } else if ("position".equals(str3) && this.entry.processingFeature() && UniprotConverter.getAttrValue(attributes, "position") != null) {
                this.entry.addFeatureLocation("begin", UniprotConverter.getAttrValue(attributes, "position"));
                this.entry.addFeatureLocation("end", UniprotConverter.getAttrValue(attributes, "position"));
            } else if ("dbReference".equals(str3) && "citation".equals(str4) && "PubMed".equals(UniprotConverter.getAttrValue(attributes, "type"))) {
                this.entry.addPub(UniprotConverter.this.getPub(UniprotConverter.getAttrValue(attributes, "id")));
            } else if ("comment".equals(str3) && StringUtils.isNotEmpty(UniprotConverter.getAttrValue(attributes, "type"))) {
                this.entry.setCommentType(UniprotConverter.getAttrValue(attributes, "type"));
            } else if ("text".equals(str3) && "comment".equals(str4)) {
                this.attName = "text";
                String attrValue4 = UniprotConverter.getAttrValue(attributes, "evidence");
                if (StringUtils.isNotEmpty(attrValue4)) {
                    this.entry.setCommentEvidence(attrValue4);
                }
            } else if ("keyword".equals(str3)) {
                this.attName = "keyword";
            } else if ("dbReference".equals(str3) && "entry".equals(str4)) {
                this.entry.addDbref(UniprotConverter.getAttrValue(attributes, "type"), UniprotConverter.getAttrValue(attributes, "id"));
            } else if ("property".equals(str3) && "dbReference".equals(str4)) {
                String attrValue5 = UniprotConverter.getAttrValue(attributes, "type");
                if (attrValue5.equals(UniprotConverter.CONFIG.getGeneDesignation(this.entry.getTaxonId()))) {
                    this.entry.addGeneDesignation(UniprotConverter.getAttrValue(attributes, "value"));
                } else if ("evidence".equals(attrValue5)) {
                    this.entry.addGOEvidence(this.entry.getDbref(), UniprotConverter.getAttrValue(attributes, "value"));
                }
            } else if ("name".equals(str3) && "gene".equals(str4)) {
                this.attName = UniprotConverter.getAttrValue(attributes, "type");
            } else if ("evidence".equals(str3) && "entry".equals(str4)) {
                String attrValue6 = UniprotConverter.getAttrValue(attributes, "key");
                String attrValue7 = UniprotConverter.getAttrValue(attributes, "attribute");
                if (StringUtils.isNotEmpty(attrValue6) && StringUtils.isNotEmpty(attrValue7)) {
                    this.entry.addPubEvidence(attrValue6, UniprotConverter.this.getEvidence(attrValue7));
                }
            } else if ("disease".equals(str4) && ("name".equals(str3) || "acronym".equals(str3) || "description".equals(str3))) {
                this.attName = "disease";
            } else if ("dbReference".equals(str3) && "disease".equals(str4)) {
                if (this.disease == null) {
                    this.disease = new DiseaseHolder();
                }
                this.disease.setIdentifier(UniprotConverter.getAttrValue(attributes, "type") + ":" + UniprotConverter.getAttrValue(attributes, "id"));
            } else if ("scope".equals(str3) && "reference".equals(str4)) {
                this.attName = "scope";
            } else if ("dbreference".equals(str3) || "comment".equals(str3) || "isoform".equals(str3) || "gene".equals(str3)) {
                this.entry.reset();
            }
            super.startElement(str, str2, str3, attributes);
            this.stack.push(str3);
            this.attValue = new StringBuffer();
        }

        private String parseTaxonId(String str) {
            String subspecies = UniprotConverter.CONFIG.getSubspecies(str);
            return StringUtils.isNotEmpty(subspecies) ? subspecies : str;
        }

        @Override // org.xml.sax.helpers.DefaultHandler, org.xml.sax.ContentHandler
        public void endElement(String str, String str2, String str3) throws SAXException {
            super.endElement(str, str2, str3);
            this.stack.pop();
            if (this.attName == null && this.attValue.toString() == null) {
                return;
            }
            String peek = this.stack.isEmpty() ? null : this.stack.peek();
            if ("sequence".equals(str3)) {
                this.entry.setSequence(this.attValue.toString().replaceAll("\n", ""));
                return;
            }
            if (StringUtils.isNotEmpty(this.attName) && "proteinName".equals(this.attName)) {
                this.entry.setName(this.attValue.toString());
                return;
            }
            if (StringUtils.isNotEmpty(this.attName) && "synonym".equals(this.attName)) {
                this.entry.addProteinName(this.attValue.toString());
                return;
            }
            if (StringUtils.isNotEmpty(this.attName) && "ecNumber".equals(this.attName)) {
                this.entry.addECNumber(this.attValue.toString());
                return;
            }
            if ("text".equals(str3) && "comment".equals(peek)) {
                StringBuilder sb = new StringBuilder();
                sb.append(this.attValue.toString());
                if (sb.length() > 0) {
                    Item createItem = UniprotConverter.this.createItem("Comment");
                    String commentType = this.entry.getCommentType();
                    createItem.setAttribute("type", commentType);
                    if (sb.length() > UniprotConverter.POSTGRES_INDEX_SIZE) {
                        createItem.setAttribute("description", sb.substring(0, UniprotConverter.POSTGRES_INDEX_SIZE - "...".length()) + "...");
                    } else if (!"disease".equals(commentType) || this.disease == null) {
                        createItem.setAttribute("description", sb.toString());
                    } else {
                        createItem.setAttribute("description", this.disease.toString() + sb.toString());
                    }
                    try {
                        this.entry.addCommentRefId(createItem.getIdentifier(), UniprotConverter.this.store(createItem));
                        return;
                    } catch (ObjectStoreException e) {
                        throw new SAXException((Exception) e);
                    }
                }
                return;
            }
            if ("name".equals(str3) && "gene".equals(peek)) {
                this.entry.addGeneName(this.attName, this.attValue.toString());
                return;
            }
            if ("keyword".equals(str3)) {
                this.entry.addKeyword(UniprotConverter.this.getKeyword(this.attValue.toString()));
                return;
            }
            if (StringUtils.isNotEmpty(this.attName) && "primaryIdentifier".equals(this.attName)) {
                this.entry.setPrimaryIdentifier(this.attValue.toString());
                return;
            }
            if ("accession".equals(str3)) {
                String stringBuffer = this.attValue.toString();
                this.entry.addAccession(stringBuffer);
                if (stringBuffer.equals(this.entry.getPrimaryAccession())) {
                    UniprotConverter.this.checkUniqueIdentifier(this.entry, stringBuffer);
                    return;
                }
                return;
            }
            if (StringUtils.isNotEmpty(this.attName) && "component".equals(this.attName) && "fullName".equals(str3) && "recommendedName".equals(peek) && this.stack.search("component") == 2) {
                this.entry.addComponent(this.attValue.toString());
                return;
            }
            if (StringUtils.isNotEmpty(this.attName) && "disease".equals(this.attName) && (("name".equals(str3) || "acronym".equals(str3) || "description".equals(str3)) && "disease".equals(peek))) {
                if (this.disease == null) {
                    this.disease = new DiseaseHolder();
                }
                if ("name".equals(str3)) {
                    this.disease.setDisease("name", this.attValue.toString());
                    return;
                } else if ("description".equals(str3)) {
                    this.disease.setDisease("description", this.attValue.toString());
                    return;
                } else {
                    if ("acronym".equals(str3)) {
                        this.disease.setDisease("acronym", this.attValue.toString());
                        return;
                    }
                    return;
                }
            }
            if ("id".equals(str3) && "isoform".equals(peek)) {
                String stringBuffer2 = this.attValue.toString();
                if (stringBuffer2.contains(",")) {
                    String[] split = stringBuffer2.split("[, ]+");
                    stringBuffer2 = split[0];
                    for (int i = 1; i < split.length; i++) {
                        this.entry.addIsoformSynonym(split[i]);
                    }
                }
                if (this.entry.getAttribute() == null) {
                    this.entry.addAttribute(stringBuffer2);
                    return;
                } else {
                    this.entry.addIsoformSynonym(stringBuffer2);
                    return;
                }
            }
            if ("comment".equals(str3)) {
                this.disease = null;
                return;
            }
            if ("scope".equals(str3)) {
                if (this.attValue.toString().contains("RETRACTED")) {
                    this.entry.deleteLastPub();
                    return;
                }
                return;
            }
            if ("name".equals(str3) && "isoform".equals(peek)) {
                if (this.attValue.toString().matches("[0-9]+")) {
                    return;
                }
                this.entry.addIsoformSynonym(this.attValue.toString());
            } else if ("entry".equals(str3)) {
                try {
                    processCommentEvidence(this.entry);
                    List<UniprotEntry> processEntry = processEntry(this.entry);
                    if (processEntry != null) {
                        Iterator<UniprotEntry> it = processEntry.iterator();
                        while (it.hasNext()) {
                            processEntry(it.next());
                        }
                    }
                } catch (ObjectStoreException e2) {
                    throw new SAXException((Exception) e2);
                }
            }
        }

        /* JADX WARN: Removed duplicated region for block: B:10:0x0052 A[LOOP:0: B:3:0x000d->B:10:0x0052, LOOP_END] */
        /* JADX WARN: Removed duplicated region for block: B:11:0x005b A[SYNTHETIC] */
        /* JADX WARN: Removed duplicated region for block: B:14:0x0060  */
        /* JADX WARN: Removed duplicated region for block: B:17:? A[RETURN, SYNTHETIC] */
        @Override // org.xml.sax.helpers.DefaultHandler, org.xml.sax.ContentHandler
        /*
            Code decompiled incorrectly, please refer to instructions dump.
            To view partially-correct add '--show-bad-code' argument
        */
        public void characters(char[] r6, int r7, int r8) {
            /*
                r5 = this;
                r0 = r7
                r9 = r0
                r0 = r8
                r10 = r0
                r0 = r5
                java.lang.String r0 = r0.attName
                if (r0 == 0) goto L7e
            Ld:
                r0 = r10
                if (r0 <= 0) goto L5b
                r0 = 0
                r11 = r0
                r0 = r6
                r1 = r9
                char r0 = r0[r1]
                switch(r0) {
                    case 9: goto L44;
                    case 10: goto L44;
                    case 13: goto L44;
                    case 32: goto L44;
                    default: goto L4a;
                }
            L44:
                r0 = 1
                r11 = r0
                goto L4a
            L4a:
                r0 = r11
                if (r0 != 0) goto L52
                goto L5b
            L52:
                int r9 = r9 + 1
                int r10 = r10 + (-1)
                goto Ld
            L5b:
                r0 = r10
                if (r0 <= 0) goto L7e
                java.lang.StringBuffer r0 = new java.lang.StringBuffer
                r1 = r0
                r1.<init>()
                r11 = r0
                r0 = r11
                r1 = r6
                r2 = r9
                r3 = r10
                java.lang.StringBuffer r0 = r0.append(r1, r2, r3)
                r0 = r5
                java.lang.StringBuffer r0 = r0.attValue
                r1 = r11
                java.lang.StringBuffer r0 = r0.append(r1)
            L7e:
                return
            */
            throw new UnsupportedOperationException("Method not decompiled: org.intermine.bio.dataconversion.UniprotConverter.UniprotHandler.characters(char[], int, int):void");
        }

        private List<UniprotEntry> processEntry(UniprotEntry uniprotEntry) throws SAXException, ObjectStoreException {
            this.entryCount++;
            if (this.entryCount % 10000 == 0) {
                UniprotConverter.LOG.info("Processed " + this.entryCount + " entries.");
            }
            ArrayList arrayList = new ArrayList();
            if (!uniprotEntry.isIsoform() && !UniprotConverter.this.allowduplicates && UniprotConverter.this.seenSequence(uniprotEntry.getTaxonId(), uniprotEntry.getMd5checksum())) {
                Map map = (Map) UniprotConverter.this.sequences.get(uniprotEntry.getTaxonId());
                if (map != null && map.containsKey(uniprotEntry.getMd5checksum())) {
                    UniprotConverter.this.synonymsAndXrefs.add(UniprotConverter.this.createSynonym((String) map.get(uniprotEntry.getMd5checksum()), uniprotEntry.getPrimaryAccession(), false));
                }
                return arrayList;
            }
            if (uniprotEntry.hasDatasetRefId() && uniprotEntry.hasPrimaryAccession() && !uniprotEntry.isDuplicate()) {
                if (!UniprotConverter.this.loadfragments && "true".equalsIgnoreCase(uniprotEntry.isFragment())) {
                    return Collections.emptyList();
                }
                UniprotConverter.this.setDataSet(uniprotEntry.getDatasetRefId());
                Iterator<String> it = uniprotEntry.getIsoforms().iterator();
                while (it.hasNext()) {
                    arrayList.add(uniprotEntry.createIsoformEntry(it.next()));
                }
                Item createItem = UniprotConverter.this.createItem("Protein");
                processIdentifiers(createItem, uniprotEntry);
                processECNumbers(createItem, uniprotEntry);
                createItem.setAttribute("isUniprotCanonical", uniprotEntry.isIsoform() ? "false" : "true");
                if (!uniprotEntry.isIsoform()) {
                    processSequence(createItem, uniprotEntry);
                }
                createItem.setReference("organism", UniprotConverter.this.getOrganism(uniprotEntry.getTaxonId()));
                if (uniprotEntry.getPubs() != null) {
                    createItem.setCollection("publications", uniprotEntry.getPubs());
                }
                if (uniprotEntry.hasComments()) {
                    createItem.setCollection("comments", uniprotEntry.getComments());
                    processCommentEvidence(uniprotEntry);
                }
                if (uniprotEntry.getKeywords() != null) {
                    createItem.setCollection("keywords", uniprotEntry.getKeywords());
                }
                processFeatures(createItem, uniprotEntry);
                if (uniprotEntry.getComponents() != null && !uniprotEntry.getComponents().isEmpty()) {
                    processComponents(createItem, uniprotEntry);
                }
                UniprotConverter.this.addSeenSequence(uniprotEntry.getTaxonId(), uniprotEntry.getMd5checksum(), createItem.getIdentifier());
                if (uniprotEntry.isIsoform()) {
                    String uniprotAccession = uniprotEntry.getUniprotAccession();
                    String str = (String) UniprotConverter.this.proteins.get(uniprotAccession);
                    if (str == null) {
                        throw new RuntimeException("parsing an isoform without a parent " + uniprotAccession);
                    }
                    createItem.setReference("canonicalProtein", str);
                } else {
                    UniprotConverter.this.proteins.put(uniprotEntry.getPrimaryAccession(), createItem.getIdentifier());
                }
                try {
                    processDbrefs(createItem, uniprotEntry);
                    if (UniprotConverter.this.creategenes) {
                        processGene(createItem, uniprotEntry);
                    }
                    UniprotConverter.this.store(createItem);
                    processSynonyms(createItem.getIdentifier(), uniprotEntry);
                    UniprotConverter.this.synonymsAndXrefs = new HashSet();
                } catch (ObjectStoreException e) {
                    throw new SAXException((Exception) e);
                }
            }
            return arrayList;
        }

        private void processCommentEvidence(UniprotEntry uniprotEntry) throws ObjectStoreException {
            for (Map.Entry<Integer, List<String>> entry : uniprotEntry.getCommentEvidence().entrySet()) {
                Integer key = entry.getKey();
                List<String> value = entry.getValue();
                ArrayList arrayList = new ArrayList();
                for (String str : value) {
                    String pubRefId = uniprotEntry.getPubRefId(str);
                    if (pubRefId != null) {
                        arrayList.add(pubRefId);
                    } else {
                        UniprotConverter.LOG.error("bad evidence code:" + str + " for " + uniprotEntry.getPrimaryAccession());
                    }
                }
                if (!arrayList.isEmpty()) {
                    UniprotConverter.this.store(new ReferenceList("publications", new ArrayList(arrayList)), key);
                }
            }
        }

        private void processSequence(Item item, UniprotEntry uniprotEntry) {
            String sequenceIdentfier = getSequenceIdentfier(uniprotEntry.getMd5checksum(), uniprotEntry.getSequence(), uniprotEntry.getLength());
            item.setAttribute("length", uniprotEntry.getLength());
            item.setReference("sequence", sequenceIdentfier);
            item.setAttribute("molecularWeight", uniprotEntry.getMolecularWeight());
            item.setAttribute("md5checksum", uniprotEntry.getMd5checksum());
        }

        private String getSequenceIdentfier(String str, String str2, String str3) {
            if (!UniprotConverter.this.allSequences.containsKey(str)) {
                Item createItem = UniprotConverter.this.createItem("Sequence");
                createItem.setAttribute("residues", str2);
                createItem.setAttribute("length", str3);
                createItem.setAttribute("md5checksum", str);
                try {
                    UniprotConverter.this.store(createItem);
                    UniprotConverter.this.allSequences.put(str, createItem.getIdentifier());
                } catch (ObjectStoreException e) {
                    throw new RuntimeException((Throwable) e);
                }
            }
            return (String) UniprotConverter.this.allSequences.get(str);
        }

        private void processIdentifiers(Item item, UniprotEntry uniprotEntry) {
            item.setAttribute("name", uniprotEntry.getName());
            item.setAttribute("isFragment", uniprotEntry.isFragment());
            item.setAttribute("uniprotAccession", uniprotEntry.getUniprotAccession());
            String primaryAccession = uniprotEntry.getPrimaryAccession();
            item.setAttribute("primaryAccession", primaryAccession);
            item.setAttribute("secondaryIdentifier", primaryAccession);
            String primaryIdentifier = uniprotEntry.getPrimaryIdentifier();
            item.setAttribute("uniprotName", primaryIdentifier);
            if (uniprotEntry.isIsoform()) {
                primaryIdentifier = getIsoformIdentifier(primaryAccession, primaryIdentifier);
            }
            item.setAttribute("primaryIdentifier", primaryIdentifier);
        }

        private void processECNumbers(Item item, UniprotEntry uniprotEntry) throws SAXException {
            List<String> eCNumbers = uniprotEntry.getECNumbers();
            if (eCNumbers == null || eCNumbers.isEmpty()) {
                return;
            }
            for (String str : eCNumbers) {
                String str2 = (String) UniprotConverter.this.ecNumbers.get(str);
                if (str2 == null) {
                    Item createItem = UniprotConverter.this.createItem("ECNumber");
                    createItem.setAttribute("identifier", str);
                    UniprotConverter.this.ecNumbers.put(str, createItem.getIdentifier());
                    try {
                        UniprotConverter.this.store(createItem);
                        str2 = createItem.getIdentifier();
                    } catch (ObjectStoreException e) {
                        throw new SAXException((Exception) e);
                    }
                }
                item.addToCollection("ecNumbers", str2);
            }
        }

        private String getIsoformIdentifier(String str, String str2) {
            String str3 = str2;
            String[] split = str.split("\\-");
            if (split.length == 2) {
                str3 = str3 + "-" + split[1];
            }
            return str3;
        }

        private void processComponents(Item item, UniprotEntry uniprotEntry) throws SAXException {
            for (String str : uniprotEntry.getComponents()) {
                Item createItem = UniprotConverter.this.createItem("Component");
                createItem.setAttribute("name", str);
                createItem.setReference("protein", item);
                try {
                    UniprotConverter.this.store(createItem);
                } catch (ObjectStoreException e) {
                    throw new SAXException((Exception) e);
                }
            }
        }

        private void processFeatures(Item item, UniprotEntry uniprotEntry) throws SAXException {
            List<String> featureTypes = UniprotConverter.CONFIG.getFeatureTypes();
            for (Item item2 : uniprotEntry.getFeatures()) {
                if (featureTypes.isEmpty() || featureTypes.contains(item2.getAttribute("type").getValue())) {
                    item2.setReference("protein", item);
                    try {
                        UniprotConverter.this.store(item2);
                    } catch (ObjectStoreException e) {
                        throw new SAXException((Exception) e);
                    }
                }
            }
        }

        private void processSynonyms(String str, UniprotEntry uniprotEntry) throws ObjectStoreException {
            for (String str2 : uniprotEntry.getAccessions()) {
                UniprotConverter.this.createSynonym(str, str2, true);
                UniprotConverter.this.createCrossReference(str, str2, "UniProt", true);
            }
            if (uniprotEntry.isIsoform()) {
                UniprotConverter.this.createSynonym(str, getIsoformIdentifier(uniprotEntry.getPrimaryAccession(), uniprotEntry.getPrimaryIdentifier()), true);
            }
            Iterator<String> it = uniprotEntry.getProteinNames().iterator();
            while (it.hasNext()) {
                UniprotConverter.this.createSynonym(str, it.next(), true);
            }
            List<String> collection = uniprotEntry.getCollection("canonicalIsoformAccessions");
            if (collection != null && !collection.isEmpty()) {
                Iterator<String> it2 = collection.iterator();
                while (it2.hasNext()) {
                    UniprotConverter.this.createSynonym(str, it2.next(), true);
                }
            }
            for (Item item : UniprotConverter.this.synonymsAndXrefs) {
                if (item != null) {
                    UniprotConverter.this.store(item);
                }
            }
        }

        private void processDbrefs(Item item, UniprotEntry uniprotEntry) throws ObjectStoreException {
            for (Map.Entry<String, Set<String>> entry : uniprotEntry.getDbrefs().entrySet()) {
                String key = entry.getKey();
                Iterator<String> it = entry.getValue().iterator();
                while (it.hasNext()) {
                    setCrossReference(item.getIdentifier(), it.next(), key, false);
                }
            }
        }

        private void setCrossReference(String str, String str2, String str3, boolean z) throws ObjectStoreException {
            Item createCrossReference;
            List<String> crossReferences = UniprotConverter.CONFIG.getCrossReferences();
            if ((crossReferences.isEmpty() || crossReferences.contains(str3)) && (createCrossReference = UniprotConverter.this.createCrossReference(str, str2, str3, z)) != null) {
                UniprotConverter.this.synonymsAndXrefs.add(createCrossReference);
            }
        }

        private void processGoAnnotation(UniprotEntry uniprotEntry, Item item) throws SAXException {
            for (Map.Entry<String, Set<String>> entry : uniprotEntry.getDbrefs().entrySet()) {
                String key = entry.getKey();
                Set<String> value = entry.getValue();
                if ("GO".equalsIgnoreCase(key)) {
                    for (String str : value) {
                        String gOEvidenceCode = UniprotConverter.this.getGOEvidenceCode(this.entry.getGOEvidence(str));
                        Item createItem = UniprotConverter.this.createItem("GOEvidence");
                        createItem.setReference("code", gOEvidenceCode);
                        Item createItem2 = UniprotConverter.this.createItem("GOAnnotation");
                        createItem2.setReference("subject", item);
                        createItem2.setReference("ontologyTerm", UniprotConverter.this.getGoTerm(str));
                        createItem2.addToCollection("evidence", createItem);
                        item.addToCollection("goAnnotation", createItem2);
                        try {
                            UniprotConverter.this.store(createItem);
                            UniprotConverter.this.store(createItem2);
                        } catch (ObjectStoreException e) {
                            throw new SAXException((Exception) e);
                        }
                    }
                }
            }
        }

        private void processGene(Item item, UniprotEntry uniprotEntry) throws ObjectStoreException {
            String taxonId = uniprotEntry.getTaxonId();
            String uniqueField = getUniqueField(taxonId);
            Set<String> geneIdentifiers = getGeneIdentifiers(uniprotEntry, uniqueField);
            if (geneIdentifiers == null || geneIdentifiers.isEmpty()) {
                UniprotConverter.LOG.error("no valid gene identifiers found for " + uniprotEntry.getPrimaryAccession());
                return;
            }
            boolean z = geneIdentifiers.size() != 1;
            Item item2 = null;
            for (String str : geneIdentifiers) {
                if (!StringUtils.isEmpty(str)) {
                    if (UniprotConverter.GENE_PREFIXES.containsKey(taxonId)) {
                        str = ((String) UniprotConverter.GENE_PREFIXES.get(taxonId)) + str;
                    }
                    item2 = getGene(item, uniprotEntry, str, taxonId, uniqueField);
                    if (item2 != null && z) {
                        addPubs2Gene(uniprotEntry, item2);
                        UniprotConverter.this.store(item2);
                    }
                }
            }
            if (item2 == null || z) {
                return;
            }
            for (String str2 : getOtherFields(taxonId)) {
                Set<String> geneIdentifiers2 = getGeneIdentifiers(uniprotEntry, str2);
                if (geneIdentifiers2 != null) {
                    Iterator<String> it = geneIdentifiers2.iterator();
                    while (it.hasNext()) {
                        String next = it.next();
                        if (!StringUtils.isEmpty(next)) {
                            if (UniprotConverter.GENE_PREFIXES.containsKey(taxonId)) {
                                next = ((String) UniprotConverter.GENE_PREFIXES.get(taxonId)) + next;
                            }
                            if ("primaryIdentifier".equals(str2)) {
                                String resolveGene = resolveGene(taxonId, next);
                                if (resolveGene == null) {
                                    UniprotConverter.LOG.info("Can not resolve " + next);
                                } else {
                                    item2.setAttribute(str2, resolveGene);
                                }
                            } else {
                                item2.setAttribute(str2, next);
                            }
                        }
                    }
                }
            }
            addPubs2Gene(uniprotEntry, item2);
            UniprotConverter.this.store(item2);
        }

        private void addPubs2Gene(UniprotEntry uniprotEntry, Item item) {
            if (uniprotEntry.getPubs() != null) {
                Iterator<String> it = uniprotEntry.getPubs().iterator();
                while (it.hasNext()) {
                    item.addToCollection("publications", it.next());
                }
            }
        }

        private Item getGene(Item item, UniprotEntry uniprotEntry, String str, String str2, String str3) {
            String resolveGene = resolveGene(str2, str);
            if (resolveGene == null) {
                return null;
            }
            String str4 = (String) UniprotConverter.this.genes.get(resolveGene);
            if (str4 != null) {
                item.addToCollection("genes", str4);
                return null;
            }
            Item createItem = UniprotConverter.this.createItem("Gene");
            createItem.setAttribute(str3, resolveGene);
            createItem.setReference("organism", UniprotConverter.this.getOrganism(str2));
            if (UniprotConverter.this.creatego) {
                try {
                    processGoAnnotation(uniprotEntry, createItem);
                } catch (SAXException e) {
                    UniprotConverter.LOG.error("couldn't process GO annotation for gene - " + resolveGene);
                }
            }
            String identifier = createItem.getIdentifier();
            UniprotConverter.this.genes.put(resolveGene, identifier);
            item.addToCollection("genes", identifier);
            return createItem;
        }

        private Set<String> getGeneIdentifiers(UniprotEntry uniprotEntry, String str) {
            String taxonId = uniprotEntry.getTaxonId();
            String geneConfigMethod = getGeneConfigMethod(taxonId, str);
            String geneConfigValue = getGeneConfigValue(taxonId, str);
            Set<String> hashSet = new HashSet();
            if ("name".equals(geneConfigMethod)) {
                hashSet = getByName(uniprotEntry, taxonId, geneConfigValue);
            } else if ("gene-designation".equals(geneConfigMethod)) {
                hashSet.addAll(uniprotEntry.getGeneDesignation(geneConfigValue));
            } else if ("dbref".equals(geneConfigMethod)) {
                hashSet = getByDbref(uniprotEntry, geneConfigValue);
            } else {
                UniprotConverter.LOG.error("error processing config for organism " + taxonId);
            }
            return hashSet;
        }

        private String getGeneConfigMethod(String str, String str2) {
            String identifierMethod = UniprotConverter.CONFIG.getIdentifierMethod(str, str2);
            if (identifierMethod == null) {
                identifierMethod = UniprotConverter.CONFIG.getIdentifierMethod("default", str2);
                if (identifierMethod == null) {
                    throw new RuntimeException("error processing line in config file for organism " + str);
                }
            }
            return identifierMethod;
        }

        private String getGeneConfigValue(String str, String str2) {
            String identifierValue = UniprotConverter.CONFIG.getIdentifierValue(str, str2);
            if (identifierValue == null) {
                identifierValue = UniprotConverter.CONFIG.getIdentifierValue("default", str2);
                if (identifierValue == null) {
                    throw new RuntimeException("error processing line in config file for organism " + str);
                }
            }
            return identifierValue;
        }

        private Set<String> getByName(UniprotEntry uniprotEntry, String str, String str2) {
            if (uniprotEntry.getGeneNames() != null && !uniprotEntry.getGeneNames().isEmpty()) {
                return uniprotEntry.getGeneNames().get(str2);
            }
            UniprotConverter.LOG.error("No gene names for " + str + ". protein accession:" + uniprotEntry.getPrimaryAccession());
            return null;
        }

        private Set<String> getByDbref(UniprotEntry uniprotEntry, String str) {
            Set<String> hashSet = new HashSet();
            if ("Ensembl".equals(str)) {
                hashSet.addAll(uniprotEntry.getGeneDesignation(str));
            } else {
                Map<String, Set<String>> dbrefs = uniprotEntry.getDbrefs();
                String str2 = "no " + str + " identifier found for gene attached to protein: " + uniprotEntry.getPrimaryAccession();
                if (dbrefs == null || dbrefs.isEmpty()) {
                    UniprotConverter.LOG.error(str2);
                    return null;
                }
                Set<String> set = dbrefs.get(str);
                if (set == null || set.isEmpty()) {
                    UniprotConverter.LOG.error(str2);
                    return null;
                }
                hashSet = dbrefs.get(str);
            }
            return hashSet;
        }

        private String getUniqueField(String str) {
            String uniqueIdentifier = UniprotConverter.CONFIG.getUniqueIdentifier(str);
            if (uniqueIdentifier == null) {
                uniqueIdentifier = UniprotConverter.CONFIG.getUniqueIdentifier("default");
            }
            return uniqueIdentifier;
        }

        private Set<String> getOtherFields(String str) {
            Set<String> geneIdentifierFields = UniprotConverter.CONFIG.getGeneIdentifierFields(str);
            if (geneIdentifierFields == null) {
                geneIdentifierFields = UniprotConverter.CONFIG.getGeneIdentifierFields("default");
            }
            return geneIdentifierFields;
        }

        private String resolveGene(String str, String str2) {
            return UniprotConverter.FLY.equals(str) ? resolveFlyGene(str, str2) : str2;
        }

        private String resolveFlyGene(String str, String str2) {
            if (UniprotConverter.this.rslv == null || !UniprotConverter.this.rslv.hasTaxon(str)) {
                return str2;
            }
            int countResolutions = UniprotConverter.this.rslv.countResolutions(str, str2);
            if (countResolutions == 1) {
                return (String) UniprotConverter.this.rslv.resolveId(str, str2).iterator().next();
            }
            UniprotConverter.LOG.info("RESOLVER: failed to resolve gene to one identifier, ignoring gene: " + str2 + " count: " + countResolutions + " FBgn: " + UniprotConverter.this.rslv.resolveId(str, str2));
            return null;
        }
    }

    public UniprotConverter(ItemWriter itemWriter, Model model) {
        super(itemWriter, model, "UniProt", "Swiss-Prot data set", LICENCE);
        this.pubs = new HashMap();
        this.synonymsAndXrefs = new HashSet();
        this.sequences = new HashMap();
        this.allSequences = new HashMap();
        this.ontologies = new HashMap();
        this.keywords = new HashMap();
        this.genes = new HashMap();
        this.goterms = new HashMap();
        this.goEvidenceCodes = new HashMap();
        this.ecNumbers = new HashMap();
        this.proteins = new LinkedHashMap();
        this.identifiers = null;
        this.creategenes = true;
        this.creatego = false;
        this.loadfragments = false;
        this.allowduplicates = false;
        this.loadtrembl = true;
        this.taxonIds = null;
        this.datasourceRefId = null;
        OrganismRepository.getOrganismRepository();
    }

    public void process(File file) throws Exception {
        try {
            this.datasourceRefId = getDataSource("UniProt");
            setOntology("UniProtKeyword");
            Map<String, File[]> parseFileNames = parseFileNames(file.listFiles());
            if (this.rslv == null) {
                this.rslv = IdResolverService.getFlyIdResolver();
            }
            if (this.taxonIds != null) {
                for (String str : this.taxonIds) {
                    if (parseFileNames.get(str) == null) {
                        LOG.error("no files found for " + str);
                    }
                    processFiles(parseFileNames.get(str));
                }
                return;
            }
            File[] fileArr = new File[2];
            for (File file2 : file.listFiles()) {
                String name = file2.getName();
                if ("uniprot_sprot.xml".equals(name)) {
                    fileArr[0] = file2;
                } else if ("uniprot_trembl.xml".equals(name) && this.loadtrembl) {
                    fileArr[1] = file2;
                }
            }
            processFiles(fileArr);
        } catch (SAXException e) {
            e.printStackTrace();
            throw new RuntimeException(e);
        }
    }

    private void processFiles(File[] fileArr) {
        if (fileArr == null) {
            LOG.error("no data files found ");
            return;
        }
        for (int i = 0; i <= 1; i++) {
            File file = fileArr[i];
            if (file != null) {
                UniprotHandler uniprotHandler = new UniprotHandler();
                try {
                    System.out.println("Processing file: " + file.getPath());
                    SAXParser.parse(new InputSource(new FileReader(file)), uniprotHandler);
                } catch (Exception e) {
                    e.printStackTrace();
                    throw new RuntimeException(e);
                }
            }
        }
        this.sequences = new HashMap();
        this.genes = new HashMap();
        this.proteins = new HashMap();
    }

    protected Map<String, File[]> parseFileNames(File[] fileArr) {
        HashMap hashMap = new HashMap();
        if (fileArr == null) {
            return null;
        }
        for (File file : fileArr) {
            String[] split = file.getName().split("_");
            String str = split[0];
            if (split.length != 3) {
                LOG.info("Bad file found:  " + file.getName() + ", expected a filename like 7227_uniprot_sprot.xml");
            } else {
                String replace = split[2].replace(".xml", "");
                if (!"sprot".equals(replace) && !"trembl".equals(replace)) {
                    LOG.info("Bad file found:  " + file.getName() + " (" + split[2] + "), expecting sprot or trembl ");
                } else if (this.loadtrembl || !"trembl".equals(replace)) {
                    boolean z = !"sprot".equals(replace);
                    if (hashMap.containsKey(str)) {
                        ((File[]) hashMap.get(str))[z ? 1 : 0] = file;
                    } else {
                        File[] fileArr2 = new File[2];
                        fileArr2[z ? 1 : 0] = file;
                        hashMap.put(str, fileArr2);
                    }
                }
            }
        }
        return hashMap;
    }

    @Deprecated
    public void setCreateinterpro(String str) {
        throw new IllegalArgumentException("UniProt data source does not create protein domains any longer. Please use the InterPro data source instead.");
    }

    public void setCreategenes(String str) {
        if ("true".equalsIgnoreCase(str)) {
            this.creategenes = true;
        } else {
            this.creategenes = false;
        }
    }

    public void setCreatego(String str) {
        if ("true".equalsIgnoreCase(str)) {
            this.creatego = true;
        } else {
            this.creatego = false;
        }
    }

    public void setLoadtrembl(String str) {
        if ("true".equalsIgnoreCase(str)) {
            this.loadtrembl = true;
        } else {
            this.loadtrembl = false;
        }
    }

    public void setAllowduplicates(String str) {
        if ("true".equalsIgnoreCase(str)) {
            this.allowduplicates = true;
        } else {
            this.allowduplicates = false;
        }
    }

    public void setUniprotOrganisms(String str) {
        this.taxonIds = new HashSet(Arrays.asList(StringUtil.split(str, " ")));
        LOG.info("Setting list of organisms to " + this.taxonIds);
        addSubspecies();
    }

    public void setLoadfragments(String str) {
        if ("true".equalsIgnoreCase(str)) {
            this.loadfragments = true;
        } else {
            this.loadfragments = false;
        }
    }

    private void addSubspecies() {
        Iterator it = new HashSet(this.taxonIds).iterator();
        while (it.hasNext()) {
            String subspecies = CONFIG.getSubspecies((String) it.next());
            if (StringUtils.isNotEmpty(subspecies)) {
                this.taxonIds.add(subspecies);
            }
        }
    }

    /* JADX INFO: Access modifiers changed from: private */
    public void addSeenSequence(String str, String str2, String str3) {
        Map<String, String> map = this.sequences.get(str);
        if (map == null) {
            map = new HashMap();
            this.sequences.put(str, map);
        }
        if (map.containsKey(str2)) {
            return;
        }
        map.put(str2, str3);
    }

    /* JADX INFO: Access modifiers changed from: private */
    public boolean seenSequence(String str, String str2) {
        Map<String, String> map = this.sequences.get(str);
        if (map == null) {
            map = new HashMap();
            this.sequences.put(str, map);
        }
        return map.containsKey(str2);
    }

    /* JADX INFO: Access modifiers changed from: private */
    public String getKeyword(String str) throws SAXException {
        String str2 = this.keywords.get(str);
        if (str2 == null) {
            Item createItem = createItem("OntologyTerm");
            createItem.setAttribute("name", str);
            createItem.setReference("ontology", this.ontologies.get("UniProtKeyword"));
            str2 = createItem.getIdentifier();
            this.keywords.put(str, str2);
            try {
                store(createItem);
            } catch (ObjectStoreException e) {
                throw new SAXException((Exception) e);
            }
        }
        return str2;
    }

    /* JADX INFO: Access modifiers changed from: private */
    public String getEvidence(String str) throws SAXException {
        if (!str.contains("=")) {
            return null;
        }
        String[] split = str.split("=");
        if (split.length != 2) {
            return null;
        }
        String str2 = split[1];
        if (StringUtils.isNotEmpty(str2)) {
            return getPub(str2);
        }
        return null;
    }

    /* JADX INFO: Access modifiers changed from: private */
    public String getPub(String str) throws SAXException {
        String str2 = this.pubs.get(str);
        if (str2 == null) {
            Item createItem = createItem("Publication");
            createItem.setAttribute("pubMedId", str);
            this.pubs.put(str, createItem.getIdentifier());
            try {
                store(createItem);
                str2 = createItem.getIdentifier();
            } catch (ObjectStoreException e) {
                throw new SAXException((Exception) e);
            }
        }
        return str2;
    }

    /* JADX INFO: Access modifiers changed from: private */
    public String getGOEvidenceCode(String str) throws SAXException {
        String[] split = str.split(":");
        String str2 = split == null ? str : split[0];
        String str3 = this.goEvidenceCodes.get(str2);
        if (str3 == null) {
            Item createItem = createItem("GOEvidenceCode");
            createItem.setAttribute("code", str2);
            str3 = createItem.getIdentifier();
            this.goEvidenceCodes.put(str2, str3);
            try {
                store(createItem);
            } catch (ObjectStoreException e) {
                throw new SAXException((Exception) e);
            }
        }
        return str3;
    }

    /* JADX INFO: Access modifiers changed from: private */
    public String getGoTerm(String str) throws SAXException {
        String str2 = this.goterms.get(str);
        if (str2 == null) {
            Item createItem = createItem("GOTerm");
            createItem.setAttribute("identifier", str);
            str2 = createItem.getIdentifier();
            this.goterms.put(str, str2);
            try {
                store(createItem);
            } catch (ObjectStoreException e) {
                throw new SAXException((Exception) e);
            }
        }
        return str2;
    }

    private String setOntology(String str) throws SAXException {
        String str2 = this.ontologies.get(str);
        if (str2 == null) {
            Item createItem = createItem("Ontology");
            createItem.setAttribute("name", str);
            this.ontologies.put(str, createItem.getIdentifier());
            try {
                store(createItem);
            } catch (ObjectStoreException e) {
                throw new SAXException((Exception) e);
            }
        }
        return str2;
    }

    /* JADX INFO: Access modifiers changed from: private */
    public Item getFeature(String str, String str2, String str3) throws SAXException {
        Item createItem = createItem("UniProtFeature");
        createItem.setAttribute("type", str);
        createItem.setReference("feature", getKeyword(str));
        String str4 = str2;
        if (str3 != null) {
            str4 = str2 == null ? str3 : str2 + " (" + str3 + ")";
        }
        if (!StringUtils.isEmpty(str4)) {
            createItem.setAttribute("description", str4);
        }
        return createItem;
    }

    /* JADX INFO: Access modifiers changed from: private */
    public static String getAttrValue(Attributes attributes, String str) {
        if (attributes.getValue(str) != null) {
            return attributes.getValue(str).trim();
        }
        return null;
    }

    /* JADX INFO: Access modifiers changed from: private */
    public void checkUniqueIdentifier(UniprotEntry uniprotEntry, String str) {
        if (!StringUtils.isNotEmpty(str) || isUniqueIdentifier(str)) {
            return;
        }
        uniprotEntry.setDuplicate(true);
    }

    private boolean isUniqueIdentifier(String str) {
        if (this.identifiers == null) {
            this.identifiers = new HashSet();
        } else if (this.identifiers.contains(str)) {
            LOG.error("not assigning duplicate identifier:  " + str);
            return false;
        }
        this.identifiers.add(str);
        return true;
    }

    static {
        GENE_PREFIXES.put("10116", "RGD:");
    }
}
